diff --git a/ci/test_thirdparty_awkward.sh b/ci/test_thirdparty_awkward.sh
index 6349ac1c2..b1ba442f3 100755
--- a/ci/test_thirdparty_awkward.sh
+++ b/ci/test_thirdparty_awkward.sh
@@ -74,6 +74,21 @@ index 39080a34..0eb3940f 100644
      array = rng.integers(50, size=1000)
 EOF
 
+patch -p1 <<'EOF'
+diff --git a/pyproject.toml b/pyproject.toml
+index 78ecfba9..80a25474 100644
+--- a/pyproject.toml
++++ b/pyproject.toml
+@@ -136,6 +136,7 @@ filterwarnings = [
+     "ignore:.*np\\.MachAr.*:DeprecationWarning",
+     "ignore:module 'sre_.*' is deprecated:DeprecationWarning",
+     "ignore:Jitify is performing a one-time only warm-up",
++    "ignore:Context.call_conv is deprecated.",
+ ]
+ log_cli_level = "INFO"
+ testpaths = ["tests", "tests-cuda", "tests-cuda-kernels", "tests-cuda-kernels-explicit"]
+EOF
+
 rapids-logger "Generate awkward tests"
 nox -s prepare -- --tests
 
diff --git a/numba_cuda/numba/cuda/cgutils.py b/numba_cuda/numba/cuda/cgutils.py
index fee2489bc..93e33642a 100644
--- a/numba_cuda/numba/cuda/cgutils.py
+++ b/numba_cuda/numba/cuda/cgutils.py
@@ -764,12 +764,12 @@ def _dbg():
     with if_unlikely(builder, out_of_bounds_upper):
         if config.FULL_TRACEBACKS:
             _dbg()
-        context.call_conv.return_user_exc(builder, IndexError, (msg,))
+        context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,))
     out_of_bounds_lower = builder.icmp_signed("<", ind, ind.type(0))
     with if_unlikely(builder, out_of_bounds_lower):
         if config.FULL_TRACEBACKS:
             _dbg()
-        context.call_conv.return_user_exc(builder, IndexError, (msg,))
+        context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,))
 
 
 def get_item_pointer2(
@@ -936,7 +936,7 @@ def guard_null(context, builder, value, exc_tuple):
     with builder.if_then(is_scalar_zero(builder, value), likely=False):
         exc = exc_tuple[0]
         exc_args = exc_tuple[1:] or None
-        context.call_conv.return_user_exc(builder, exc, exc_args)
+        context.fndesc.call_conv.return_user_exc(builder, exc, exc_args)
 
 
 def guard_memory_error(context, builder, pointer, msg=None):
@@ -946,7 +946,7 @@ def guard_memory_error(context, builder, pointer, msg=None):
     assert isinstance(pointer.type, ir.PointerType), pointer.type
     exc_args = (msg,) if msg else ()
     with builder.if_then(is_null(builder, pointer), likely=False):
-        context.call_conv.return_user_exc(builder, MemoryError, exc_args)
+        context.fndesc.call_conv.return_user_exc(builder, MemoryError, exc_args)
 
 
 @contextmanager
diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py
index 511d28eb4..cbe1dfac2 100644
--- a/numba_cuda/numba/cuda/compiler.py
+++ b/numba_cuda/numba/cuda/compiler.py
@@ -29,7 +29,7 @@
 from numba.cuda.cudadrv.linkable_code import LinkableCode
 from numba.cuda.descriptor import cuda_target
 from numba.cuda.flags import CUDAFlags
-from numba.cuda.target import CUDACABICallConv
+from numba.cuda.core.callconv import CUDACABICallConv, CUDACallConv
 from numba.cuda.core.compiler import CompilerBase
 from numba.cuda.core.compiler_machinery import (
     FunctionPass,
@@ -699,6 +699,8 @@ def compile_cuda(
     cc=None,
     max_registers=None,
     lto=False,
+    abi="numba",
+    abi_info=None,
 ):
     if cc is None:
         raise ValueError("Compute Capability must be supplied")
@@ -741,6 +743,12 @@ def compile_cuda(
     flags.max_registers = max_registers
     flags.lto = lto
 
+    if abi == "c":
+        flags.call_conv = CUDACABICallConv(targetctx)
+
+    if abi_info is not None:
+        flags.abi_info = abi_info
+
     with utils.numba_target_override():
         cres = compile_extra(
             typingctx=typingctx,
@@ -759,57 +767,6 @@ def compile_cuda(
     return cres
 
 
-def cabi_wrap_function(
-    context, lib, fndesc, wrapper_function_name, nvvm_options
-):
-    """
-    Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level.
-
-    The C ABI wrapper will have the same name as the source Python function.
-    """
-    # The wrapper will be contained in a new library that links to the wrapped
-    # function's library
-    library = lib.codegen.create_library(
-        f"{lib.name}_function_",
-        entry_name=wrapper_function_name,
-        nvvm_options=nvvm_options,
-    )
-    library.add_linking_library(lib)
-
-    # Determine the caller (C ABI) and wrapper (Numba ABI) function types
-    argtypes = fndesc.argtypes
-    restype = fndesc.restype
-    c_call_conv = CUDACABICallConv(context)
-    wrapfnty = c_call_conv.get_function_type(restype, argtypes)
-    fnty = context.call_conv.get_function_type(fndesc.restype, argtypes)
-
-    # Create a new module and declare the callee
-    wrapper_module = context.create_module("cuda.cabi.wrapper")
-    func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)
-
-    # Define the caller - populate it with a call to the callee and return
-    # its return value
-
-    wrapfn = ir.Function(wrapper_module, wrapfnty, wrapper_function_name)
-    builder = ir.IRBuilder(wrapfn.append_basic_block(""))
-
-    arginfo = context.get_arg_packer(argtypes)
-    callargs = arginfo.from_arguments(builder, wrapfn.args)
-    # We get (status, return_value), but we ignore the status since we
-    # can't propagate it through the C ABI anyway
-    _, return_value = context.call_conv.call_function(
-        builder, func, restype, argtypes, callargs
-    )
-    builder.ret(return_value)
-
-    if config.DUMP_LLVM:
-        utils.dump_llvm(fndesc, wrapper_module)
-
-    library.add_ir_module(wrapper_module)
-    library.finalize()
-    return library
-
-
 def kernel_fixup(kernel, debug):
     if debug:
         exc_helper = add_exception_store_helper(kernel)
@@ -934,7 +891,7 @@ def define_error_gv(postfix):
     # Implement status check / exception store logic
 
     status_code = helper_func.args[0]
-    call_conv = cuda_target.target_context.call_conv
+    call_conv = CUDACallConv(cuda_target.target_context)
     status = call_conv._get_return_status(builder, status_code)
 
     # Check error status
@@ -1118,23 +1075,16 @@ def _compile_pyfunc_with_fixup(
         nvvm_options=nvvm_options,
         cc=cc,
         forceinline=forceinline,
+        abi=abi,
+        abi_info=abi_info,
     )
     resty = cres.signature.return_type
 
     if resty and not device and resty != types.void:
         raise TypeError("CUDA kernel must have void return type.")
 
-    tgt = cres.target_context
-
-    if device:
-        lib = cres.library
-        if abi == "c":
-            wrapper_name = abi_info.get("abi_name", pyfunc.__name__)
-            lib = cabi_wrap_function(
-                tgt, lib, cres.fndesc, wrapper_name, nvvm_options
-            )
-    else:
-        lib = cres.library
+    lib = cres.library
+    if not device:
         kernel = lib.get_function(cres.fndesc.llvm_func_name)
         lib._entry_name = cres.fndesc.llvm_func_name
         kernel_fixup(kernel, debug)
@@ -1355,7 +1305,9 @@ def compile_ptx_for_current_device(
     )
 
 
-def declare_device_function(name, restype, argtypes, link, use_cooperative):
+def declare_device_function(
+    name, restype, argtypes, link, use_cooperative, abi
+):
     from .descriptor import cuda_target
 
     typingctx = cuda_target.typing_context
@@ -1376,9 +1328,18 @@ def declare_device_function(name, restype, argtypes, link, use_cooperative):
         lib.add_linking_file(file)
     lib.use_cooperative = use_cooperative
 
+    if abi == "numba":
+        call_conv = CUDACallConv(targetctx)
+    elif abi == "c":
+        call_conv = CUDACABICallConv(targetctx)
+    else:
+        raise NotImplementedError(f"Unsupported ABI: {abi}")
+
     # ExternalFunctionDescriptor provides a lowering implementation for calling
     # external functions
-    fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes)
+    fndesc = funcdesc.ExternalFunctionDescriptor(
+        name, restype, argtypes, call_conv
+    )
     targetctx.insert_user_function(extfn, fndesc, libs=(lib,))
 
     return device_function_template
diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py
index f09e512f9..c518f9cb8 100644
--- a/numba_cuda/numba/cuda/core/base.py
+++ b/numba_cuda/numba/cuda/core/base.py
@@ -468,19 +468,6 @@ def get_external_function_type(self, fndesc):
         fnty = llvmir.FunctionType(restype, argtypes)
         return fnty
 
-    def declare_function(self, module, fndesc):
-        fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
-        fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name)
-        self.call_conv.decorate_function(
-            fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias
-        )
-        if fndesc.inline:
-            fn.attributes.add("alwaysinline")
-            # alwaysinline overrides optnone
-            fn.attributes.discard("noinline")
-            fn.attributes.discard("optnone")
-        return fn
-
     def declare_external_function(self, module, fndesc):
         fnty = self.get_external_function_type(fndesc)
         fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name)
@@ -975,7 +962,7 @@ def call_internal(self, builder, fndesc, sig, args):
             builder, fndesc, sig, args
         )
         with cgutils.if_unlikely(builder, status.is_error):
-            self.call_conv.return_status_propagate(builder, status)
+            fndesc.call_conv.return_status_propagate(builder, status)
 
         res = imputils.fix_returning_optional(self, builder, sig, status, res)
         return res
@@ -986,8 +973,8 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args):
         """
         # Add call to the generated function
         llvm_mod = builder.module
-        fn = self.declare_function(llvm_mod, fndesc)
-        status, res = self.call_conv.call_function(
+        fn = fndesc.declare_function(llvm_mod)
+        status, res = fndesc.call_conv.call_function(
             builder, fn, sig.return_type, sig.args, args
         )
         return status, res
diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py
index c929c25b6..ad69ca21c 100644
--- a/numba_cuda/numba/cuda/core/callconv.py
+++ b/numba_cuda/numba/cuda/core/callconv.py
@@ -3,6 +3,7 @@
 
 from numba.cuda import types
 from numba.cuda import cgutils
+from numba.cuda import itanium_mangler
 from collections import namedtuple
 
 from llvmlite import ir
@@ -161,6 +162,11 @@ def _get_arg_packer(self, argtypes):
         """
         return self.context.get_arg_packer(argtypes)
 
+    def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
+        return itanium_mangler.mangle(
+            name, argtypes, abi_tags=abi_tags, uid=uid
+        )
+
 
 class MinimalCallConv(BaseCallConv):
     """
@@ -341,6 +347,93 @@ def get_exception(self, exc_id):
             return exc, exc_args, locinfo
 
 
+class CUDACallConv(MinimalCallConv):
+    def decorate_function(self, fn, args, fe_argtypes, noalias=False):
+        """
+        Set names and attributes of function arguments.
+        """
+        assert not noalias
+        arginfo = self._get_arg_packer(fe_argtypes)
+        # Do not prefix "arg." on argument name, so that nvvm compiler
+        # can track debug info of argument more accurately
+        arginfo.assign_names(self.get_arguments(fn), args)
+        fn.args[0].name = ".ret"
+
+
+class CUDACABICallConv(BaseCallConv):
+    """
+    Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
+    function signature is:
+
+        <Python return type> (<Python arguments>)
+
+    Exceptions are unsupported in this convention.
+    """
+
+    def _make_call_helper(self, builder):
+        # Call helpers are used to help report exceptions back to Python, so
+        # none is required here.
+        return None
+
+    def return_value(self, builder, retval):
+        return builder.ret(retval)
+
+    def return_user_exc(
+        self, builder, exc, exc_args=None, loc=None, func_name=None
+    ):
+        msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
+        raise NotImplementedError(msg)
+
+    def return_status_propagate(self, builder, status):
+        msg = "Return status is unsupported in the CUDA C/C++ ABI"
+        raise NotImplementedError(msg)
+
+    def get_function_type(self, restype, argtypes):
+        """
+        Get the LLVM IR Function type for *restype* and *argtypes*.
+        """
+        arginfo = self._get_arg_packer(argtypes)
+        argtypes = list(arginfo.argument_types)
+        fnty = ir.FunctionType(self.get_return_type(restype), argtypes)
+        return fnty
+
+    def decorate_function(self, fn, args, fe_argtypes, noalias=False):
+        """
+        Set names and attributes of function arguments.
+        """
+        assert not noalias
+        arginfo = self._get_arg_packer(fe_argtypes)
+        arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])
+
+    def get_arguments(self, func):
+        """
+        Get the Python-level arguments of LLVM *func*.
+        """
+        return func.args
+
+    def call_function(self, builder, callee, resty, argtys, args):
+        """
+        Call the Numba-compiled *callee*.
+        """
+        arginfo = self._get_arg_packer(argtys)
+        realargs = arginfo.as_arguments(builder, args)
+        code = builder.call(callee, realargs)
+        # No status required as we don't support exceptions or a distinct None
+        # value in a C ABI.
+        status = None
+        out = self.context.get_returned_value(builder, resty, code)
+        return status, out
+
+    def get_return_type(self, ty):
+        return self.context.data_model_manager[ty].get_return_type()
+
+    def mangler(self, name, argtypes, *, abi_tags=None, uid=None):
+        if name.startswith(".NumbaEnv."):
+            func_name = name.split(".")[-1]
+            return f"_ZN08NumbaEnv{func_name}"
+        return name.split(".")[-1]
+
+
 class ErrorModel:
     def __init__(self, call_conv):
         self.call_conv = call_conv
@@ -385,8 +478,8 @@ class NumpyErrorModel(ErrorModel):
 }
 
 
-def create_error_model(model_name, context):
+def create_error_model(model_name, call_conv):
     """
     Create an error model instance for the given target context.
     """
-    return error_models[model_name](context.call_conv)
+    return error_models[model_name](call_conv)
diff --git a/numba_cuda/numba/cuda/core/compiler.py b/numba_cuda/numba/cuda/core/compiler.py
index 19523a33d..c6a037e53 100644
--- a/numba_cuda/numba/cuda/core/compiler.py
+++ b/numba_cuda/numba/cuda/core/compiler.py
@@ -10,6 +10,8 @@
 from numba.cuda.core.untyped_passes import ExtractByteCode, FixupArgs
 from numba.cuda.core.targetconfig import ConfigStack
 
+from numba.cuda.core.callconv import CUDACallConv
+
 
 class _CompileStatus:
     """
@@ -66,7 +68,11 @@ def _make_subtarget(targetctx, flags):
         subtargetoptions["enable_nrt"] = True
     if flags.fastmath:
         subtargetoptions["fastmath"] = flags.fastmath
-    error_model = callconv.create_error_model(flags.error_model, targetctx)
+
+    # Only the CUDA Calling convention can raise exceptions, so we assume here
+    # that it is suitable for creating the error model.
+    call_conv = CUDACallConv(targetctx)
+    error_model = callconv.create_error_model(flags.error_model, call_conv)
     subtargetoptions["error_model"] = error_model
 
     return targetctx.subtarget(**subtargetoptions)
diff --git a/numba_cuda/numba/cuda/core/funcdesc.py b/numba_cuda/numba/cuda/core/funcdesc.py
index f6ed3638d..150272929 100644
--- a/numba_cuda/numba/cuda/core/funcdesc.py
+++ b/numba_cuda/numba/cuda/core/funcdesc.py
@@ -8,7 +8,7 @@
 from collections import defaultdict
 import importlib
 
-from numba.cuda import types
+from numba.cuda import types, cgutils
 from numba.cuda import itanium_mangler
 from numba.cuda.utils import _dynamic_modname, _dynamic_module
 
@@ -54,6 +54,8 @@ class FunctionDescriptor:
         "noalias",
         "abi_tags",
         "uid",
+        "call_conv",
+        "abi_info",
     )
 
     def __init__(
@@ -76,6 +78,8 @@ def __init__(
         global_dict=None,
         abi_tags=(),
         uid=None,
+        call_conv=None,
+        abi_info=None,
     ):
         self.native = native
         self.modname = modname
@@ -103,12 +107,17 @@ def __init__(
         # be chosen at link time.
         qualprefix = qualifying_prefix(self.modname, self.qualname)
         self.uid = uid
-        self.mangled_name = mangler(
-            qualprefix,
-            self.argtypes,
-            abi_tags=abi_tags,
-            uid=uid,
-        )
+
+        if abi_info is not None and "abi_name" in abi_info:
+            self.mangled_name = abi_info["abi_name"]
+        else:
+            self.mangled_name = mangler(
+                qualprefix,
+                self.argtypes,
+                abi_tags=abi_tags,
+                uid=uid,
+            )
+
         if env_name is None:
             env_name = mangler(
                 ".NumbaEnv.{}".format(qualprefix),
@@ -120,6 +129,8 @@ def __init__(
         self.inline = inline
         self.noalias = noalias
         self.abi_tags = abi_tags
+        self.call_conv = call_conv
+        self.abi_info = abi_info
 
     def lookup_globals(self):
         """
@@ -219,6 +230,8 @@ def _from_python_function(
         inline=False,
         noalias=False,
         abi_tags=(),
+        call_conv=None,
+        abi_info=None,
     ):
         (
             qualname,
@@ -247,9 +260,24 @@ def _from_python_function(
             global_dict=global_dict,
             abi_tags=abi_tags,
             uid=func_ir.func_id.unique_id,
+            call_conv=call_conv,
+            abi_info=abi_info,
         )
         return self
 
+    def declare_function(self, module):
+        fnty = self.call_conv.get_function_type(self.restype, self.argtypes)
+        fn = cgutils.get_or_insert_function(module, fnty, self.mangled_name)
+        self.call_conv.decorate_function(
+            fn, self.args, self.argtypes, noalias=self.noalias
+        )
+        if self.inline:
+            fn.attributes.add("alwaysinline")
+            # alwaysinline overrides optnone
+            fn.attributes.discard("noinline")
+            fn.attributes.discard("optnone")
+        return fn
+
 
 class PythonFunctionDescriptor(FunctionDescriptor):
     """
@@ -269,6 +297,8 @@ def from_specialized_function(
         inline,
         noalias,
         abi_tags,
+        call_conv,
+        abi_info,
     ):
         """
         Build a FunctionDescriptor for a given specialization of a Python
@@ -284,6 +314,8 @@ def from_specialized_function(
             inline=inline,
             noalias=noalias,
             abi_tags=abi_tags,
+            call_conv=call_conv,
+            abi_info=abi_info,
         )
 
     @classmethod
@@ -308,7 +340,7 @@ class ExternalFunctionDescriptor(FunctionDescriptor):
 
     __slots__ = ()
 
-    def __init__(self, name, restype, argtypes):
+    def __init__(self, name, restype, argtypes, call_conv):
         args = ["arg%d" % i for i in range(len(argtypes))]
 
         def mangler(a, x, abi_tags, uid=None):
@@ -327,4 +359,5 @@ def mangler(a, x, abi_tags, uid=None):
             kws=None,
             mangler=mangler,
             argtypes=argtypes,
+            call_conv=call_conv,
         )
diff --git a/numba_cuda/numba/cuda/core/generators.py b/numba_cuda/numba/cuda/core/generators.py
index 63d68dbed..27d3422d0 100644
--- a/numba_cuda/numba/cuda/core/generators.py
+++ b/numba_cuda/numba/cuda/core/generators.py
@@ -76,7 +76,10 @@ def __init__(self, lower):
         self.geninfo = lower.generator_info
         self.gentype = self.get_generator_type()
         self.gendesc = GeneratorDescriptor.from_generator_fndesc(
-            lower.func_ir, self.fndesc, self.gentype, self.context.mangler
+            lower.func_ir,
+            self.fndesc,
+            self.gentype,
+            self.fndesc.call_conv.mangler,
         )
         # Helps packing non-omitted arguments into a structure
         self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes)
diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py
index 8cb07adb9..751022675 100644
--- a/numba_cuda/numba/cuda/core/imputils.py
+++ b/numba_cuda/numba/cuda/core/imputils.py
@@ -212,15 +212,19 @@ def user_function(fndesc, libs):
     A wrapper inserting code calling Numba-compiled *fndesc*.
     """
 
-    def imp(context, builder, sig, args):
-        func = context.declare_function(builder.module, fndesc)
+    def imp(context, builder, sig, args, fndesc=fndesc):
+        func = fndesc.declare_function(builder.module)
         # env=None assumes this is a nopython function
-        status, retval = context.call_conv.call_function(
+        status, retval = fndesc.call_conv.call_function(
             builder, func, fndesc.restype, fndesc.argtypes, args
         )
-        with cgutils.if_unlikely(builder, status.is_error):
-            context.call_conv.return_status_propagate(builder, status)
+
+        if status is not None:
+            with cgutils.if_unlikely(builder, status.is_error):
+                fndesc.call_conv.return_status_propagate(builder, status)
+
         assert sig.return_type == fndesc.restype
+
         # Reconstruct optional return type
         retval = fix_returning_optional(context, builder, sig, status, retval)
         # If the data representations don't match up
@@ -240,10 +244,10 @@ def user_generator(gendesc, libs):
     A wrapper inserting code calling Numba-compiled *gendesc*.
     """
 
-    def imp(context, builder, sig, args):
-        func = context.declare_function(builder.module, gendesc)
+    def imp(context, builder, sig, args, gendesc=gendesc):
+        func = gendesc.declare_function(builder.module)
         # env=None assumes this is a nopython function
-        status, retval = context.call_conv.call_function(
+        status, retval = gendesc.call_conv.call_function(
             builder, func, gendesc.restype, gendesc.argtypes, args
         )
         # Return raw status for caller to process StopIteration
diff --git a/numba_cuda/numba/cuda/core/optional.py b/numba_cuda/numba/cuda/core/optional.py
index 8cad47935..e7880e7e0 100644
--- a/numba_cuda/numba/cuda/core/optional.py
+++ b/numba_cuda/numba/cuda/core/optional.py
@@ -124,6 +124,6 @@ def optional_to_any(context, builder, fromty, toty, val):
     validbit = cgutils.as_bool_bit(builder, optval.valid)
     with builder.if_then(builder.not_(validbit), likely=False):
         msg = "expected %s, got None" % (fromty.type,)
-        context.call_conv.return_user_exc(builder, TypeError, (msg,))
+        context.fndesc.call_conv.return_user_exc(builder, TypeError, (msg,))
 
     return context.cast(builder, optval.data, fromty.type, toty)
diff --git a/numba_cuda/numba/cuda/core/pythonapi.py b/numba_cuda/numba/cuda/core/pythonapi.py
index 749b3651d..cc8dfe9bd 100644
--- a/numba_cuda/numba/cuda/core/pythonapi.py
+++ b/numba_cuda/numba/cuda/core/pythonapi.py
@@ -230,7 +230,7 @@ def emit_environment_sentry(
                 )
                 self.builder.ret(self.get_null_object())
             else:
-                self.context.call_conv.return_user_exc(
+                self.context.fndesc.call_conv.return_user_exc(
                     self.builder,
                     RuntimeError,
                     (f"missing Environment: {debug_msg}",),
@@ -1771,7 +1771,7 @@ def call_jit_code(self, func, sig, args):
             with has_err:
                 builder.store(status.is_error, is_error_ptr)
                 # Set error state in the Python interpreter
-                self.context.call_conv.raise_error(builder, self, status)
+                self.context.fndesc.call_conv.raise_error(builder, self, status)
             with no_err:
                 # Handle returned value
                 res = imputils.fix_returning_optional(
diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py
index fa5aefda7..8cd5f3d84 100644
--- a/numba_cuda/numba/cuda/core/typed_passes.py
+++ b/numba_cuda/numba/cuda/core/typed_passes.py
@@ -31,6 +31,7 @@
     compute_cfg_from_blocks,
     is_operator_or_getitem,
 )
+from numba.cuda.core.callconv import CUDACallConv
 
 from numba.cuda.core import postproc, rewrites, funcdesc, config
 
@@ -325,6 +326,12 @@ def run_pass(self, state):
         metadata = state.metadata
         pre_stats = passmanagers.dump_refprune_stats()
 
+        call_conv = flags.call_conv
+        if call_conv is None:
+            call_conv = CUDACallConv(state.targetctx)
+
+        mangler = call_conv.mangler
+
         msg = "Function %s failed at nopython mode lowering" % (
             state.func_id.func_name,
         )
@@ -336,10 +343,12 @@ def run_pass(self, state):
                     typemap,
                     restype,
                     calltypes,
-                    mangler=targetctx.mangler,
+                    mangler=mangler,
                     inline=flags.forceinline,
                     noalias=flags.noalias,
                     abi_tags=[flags.get_mangle_string()],
+                    call_conv=call_conv,
+                    abi_info=flags.abi_info,
                 )
             )
 
diff --git a/numba_cuda/numba/cuda/cpython/builtins.py b/numba_cuda/numba/cuda/cpython/builtins.py
index 8a5eeac09..71d833956 100644
--- a/numba_cuda/numba/cuda/cpython/builtins.py
+++ b/numba_cuda/numba/cuda/cpython/builtins.py
@@ -446,7 +446,7 @@ def next_impl(context, builder, sig, args):
     res = call_iternext(context, builder, iterty, iterval)
 
     with builder.if_then(builder.not_(res.is_valid()), likely=False):
-        context.call_conv.return_user_exc(builder, StopIteration, ())
+        context.fndesc.call_conv.return_user_exc(builder, StopIteration, ())
 
     return res.yielded_value()
 
diff --git a/numba_cuda/numba/cuda/cpython/charseq.py b/numba_cuda/numba/cuda/cpython/charseq.py
index d6738a6f1..b964aecc9 100644
--- a/numba_cuda/numba/cuda/cpython/charseq.py
+++ b/numba_cuda/numba/cuda/cpython/charseq.py
@@ -244,7 +244,7 @@ def unicode_to_bytes_cast(context, builder, fromty, toty, val):
     src_length = uni_str.length
 
     with builder.if_then(notkind1):
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder,
             ValueError,
             ("cannot cast higher than 8-bit unicode_type to bytes",),
@@ -315,7 +315,7 @@ def unicode_to_unicode_charseq(context, builder, fromty, toty, val):
                 in_val = builder.zext(builder.load(in_ptr), dstint_t)
                 builder.store(in_val, builder.gep(dst, [loop.index]))
         else:
-            context.call_conv.return_user_exc(
+            context.fndesc.call_conv.return_user_exc(
                 builder,
                 ValueError,
                 (
@@ -331,7 +331,7 @@ def unicode_to_unicode_charseq(context, builder, fromty, toty, val):
                 in_val = builder.zext(builder.load(in_ptr), dstint_t)
                 builder.store(in_val, builder.gep(dst, [loop.index]))
         else:
-            context.call_conv.return_user_exc(
+            context.fndesc.call_conv.return_user_exc(
                 builder,
                 ValueError,
                 (
diff --git a/numba_cuda/numba/cuda/cpython/iterators.py b/numba_cuda/numba/cuda/cpython/iterators.py
index df7a87b75..e3606070f 100644
--- a/numba_cuda/numba/cuda/cpython/iterators.py
+++ b/numba_cuda/numba/cuda/cpython/iterators.py
@@ -164,4 +164,4 @@ def iternext_zip(context, builder, sig, args, result):  # noqa: F811
         builder,
         builder.and_(status.is_error, builder.not_(status.is_stop_iteration)),
     ):
-        context.call_conv.return_status_propagate(builder, status)
+        context.fndesc.call_conv.return_status_propagate(builder, status)
diff --git a/numba_cuda/numba/cuda/cpython/listobj.py b/numba_cuda/numba/cuda/cpython/listobj.py
index 68be31663..9e2f1532f 100644
--- a/numba_cuda/numba/cuda/cpython/listobj.py
+++ b/numba_cuda/numba/cuda/cpython/listobj.py
@@ -117,7 +117,7 @@ def guard_index(self, idx, msg):
         Raise an error if the index is out of bounds.
         """
         with self._builder.if_then(self.is_out_of_bounds(idx), likely=False):
-            self._context.call_conv.return_user_exc(
+            self._context.fndesc.call_conv.return_user_exc(
                 self._builder, IndexError, (msg,)
             )
 
@@ -347,7 +347,7 @@ def allocate(cls, context, builder, list_type, nitems):
         """
         ok, self = cls.allocate_ex(context, builder, list_type, nitems)
         with builder.if_then(builder.not_(ok), likely=False):
-            context.call_conv.return_user_exc(
+            context.fndesc.call_conv.return_user_exc(
                 builder, MemoryError, ("cannot allocate list",)
             )
         return self
@@ -384,7 +384,7 @@ def _payload_realloc(new_allocated):
                 ir.Constant(intp_t, payload_size),
             )
             with builder.if_then(ovf, likely=False):
-                context.call_conv.return_user_exc(
+                context.fndesc.call_conv.return_user_exc(
                     builder, MemoryError, ("cannot resize list",)
                 )
 
@@ -654,7 +654,9 @@ def setitem_list(context, builder, sig, args):  # noqa: F811
         with otherwise:
             with builder.if_then(builder.icmp_signed("!=", size_delta, zero)):
                 msg = "cannot resize extended list slice with step != 1"
-                context.call_conv.return_user_exc(builder, ValueError, (msg,))
+                context.fndesc.call_conv.return_user_exc(
+                    builder, ValueError, (msg,)
+                )
 
             with cgutils.for_range_slice_generic(
                 builder, slice.start, slice.stop, slice.step
@@ -693,7 +695,9 @@ def delitem_list(context, builder, sig, args):
         builder.icmp_signed("!=", slice.step, one), likely=False
     ):
         msg = "unsupported del list[start:stop:step] with step != 1"
-        context.call_conv.return_user_exc(builder, NotImplementedError, (msg,))
+        context.fndesc.call_conv.return_user_exc(
+            builder, NotImplementedError, (msg,)
+        )
 
     # Compute the real stop, e.g. for dest[2:0]
     start = slice.start
diff --git a/numba_cuda/numba/cuda/cpython/numbers.py b/numba_cuda/numba/cuda/cpython/numbers.py
index d3a24b934..4ffe3ad86 100644
--- a/numba_cuda/numba/cuda/cpython/numbers.py
+++ b/numba_cuda/numba/cuda/cpython/numbers.py
@@ -671,7 +671,8 @@ def real_divmod(context, builder, x, y):
     floatty = x.type
 
     module = builder.module
-    fname = context.mangler(".numba.python.rem", [x.type])
+    call_conv = context.fndesc.call_conv
+    fname = call_conv.mangler(".numba.python.rem", [x.type])
     fnty = ir.FunctionType(floatty, (floatty, floatty, ir.PointerType(floatty)))
     fn = cgutils.get_or_insert_function(module, fnty, fname)
 
diff --git a/numba_cuda/numba/cuda/cpython/rangeobj.py b/numba_cuda/numba/cuda/cpython/rangeobj.py
index 00a75cba8..99b7903d1 100644
--- a/numba_cuda/numba/cuda/cpython/rangeobj.py
+++ b/numba_cuda/numba/cuda/cpython/rangeobj.py
@@ -158,7 +158,7 @@ def from_range_state(cls, context, builder, state):
 
             with cgutils.if_unlikely(builder, zero_step):
                 # step shouldn't be zero
-                context.call_conv.return_user_exc(
+                context.fndesc.call_conv.return_user_exc(
                     builder, ValueError, ("range() arg 3 must not be zero",)
                 )
 
diff --git a/numba_cuda/numba/cuda/cpython/slicing.py b/numba_cuda/numba/cuda/cpython/slicing.py
index 0899db994..ed9ae163f 100644
--- a/numba_cuda/numba/cuda/cpython/slicing.py
+++ b/numba_cuda/numba/cuda/cpython/slicing.py
@@ -240,13 +240,13 @@ def slice_indices(context, builder, sig, args):
     sli = context.make_helper(builder, sig.args[0], args[0])
 
     with builder.if_then(cgutils.is_neg_int(builder, length), likely=False):
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder, ValueError, ("length should not be negative",)
         )
     with builder.if_then(
         cgutils.is_scalar_zero(builder, sli.step), likely=False
     ):
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder, ValueError, ("slice step cannot be zero",)
         )
 
diff --git a/numba_cuda/numba/cuda/cpython/tupleobj.py b/numba_cuda/numba/cuda/cpython/tupleobj.py
index 5292d412d..0483a3960 100644
--- a/numba_cuda/numba/cuda/cpython/tupleobj.py
+++ b/numba_cuda/numba/cuda/cpython/tupleobj.py
@@ -251,7 +251,9 @@ def getitem_typed(context, builder, sig, args):
 
         # Always branch and raise IndexError
         with builder.if_then(cgutils.true_bit):
-            context.call_conv.return_user_exc(builder, IndexError, errmsg_oob)
+            context.fndesc.call_conv.return_user_exc(
+                builder, IndexError, errmsg_oob
+            )
         # This is unreachable in runtime,
         # but it exists to not terminate the current basicblock.
         res = context.get_constant_null(sig.return_type)
@@ -264,7 +266,9 @@ def getitem_typed(context, builder, sig, args):
         switch = builder.switch(idx, bbelse)
 
         with builder.goto_block(bbelse):
-            context.call_conv.return_user_exc(builder, IndexError, errmsg_oob)
+            context.fndesc.call_conv.return_user_exc(
+                builder, IndexError, errmsg_oob
+            )
 
         lrtty = context.get_value_type(sig.return_type)
         voidptrty = context.get_value_type(types.voidptr)
@@ -345,7 +349,9 @@ def getitem_unituple(context, builder, sig, args):
 
         # Always branch and raise IndexError
         with builder.if_then(cgutils.true_bit):
-            context.call_conv.return_user_exc(builder, IndexError, errmsg_oob)
+            context.fndesc.call_conv.return_user_exc(
+                builder, IndexError, errmsg_oob
+            )
         # This is unreachable in runtime,
         # but it exists to not terminate the current basicblock.
         res = context.get_constant_null(sig.return_type)
@@ -357,7 +363,9 @@ def getitem_unituple(context, builder, sig, args):
         switch = builder.switch(idx, bbelse)
 
         with builder.goto_block(bbelse):
-            context.call_conv.return_user_exc(builder, IndexError, errmsg_oob)
+            context.fndesc.call_conv.return_user_exc(
+                builder, IndexError, errmsg_oob
+            )
 
         lrtty = context.get_value_type(tupty.dtype)
         with builder.goto_block(bbend):
diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py
index b2ff5932e..6a9f9301a 100644
--- a/numba_cuda/numba/cuda/decorators.py
+++ b/numba_cuda/numba/cuda/decorators.py
@@ -285,7 +285,7 @@ def autojitwrapper(func):
                 return disp
 
 
-def declare_device(name, sig, link=None, use_cooperative=False):
+def declare_device(name, sig, link=None, use_cooperative=False, abi="numba"):
     """
     Declare the signature of a foreign function. Returns a descriptor that can
     be used to call the function from a Python kernel.
@@ -295,7 +295,11 @@ def declare_device(name, sig, link=None, use_cooperative=False):
     :param sig: The Numba signature of the function.
     :param link: External code to link when calling the function.
     :param use_cooperative: External code requires cooperative launch.
+    :param abi: The ABI to use for the function. "numba" for Numba ABI, "c" for C ABI.
     """
+    if abi not in ("numba", "c"):
+        raise NotImplementedError(f"Unsupported ABI: {abi}")
+
     if link is None:
         link = tuple()
     else:
@@ -308,7 +312,7 @@ def declare_device(name, sig, link=None, use_cooperative=False):
         raise TypeError(msg)
 
     template = declare_device_function(
-        name, restype, argtypes, link, use_cooperative
+        name, restype, argtypes, link, use_cooperative, abi
     )
 
     return template.key
diff --git a/numba_cuda/numba/cuda/flags.py b/numba_cuda/numba/cuda/flags.py
index 37ff995b6..51d9eb658 100644
--- a/numba_cuda/numba/cuda/flags.py
+++ b/numba_cuda/numba/cuda/flags.py
@@ -9,6 +9,8 @@
     InlineOptions,
 )
 
+from numba.cuda.core.callconv import BaseCallConv
+
 
 class Flags(TargetConfig):
     __slots__ = ()
@@ -161,6 +163,24 @@ def _optional_int_type(x):
         return x
 
 
+def _call_conv_options_type(x):
+    if x is None:
+        return None
+
+    else:
+        assert isinstance(x, BaseCallConv)
+        return x
+
+
+def _abi_info_options_type(x):
+    if x is None:
+        return {}
+
+    else:
+        assert isinstance(x, dict)
+        return x
+
+
 class CUDAFlags(Flags):
     nvvm_options = Option(
         type=_nvvm_options_type,
@@ -176,3 +196,7 @@ class CUDAFlags(Flags):
         type=_optional_int_type, default=None, doc="Max registers"
     )
     lto = Option(type=bool, default=False, doc="Enable Link-time Optimization")
+
+    call_conv = Option(type=_call_conv_options_type, default=None, doc="")
+
+    abi_info = Option(type=_abi_info_options_type, default=None, doc="ABI info")
diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py
index c0b18b40a..6184aff06 100644
--- a/numba_cuda/numba/cuda/lowering.py
+++ b/numba_cuda/numba/cuda/lowering.py
@@ -99,7 +99,7 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None):
 
     @property
     def call_conv(self):
-        return self.context.call_conv
+        return self.fndesc.call_conv
 
     def init(self):
         pass
@@ -354,7 +354,7 @@ def create_cfunc_wrapper(self):
 
     def setup_function(self, fndesc):
         # Setup function
-        self.function = self.context.declare_function(self.module, fndesc)
+        self.function = fndesc.declare_function(self.module)
         if self.flags.dbg_optnone:
             attrset = self.function.attributes
             if "alwaysinline" not in attrset:
@@ -1209,7 +1209,7 @@ def _lower_call_RecursiveCall(self, fnty, expr, signature):
             expr.kws,
         )
         rec_ov = fnty.get_overloads(signature.args)
-        mangler = self.context.mangler or default_mangler
+        mangler = self.call_conv.mangler or default_mangler
         abi_tags = self.fndesc.abi_tags
         mangled_name = mangler(
             rec_ov.qualname, signature.args, abi_tags=abi_tags, uid=rec_ov.uid
diff --git a/numba_cuda/numba/cuda/memory_management/nrt_context.py b/numba_cuda/numba/cuda/memory_management/nrt_context.py
index 3e171779a..f3e6e5f73 100644
--- a/numba_cuda/numba/cuda/memory_management/nrt_context.py
+++ b/numba_cuda/numba/cuda/memory_management/nrt_context.py
@@ -415,7 +415,7 @@ def get_nrt_api(self, builder):
     def eh_check(self, builder):
         """Check if an exception is raised"""
         ctx = self._context
-        cc = ctx.call_conv
+        cc = ctx.fndesc.call_conv
         # Inspect the excinfo argument on the function
         trystatus = cc.check_try_status(builder)
         excinfo = trystatus.excinfo
@@ -428,11 +428,11 @@ def eh_check(self, builder):
     def eh_try(self, builder):
         """Begin a try-block."""
         ctx = self._context
-        cc = ctx.call_conv
+        cc = ctx.fndesc.call_conv
         cc.set_try_status(builder)
 
     def eh_end_try(self, builder):
         """End a try-block"""
         ctx = self._context
-        cc = ctx.call_conv
+        cc = ctx.fndesc.call_conv
         cc.unset_try_status(builder)
diff --git a/numba_cuda/numba/cuda/misc/gdb_hook.py b/numba_cuda/numba/cuda/misc/gdb_hook.py
index 2c8bd3c8d..805f952e7 100644
--- a/numba_cuda/numba/cuda/misc/gdb_hook.py
+++ b/numba_cuda/numba/cuda/misc/gdb_hook.py
@@ -162,14 +162,14 @@ def init_gdb_codegen(
     invalid_write = builder.icmp_signed(">", stat, int32_t(12))
     with builder.if_then(invalid_write, likely=False):
         msg = "Internal error: `snprintf` buffer would have overflowed."
-        cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg,))
+        cgctx.fndesc.call_conv.return_user_exc(builder, RuntimeError, (msg,))
 
     # fork, check pids etc
     child_pid = builder.call(fork, tuple())
     fork_failed = builder.icmp_signed("==", child_pid, int32_t(-1))
     with builder.if_then(fork_failed, likely=False):
         msg = "Internal error: `fork` failed."
-        cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg,))
+        cgctx.fndesc.call_conv.return_user_exc(builder, RuntimeError, (msg,))
 
     is_child = builder.icmp_signed("==", child_pid, zero_i32t)
     with builder.if_else(is_child) as (then, orelse):
diff --git a/numba_cuda/numba/cuda/np/arrayobj.py b/numba_cuda/numba/cuda/np/arrayobj.py
index 115fb63f2..2fcaf4d8c 100644
--- a/numba_cuda/numba/cuda/np/arrayobj.py
+++ b/numba_cuda/numba/cuda/np/arrayobj.py
@@ -672,7 +672,7 @@ def array_item(context, builder, sig, args):
         builder.icmp_signed("!=", nitems, nitems.type(1)), likely=False
     ):
         msg = "item(): can only convert an array of size 1 to a Python scalar"
-        context.call_conv.return_user_exc(builder, ValueError, (msg,))
+        context.fndesc.call_conv.return_user_exc(builder, ValueError, (msg,))
 
     return load_item(context, builder, aryty, ary.data)
 
@@ -691,7 +691,9 @@ def array_itemset(context, builder, sig, args):
             builder.icmp_signed("!=", nitems, nitems.type(1)), likely=False
         ):
             msg = "itemset(): can only write to an array of size 1"
-            context.call_conv.return_user_exc(builder, ValueError, (msg,))
+            context.fndesc.call_conv.return_user_exc(
+                builder, ValueError, (msg,)
+            )
 
         store_item(context, builder, aryty, val, ary.data)
         return context.get_dummy_value()
@@ -1478,7 +1480,9 @@ def _bc_adjust_dimension(context, builder, shapes, strides, target_shape):
         # Check error
         with builder.if_then(builder.not_(accepted), likely=False):
             msg = "cannot broadcast source array for assignment"
-            context.call_conv.return_user_exc(builder, ValueError, (msg,))
+            context.fndesc.call_conv.return_user_exc(
+                builder, ValueError, (msg,)
+            )
         # Truncate extra shapes, strides
         shapes = shapes[nd_diff:]
         strides = strides[nd_diff:]
@@ -2422,7 +2426,9 @@ def array_reshape(context, builder, sig, args):
 
     with builder.if_then(fail):
         msg = "incompatible shape for array"
-        context.call_conv.return_user_exc(builder, NotImplementedError, (msg,))
+        context.fndesc.call_conv.return_user_exc(
+            builder, NotImplementedError, (msg,)
+        )
 
     ret = make_array(retty)(context, builder)
     populate_array(
@@ -3095,7 +3101,7 @@ def array_view(context, builder, sig, args):
 
     with builder.if_then(fail):
         msg = "new type not compatible with array"
-        context.call_conv.return_user_exc(builder, ValueError, (msg,))
+        context.fndesc.call_conv.return_user_exc(builder, ValueError, (msg,))
 
     res = ret._getvalue()
     return impl_ret_borrowed(context, builder, sig.return_type, res)
@@ -4720,7 +4726,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
     with builder.if_then(overflow, likely=False):
         # Raise same error as numpy, see:
         # https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101    # noqa: E501
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder,
             ValueError,
             (
@@ -4804,7 +4810,7 @@ def safecast_intp(context, builder, src_t, src):
         elif src_t.width >= intp_width:
             is_larger = builder.icmp_signed(">", src, maxval)
             with builder.if_then(is_larger, likely=False):
-                context.call_conv.return_user_exc(
+                context.fndesc.call_conv.return_user_exc(
                     builder,
                     ValueError,
                     ("Cannot safely convert value to intp",),
@@ -4829,7 +4835,7 @@ def safecast_intp(context, builder, src_t, src):
     for shape in shapes:
         is_neg = builder.icmp_signed("<", shape, zero)
         with cgutils.if_unlikely(builder, is_neg):
-            context.call_conv.return_user_exc(
+            context.fndesc.call_conv.return_user_exc(
                 builder, ValueError, ("negative dimensions not allowed",)
             )
 
@@ -5759,7 +5765,9 @@ def codegen(context, builder, sig, args):
         is_incompatible = cgutils.is_not_null(builder, rem)
         with builder.if_then(is_incompatible, likely=False):
             msg = "buffer size must be a multiple of element size"
-            context.call_conv.return_user_exc(builder, ValueError, (msg,))
+            context.fndesc.call_conv.return_user_exc(
+                builder, ValueError, (msg,)
+            )
 
         shape = cgutils.pack_array(builder, [builder.sdiv(nbytes, ll_itemsize)])
         strides = cgutils.pack_array(builder, [ll_itemsize])
@@ -6014,7 +6022,7 @@ def check_sequence_shape(context, builder, seqty, seq, shapes):
     """
 
     def _fail():
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder, ValueError, ("incompatible sequence shape",)
         )
 
@@ -6169,7 +6177,7 @@ def _normalize_axis(context, builder, func_name, ndim, axis):
     )
     with builder.if_then(axis_out_of_bounds, likely=False):
         msg = "%s(): axis out of bounds" % func_name
-        context.call_conv.return_user_exc(builder, IndexError, (msg,))
+        context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,))
 
     return axis
 
@@ -6489,7 +6497,7 @@ def _np_concatenate(context, builder, arrtys, arrs, retty, axis):
                         is_ok, builder.icmp_signed("==", sh, ret_sh)
                     )
                 with builder.if_then(builder.not_(is_ok), likely=False):
-                    context.call_conv.return_user_exc(
+                    context.fndesc.call_conv.return_user_exc(
                         builder,
                         ValueError,
                         (
@@ -6538,7 +6546,7 @@ def _np_stack(context, builder, arrtys, arrs, retty, axis):
         ):
             is_ok = builder.and_(is_ok, builder.icmp_signed("==", sh, orig_sh))
             with builder.if_then(builder.not_(is_ok), likely=False):
-                context.call_conv.return_user_exc(
+                context.fndesc.call_conv.return_user_exc(
                     builder,
                     ValueError,
                     ("np.stack(): all input arrays must have the same shape",),
diff --git a/numba_cuda/numba/cuda/np/math/numbers.py b/numba_cuda/numba/cuda/np/math/numbers.py
index cf0875b9c..b4b20cd49 100644
--- a/numba_cuda/numba/cuda/np/math/numbers.py
+++ b/numba_cuda/numba/cuda/np/math/numbers.py
@@ -657,7 +657,8 @@ def real_divmod(context, builder, x, y):
     floatty = x.type
 
     module = builder.module
-    fname = context.mangler(".numba.python.rem", [x.type])
+    call_conv = context.fndesc.call_conv
+    fname = call_conv.mangler(".numba.python.rem", [x.type])
     fnty = ir.FunctionType(floatty, (floatty, floatty, ir.PointerType(floatty)))
     fn = cgutils.get_or_insert_function(module, fnty, fname)
 
diff --git a/numba_cuda/numba/cuda/np/npyimpl.py b/numba_cuda/numba/cuda/np/npyimpl.py
index a25779d9a..a43e011eb 100644
--- a/numba_cuda/numba/cuda/np/npyimpl.py
+++ b/numba_cuda/numba/cuda/np/npyimpl.py
@@ -507,7 +507,9 @@ def make_intp_const(val):
             if loc is not None:
                 msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line)
 
-            context.call_conv.return_user_exc(builder, ValueError, (msg,))
+            context.fndesc.call_conv.return_user_exc(
+                builder, ValueError, (msg,)
+            )
 
     real_array_ty = array_ty.as_array
 
@@ -799,11 +801,11 @@ def generate(self, *args):
             for val, inty, outty in zip(args, osig.args, isig.args)
         ]
         if self.cres.objectmode:
-            func_type = self.context.call_conv.get_function_type(
+            func_type = self.context.fndesc.call_conv.get_function_type(
                 types.pyobject, [types.pyobject] * len(isig.args)
             )
         else:
-            func_type = self.context.call_conv.get_function_type(
+            func_type = self.context.fndesc.call_conv.get_function_type(
                 isig.return_type, isig.args
             )
         module = self.builder.block.function.module
@@ -812,7 +814,7 @@ def generate(self, *args):
         )
         entry_point.attributes.add("alwaysinline")
 
-        _, res = self.context.call_conv.call_function(
+        _, res = self.context.fndesc.call_conv.call_function(
             self.builder, entry_point, isig.return_type, isig.args, cast_args
         )
         return self.cast(res, isig.return_type, osig.return_type)
diff --git a/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py b/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py
index d7b497abe..1e6dc266e 100644
--- a/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py
+++ b/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py
@@ -149,12 +149,12 @@ def to_double(coef):
     pred2 = builder.icmp_signed("!=", s2, two)
 
     with cgutils.if_unlikely(builder, pred1):
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder, ValueError, ("Domain has wrong number of elements.",)
         )
 
     with cgutils.if_unlikely(builder, pred2):
-        context.call_conv.return_user_exc(
+        context.fndesc.call_conv.return_user_exc(
             builder, ValueError, ("Window has wrong number of elements.",)
         )
 
diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py
index 8e2c9a4fb..cbe39946a 100644
--- a/numba_cuda/numba/cuda/target.py
+++ b/numba_cuda/numba/cuda/target.py
@@ -29,7 +29,6 @@
 from numba.cuda.debuginfo import CUDADIBuilder
 from numba.cuda.flags import CUDAFlags
 from numba.cuda.models import cuda_data_manager
-from numba.cuda.core.callconv import BaseCallConv, MinimalCallConv
 from numba.cuda.core import config, targetconfig
 
 
@@ -266,14 +265,14 @@ def nonconst_module_attrs(self):
         )
         return nonconsts_with_mod
 
-    @cached_property
+    @property
     def call_conv(self):
-        return CUDACallConv(self)
-
-    def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
-        return itanium_mangler.mangle(
-            name, argtypes, abi_tags=abi_tags, uid=uid
+        warnings.warn(
+            "Context.call_conv is deprecated. "
+            "Use FunctionDescriptor.call_conv instead.",
+            DeprecationWarning,
         )
+        return self.fndesc.call_conv
 
     def make_constant_array(self, builder, aryty, arr):
         """
@@ -422,84 +421,3 @@ def _compile_subroutine_no_cache(
             # Allow inlining the function inside callers
             self.active_code_library.add_linking_library(cres.library)
             return cres
-
-
-class CUDACallConv(MinimalCallConv):
-    def decorate_function(self, fn, args, fe_argtypes, noalias=False):
-        """
-        Set names and attributes of function arguments.
-        """
-        assert not noalias
-        arginfo = self._get_arg_packer(fe_argtypes)
-        # Do not prefix "arg." on argument name, so that nvvm compiler
-        # can track debug info of argument more accurately
-        arginfo.assign_names(self.get_arguments(fn), args)
-        fn.args[0].name = ".ret"
-
-
-class CUDACABICallConv(BaseCallConv):
-    """
-    Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
-    function signature is:
-
-        <Python return type> (<Python arguments>)
-
-    Exceptions are unsupported in this convention.
-    """
-
-    def _make_call_helper(self, builder):
-        # Call helpers are used to help report exceptions back to Python, so
-        # none is required here.
-        return None
-
-    def return_value(self, builder, retval):
-        return builder.ret(retval)
-
-    def return_user_exc(
-        self, builder, exc, exc_args=None, loc=None, func_name=None
-    ):
-        msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
-        raise NotImplementedError(msg)
-
-    def return_status_propagate(self, builder, status):
-        msg = "Return status is unsupported in the CUDA C/C++ ABI"
-        raise NotImplementedError(msg)
-
-    def get_function_type(self, restype, argtypes):
-        """
-        Get the LLVM IR Function type for *restype* and *argtypes*.
-        """
-        arginfo = self._get_arg_packer(argtypes)
-        argtypes = list(arginfo.argument_types)
-        fnty = ir.FunctionType(self.get_return_type(restype), argtypes)
-        return fnty
-
-    def decorate_function(self, fn, args, fe_argtypes, noalias=False):
-        """
-        Set names and attributes of function arguments.
-        """
-        assert not noalias
-        arginfo = self._get_arg_packer(fe_argtypes)
-        arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])
-
-    def get_arguments(self, func):
-        """
-        Get the Python-level arguments of LLVM *func*.
-        """
-        return func.args
-
-    def call_function(self, builder, callee, resty, argtys, args):
-        """
-        Call the Numba-compiled *callee*.
-        """
-        arginfo = self._get_arg_packer(argtys)
-        realargs = arginfo.as_arguments(builder, args)
-        code = builder.call(callee, realargs)
-        # No status required as we don't support exceptions or a distinct None
-        # value in a C ABI.
-        status = None
-        out = self.context.get_returned_value(builder, resty, code)
-        return status, out
-
-    def get_return_type(self, ty):
-        return self.context.data_model_manager[ty].get_return_type()
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
index ee3691ca1..c2b66bb67 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py
@@ -179,7 +179,6 @@ def check_debug_info(self, ptx):
     # in that module anyway. So this test can only be expected to fail until we
     # have a proper way of generating device functions with the C ABI without
     # requiring the hack of generating a wrapper.
-    @unittest.expectedFailure
     def test_device_function_with_debug(self):
         # See Issue #6719 - this ensures that compilation with debug succeeds
         # with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
index 3fa62728b..6698e6337 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py
@@ -496,5 +496,139 @@ def kernel(r, x):
         np.testing.assert_equal(r, x * 5)
 
 
+times2_cabi_cu = cuda.CUSource("""
+extern "C" __device__
+int times2(int a)
+{
+  return a * 2;
+}
+""")
+
+# 0-arg, 2-arg, and void-return C-ABI device functions.
+const42_cabi_cu = cuda.CUSource("""
+extern "C" __device__
+int const42()
+{
+  return 42;
+}
+""")
+
+add2_cabi_cu = cuda.CUSource("""
+extern "C" __device__
+int add2(int a, int b)
+{
+  return a + b;
+}
+""")
+
+consume_cabi_cu = cuda.CUSource("""
+extern "C" __device__
+void consume(int a)
+{
+  // Prevent trivial removal while still having no externally-visible effect.
+  asm volatile("" : : "r"(a) : "memory");
+}
+""")
+
+consume_cabi_pointer_cu = cuda.CUSource("""
+extern "C" __device__
+void consume(int *a)
+{
+  *a = 42;
+}
+""")
+
+
+@skip_on_cudasim("External functions unsupported in the simulator")
+class TestDeclareDeviceCABI(CUDATestCase):
+    def test_declare_device_cabi_basic(self):
+        times2 = cuda.declare_device(
+            "times2", "int32(int32)", link=times2_cabi_cu, abi="c"
+        )
+
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = times2(x[i])
+
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 2)
+
+    def test_declare_device_cabi_zero_args(self):
+        const42 = cuda.declare_device(
+            "const42", "int32()", link=const42_cabi_cu, abi="c"
+        )
+
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = x[i] + const42()
+
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x + 42)
+
+    def test_declare_device_cabi_two_args(self):
+        add2 = cuda.declare_device(
+            "add2", "int32(int32, int32)", link=add2_cabi_cu, abi="c"
+        )
+
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                r[i] = add2(x[i], i)
+
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x + np.arange(10, dtype=np.int32))
+
+    def test_declare_device_cabi_void_return(self):
+        consume = cuda.declare_device(
+            "consume", "void(int32)", link=consume_cabi_cu, abi="c"
+        )
+
+        @cuda.jit
+        def kernel(r, x):
+            i = cuda.grid(1)
+            if i < len(r):
+                # The call itself is what we want to validate; output must remain
+                # correct, proving compilation + linking + invocation succeeded.
+                consume(x[i])
+                r[i] = x[i] * 2
+
+        x = np.arange(10, dtype=np.int32)
+        r = np.empty_like(x)
+        kernel[1, 32](r, x)
+        np.testing.assert_equal(r, x * 2)
+
+    def test_declare_device_cabi_pointer_return(self):
+        ffi = cffi.FFI()
+        consume = cuda.declare_device(
+            "consume",
+            types.void(types.CPointer(types.int32)),
+            link=consume_cabi_pointer_cu,
+            abi="c",
+        )
+
+        @cuda.jit
+        def kernel(x):
+            i = cuda.grid(1)
+            if i < len(x):
+                ptr = ffi.from_buffer(x[i:])
+                consume(ptr)
+
+        x = np.zeros(10, dtype=np.int32)
+
+        kernel[1, 32](x)
+        np.testing.assert_equal(x, 42)
+
+
 if __name__ == "__main__":
     unittest.main()