Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions ci/test_thirdparty_awkward.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@ index 39080a34..0eb3940f 100644
array = rng.integers(50, size=1000)
EOF

patch -p1 <<'EOF'
diff --git a/pyproject.toml b/pyproject.toml
index 78ecfba9..80a25474 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -136,6 +136,7 @@ filterwarnings = [
"ignore:.*np\\.MachAr.*:DeprecationWarning",
"ignore:module 'sre_.*' is deprecated:DeprecationWarning",
"ignore:Jitify is performing a one-time only warm-up",
+ "ignore:Context.call_conv is deprecated.",
]
log_cli_level = "INFO"
testpaths = ["tests", "tests-cuda", "tests-cuda-kernels", "tests-cuda-kernels-explicit"]
EOF

rapids-logger "Generate awkward tests"
nox -s prepare -- --tests

Expand Down
8 changes: 4 additions & 4 deletions numba_cuda/numba/cuda/cgutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,12 +764,12 @@ def _dbg():
with if_unlikely(builder, out_of_bounds_upper):
if config.FULL_TRACEBACKS:
_dbg()
context.call_conv.return_user_exc(builder, IndexError, (msg,))
context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,))
out_of_bounds_lower = builder.icmp_signed("<", ind, ind.type(0))
with if_unlikely(builder, out_of_bounds_lower):
if config.FULL_TRACEBACKS:
_dbg()
context.call_conv.return_user_exc(builder, IndexError, (msg,))
context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,))


def get_item_pointer2(
Expand Down Expand Up @@ -936,7 +936,7 @@ def guard_null(context, builder, value, exc_tuple):
with builder.if_then(is_scalar_zero(builder, value), likely=False):
exc = exc_tuple[0]
exc_args = exc_tuple[1:] or None
context.call_conv.return_user_exc(builder, exc, exc_args)
context.fndesc.call_conv.return_user_exc(builder, exc, exc_args)


def guard_memory_error(context, builder, pointer, msg=None):
Expand All @@ -946,7 +946,7 @@ def guard_memory_error(context, builder, pointer, msg=None):
assert isinstance(pointer.type, ir.PointerType), pointer.type
exc_args = (msg,) if msg else ()
with builder.if_then(is_null(builder, pointer), likely=False):
context.call_conv.return_user_exc(builder, MemoryError, exc_args)
context.fndesc.call_conv.return_user_exc(builder, MemoryError, exc_args)


@contextmanager
Expand Down
93 changes: 27 additions & 66 deletions numba_cuda/numba/cuda/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from numba.cuda.cudadrv.linkable_code import LinkableCode
from numba.cuda.descriptor import cuda_target
from numba.cuda.flags import CUDAFlags
from numba.cuda.target import CUDACABICallConv
from numba.cuda.core.callconv import CUDACABICallConv, CUDACallConv
from numba.cuda.core.compiler import CompilerBase
from numba.cuda.core.compiler_machinery import (
FunctionPass,
Expand Down Expand Up @@ -699,6 +699,8 @@ def compile_cuda(
cc=None,
max_registers=None,
lto=False,
abi="numba",
abi_info=None,
):
if cc is None:
raise ValueError("Compute Capability must be supplied")
Expand Down Expand Up @@ -741,6 +743,12 @@ def compile_cuda(
flags.max_registers = max_registers
flags.lto = lto

if abi == "c":
flags.call_conv = CUDACABICallConv(targetctx)

if abi_info is not None:
flags.abi_info = abi_info

with utils.numba_target_override():
cres = compile_extra(
typingctx=typingctx,
Expand All @@ -759,57 +767,6 @@ def compile_cuda(
return cres


def cabi_wrap_function(
context, lib, fndesc, wrapper_function_name, nvvm_options
):
"""
Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level.

The C ABI wrapper will have the same name as the source Python function.
"""
# The wrapper will be contained in a new library that links to the wrapped
# function's library
library = lib.codegen.create_library(
f"{lib.name}_function_",
entry_name=wrapper_function_name,
nvvm_options=nvvm_options,
)
library.add_linking_library(lib)

# Determine the caller (C ABI) and wrapper (Numba ABI) function types
argtypes = fndesc.argtypes
restype = fndesc.restype
c_call_conv = CUDACABICallConv(context)
wrapfnty = c_call_conv.get_function_type(restype, argtypes)
fnty = context.call_conv.get_function_type(fndesc.restype, argtypes)

# Create a new module and declare the callee
wrapper_module = context.create_module("cuda.cabi.wrapper")
func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name)

# Define the caller - populate it with a call to the callee and return
# its return value

wrapfn = ir.Function(wrapper_module, wrapfnty, wrapper_function_name)
builder = ir.IRBuilder(wrapfn.append_basic_block(""))

arginfo = context.get_arg_packer(argtypes)
callargs = arginfo.from_arguments(builder, wrapfn.args)
# We get (status, return_value), but we ignore the status since we
# can't propagate it through the C ABI anyway
_, return_value = context.call_conv.call_function(
builder, func, restype, argtypes, callargs
)
builder.ret(return_value)

if config.DUMP_LLVM:
utils.dump_llvm(fndesc, wrapper_module)

library.add_ir_module(wrapper_module)
library.finalize()
return library


def kernel_fixup(kernel, debug):
if debug:
exc_helper = add_exception_store_helper(kernel)
Expand Down Expand Up @@ -934,7 +891,7 @@ def define_error_gv(postfix):
# Implement status check / exception store logic

status_code = helper_func.args[0]
call_conv = cuda_target.target_context.call_conv
call_conv = CUDACallConv(cuda_target.target_context)
status = call_conv._get_return_status(builder, status_code)

# Check error status
Expand Down Expand Up @@ -1118,23 +1075,16 @@ def _compile_pyfunc_with_fixup(
nvvm_options=nvvm_options,
cc=cc,
forceinline=forceinline,
abi=abi,
abi_info=abi_info,
)
resty = cres.signature.return_type

if resty and not device and resty != types.void:
raise TypeError("CUDA kernel must have void return type.")
Comment on lines 1080 to 1084
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed cabi_wrap_function call for device functions with C ABI. Verify this was intentional and that device functions with abi="c" still work correctly without the wrapper.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is indeed intentional, because function / kernels can bare its own calling convention, rather than using a target-wide calling convention. cabi_wrap_function was meant to patch a function with Numba ABI to a caller that expects a CABI function. The fix in this PR is imply switching the kernel to bare a CUDACABICallConv and should work as expected.


tgt = cres.target_context

if device:
lib = cres.library
if abi == "c":
wrapper_name = abi_info.get("abi_name", pyfunc.__name__)
lib = cabi_wrap_function(
tgt, lib, cres.fndesc, wrapper_name, nvvm_options
)
else:
lib = cres.library
lib = cres.library
if not device:
kernel = lib.get_function(cres.fndesc.llvm_func_name)
lib._entry_name = cres.fndesc.llvm_func_name
kernel_fixup(kernel, debug)
Expand Down Expand Up @@ -1355,7 +1305,9 @@ def compile_ptx_for_current_device(
)


def declare_device_function(name, restype, argtypes, link, use_cooperative):
def declare_device_function(
name, restype, argtypes, link, use_cooperative, abi
):
from .descriptor import cuda_target

typingctx = cuda_target.typing_context
Expand All @@ -1376,9 +1328,18 @@ def declare_device_function(name, restype, argtypes, link, use_cooperative):
lib.add_linking_file(file)
lib.use_cooperative = use_cooperative

if abi == "numba":
call_conv = CUDACallConv(targetctx)
elif abi == "c":
call_conv = CUDACABICallConv(targetctx)
else:
raise NotImplementedError(f"Unsupported ABI: {abi}")

# ExternalFunctionDescriptor provides a lowering implementation for calling
# external functions
fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes)
fndesc = funcdesc.ExternalFunctionDescriptor(
name, restype, argtypes, call_conv
)
targetctx.insert_user_function(extfn, fndesc, libs=(lib,))

return device_function_template
Expand Down
19 changes: 3 additions & 16 deletions numba_cuda/numba/cuda/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,19 +468,6 @@ def get_external_function_type(self, fndesc):
fnty = llvmir.FunctionType(restype, argtypes)
return fnty

def declare_function(self, module, fndesc):
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name)
self.call_conv.decorate_function(
fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias
)
if fndesc.inline:
fn.attributes.add("alwaysinline")
# alwaysinline overrides optnone
fn.attributes.discard("noinline")
fn.attributes.discard("optnone")
return fn

def declare_external_function(self, module, fndesc):
fnty = self.get_external_function_type(fndesc)
fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name)
Expand Down Expand Up @@ -975,7 +962,7 @@ def call_internal(self, builder, fndesc, sig, args):
builder, fndesc, sig, args
)
with cgutils.if_unlikely(builder, status.is_error):
self.call_conv.return_status_propagate(builder, status)
fndesc.call_conv.return_status_propagate(builder, status)

res = imputils.fix_returning_optional(self, builder, sig, status, res)
return res
Expand All @@ -986,8 +973,8 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args):
"""
# Add call to the generated function
llvm_mod = builder.module
fn = self.declare_function(llvm_mod, fndesc)
status, res = self.call_conv.call_function(
fn = fndesc.declare_function(llvm_mod)
status, res = fndesc.call_conv.call_function(
builder, fn, sig.return_type, sig.args, args
)
return status, res
Expand Down
97 changes: 95 additions & 2 deletions numba_cuda/numba/cuda/core/callconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from numba.cuda import types
from numba.cuda import cgutils
from numba.cuda import itanium_mangler
from collections import namedtuple

from llvmlite import ir
Expand Down Expand Up @@ -161,6 +162,11 @@ def _get_arg_packer(self, argtypes):
"""
return self.context.get_arg_packer(argtypes)

def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
return itanium_mangler.mangle(
name, argtypes, abi_tags=abi_tags, uid=uid
)


class MinimalCallConv(BaseCallConv):
"""
Expand Down Expand Up @@ -341,6 +347,93 @@ def get_exception(self, exc_id):
return exc, exc_args, locinfo


class CUDACallConv(MinimalCallConv):
def decorate_function(self, fn, args, fe_argtypes, noalias=False):
"""
Set names and attributes of function arguments.
"""
assert not noalias
arginfo = self._get_arg_packer(fe_argtypes)
# Do not prefix "arg." on argument name, so that nvvm compiler
# can track debug info of argument more accurately
arginfo.assign_names(self.get_arguments(fn), args)
fn.args[0].name = ".ret"


class CUDACABICallConv(BaseCallConv):
"""
Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
function signature is:

<Python return type> (<Python arguments>)

Exceptions are unsupported in this convention.
"""

def _make_call_helper(self, builder):
# Call helpers are used to help report exceptions back to Python, so
# none is required here.
return None

def return_value(self, builder, retval):
return builder.ret(retval)

def return_user_exc(
self, builder, exc, exc_args=None, loc=None, func_name=None
):
msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
raise NotImplementedError(msg)

def return_status_propagate(self, builder, status):
msg = "Return status is unsupported in the CUDA C/C++ ABI"
raise NotImplementedError(msg)

def get_function_type(self, restype, argtypes):
"""
Get the LLVM IR Function type for *restype* and *argtypes*.
"""
arginfo = self._get_arg_packer(argtypes)
argtypes = list(arginfo.argument_types)
fnty = ir.FunctionType(self.get_return_type(restype), argtypes)
return fnty

def decorate_function(self, fn, args, fe_argtypes, noalias=False):
"""
Set names and attributes of function arguments.
"""
assert not noalias
arginfo = self._get_arg_packer(fe_argtypes)
arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])

def get_arguments(self, func):
"""
Get the Python-level arguments of LLVM *func*.
"""
return func.args

def call_function(self, builder, callee, resty, argtys, args):
"""
Call the Numba-compiled *callee*.
"""
arginfo = self._get_arg_packer(argtys)
realargs = arginfo.as_arguments(builder, args)
code = builder.call(callee, realargs)
# No status required as we don't support exceptions or a distinct None
# value in a C ABI.
status = None
out = self.context.get_returned_value(builder, resty, code)
return status, out

def get_return_type(self, ty):
return self.context.data_model_manager[ty].get_return_type()

def mangler(self, name, argtypes, *, abi_tags=None, uid=None):
if name.startswith(".NumbaEnv."):
func_name = name.split(".")[-1]
return f"_ZN08NumbaEnv{func_name}"
return name.split(".")[-1]


class ErrorModel:
def __init__(self, call_conv):
self.call_conv = call_conv
Expand Down Expand Up @@ -385,8 +478,8 @@ class NumpyErrorModel(ErrorModel):
}


def create_error_model(model_name, context):
def create_error_model(model_name, call_conv):
"""
Create an error model instance for the given target context.
"""
return error_models[model_name](context.call_conv)
return error_models[model_name](call_conv)
8 changes: 7 additions & 1 deletion numba_cuda/numba/cuda/core/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from numba.cuda.core.untyped_passes import ExtractByteCode, FixupArgs
from numba.cuda.core.targetconfig import ConfigStack

from numba.cuda.core.callconv import CUDACallConv


class _CompileStatus:
"""
Expand Down Expand Up @@ -66,7 +68,11 @@ def _make_subtarget(targetctx, flags):
subtargetoptions["enable_nrt"] = True
if flags.fastmath:
subtargetoptions["fastmath"] = flags.fastmath
error_model = callconv.create_error_model(flags.error_model, targetctx)

# Only the CUDA Calling convention can raise exceptions, so we assume here
# that it is suitable for creating the error model.
call_conv = CUDACallConv(targetctx)
error_model = callconv.create_error_model(flags.error_model, call_conv)
subtargetoptions["error_model"] = error_model

return targetctx.subtarget(**subtargetoptions)
Expand Down
Loading
Loading