Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 108 additions & 4 deletions numba_cuda/numba/cuda/compiler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from llvmlite import ir
from collections import namedtuple
from numba.core import ir as numba_ir
from numba.core import (
cgutils,
Expand All @@ -14,7 +15,6 @@
sanitize_compile_result_entries,
CompilerBase,
DefaultPassBuilder,
CompileResult,
)
from numba.core.compiler_lock import global_compiler_lock
from numba.core.compiler_machinery import (
Expand Down Expand Up @@ -45,25 +45,129 @@
# The CUDACompileResult (CCR) has a specially-defined entry point equal to its
# id. This is because the entry point is used as a key into a dict of
# overloads by the base dispatcher. The id of the CCR is the only small and
# unique property of a CompileResult in the CUDA target (cf. the CPU target,
# unique property of a CUDACompileResult in the CUDA target (cf. the CPU target,
# which uses its entry_point, which is a pointer value).
#
# This does feel a little hackish, and there are two ways in which this could
# be improved:
#
# 1. We could change the core of Numba so that each CompileResult has its own
# 1. We could change the CUDACompileResult so that each instance has its own
# unique ID that can be used as a key - e.g. a count, similar to the way in
# which types have unique counts.
# 2. At some future time when kernel launch uses a compiled function, the entry
# point will no longer need to be a synthetic value, but will instead be a
# pointer to the compiled function as in the CPU target.

CR_FIELDS = [
"typing_context",
"target_context",
"entry_point",
"typing_error",
"type_annotation",
"signature",
"objectmode",
"lifted",
"fndesc",
"library",
"call_helper",
"environment",
"metadata",
# List of functions to call to initialize on unserialization
# (i.e cache load).
"reload_init",
"referenced_envs",
]


class CUDACompileResult(namedtuple("_CompileResult", CR_FIELDS)):
"""
A structure holding results from the compilation of a function.
"""

__slots__ = ()

class CUDACompileResult(CompileResult):
@property
def entry_point(self):
return id(self)

def _reduce(self):
"""
Reduce a CompileResult to picklable components.
"""
libdata = self.library.serialize_using_object_code()
# Make it (un)picklable efficiently
typeann = str(self.type_annotation)
fndesc = self.fndesc
# Those don't need to be pickled and may fail
fndesc.typemap = fndesc.calltypes = None
# The CUDA target does not reference environments
referenced_envs = tuple()
return (
libdata,
self.fndesc,
self.environment,
self.signature,
self.objectmode,
self.lifted,
typeann,
self.reload_init,
referenced_envs,
)

@classmethod
def _rebuild(
cls,
target_context,
libdata,
fndesc,
env,
signature,
objectmode,
lifted,
typeann,
reload_init,
referenced_envs,
):
if reload_init:
# Re-run all
for fn in reload_init:
fn()

library = target_context.codegen().unserialize_library(libdata)
cfunc = target_context.get_executable(library, fndesc, env)
cr = cls(
target_context=target_context,
typing_context=target_context.typing_context,
library=library,
environment=env,
entry_point=cfunc,
fndesc=fndesc,
type_annotation=typeann,
signature=signature,
objectmode=objectmode,
lifted=lifted,
typing_error=None,
call_helper=None,
metadata=None, # Do not store, arbitrary & potentially large!
reload_init=reload_init,
referenced_envs=referenced_envs,
)

# Load Environments
for env in referenced_envs:
library.codegen.set_env(env.env_name, env)

return cr

@property
def codegen(self):
return self.target_context.codegen()

def dump(self, tab=""):
print(f"{tab}DUMP {type(self).__name__} {self.entry_point}")
self.signature.dump(tab=tab + " ")
print(f"{tab}END DUMP")


def cuda_compile_result(**entries):
entries = sanitize_compile_result_entries(entries)
Expand Down