Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions numba_cuda/numba/cuda/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def get_lto_ptx(self, cc=None):

cc = self._ensure_cc(cc)

linker = driver._Linker.new(
linker = driver._Linker(
max_registers=self._max_registers,
cc=cc,
additional_flags=["-ptx"],
Expand Down Expand Up @@ -308,7 +308,7 @@ def get_cubin(self, cc=None):
print(ptx)
print("=" * 80)

linker = driver._Linker.new(
linker = driver._Linker(
max_registers=self._max_registers, cc=cc, lto=self._lto
)
self._link_all(linker, cc, ignore_nonlto=False)
Expand Down
283 changes: 37 additions & 246 deletions numba_cuda/numba/cuda/cudadrv/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@
c_int,
byref,
c_size_t,
c_char,
c_char_p,
addressof,
c_void_p,
c_uint8,
)
Expand Down Expand Up @@ -2241,7 +2238,7 @@ def get_global_symbol(self, name):
)


class Function:
class CudaPythonFunction:
griddim = 1, 1, 1
blockdim = 1, 1, 1
stream = 0
Expand Down Expand Up @@ -2296,73 +2293,38 @@ def set_shared_memory_carveout(self, carveout):


# Alias for backward compatibility
CudaPythonFunction = Function
Function = CudaPythonFunction


class _LinkerBase(metaclass=ABCMeta):
"""Abstract base class for linkers"""

@classmethod
def new(
cls,
max_registers=0,
class _Linker:
def __init__(
self,
max_registers=None,
lineinfo=False,
cc=None,
lto=None,
additional_flags=None,
):
linker = _Linker

params = (max_registers, lineinfo, cc)
if linker is _Linker:
params = (*params, lto, additional_flags)
else:
if lto or additional_flags:
raise ValueError("LTO and additional flags require nvjitlink")

return linker(*params)

@abstractmethod
def __init__(self, max_registers, lineinfo, cc):
# LTO unsupported in Numba at present, but the pynvjitlink linker
# (https://github.com/rapidsai/pynvjitlink) supports it,
self.lto = False

@property
@abstractmethod
def info_log(self):
"""Return the info log from the linker invocation"""

@property
@abstractmethod
def error_log(self):
"""Return the error log from the linker invocation"""

@abstractmethod
def add_ptx(self, ptx, name):
"""Add PTX source in a string to the link"""

def add_cu(self, cu, name):
"""Add CUDA source in a string to the link. The name of the source
file should be specified in `name`."""
ptx, log = nvrtc.compile(cu, name, self.cc)

if config.DUMP_ASSEMBLY:
print(("ASSEMBLY %s" % name).center(80, "-"))
print(ptx)
print("=" * 80)

# Link the program's PTX using the normal linker mechanism
ptx_name = os.path.splitext(name)[0] + ".ptx"
self.add_ptx(ptx.encode(), ptx_name)

@abstractmethod
def add_data(self, data, kind, name):
"""Add in-memory data to the link"""
arch = f"sm_{cc[0]}{cc[1]}"
self.max_registers = max_registers if max_registers else None
self.lineinfo = lineinfo
self.cc = cc
self.arch = arch
if lto is False:
# WAR for apparent nvjitlink issue
lto = None
self.lto = lto
self.additional_flags = additional_flags

@abstractmethod
def add_file(self, path, kind):
"""Add code from a file to the link"""
self.options = LinkerOptions(
max_register_count=self.max_registers,
lineinfo=lineinfo,
arch=arch,
link_time_optimization=lto,
)
self._complete = False
self._object_codes = []
self.linker = None # need at least one program

def add_cu_file(self, path):
cu = cached_file_read(path, how="rb")
Expand Down Expand Up @@ -2449,47 +2411,9 @@ def add_file_guess_ext(self, path_or_code, ignore_nonlto=False):
path_or_code.data, path_or_code.kind, path_or_code.name
)

@abstractmethod
def complete(self):
"""Complete the link. Returns (cubin, size)

cubin is a pointer to a internal buffer of cubin owned by the linker;
thus, it should be loaded before the linker is destroyed.
"""


class _Linker(_LinkerBase):
def __init__(
self,
max_registers=None,
lineinfo=False,
cc=None,
lto=None,
additional_flags=None,
):
arch = f"sm_{cc[0]}{cc[1]}"
self.max_registers = max_registers if max_registers else None
self.lineinfo = lineinfo
self.cc = cc
self.arch = arch
if lto is False:
# WAR for apparent nvjitlink issue
lto = None
self.lto = lto
self.additional_flags = additional_flags

self.options = LinkerOptions(
max_register_count=self.max_registers,
lineinfo=lineinfo,
arch=arch,
link_time_optimization=lto,
)
self._complete = False
self._object_codes = []
self.linker = None # need at least one program

@property
def info_log(self):
"""Return the info log from the linker invocation"""
if not self.linker:
raise ValueError("Not Initialized")
if self._complete:
Expand All @@ -2498,17 +2422,21 @@ def info_log(self):

@property
def error_log(self):
"""Return the error log from the linker invocation"""
if not self.linker:
raise ValueError("Not Initialized")
if self._complete:
return self._error_log
raise RuntimeError("Link not yet complete.")

def add_ptx(self, ptx, name="<cudapy-ptx>"):
"""Add PTX source in a string to the link"""
obj = ObjectCode.from_ptx(ptx, name=name)
self._object_codes.append(obj)

def add_cu(self, cu, name="<cudapy-cu>"):
"""Add CUDA source in a string to the link. The name of the source
file should be specified in `name`."""
obj, log = nvrtc.compile(cu, name, self.cc, ltoir=self.lto)

if not self.lto and config.DUMP_ASSEMBLY:
Expand Down Expand Up @@ -2538,6 +2466,7 @@ def add_library(self, lib, name="<cudapy-lib>"):
self._object_codes.append(obj)

def add_file(self, path, kind):
"""Add code from a file to the link"""
try:
data = cached_file_read(path, how="rb")
except FileNotFoundError:
Expand All @@ -2546,6 +2475,7 @@ def add_file(self, path, kind):
self.add_data(data, kind, name)

def add_data(self, data, kind, name):
"""Add in-memory data to the link"""
if kind == FILE_EXTENSION_MAP["ptx"]:
fn = self.add_ptx
elif kind == FILE_EXTENSION_MAP["cubin"]:
Expand Down Expand Up @@ -2589,157 +2519,18 @@ def close(self):
self.linker.close()

def complete(self):
"""Complete the link. Returns (cubin, size)

cubin is a pointer to a internal buffer of cubin owned by the linker;
thus, it should be loaded before the linker is destroyed.
"""
self.linker = Linker(*self._object_codes, options=self.options)
result = self.linker.link("cubin")
self.close()
self._complete = True
return result


class CtypesLinker(_LinkerBase):
"""
Links for current device if no CC given
"""

def __init__(self, max_registers=0, lineinfo=False, cc=None):
super().__init__(max_registers, lineinfo, cc)

logsz = config.CUDA_LOG_SIZE
linkerinfo = (c_char * logsz)()
linkererrors = (c_char * logsz)()

options = {
enums.CU_JIT_INFO_LOG_BUFFER: addressof(linkerinfo),
enums.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: c_void_p(logsz),
enums.CU_JIT_ERROR_LOG_BUFFER: addressof(linkererrors),
enums.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: c_void_p(logsz),
enums.CU_JIT_LOG_VERBOSE: c_void_p(1),
}
if max_registers:
options[enums.CU_JIT_MAX_REGISTERS] = c_void_p(max_registers)
if lineinfo:
options[enums.CU_JIT_GENERATE_LINE_INFO] = c_void_p(1)

self.cc = cc
if cc is None:
# No option value is needed, but we need something as a placeholder
options[enums.CU_JIT_TARGET_FROM_CUCONTEXT] = 1
else:
cc_val = cc[0] * 10 + cc[1]
options[enums.CU_JIT_TARGET] = c_void_p(cc_val)

raw_keys = list(options.keys())
raw_values = list(options.values())

option_keys = (drvapi.cu_jit_option * len(raw_keys))(*raw_keys)
option_vals = (c_void_p * len(raw_values))(*raw_values)

self.handle = handle = drvapi.cu_link_state()
driver.cuLinkCreate(
len(raw_keys), option_keys, option_vals, byref(self.handle)
)

weakref.finalize(self, driver.cuLinkDestroy, handle)

self.linker_info_buf = linkerinfo
self.linker_errors_buf = linkererrors

self._keep_alive = [linkerinfo, linkererrors, option_keys, option_vals]

@property
def info_log(self):
return self.linker_info_buf.value.decode("utf8")

@property
def error_log(self):
return self.linker_errors_buf.value.decode("utf8")

def add_cubin(self, cubin, name="<unnamed-cubin>"):
return self._add_data(enums.CU_JIT_INPUT_CUBIN, cubin, name)

def add_ptx(self, ptx, name="<unnamed-ptx>"):
return self._add_data(enums.CU_JIT_INPUT_PTX, ptx, name)

def add_object(self, object_, name="<unnamed-object>"):
return self._add_data(enums.CU_JIT_INPUT_OBJECT, object_, name)

def add_fatbin(self, fatbin, name="<unnamed-fatbin>"):
return self._add_data(enums.CU_JIT_INPUT_FATBINARY, fatbin, name)

def add_library(self, library, name="<unnamed-library>"):
return self._add_data(enums.CU_JIT_INPUT_LIBRARY, library, name)

def _add_data(self, input_type, data, name):
data_buffer = c_char_p(data)
name_buffer = c_char_p(name.encode("utf8"))
self._keep_alive += [data_buffer, name_buffer]
try:
driver.cuLinkAddData(
self.handle,
input_type,
data_buffer,
len(data),
name_buffer,
0,
None,
None,
)
except CudaAPIError as e:
raise LinkerError("%s\n%s" % (e, self.error_log))

def add_data(self, data, kind, name=None):
# We pass the name as **kwargs to ensure the default name for the input
# type is used if none is supplied
kws = {}
if name is not None:
kws["name"] = name

if kind == FILE_EXTENSION_MAP["cubin"]:
self.add_cubin(data, **kws)
elif kind == FILE_EXTENSION_MAP["fatbin"]:
self.add_fatbin(data, **kws)
elif kind == FILE_EXTENSION_MAP["a"]:
self.add_library(data, **kws)
elif kind == FILE_EXTENSION_MAP["ptx"]:
self.add_ptx(data, **kws)
elif kind == FILE_EXTENSION_MAP["o"]:
self.add_object(data, **kws)
elif kind == FILE_EXTENSION_MAP["ltoir"]:
raise LinkerError("Ctypes linker cannot link LTO-IR")
else:
raise LinkerError(f"Don't know how to link {kind}")

def add_file(self, path, kind):
pathbuf = c_char_p(path.encode("utf8"))
self._keep_alive.append(pathbuf)

try:
driver.cuLinkAddFile(self.handle, kind, pathbuf, 0, None, None)
except CudaAPIError as e:
if e.code == enums.CUDA_ERROR_FILE_NOT_FOUND:
msg = f"{path} not found"
else:
msg = "%s\n%s" % (e, self.error_log)
raise LinkerError(msg)

def complete(self):
cubin_buf = c_void_p(0)
size = c_size_t(0)

try:
driver.cuLinkComplete(self.handle, byref(cubin_buf), byref(size))
except CudaAPIError as e:
raise LinkerError("%s\n%s" % (e, self.error_log))

size = size.value
assert size > 0, "linker returned a zero sized cubin"
del self._keep_alive[:]

# We return a copy of the cubin because it's owned by the linker
cubin_ptr = ctypes.cast(cubin_buf, ctypes.POINTER(ctypes.c_char))
return bytes(np.ctypeslib.as_array(cubin_ptr, shape=(size,)))


# -----------------------------------------------------------------------------


Expand Down
2 changes: 1 addition & 1 deletion numba_cuda/numba/cuda/memory_management/nrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def _compile_memsys_module(self):
cc = get_current_device().compute_capability

# Create a new linker instance and add the cu file
linker = _Linker.new(cc=cc, lto=_have_nvjitlink())
linker = _Linker(max_registers=0, cc=cc, lto=_have_nvjitlink())
linker.add_cu_file(memsys_mod)

# Complete the linker and create a module from it
Expand Down
Loading