Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 26 additions & 37 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,48 +14,40 @@

LIBDL_PATH = ctypes.util.find_library("dl") or "libdl.so.2"
LIBDL = ctypes.CDLL(LIBDL_PATH)
LIBDL.dladdr.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
LIBDL.dladdr.restype = ctypes.c_int
LIBDL.dlinfo.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p]
LIBDL.dlinfo.restype = ctypes.c_int
Comment on lines -17 to +32

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit nervous about us using either dladdr or dlinfo since none of them is POSIX-compliant. Since dladdr has been used, switching to dlinfo does not make it more "non-compliant", so it's still a step forward. We just have to see if it runs everywhere...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since none of them is POSIX-compliant

Do you think the ChatGPT conclusion below is reasonable?

My ChatGPT prompt:


This makes me want to ask:

Does CUDA run on any systems that are not Linux or Windows?

(Windows dynamic loading is handled separately. I.e. basically I'm only asking: does CUDA run on any Unix system that is not Linux?)

Do you know?


ChatGPT said:

Thought for 19s

Short answer: No. Today CUDA is officially supported on Linux (various distros/arches, including aarch64/Jetson) and Windows (including WSL2 Linux environments on Windows). There’s no current macOS runtime support, and no official support for other Unix-like OSes (e.g., *BSD).

So if your scope is “Linux only,” it’s reasonable to lean on glibc-specific behavior (e.g., dlinfo)—CUDA isn’t targeting musl/Alpine or non-Linux Unix platforms.




class DlInfo(ctypes.Structure):
"""Structure used by dladdr to return information about a loaded symbol."""
# First appeared in 2004-era glibc. Universally correct on Linux for all practical purposes.
RTLD_DI_LINKMAP = 2
Comment thread
leofang marked this conversation as resolved.


class LinkMap(ctypes.Structure):
# Minimal fields we need; layout matches glibc's struct link_map
_fields_ = (
("dli_fname", ctypes.c_char_p), # path to .so
("dli_fbase", ctypes.c_void_p),
("dli_sname", ctypes.c_char_p),
("dli_saddr", ctypes.c_void_p),
("l_addr", ctypes.c_void_p),
("l_name", ctypes.c_char_p),
("l_ld", ctypes.c_void_p),
("l_next", ctypes.c_void_p),
("l_prev", ctypes.c_void_p),
)


def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> Optional[str]:
"""Get the absolute path of a loaded dynamic library on Linux.

Args:
libname: The name of the library
handle: The library handle
def abs_path_for_dynamic_library(libname: str, handle: ctypes.CDLL) -> str:
lm_ptr = ctypes.POINTER(LinkMap)()
rc = LIBDL.dlinfo(ctypes.c_void_p(handle._handle), RTLD_DI_LINKMAP, ctypes.byref(lm_ptr))
if rc == 0 and lm_ptr and lm_ptr.contents.l_name:
path: str = lm_ptr.contents.l_name.decode()
if path:
return path

Returns:
The absolute path to the library file, or None if no expected symbol is found
raise OSError(f"abs_path_for_dynamic_library failed for {libname=!r}")
Comment thread
leofang marked this conversation as resolved.
Outdated

Raises:
OSError: If dladdr fails to get information about the symbol
"""
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import EXPECTED_LIB_SYMBOLS

for symbol_name in EXPECTED_LIB_SYMBOLS[libname]:
symbol = getattr(handle, symbol_name, None)
if symbol is not None:
break
else:
return None

addr = ctypes.cast(symbol, ctypes.c_void_p)
info = DlInfo()
if LIBDL.dladdr(addr, ctypes.byref(info)) == 0:
raise OSError(f"dladdr failed for {libname=!r}")
return info.dli_fname.decode() # type: ignore[no-any-return]
def get_candidate_sonames(libname: str) -> list[str]:
candidate_sonames = list(SUPPORTED_LINUX_SONAMES.get(libname, ()))
candidate_sonames.append(f"lib{libname}.so")
return candidate_sonames


def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]:
Expand All @@ -72,9 +64,8 @@ def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]:
>>> if loaded is not None:
... print(f"Library already loaded from {loaded.abs_path}")
"""
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import SUPPORTED_LINUX_SONAMES

for soname in SUPPORTED_LINUX_SONAMES.get(libname, ()):
for soname in get_candidate_sonames(libname):
try:
handle = ctypes.CDLL(soname, mode=os.RTLD_NOLOAD)
except OSError:
Expand All @@ -96,9 +87,7 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
Raises:
RuntimeError: If the library is loaded but no expected symbol is found
"""
candidate_sonames = list(SUPPORTED_LINUX_SONAMES.get(libname, ()))
candidate_sonames.append(f"lib{libname}.so")
for soname in candidate_sonames:
for soname in get_candidate_sonames(libname):
try:
handle = ctypes.CDLL(soname, CDLL_MODE)
abs_path = abs_path_for_dynamic_library(libname, handle)
Expand Down
11 changes: 4 additions & 7 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from typing import Optional

from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY,
SUPPORTED_WINDOWS_DLLS,
)

# Mirrors WinBase.h (unfortunately not defined already elsewhere)
WINBASE_LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
Expand Down Expand Up @@ -110,7 +114,6 @@ def check_if_already_loaded_from_elsewhere(libname: str) -> Optional[LoadedDL]:
>>> if loaded is not None:
... print(f"Library already loaded from {loaded.abs_path}")
"""
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import SUPPORTED_WINDOWS_DLLS

for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()):
handle = kernel32.GetModuleHandleW(dll_name)
Expand All @@ -129,8 +132,6 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
Returns:
A LoadedDL object if successful, None if the library cannot be loaded
"""
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import SUPPORTED_WINDOWS_DLLS

for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()):
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if handle:
Expand All @@ -153,10 +154,6 @@ def load_with_abs_path(libname: str, found_path: str) -> LoadedDL:
Raises:
RuntimeError: If the DLL cannot be loaded
"""
from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import (
LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY,
)

if libname in LIBNAMES_REQUIRING_OS_ADD_DLL_DIRECTORY:
add_dll_directory(found_path)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# SUPPORTED_LIBNAMES
# SUPPORTED_WINDOWS_DLLS
# SUPPORTED_LINUX_SONAMES
# EXPECTED_LIB_SYMBOLS

import sys

Expand Down Expand Up @@ -401,39 +400,3 @@ def is_suppressed_dll_file(path_basename: str) -> bool:
# nvrtc64_120_0.dll
return path_basename.endswith(".alt.dll") or "-builtins" in path_basename
return path_basename.startswith(("cudart32_", "nvvm32"))


# Based on `nm -D --defined-only` output for Linux x86_64 distributions.
EXPECTED_LIB_SYMBOLS = {
"nvJitLink": (
"__nvJitLinkCreate_12_0", # 12.0 through 12.9
"nvJitLinkVersion", # 12.3 and up
),
"nvrtc": ("nvrtcVersion",),
"nvvm": ("nvvmVersion",),
"cudart": ("cudaRuntimeGetVersion",),
"nvfatbin": ("nvFatbinVersion",),
"cublas": ("cublasGetVersion",),
"cublasLt": ("cublasLtGetVersion",),
"cufft": ("cufftGetVersion",),
"cufftw": ("fftwf_malloc",),
"curand": ("curandGetVersion",),
"cusolver": ("cusolverGetVersion",),
"cusolverMg": ("cusolverMgCreate",),
"cusparse": ("cusparseGetVersion",),
"nppc": ("nppGetLibVersion",),
"nppial": ("nppiAdd_32f_C1R_Ctx",),
"nppicc": ("nppiColorToGray_8u_C3C1R_Ctx",),
"nppidei": ("nppiCopy_8u_C1R_Ctx",),
"nppif": ("nppiFilterSobelHorizBorder_8u_C1R_Ctx",),
"nppig": ("nppiResize_8u_C1R_Ctx",),
"nppim": ("nppiErode_8u_C1R_Ctx",),
"nppist": ("nppiMean_8u_C1R_Ctx",),
"nppisu": ("nppiFree",),
"nppitc": ("nppiThreshold_8u_C1R_Ctx",),
"npps": ("nppsAdd_32f_Ctx",),
"nvblas": ("dgemm",),
"cufile": ("cuFileGetVersion",),
# "cufile_rdma": ("rdma_buffer_reg",),
"nvjpeg": ("nvjpegCreate",),
}
2 changes: 1 addition & 1 deletion cuda_pathfinder/cuda/pathfinder/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

__version__ = "1.1.1a0"
__version__ = "1.1.1a1"
6 changes: 0 additions & 6 deletions cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ def test_supported_libnames_windows_libnames_requiring_os_add_dll_directory_cons
)


def test_supported_libnames_all_expected_lib_symbols_consistency():
assert tuple(sorted(supported_nvidia_libs.SUPPORTED_LIBNAMES_ALL)) == tuple(
sorted(supported_nvidia_libs.EXPECTED_LIB_SYMBOLS.keys())
)


def test_runtime_error_on_non_64bit_python():
with (
patch("struct.calcsize", return_value=3), # fake 24-bit pointer
Expand Down
Loading