Skip to content

Commit 4e0b423

Browse files
committed
hook: Add conditional library initialization in cuGetProcAddress()
initialize_libnvshare() is our bootstrapping function, which, among others, obtains the original CUDA function addresses needed for our hook logic. We currently only call initialize_libnvshare() when the user application makes its first call to cuInit(), which we assume is **always** the first CUDA Driver API function that it makes. However, CUDA >=11.3 applications that use the Runtime API call cuGetProcAddress() before cuInit(), which means that in these cases cuGetProcAddress() is the first function called. So far we were lucky, as the first call that such apps make is cuGetProcAddress("cuInit"), which falls into our custom handling cases and does not invoke the original cuGetProcAddress() to which we don't have a pointer, as we haven't bootstrapped CUDA yet. To address the aforementioned case and avoid potential NULL pointer dereferences, add a one-time call to initialize_libnvshare() in cuGetProcAddress(). Signed-off-by: Xinyuan Lyu <[email protected]>
1 parent 67bed3f commit 4e0b423

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

src/hook.c

+13-3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ struct cuda_mem_allocation {
9393
/* Linked list that holds all memory allocations of current application. */
9494
struct cuda_mem_allocation *cuda_allocation_list = NULL;
9595

96+
/* Establishes init step that will be executed only once in a cuda process */
97+
static pthread_once_t init_libnvshare_done = PTHREAD_ONCE_INIT;
98+
static pthread_once_t init_done = PTHREAD_ONCE_INIT;
99+
96100
/* Load real CUDA {Driver API, NVML} functions and bootstrap auxiliary stuff. */
97101
static void bootstrap_cuda(void)
98102
{
@@ -510,6 +514,14 @@ void *dlsym_234(void *handle, const char *symbol)
510514
CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion,
511515
cuuint64_t flags)
512516
{
517+
/**
518+
* cuGetProcAddress() will be called before cuInit() in CUDA
519+
* Runtime API (version >=11.3), so cuGetProcAddress() should also serve as
520+
* an entrypoint.
521+
* Otherwise, real_cuGetProcAddress may be a NULL pointer when it is called.
522+
*/
523+
true_or_exit(pthread_once(&init_libnvshare_done, initialize_libnvshare) == 0);
524+
true_or_exit(pthread_once(&init_done, initialize_client) == 0);
513525
CUresult result = CUDA_SUCCESS;
514526

515527
if (real_cuGetProcAddress == NULL) return CUDA_ERROR_NOT_INITIALIZED;
@@ -553,7 +565,7 @@ CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion,
553565
CUresult cuMemAlloc(CUdeviceptr *dptr, size_t bytesize)
554566
{
555567
static int got_max_mem_size = 0;
556-
size_t junk;
568+
size_t junk;
557569
CUresult result = CUDA_SUCCESS;
558570

559571

@@ -659,8 +671,6 @@ CUresult cuMemGetInfo(size_t *free, size_t *total)
659671
CUresult cuInit(unsigned int flags)
660672
{
661673
CUresult result = CUDA_SUCCESS;
662-
static pthread_once_t init_libnvshare_done = PTHREAD_ONCE_INIT;
663-
static pthread_once_t init_done = PTHREAD_ONCE_INIT;
664674

665675
true_or_exit(pthread_once(&init_libnvshare_done, initialize_libnvshare) == 0);
666676
true_or_exit(pthread_once(&init_done, initialize_client) == 0);

0 commit comments

Comments
 (0)