@@ -5849,16 +5849,43 @@ void ggml_init_cublas() {
58495849 cudaDeviceProp prop;
58505850 CUDA_CHECK (cudaGetDeviceProperties (&prop, id));
58515851 fprintf (stderr, " Device %d: %s, compute capability %d.%d" , id, prop.name , prop.major , prop.minor );
5852+
58525853#if defined(CUDA_USE_MEMORY_POOL)
5853- // configure memory pool
5854- cudaError_t err = cudaDeviceGetMemPool (&g_cudaMemPools[id], id);
5855- if (err == cudaSuccess) {
5856- size_t treshold = UINT64_MAX;
5857- CUDA_CHECK (cudaMemPoolSetAttribute (g_cudaMemPools[id], cudaMemPoolAttrReleaseThreshold, &treshold));
5858- fprintf (stderr, " , CUDA memory pool is supported\n " );
5859- } else {
5860- g_cudaMemPools[id] = nullptr ;
5861- fprintf (stderr, " , CUDA memory pool is not supported\n " );
5854+ bool support_mem_pool = true ;
5855+ #if CUDART_VERSION >= 12000
5856+ support_mem_pool = (prop.memoryPoolsSupported == 1 );
5857+ #endif
5858+ if (support_mem_pool) {
5859+ cudaError_t err = cudaDeviceGetMemPool (&g_cudaMemPools[id], id);
5860+ if (err == cudaSuccess) {
5861+ size_t treshold = UINT64_MAX;
5862+ err = (cudaMemPoolSetAttribute (g_cudaMemPools[id], cudaMemPoolAttrReleaseThreshold, &treshold));
5863+ if (err == cudaSuccess) {
5864+ fprintf (stderr, " , CUDA memory pool is supported\n " );
5865+ } else {
5866+ g_cudaMemPools[id] = nullptr ;
5867+ fprintf (stderr, " , CUDA memory pool is not supported (release threshold error)\n " );
5868+ }
5869+ } else {
5870+ g_cudaMemPools[id] = nullptr ;
5871+ fprintf (stderr, " , CUDA memory pool is not supported (cant load default pool)\n " );
5872+ }
5873+ // test alloc/dealoc
5874+ if (err == cudaSuccess) {
5875+ void *testPtr;
5876+ size_t testSize = 1024 ;
5877+ err = cudaMallocFromPoolAsync (&testPtr, testSize, g_cudaMemPools[id], g_cudaStreams[id][0 ]);
5878+ if (err == cudaSuccess) {
5879+ err = cudaFreeAsync (testPtr, g_cudaStreams[id][0 ]);
5880+ if (err != cudaSuccess) {
5881+ g_cudaMemPools[id] = nullptr ;
5882+ fprintf (stderr, " , CUDA memory pool is not supported (deallocation failed)\n " );
5883+ }
5884+ } else {
5885+ g_cudaMemPools[id] = nullptr ;
5886+ fprintf (stderr, " , CUDA memory pool is not supported (allocation failed)\n " );
5887+ }
5888+ }
58625889 }
58635890#endif
58645891 g_tensor_split[id] = total_vram;
0 commit comments