Skip to content

Commit 56e5162

Browse files
All memory pool operation are checked during init phase. For CUDA 12+ device properties checked.
1 parent 815bf1a commit 56e5162

File tree

1 file changed

+36
-9
lines changed

1 file changed

+36
-9
lines changed

ggml-cuda.cu

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5849,16 +5849,43 @@ void ggml_init_cublas() {
58495849
cudaDeviceProp prop;
58505850
CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
58515851
fprintf(stderr, " Device %d: %s, compute capability %d.%d", id, prop.name, prop.major, prop.minor);
5852+
58525853
#if defined(CUDA_USE_MEMORY_POOL)
5853-
// configure memory pool
5854-
cudaError_t err = cudaDeviceGetMemPool(&g_cudaMemPools[id], id);
5855-
if (err == cudaSuccess) {
5856-
size_t treshold = UINT64_MAX;
5857-
CUDA_CHECK(cudaMemPoolSetAttribute(g_cudaMemPools[id], cudaMemPoolAttrReleaseThreshold, &treshold));
5858-
fprintf(stderr, ", CUDA memory pool is supported\n");
5859-
} else {
5860-
g_cudaMemPools[id] = nullptr;
5861-
fprintf(stderr, ", CUDA memory pool is not supported\n");
5854+
bool support_mem_pool = true;
5855+
#if CUDART_VERSION >= 12000
5856+
support_mem_pool = (prop.memoryPoolsSupported == 1);
5857+
#endif
5858+
if (support_mem_pool) {
5859+
cudaError_t err = cudaDeviceGetMemPool(&g_cudaMemPools[id], id);
5860+
if (err == cudaSuccess) {
5861+
size_t treshold = UINT64_MAX;
5862+
err = (cudaMemPoolSetAttribute(g_cudaMemPools[id], cudaMemPoolAttrReleaseThreshold, &treshold));
5863+
if (err == cudaSuccess) {
5864+
fprintf(stderr, ", CUDA memory pool is supported\n");
5865+
} else {
5866+
g_cudaMemPools[id] = nullptr;
5867+
fprintf(stderr, ", CUDA memory pool is not supported (release threshold error)\n");
5868+
}
5869+
} else {
5870+
g_cudaMemPools[id] = nullptr;
5871+
fprintf(stderr, ", CUDA memory pool is not supported (cant load default pool)\n");
5872+
}
5873+
// test alloc/dealoc
5874+
if (err == cudaSuccess) {
5875+
void *testPtr;
5876+
size_t testSize = 1024;
5877+
err = cudaMallocFromPoolAsync(&testPtr, testSize, g_cudaMemPools[id], g_cudaStreams[id][0]);
5878+
if (err == cudaSuccess) {
5879+
err = cudaFreeAsync(testPtr, g_cudaStreams[id][0]);
5880+
if (err != cudaSuccess) {
5881+
g_cudaMemPools[id] = nullptr;
5882+
fprintf(stderr, ", CUDA memory pool is not supported (deallocation failed)\n");
5883+
}
5884+
} else {
5885+
g_cudaMemPools[id] = nullptr;
5886+
fprintf(stderr, ", CUDA memory pool is not supported (allocation failed)\n");
5887+
}
5888+
}
58625889
}
58635890
#endif
58645891
g_tensor_split[id] = total_vram;

0 commit comments

Comments
 (0)