Implement reverse lookup (Ptr->Tuple) for CUDNN descriptors. (#1948)

JuliaGPU · Aug 19, 2023 · 4b87ec0 · 4b87ec0
1 parent dc16d92
commit 4b87ec0
Showing 1 changed file with 40 additions and 4 deletions.
diff --git a/lib/cudnn/src/convolution.jl b/lib/cudnn/src/convolution.jl
@@ -120,6 +120,28 @@ function cudnnConvolutionForwardAD(w, x, bias, z; y, activation, convDesc, wDesc
     return y
 end
 
+function cudnnGetConvolutionDescriptor(d::cudnnConvolutionDescriptor)
+    # we don't know the dimension of the convolution, so we start by
+    # allocating the maximum size it can be.
+    nbDimsRequested = CUDNN_DIM_MAX - 2
+    # later, here we get the actual dimensionality of the convolution
+    arrlen = Ref{Cint}(nbDimsRequested)
+    padding = Array{Cint}(undef, nbDimsRequested)
+    stride = Array{Cint}(undef, nbDimsRequested)
+    dilation = Array{Cint}(undef, nbDimsRequested)
+    mode = Ref{cuDNN.cudnnConvolutionMode_t}(CUDNN_CONVOLUTION)
+    dataType = Ref{cuDNN.cudnnDataType_t}(cuDNN.CUDNN_DATA_FLOAT)
+
+    cudnnGetConvolutionNdDescriptor(d, nbDimsRequested, arrlen, padding, stride, dilation,
+                                    mode, dataType)
+    T = juliaDataType(dataType[])
+    SZ = arrlen[]
+    P = (padding[1:SZ]..., )
+    S = (stride[1:SZ]..., )
+    D = (dilation[1:SZ]..., )
+    return T, mode[], SZ, P, S, D
+end
+
 # Helper for cudnnConvolutionDescriptor
 function cudnnSetConvolutionDescriptor(
     ptr::cudnnConvolutionDescriptor_t,
@@ -179,9 +201,15 @@ const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionFwdAlgoPerf(xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation, allocateTmpBuf=true)
-    key = (xDesc, wDesc, convDesc, biasDesc, activation)
+    xDesc_native = cudnnGetTensorDescriptor(xDesc)
+    wDesc_native = cudnnGetFilterDescriptor(wDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
+    biasDesc_native = (isnothing(biasDesc) ? nothing
+                                           : cudnnGetTensorDescriptor(biasDesc))
+
+    key = (xDesc_native, wDesc_native, convDesc_native, biasDesc, activation)
     val = lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
-         get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
+        get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
     end
     if val === nothing
         requestedAlgoCount = Int(CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
@@ -210,7 +238,11 @@ const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionBwdDataAlgoPerf(wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, allocateTmpBuf=true)
-    key = (wDesc, dyDesc, convDesc)
+    wDesc_native = cudnnGetFilterDescriptor(wDesc)
+    dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
+
+    key = (wDesc_native, dyDesc_native, convDesc_native)
     val = lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
         get(cudnnConvolutionBwdDataAlgoPerfCache, key, nothing)
     end
@@ -241,7 +273,11 @@ const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, dyDesc, dy, convDesc, dwDesc, dw, allocateTmpBuf=true)
-    key = (xDesc, dyDesc, convDesc)
+    xDesc_native = cudnnGetTensorDescriptor(xDesc)
+    dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
+
+    key = (xDesc_native, dyDesc_native, convDesc_native)
     val = lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
         get(cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc), nothing)
     end