[Cublas] Added support for bfloat16 while dispatching to cublas kernels

annanyapr · annanyapr · commit 172d7c1d8711 · 2025-04-06T20:03:00.000-04:00
diff --git a/python/tvm/relax/backend/cuda/cublas.py b/python/tvm/relax/backend/cuda/cublas.py
@@ -43,6 +43,7 @@ def _is_supported_dtype(lhs_dtype, rhs_dtype, out_dtype):
         (lhs_dtype == "float16" and rhs_dtype == "float16")
         or (lhs_dtype == "float32" and rhs_dtype == "float32")
         or (lhs_dtype == "int8" and rhs_dtype == "int8")
+        or (lhs_dtype == "bfloat16" and rhs_dtype == "bfloat16")
     )
 
 
diff --git a/src/runtime/contrib/cublas/cublas.cc b/src/runtime/contrib/cublas/cublas.cc
@@ -162,6 +162,8 @@ void CallCublasLt(cublasLtHandle_t hdl, cudaStream_t stream,
 
   if (TypeMatch(A->dtype, kDLFloat, 16)) {
     ab_type = CUDA_R_16F;
+  } else if(TypeMatch(A->dtype, kDLBfloat, 16)){
+    ab_type = CUDA_R_16BF;
   } else if (TypeMatch(A->dtype, kDLInt, 8)) {
     ab_type = CUDA_R_8I;
   } else if (TypeMatch(A->dtype, DataType::TypeCode::kFloat8_e4m3fn, 8)) {
@@ -171,6 +173,8 @@ void CallCublasLt(cublasLtHandle_t hdl, cudaStream_t stream,
 
   if (TypeMatch(C->dtype, kDLFloat, 16)) {
     c_type = CUDA_R_16F;
+  } else if(TypeMatch(C->dtype, kDLBfloat, 16)){
+    c_type = CUDA_R_16BF;
   } else if (TypeMatch(C->dtype, kDLInt, 32)) {
     c_type = CUDA_R_32I;
     compute_type = CUBLAS_COMPUTE_32I;
diff --git a/src/runtime/contrib/cublas/cublas_utils.h b/src/runtime/contrib/cublas/cublas_utils.h
@@ -116,6 +116,11 @@ inline cudaDataType_t GetCudaDataType(DLDataType type) {
       case 64:
         return CUDA_R_64F;
     }
+  } else if (type.code == kDLBfloat){
+    switch (type.bits) {
+      case 16:
+        return CUDA_R_16BF;
+    }
   }
   LOG(FATAL) << "Unsupported cuda type";
 }

Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ def _is_supported_dtype(lhs_dtype, rhs_dtype, out_dtype):`
`43`	`43`	`(lhs_dtype == "float16" and rhs_dtype == "float16")`
`44`	`44`	`or (lhs_dtype == "float32" and rhs_dtype == "float32")`
`45`	`45`	`or (lhs_dtype == "int8" and rhs_dtype == "int8")`
	`46`	`+ or (lhs_dtype == "bfloat16" and rhs_dtype == "bfloat16")`
`46`	`47`	`)`
`47`	`48`
`48`	`49`
Original file line number	Diff line number	Diff line change
`@@ -116,6 +116,11 @@ inline cudaDataType_t GetCudaDataType(DLDataType type) {`
`116`	`116`	`case 64:`
`117`	`117`	`return CUDA_R_64F;`
`118`	`118`	`}`
	`119`	`+ } else if (type.code == kDLBfloat){`
	`120`	`+ switch (type.bits) {`
	`121`	`+ case 16:`
	`122`	`+ return CUDA_R_16BF;`
	`123`	`+ }`
`119`	`124`	`}`
`120`	`125`	`LOG(FATAL) << "Unsupported cuda type";`
`121`	`126`	`}`