apache · MasterJH5574 · Jul 18, 2023 · Jul 16, 2023 · Jul 16, 2023 · Jul 16, 2023
diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h
@@ -48,7 +48,8 @@ enum DeviceAttrKind : int {
   kMaxRegistersPerBlock = 9,
   kGcnArch = 10,
   kApiVersion = 11,
-  kDriverVersion = 12
+  kDriverVersion = 12,
+  kL2CacheSizeBytes = 13,
 };
 
 #ifdef TVM_KALLOC_ALIGNMENT

diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py
@@ -488,6 +488,22 @@ def driver_version(self):
         """
         return self._GetDeviceAttr(self.device_type, self.device_id, 12)
 
+    @property
+    def l2_cache_size_bytes(self):
+        """Return the size of the device L2 cache in bytes
+
+        Returns
+        -------
+        l2_cache_size_bytes : int or None
+            The size of the device L2 cache in bytes returned by device runtime API.
+
+        Note
+        ----
+        The value returned by opencl's API is smaller than real device L2 cache size.
+        Vulkan does not support this attribute.
+        """
+        return self._GetDeviceAttr(self.device_type, self.device_id, 13)
+
     def texture_spatial_limit(self):
         """Returns limits for textures by spatial dimensions
 

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
@@ -244,6 +244,10 @@ def supports_cooperative_matrix(self):
     def features(self):
         return TargetFeatures(self)
 
+    @property
+    def l2_cache_size_bytes(self):
+        return int(self.attrs.get("l2_cache_size_bytes", 0))
+
     def get_kind_attr(self, attr_name):
         """Get additional attribute about the target kind.
 

diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc
@@ -105,6 +105,12 @@ class CUDADeviceAPI final : public DeviceAPI {
       }
       case kDriverVersion:
         return;
+      case kL2CacheSizeBytes:
+        // Get size of device l2 cache size in bytes.
+        int l2_size = 0;
+        CUDA_CALL(cudaDeviceGetAttribute(&l2_size, cudaDevAttrL2CacheSize, dev.device_id));
+        *rv = l2_size;
+        return;
     }
     *rv = value;
   }

diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
@@ -198,6 +198,13 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
       *rv = std::string(value);
       break;
     }
+    case kL2CacheSizeBytes:
+      // NOTE(Zihao): this API cannot reflect the real L2 cache size in both CUDA/AMD GPUs.
+      cl_ulong value;
+      OPENCL_CALL(clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(value), &value,
+                                  nullptr));
+      *rv = static_cast<int64_t>(value);
+      break;
   }
 }
 

diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc
@@ -122,6 +122,11 @@ class ROCMDeviceAPI final : public DeviceAPI {
       }
       case kDriverVersion:
         return;
+      case kL2CacheSizeBytes:
+        // Get size of device l2 cache size in bytes.
+        int l2_size;
+        ROCM_CALL(hipDeviceGetAttribute(l2_size, hipDeviceAttributeL2CacheSize, device.device_id));
+        *rv = l2_size;
     }
     *rv = value;
   }

diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc
@@ -160,6 +160,12 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
       *rv = os.str();
       break;
     }
+
+    case kL2CacheSizeBytes:
+      // Vulkan do not have standalone APIs to measure L2 cache size,
+      // and the vkCmdPipelineBarrier will flush L2 texture cache.
+      *rv = 0;
+      break;
   }
 }