From 63650aa107f368e174ffc0b5b19c1a29bc985f2a Mon Sep 17 00:00:00 2001 From: yzh119 Date: Sun, 16 Jul 2023 19:35:42 +0000 Subject: [PATCH 1/5] init commit --- include/tvm/runtime/device_api.h | 3 ++- python/tvm/_ffi/runtime_ctypes.py | 6 ++++++ python/tvm/target/target.py | 6 +++++- src/runtime/cuda/cuda_device_api.cc | 6 ++++++ src/runtime/opencl/opencl_device_api.cc | 6 ++++++ src/runtime/rocm/rocm_device_api.cc | 5 +++++ src/runtime/vulkan/vulkan_device_api.cc | 4 ++++ 7 files changed, 34 insertions(+), 2 deletions(-) diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index d3c2f9ba3857..e5ce20432941 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -48,7 +48,8 @@ enum DeviceAttrKind : int { kMaxRegistersPerBlock = 9, kGcnArch = 10, kApiVersion = 11, - kDriverVersion = 12 + kDriverVersion = 12, + kGlobalMemCacheSizeBytes = 13, }; #ifdef TVM_KALLOC_ALIGNMENT diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 363843018927..4e40456d04fe 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -487,6 +487,12 @@ def driver_version(self): """ return self._GetDeviceAttr(self.device_type, self.device_id, 12) + + @property + def global_mem_cache_size(self): + """TODO(Zihao) + """ + return self._GetDeviceAttr(self.device_type, self.device_id, 13) def texture_spatial_limit(self): """Returns limits for textures by spatial dimensions diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index 0c834c5f026e..57b551da2bd8 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -243,7 +243,11 @@ def supports_cooperative_matrix(self): @property def features(self): return TargetFeatures(self) - + + @property + def global_mem_cache_size(self): + return int(self.attrs.get("global_mem_cache_size", 0)) + def get_kind_attr(self, attr_name): """Get additional attribute about the target kind. diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc index 71788e52999a..665f3ea02b20 100644 --- a/src/runtime/cuda/cuda_device_api.cc +++ b/src/runtime/cuda/cuda_device_api.cc @@ -105,6 +105,12 @@ class CUDADeviceAPI final : public DeviceAPI { } case kDriverVersion: return; + case kGlobalMemCacheSizeBytes: + // Get size of device l2 cache size in bytes. + int l2_size = 0; + CUDA_CALL(cudaDeviceGetAttribute(&l2_size, cudaDevAttrL2CacheSize, dev.device_id)); + *rv = l2_size; + return; } *rv = value; } diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index f3eb8d83a210..def7f931ea55 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -198,6 +198,12 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = std::string(value); break; } + case kGlobalMemCacheSizeBytes: + cl_ulong value; + OPENCL_CALL(clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(value), &value, + nullptr)); + *rv = static_cast(value); + break; } } diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index 4e758b7fd977..c7718623806c 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -122,6 +122,11 @@ class ROCMDeviceAPI final : public DeviceAPI { } case kDriverVersion: return; + case kGlobalMemCacheSizeBytes: + // Get size of device l2 cache size in bytes. + int l2_size; + ROCM_CALL(hipDeviceGetAttribute(l2_size, hipDeviceAttributeL2CacheSize, device.device_id)); + *rv = l2_size; } *rv = value; } diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc index 108741525602..aba3c5e98e0c 100644 --- a/src/runtime/vulkan/vulkan_device_api.cc +++ b/src/runtime/vulkan/vulkan_device_api.cc @@ -160,6 +160,10 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = os.str(); break; } + + case kGlobalMemCacheSizeBytes: + *rv = 256 * 1024 * 1024; // return 256mb by default + break; } } From 8579e7a050074b632e09f15c4c83202fda756479 Mon Sep 17 00:00:00 2001 From: yzh119 Date: Sun, 16 Jul 2023 20:06:24 +0000 Subject: [PATCH 2/5] rename cpp files --- include/tvm/runtime/device_api.h | 2 +- src/runtime/cuda/cuda_device_api.cc | 2 +- src/runtime/opencl/opencl_device_api.cc | 3 ++- src/runtime/rocm/rocm_device_api.cc | 2 +- src/runtime/vulkan/vulkan_device_api.cc | 6 ++++-- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index e5ce20432941..654018565716 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -49,7 +49,7 @@ enum DeviceAttrKind : int { kGcnArch = 10, kApiVersion = 11, kDriverVersion = 12, - kGlobalMemCacheSizeBytes = 13, + kL2CacheSizeBytes = 13, }; #ifdef TVM_KALLOC_ALIGNMENT diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc index 665f3ea02b20..21416f619fae 100644 --- a/src/runtime/cuda/cuda_device_api.cc +++ b/src/runtime/cuda/cuda_device_api.cc @@ -105,7 +105,7 @@ class CUDADeviceAPI final : public DeviceAPI { } case kDriverVersion: return; - case kGlobalMemCacheSizeBytes: + case kL2CacheSizeBytes: // Get size of device l2 cache size in bytes. int l2_size = 0; CUDA_CALL(cudaDeviceGetAttribute(&l2_size, cudaDevAttrL2CacheSize, dev.device_id)); diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index def7f931ea55..0d1f4af2bbf1 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -198,7 +198,8 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = std::string(value); break; } - case kGlobalMemCacheSizeBytes: + case kL2CacheSizeBytes: + // NOTE(Zihao): this API cannot reflect the real L2 cache size in both CUDA/AMD GPUs. cl_ulong value; OPENCL_CALL(clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(value), &value, nullptr)); diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index c7718623806c..c848f38d54a0 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -122,7 +122,7 @@ class ROCMDeviceAPI final : public DeviceAPI { } case kDriverVersion: return; - case kGlobalMemCacheSizeBytes: + case kL2CacheSizeBytes: // Get size of device l2 cache size in bytes. int l2_size; ROCM_CALL(hipDeviceGetAttribute(l2_size, hipDeviceAttributeL2CacheSize, device.device_id)); diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc index aba3c5e98e0c..fb505e5dcd94 100644 --- a/src/runtime/vulkan/vulkan_device_api.cc +++ b/src/runtime/vulkan/vulkan_device_api.cc @@ -161,8 +161,10 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) break; } - case kGlobalMemCacheSizeBytes: - *rv = 256 * 1024 * 1024; // return 256mb by default + case kL2CacheSizeBytes: + // Vulkan do not have standalone APIs to measure L2 cache size, + // and the vkCmdPipelineBarrier will flush L2 texture cache. + *rv = 0; break; } } From 3e851b103396d924ec1ad56767ae98cd0718c760 Mon Sep 17 00:00:00 2001 From: yzh119 Date: Sun, 16 Jul 2023 20:11:49 +0000 Subject: [PATCH 3/5] rename python api --- python/tvm/_ffi/runtime_ctypes.py | 16 +++++++++++++--- python/tvm/target/target.py | 8 ++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 4e40456d04fe..87997b534146 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -487,10 +487,20 @@ def driver_version(self): """ return self._GetDeviceAttr(self.device_type, self.device_id, 12) - + @property - def global_mem_cache_size(self): - """TODO(Zihao) + def l2_cache_size_bytes(self): + """Return the size of the device L2 cache in bytes + + Returns + ------- + l2_cache_size_bytes : int or None + The size of the device L2 cache in bytes returned by device runtime API. + + Note + ---- + The value returned by opencl's API is smaller than real device L2 cache size. + Vulkan does not support this attribute. """ return self._GetDeviceAttr(self.device_type, self.device_id, 13) diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index 57b551da2bd8..0117420c2140 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -243,11 +243,11 @@ def supports_cooperative_matrix(self): @property def features(self): return TargetFeatures(self) - + @property - def global_mem_cache_size(self): - return int(self.attrs.get("global_mem_cache_size", 0)) - + def l2_cache_size_bytes(self): + return int(self.attrs.get("l2_cache_size_bytes", 0)) + def get_kind_attr(self, attr_name): """Get additional attribute about the target kind. From bbce2b6be4c27c8ba6b9a36fbaf5204d33b62f50 Mon Sep 17 00:00:00 2001 From: yzh119 Date: Mon, 17 Jul 2023 03:09:24 +0000 Subject: [PATCH 4/5] fix rocm --- python/tvm/_ffi/runtime_ctypes.py | 6 ++++-- src/runtime/rocm/rocm_device_api.cc | 2 +- src/runtime/vulkan/vulkan_device_api.cc | 6 ------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 87997b534146..7836f4224769 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -492,15 +492,17 @@ def driver_version(self): def l2_cache_size_bytes(self): """Return the size of the device L2 cache in bytes + Supported devices include CUDA/ROCM/OpenCL. + Returns ------- l2_cache_size_bytes : int or None The size of the device L2 cache in bytes returned by device runtime API. + Return None if the device does not support this feature. Note ---- - The value returned by opencl's API is smaller than real device L2 cache size. - Vulkan does not support this attribute. + The value returned by opencl's API is smaller than actual device L2 cache size. """ return self._GetDeviceAttr(self.device_type, self.device_id, 13) diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index c848f38d54a0..c2fb42ee360a 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -125,7 +125,7 @@ class ROCMDeviceAPI final : public DeviceAPI { case kL2CacheSizeBytes: // Get size of device l2 cache size in bytes. int l2_size; - ROCM_CALL(hipDeviceGetAttribute(l2_size, hipDeviceAttributeL2CacheSize, device.device_id)); + ROCM_CALL(hipDeviceGetAttribute(&l2_size, hipDeviceAttributeL2CacheSize, device.device_id)); *rv = l2_size; } *rv = value; diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc index fb505e5dcd94..108741525602 100644 --- a/src/runtime/vulkan/vulkan_device_api.cc +++ b/src/runtime/vulkan/vulkan_device_api.cc @@ -160,12 +160,6 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = os.str(); break; } - - case kL2CacheSizeBytes: - // Vulkan do not have standalone APIs to measure L2 cache size, - // and the vkCmdPipelineBarrier will flush L2 texture cache. - *rv = 0; - break; } } From 330b07c220f08bf26253eac2d5520186434fea03 Mon Sep 17 00:00:00 2001 From: yzh119 Date: Mon, 17 Jul 2023 07:29:56 +0000 Subject: [PATCH 5/5] fix warning --- src/runtime/metal/metal_device_api.mm | 2 ++ src/runtime/vulkan/vulkan_device_api.cc | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/runtime/metal/metal_device_api.mm b/src/runtime/metal/metal_device_api.mm index 0f1be0cc95ea..1e60019f144d 100644 --- a/src/runtime/metal/metal_device_api.mm +++ b/src/runtime/metal/metal_device_api.mm @@ -81,6 +81,8 @@ return; case kDriverVersion: return; + case kL2CacheSizeBytes: + return; } }; } diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc index 108741525602..d67746856cfc 100644 --- a/src/runtime/vulkan/vulkan_device_api.cc +++ b/src/runtime/vulkan/vulkan_device_api.cc @@ -160,6 +160,9 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = os.str(); break; } + + case kL2CacheSizeBytes: + break; } }