Skip to content

Commit 3cd6673

Browse files
authored
[Runtime] Allow query of available device memory through DeviceAPI (#16994)
* [Runtime] Allow query of available device memory through DeviceAPI Prior to this commit, the total device memory could be queried through the `DeviceAPI` interface, but the currently available device memory could not. This functionality may be useful for debugging, or for validating available memory prior to model execution. This commit implements the property `Device.available_global_memory`, which queries the `DeviceAttrKind::kAvailableGlobalMemory`. Support for this query, like all device attribute queries, may vary across different backends, and will return `None` for backends that do not support this query. This commit only currently implements support for `kAvailableGlobalMemory` for TVM's Cuda backend. * Updated docstring to fix copy/paste typo * Lint fix, cover all enum values in case/switch * Fix rocm compilation warning
1 parent afb6416 commit 3cd6673

File tree

7 files changed

+86
-22
lines changed

7 files changed

+86
-22
lines changed

include/tvm/runtime/device_api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ enum DeviceAttrKind : int {
5151
kDriverVersion = 12,
5252
kL2CacheSizeBytes = 13,
5353
kTotalGlobalMemory = 14,
54+
kAvailableGlobalMemory = 15,
5455
};
5556

5657
#ifdef TVM_KALLOC_ALIGNMENT

python/tvm/_ffi/runtime_ctypes.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,11 +539,25 @@ def total_global_memory(self):
539539
Returns
540540
-------
541541
total_global_memory : int or None
542-
Return the global memory available on device in bytes.
542+
Return the total size of global memory on device in bytes.
543543
Return None if the device does not support this feature.
544544
"""
545545
return self._GetDeviceAttr(self.device_type, self.device_id, 14)
546546

547+
@property
548+
def available_global_memory(self):
549+
"""Return size of the available global memory.
550+
551+
Supported devices include CUDA.
552+
553+
Returns
554+
-------
555+
available_global_memory : int or None
556+
Return the amount of unallocated global memory on device in bytes.
557+
Return None if the device does not support this feature.
558+
"""
559+
return self._GetDeviceAttr(self.device_type, self.device_id, 15)
560+
547561
def texture_spatial_limit(self):
548562
"""Returns limits for textures by spatial dimensions
549563

src/runtime/cuda/cuda_device_api.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,12 @@ class CUDADeviceAPI final : public DeviceAPI {
121121
*rv = total_global_memory;
122122
return;
123123
}
124+
case kAvailableGlobalMemory: {
125+
size_t free_mem, total_mem;
126+
CUDA_CALL(cudaMemGetInfo(&free_mem, &total_mem));
127+
*rv = static_cast<int64_t>(free_mem);
128+
return;
129+
}
124130
}
125131
*rv = value;
126132
}

src/runtime/opencl/opencl_device_api.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,12 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
214214
*rv = static_cast<int64_t>(total_global_memory);
215215
return;
216216
}
217+
218+
case kAvailableGlobalMemory:
219+
// Not currently implemented. Based on
220+
// https://stackoverflow.com/a/3568223, may not be implementable
221+
// at all through OpenCL API.
222+
break;
217223
}
218224
}
219225

src/runtime/rocm/rocm_device_api.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ class ROCMDeviceAPI final : public DeviceAPI {
136136
*rv = total_global_memory;
137137
return;
138138
}
139+
140+
case kAvailableGlobalMemory:
141+
// Not currently implemented.
142+
break;
139143
}
140144
*rv = value;
141145
}

src/runtime/vulkan/vulkan_device_api.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
168168
*rv = device(index).compute_memory_size;
169169
return;
170170
}
171+
172+
case kAvailableGlobalMemory:
173+
// Not currently implemented. Will only be implementable for
174+
// devices that support the VK_EXT_memory_budget extension.
175+
break;
171176
}
172177
}
173178

tests/python/all-platform-minimal-test/test_runtime_ndarray.py

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,63 @@
1616
# under the License.
1717
"""Basic runtime enablement test."""
1818

19-
import tvm
20-
from tvm import te
19+
import math
20+
21+
import pytest
2122
import numpy as np
23+
24+
import tvm
2225
import tvm.testing
26+
from tvm import te
27+
28+
dtype = tvm.testing.parameter("uint8", "int8", "uint16", "int16", "uint32", "int32", "float32")
29+
30+
31+
def test_nd_create(target, dev, dtype):
32+
x = np.random.randint(0, 10, size=(3, 4))
33+
x = np.array(x, dtype=dtype)
34+
y = tvm.nd.array(x, device=dev)
35+
z = y.copyto(dev)
36+
assert y.dtype == x.dtype
37+
assert y.shape == x.shape
38+
assert isinstance(y, tvm.nd.NDArray)
39+
np.testing.assert_equal(x, y.numpy())
40+
np.testing.assert_equal(x, z.numpy())
41+
42+
# no need here, just to test usablity
43+
dev.sync()
44+
45+
46+
def test_memory_usage(target, dev, dtype):
47+
available_memory_before = dev.available_global_memory
48+
if available_memory_before is None:
49+
pytest.skip(reason=f"Target '{target}' does not support queries of available memory")
50+
51+
arr = tvm.nd.empty([1024, 1024], dtype=dtype, device=dev)
52+
available_memory_after = dev.available_global_memory
53+
54+
num_elements = math.prod(arr.shape)
55+
element_nbytes = tvm.runtime.DataType(dtype).itemsize()
56+
expected_memory_after = available_memory_before - num_elements * element_nbytes
57+
58+
# Allocations may be padded out to provide alignment, to match a
59+
# page boundary, due to additional device-side bookkeeping
60+
# required by the TVM backend or the driver, etc. Therefore, the
61+
# available memory may decrease by more than the requested amount.
62+
assert available_memory_after <= expected_memory_after
2363

64+
# TVM's NDArray type is a reference-counted handle to the
65+
# underlying reference. After the last reference to an NDArray is
66+
# cleared, the backing allocation will be freed.
67+
del arr
2468

25-
@tvm.testing.uses_gpu
26-
def test_nd_create():
27-
for target, dev in tvm.testing.enabled_targets():
28-
for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32"]:
29-
x = np.random.randint(0, 10, size=(3, 4))
30-
x = np.array(x, dtype=dtype)
31-
y = tvm.nd.array(x, device=dev)
32-
z = y.copyto(dev)
33-
assert y.dtype == x.dtype
34-
assert y.shape == x.shape
35-
assert isinstance(y, tvm.nd.NDArray)
36-
np.testing.assert_equal(x, y.numpy())
37-
np.testing.assert_equal(x, z.numpy())
38-
# no need here, just to test usablity
39-
dev.sync()
69+
assert dev.available_global_memory == available_memory_before
4070

4171

4272
def test_fp16_conversion():
4373
n = 100
4474

45-
for (src, dst) in [("float32", "float16"), ("float16", "float32")]:
75+
for src, dst in [("float32", "float16"), ("float16", "float32")]:
4676
A = te.placeholder((n,), dtype=src)
4777
B = te.compute((n,), lambda i: A[i].astype(dst))
4878

@@ -66,6 +96,4 @@ def test_dtype():
6696

6797

6898
if __name__ == "__main__":
69-
test_nd_create()
70-
test_fp16_conversion()
71-
test_dtype()
99+
tvm.testing.main()

0 commit comments

Comments
 (0)