Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add python wrapper for system memory resource #1605

Merged
merged 16 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/rmm/rmm/_lib/memory_resource.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ cdef class CudaMemoryResource(DeviceMemoryResource):
cdef class ManagedMemoryResource(DeviceMemoryResource):
pass

cdef class SystemMemoryResource(DeviceMemoryResource):
pass

cdef class CudaAsyncMemoryResource(DeviceMemoryResource):
pass

Expand All @@ -77,6 +80,9 @@ cdef class CallbackMemoryResource(DeviceMemoryResource):
cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
pass

cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor):
pass

cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
cdef object _log_file_name
cpdef get_file_name(self)
Expand Down
103 changes: 89 additions & 14 deletions python/rmm/rmm/_lib/memory_resource.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ cdef extern from "rmm/mr/device/managed_memory_resource.hpp" \
cdef cppclass managed_memory_resource(device_memory_resource):
managed_memory_resource() except +

cdef extern from "rmm/mr/device/system_memory_resource.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass system_memory_resource(device_memory_resource):
system_memory_resource() except +

cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
namespace "rmm::mr" nogil:

Expand Down Expand Up @@ -170,6 +175,13 @@ cdef extern from "rmm/mr/device/limiting_resource_adaptor.hpp" \
size_t get_allocated_bytes() except +
size_t get_allocation_limit() except +

cdef extern from "rmm/mr/device/sam_headroom_resource_adaptor.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass sam_headroom_resource_adaptor[Upstream](device_memory_resource):
sam_headroom_resource_adaptor(
Upstream* upstream_mr,
size_t headroom) except +

cdef extern from "rmm/mr/device/logging_resource_adaptor.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass logging_resource_adaptor[Upstream](device_memory_resource):
Expand Down Expand Up @@ -366,6 +378,20 @@ cdef class ManagedMemoryResource(DeviceMemoryResource):
pass


cdef class SystemMemoryResource(DeviceMemoryResource):
def __cinit__(self):
self.c_obj.reset(
new system_memory_resource()
)

def __init__(self):
"""
Memory resource that uses ``malloc``/``free`` for
allocation/deallocation.
"""
pass


cdef class PoolMemoryResource(UpstreamResourceAdaptor):

def __cinit__(
Expand Down Expand Up @@ -723,6 +749,43 @@ cdef class LimitingResourceAdaptor(UpstreamResourceAdaptor):
self.c_obj.get())
)[0].get_allocation_limit()

cdef class SamHeadroomResourceAdaptor(UpstreamResourceAdaptor):
def __cinit__(
self,
DeviceMemoryResource upstream_mr,
size_t headroom
):
if not isinstance(upstream_mr, SystemMemoryResource):
raise TypeError(
"SamHeadroomResourceAdaptor requires a SystemMemoryResource"
)
cdef system_memory_resource *sys_mr =\
<system_memory_resource *> upstream_mr.get_mr()
self.c_obj.reset(
new sam_headroom_resource_adaptor[system_memory_resource](
sys_mr,
headroom
)
)

def __init__(
self,
DeviceMemoryResource upstream_mr,
size_t headroom
):
"""
Memory resource that adapts system memory resource to allocate memory
with a headroom.

Parameters
----------
upstream_mr : DeviceMemoryResource
The upstream memory resource.
headroom : size_t
Size of the reserved GPU memory as headroom
"""
pass


cdef class LoggingResourceAdaptor(UpstreamResourceAdaptor):
def __cinit__(
Expand Down Expand Up @@ -1026,32 +1089,27 @@ cdef _per_device_mrs = defaultdict(CudaMemoryResource)
cpdef void _initialize(
bool pool_allocator=False,
bool managed_memory=False,
bool system_memory=False,
object initial_pool_size=None,
object maximum_pool_size=None,
object system_memory_headroom_size=None,
object devices=0,
bool logging=False,
object log_file_name=None,
) except *:
"""
Initializes RMM library using the options passed
"""
if managed_memory and system_memory:
raise ValueError("managed_memory and system_memory cannot both be True")

if managed_memory:
upstream = ManagedMemoryResource
elif system_memory:
upstream = SystemMemoryResource
else:
upstream = CudaMemoryResource

if pool_allocator:
typ = PoolMemoryResource
args = (upstream(),)
kwargs = dict(
initial_pool_size=initial_pool_size,
maximum_pool_size=maximum_pool_size
)
else:
typ = upstream
args = ()
kwargs = {}

cdef DeviceMemoryResource mr
cdef int original_device

Expand All @@ -1077,13 +1135,30 @@ cpdef void _initialize(
for device in devices:
setDevice(device)

base_mr = upstream()

if system_memory and system_memory_headroom_size is not None:
base_mr = SamHeadroomResourceAdaptor(
base_mr,
system_memory_headroom_size
)
else:
base_mr = upstream()

if pool_allocator:
base_mr = PoolMemoryResource(
base_mr,
initial_pool_size=initial_pool_size,
maximum_pool_size=maximum_pool_size
)

if logging:
mr = LoggingResourceAdaptor(
typ(*args, **kwargs),
base_mr,
log_file_name
)
else:
mr = typ(*args, **kwargs)
mr = base_mr

set_per_device_resource(device, mr)

Expand Down
4 changes: 4 additions & 0 deletions python/rmm/rmm/mr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
ManagedMemoryResource,
PoolMemoryResource,
PrefetchResourceAdaptor,
SamHeadroomResourceAdaptor,
StatisticsResourceAdaptor,
SystemMemoryResource,
TrackingResourceAdaptor,
UpstreamResourceAdaptor,
_flush_logs,
Expand Down Expand Up @@ -54,7 +56,9 @@
"ManagedMemoryResource",
"PoolMemoryResource",
"PrefetchResourceAdaptor",
"SamHeadroomResourceAdaptor",
"StatisticsResourceAdaptor",
"SystemMemoryResource",
"TrackingResourceAdaptor",
"FailureCallbackResourceAdaptor",
"UpstreamResourceAdaptor",
Expand Down
19 changes: 18 additions & 1 deletion python/rmm/rmm/rmm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,8 +27,10 @@ def __init__(self, errcode, msg):
def reinitialize(
pool_allocator=False,
managed_memory=False,
system_memory=False,
initial_pool_size=None,
maximum_pool_size=None,
system_memory_headroom_size=None,
devices=0,
logging=False,
log_file_name=None,
Expand All @@ -45,6 +47,8 @@ def reinitialize(
performance.
managed_memory : bool, default False
If True, use managed memory for device memory allocation
system_memory : bool, default False
If True, use system allocated memory for device memory allocation
initial_pool_size : int, default None
When `pool_allocator` is True, this indicates the initial pool size in
bytes. By default, 1/2 of the total GPU memory is used.
Expand All @@ -53,6 +57,12 @@ def reinitialize(
When `pool_allocator` is True, this indicates the maximum pool size in
bytes. By default, the total available memory on the GPU is used.
When `pool_allocator` is False, this argument is ignored if provided.
system_memory_headroom_size : int, default None
When `system_memory` is True, this indicates the headroom size in bytes
to be reserved for CUDA calls not using system memory. By default, the
headroom is 0 and the total available memory on the GPU can be used by
system memory.
When `system_allocator` is False, this argument is ignored if provided.
devices : int or List[int], default 0
GPU device IDs to register. By default registers only GPU 0.
logging : bool, default False
Expand All @@ -75,14 +85,21 @@ def reinitialize(
with device ID ``1``. Use `rmm.get_log_filenames()` to get the log file
names corresponding to each device.
"""
if managed_memory and system_memory:
raise ValueError(
"managed_memory and system_memory cannot both be True"
)

for func, args, kwargs in reversed(_reinitialize_hooks):
func(*args, **kwargs)

mr._initialize(
pool_allocator=pool_allocator,
managed_memory=managed_memory,
system_memory=system_memory,
initial_pool_size=initial_pool_size,
maximum_pool_size=maximum_pool_size,
system_memory_headroom_size=system_memory_headroom_size,
devices=devices,
logging=logging,
log_file_name=log_file_name,
Expand Down
48 changes: 46 additions & 2 deletions python/rmm/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
_runtime_version >= 11020
)

_SYSTEM_MEMORY_SUPPORTED = rmm._cuda.gpu.getDeviceAttribute(
cudart.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess,
rmm._cuda.gpu.getDevice(),
)


def array_tester(dtype, nelem, alloc):
# data
Expand Down Expand Up @@ -91,6 +96,27 @@ def test_rmm_modes(dtype, nelem, alloc, managed, pool):
array_tester(dtype, nelem, alloc)


@pytest.mark.skipif(
not _SYSTEM_MEMORY_SUPPORTED,
reason="System memory not supported",
)
@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
@pytest.mark.parametrize(
"system, pool", list(product([False, True], [False, True]))
)
def test_rmm_modes_system_memory(dtype, nelem, alloc, system, pool):
assert rmm.is_initialized()
array_tester(dtype, nelem, alloc)

rmm.reinitialize(pool_allocator=pool, system_memory=system)

assert rmm.is_initialized()

array_tester(dtype, nelem, alloc)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
Expand Down Expand Up @@ -410,7 +436,12 @@ def test_pool_memory_resource(dtype, nelem, alloc):
[
lambda: rmm.mr.CudaMemoryResource(),
lambda: rmm.mr.ManagedMemoryResource(),
],
]
+ (
[lambda: rmm.mr.SystemMemoryResource()]
if _SYSTEM_MEMORY_SUPPORTED
else []
),
)
def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream):
mr = rmm.mr.FixedSizeMemoryResource(
Expand All @@ -432,7 +463,12 @@ def test_fixed_size_memory_resource(dtype, nelem, alloc, upstream):
lambda: rmm.mr.PoolMemoryResource(
rmm.mr.CudaMemoryResource(), 1 << 20
),
],
]
+ (
[lambda: rmm.mr.SystemMemoryResource()]
if _SYSTEM_MEMORY_SUPPORTED
else []
),
)
def test_binning_memory_resource(dtype, nelem, alloc, upstream_mr):
upstream = upstream_mr()
Expand Down Expand Up @@ -476,6 +512,14 @@ def test_reinitialize_initial_pool_size_gt_max():
assert "Initial pool size exceeds the maximum pool size" in str(e.value)


def test_reinitialize_both_managed_and_system():
with pytest.raises(ValueError) as e:
rmm.reinitialize(managed_memory=True, system_memory=True)
assert "managed_memory and system_memory cannot both be True" in str(
e.value
)


@pytest.mark.parametrize("dtype", _dtypes)
@pytest.mark.parametrize("nelem", _nelems)
@pytest.mark.parametrize("alloc", _allocs)
Expand Down
Loading