Skip to content

Commit afe237c

Browse files
authored
cuda_async_memory_resource built on cudaMallocAsync (rapidsai#676)
This PR adds a new device memory resource, `cuda_async_memory_resource`, which uses `cudaMallocAsync`. Closes rapidsai#671 Merging this also depends on CI support for CUDA 11.2 TODO: - [x] Extend tests and benchmarks to exercise the new resource - [x] Implement `get_mem_info` correctly. - [x] ~Consider a constructor which takes a CUDA memory pool handle to use (currently uses the default pool)~ Edit: leave this for a followup because pools have multiple parameters and requirements aren't clear. - [ ] Test on a system without cudaMallocAsync support to verify that compiling with CUDA 11.2 but running on an earlier version fails gracefully Authors: - Mark Harris (@harrism) Approvers: - Jake Hemstad (@jrhemstad) - Keith Kraus (@kkraus14) - Leo Fang (@leofang) - Rong Ou (@rongou) URL: rapidsai#676
1 parent e899c68 commit afe237c

File tree

8 files changed

+218
-8
lines changed

8 files changed

+218
-8
lines changed

benchmarks/random_allocations/random_allocations.cpp

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
1818

1919
#include <rmm/mr/device/arena_memory_resource.hpp>
2020
#include <rmm/mr/device/binning_memory_resource.hpp>
21+
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
2122
#include <rmm/mr/device/cuda_memory_resource.hpp>
2223
#include <rmm/mr/device/device_memory_resource.hpp>
2324
#include <rmm/mr/device/owning_wrapper.hpp>
@@ -157,6 +158,8 @@ void uniform_random_allocations(rmm::mr::device_memory_resource& mr,
157158
/// MR factory functions
158159
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
159160

161+
inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
162+
160163
inline auto make_pool()
161164
{
162165
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
@@ -235,6 +238,9 @@ void declare_benchmark(std::string name)
235238
{
236239
if (name == "cuda")
237240
BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_mr, &make_cuda)->Apply(benchmark_range);
241+
if (name == "cuda_async")
242+
BENCHMARK_CAPTURE(BM_RandomAllocations, cuda_async_mr, &make_cuda_async)
243+
->Apply(benchmark_range);
238244
else if (name == "binning")
239245
BENCHMARK_CAPTURE(BM_RandomAllocations, binning_mr, &make_binning)->Apply(benchmark_range);
240246
else if (name == "pool")
@@ -288,6 +294,9 @@ int main(int argc, char** argv)
288294
std::map<std::string, MRFactoryFunc> const funcs({{"arena", &make_arena},
289295
{"binning", &make_binning},
290296
{"cuda", &make_cuda},
297+
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
298+
{"cuda_async", &make_cuda_async},
299+
#endif
291300
{"pool", &make_pool}});
292301
auto resource = args["resource"].as<std::string>();
293302

@@ -309,7 +318,11 @@ int main(int argc, char** argv)
309318
std::string mr_name = args["resource"].as<std::string>();
310319
declare_benchmark(mr_name);
311320
} else {
321+
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
322+
std::array<std::string, 5> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
323+
#else
312324
std::array<std::string, 4> mrs{"pool", "binning", "arena", "cuda"};
325+
#endif
313326
std::for_each(std::cbegin(mrs), std::cend(mrs), [](auto const& s) { declare_benchmark(s); });
314327
}
315328
::benchmark::RunSpecifiedBenchmarks();

include/rmm/detail/error.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class out_of_range : public std::out_of_range {
130130
#define GET_RMM_FAIL_MACRO(_1, _2, NAME, ...) NAME
131131
#define RMM_FAIL_2(_what, _exception_type) \
132132
throw _exception_type{"RMM failure at:" __FILE__ ":" RMM_STRINGIFY(__LINE__) ": " _what};
133-
#define RMM_FAIL_1(_what) RMM_FAIL_2(_call, rmm::logic_error)
133+
#define RMM_FAIL_1(_what) RMM_FAIL_2(_what, rmm::logic_error)
134134

135135
/**
136136
* @brief Error checking macro for CUDA runtime API functions.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* Copyright (c) 2021, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <rmm/detail/error.hpp>
19+
#include <rmm/mr/device/device_memory_resource.hpp>
20+
#include "rmm/cuda_stream_view.hpp"
21+
22+
#include <cuda_runtime_api.h>
23+
24+
#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync
25+
#define RMM_CUDA_MALLOC_ASYNC_SUPPORT
26+
#endif
27+
28+
namespace rmm {
29+
namespace mr {
30+
31+
/**
32+
* @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
33+
* allocation/deallocation.
34+
*/
35+
class cuda_async_memory_resource final : public device_memory_resource {
36+
public:
37+
/**
38+
* @brief Default constructor
39+
*
40+
* @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
41+
*/
42+
cuda_async_memory_resource()
43+
{
44+
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
45+
// Check if cudaMallocAsync Memory pool supported
46+
int device{0};
47+
RMM_CUDA_TRY(cudaGetDevice(&device));
48+
int v{0};
49+
auto e = cudaDeviceGetAttribute(&v, cudaDevAttrMemoryPoolsSupported, device);
50+
RMM_EXPECTS(e == cudaSuccess && v == 1,
51+
"cudaMallocAsync not supported with this CUDA driver/runtime version");
52+
#else
53+
RMM_FAIL("cudaMallocAsync not supported");
54+
#endif
55+
}
56+
57+
~cuda_async_memory_resource() = default;
58+
cuda_async_memory_resource(cuda_async_memory_resource const&) = default;
59+
cuda_async_memory_resource(cuda_async_memory_resource&&) = default;
60+
cuda_async_memory_resource& operator=(cuda_async_memory_resource const&) = default;
61+
cuda_async_memory_resource& operator=(cuda_async_memory_resource&&) = default;
62+
63+
/**
64+
* @brief Query whether the resource supports use of non-null CUDA streams for
65+
* allocation/deallocation. `cuda_memory_resource` does not support streams.
66+
*
67+
* @returns bool true
68+
*/
69+
bool supports_streams() const noexcept override { return true; }
70+
71+
/**
72+
* @brief Query whether the resource supports the get_mem_info API.
73+
*
74+
* @return true
75+
*/
76+
bool supports_get_mem_info() const noexcept override { return false; }
77+
78+
private:
79+
/**
80+
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
81+
*
82+
* The returned pointer has at least 256B alignment.
83+
*
84+
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
85+
*
86+
* @param bytes The size, in bytes, of the allocation
87+
* @return void* Pointer to the newly allocated memory
88+
*/
89+
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
90+
{
91+
void* p{nullptr};
92+
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
93+
if (bytes > 0) { RMM_CUDA_TRY(cudaMallocAsync(&p, bytes, stream.value()), rmm::bad_alloc); }
94+
#else
95+
(void)bytes;
96+
(void)stream;
97+
#endif
98+
return p;
99+
}
100+
101+
/**
102+
* @brief Deallocate memory pointed to by \p p.
103+
*
104+
* @throws Nothing.
105+
*
106+
* @param p Pointer to be deallocated
107+
*/
108+
void do_deallocate(void* p, std::size_t, rmm::cuda_stream_view stream) override
109+
{
110+
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
111+
if (p != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(p, stream.value())); }
112+
#else
113+
(void)p;
114+
(void)stream;
115+
#endif
116+
}
117+
118+
/**
119+
* @brief Compare this resource to another.
120+
*
121+
* @throws Nothing.
122+
*
123+
* @param other The other resource to compare to
124+
* @return true If the two resources are equivalent
125+
* @return false If the two resources are not equal
126+
*/
127+
bool do_is_equal(device_memory_resource const& other) const noexcept override
128+
{
129+
return dynamic_cast<cuda_async_memory_resource const*>(&other) != nullptr;
130+
}
131+
132+
/**
133+
* @brief Get free and available memory for memory resource
134+
*
135+
* @throws `rmm::cuda_error` if unable to retrieve memory info.
136+
*
137+
* @return std::pair contaiing free_size and total_size of memory
138+
*/
139+
std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view) const override
140+
{
141+
return std::make_pair(0, 0);
142+
}
143+
};
144+
145+
} // namespace mr
146+
} // namespace rmm

tests/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ ConfigureTest(DEVICE_MR_TEST "${DEVICE_MR_TEST_SRC}")
9898
set(POOL_MR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/pool_mr_tests.cpp")
9999
ConfigureTest(POOL_MR_TEST "${POOL_MR_TEST_SRC}")
100100

101+
# cuda_async mr tests
102+
set(CUDA_ASYNC_MR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/cuda_async_mr_tests.cpp")
103+
ConfigureTest(CUDA_ASYNC_MR_TEST "${CUDA_ASYNC_MR_TEST_SRC}")
104+
101105
# thrust allocator tests
102106

103107
set(THRUST_ALLOCATOR_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/mr/device/thrust_allocator_tests.cu")
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) 2021, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <rmm/detail/error.hpp>
18+
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
19+
20+
#include <gtest/gtest.h>
21+
22+
namespace rmm {
23+
namespace test {
24+
namespace {
25+
26+
using cuda_async_mr = rmm::mr::cuda_async_memory_resource;
27+
28+
TEST(PoolTest, ThrowIfNotSupported)
29+
{
30+
auto construct_mr = []() { cuda_async_mr mr; };
31+
#ifndef RMM_CUDA_MALLOC_ASYNC_SUPPORT
32+
EXPECT_THROW(construct_mr(), rmm::logic_error);
33+
#else
34+
EXPECT_NO_THROW(construct_mr());
35+
#endif
36+
}
37+
38+
} // namespace
39+
} // namespace test
40+
} // namespace rmm

tests/mr/device/mr_test.hpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
2222
#include <rmm/cuda_stream_view.hpp>
2323
#include <rmm/mr/device/arena_memory_resource.hpp>
2424
#include <rmm/mr/device/binning_memory_resource.hpp>
25+
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
2526
#include <rmm/mr/device/cuda_memory_resource.hpp>
2627
#include <rmm/mr/device/device_memory_resource.hpp>
2728
#include <rmm/mr/device/fixed_size_memory_resource.hpp>
@@ -224,6 +225,8 @@ struct mr_test : public ::testing::TestWithParam<mr_factory> {
224225
/// MR factory functions
225226
inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
226227

228+
inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
229+
227230
inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
228231

229232
inline auto make_pool()

tests/mr/device/mr_tests.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, NVIDIA CORPORATION.
2+
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -14,9 +14,10 @@
1414
* limitations under the License.
1515
*/
1616

17-
#include <rmm/mr/device/per_device_resource.hpp>
1817
#include "mr_test.hpp"
1918

19+
#include <rmm/mr/device/per_device_resource.hpp>
20+
2021
#include <gtest/gtest.h>
2122

2223
namespace rmm {
@@ -26,6 +27,9 @@ namespace {
2627
INSTANTIATE_TEST_CASE_P(ResourceTests,
2728
mr_test,
2829
::testing::Values(mr_factory{"CUDA", &make_cuda},
30+
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
31+
mr_factory{"CUDA_Async", &make_cuda_async},
32+
#endif
2933
mr_factory{"Managed", &make_managed},
3034
mr_factory{"Pool", &make_pool},
3135
mr_factory{"Arena", &make_arena},

tests/mr/device/pool_mr_tests.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2019, NVIDIA CORPORATION.
2+
* Copyright (c) 2021, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include <rmm/detail/aligned.hpp>
1718
#include <rmm/detail/error.hpp>
1819
#include <rmm/device_buffer.hpp>
1920
#include <rmm/mr/device/cuda_memory_resource.hpp>
2021
#include <rmm/mr/device/device_memory_resource.hpp>
22+
#include <rmm/mr/device/limiting_resource_adaptor.hpp>
2123
#include <rmm/mr/device/per_device_resource.hpp>
2224
#include <rmm/mr/device/pool_memory_resource.hpp>
23-
#include "rmm/detail/aligned.hpp"
24-
#include "rmm/mr/device/limiting_resource_adaptor.hpp"
2525

2626
#include <gtest/gtest.h>
2727

0 commit comments

Comments
 (0)