|
| 1 | +/* |
| 2 | + * Copyright (c) 2021, NVIDIA CORPORATION. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +#pragma once |
| 17 | + |
| 18 | +#include <rmm/detail/error.hpp> |
| 19 | +#include <rmm/mr/device/device_memory_resource.hpp> |
| 20 | +#include "rmm/cuda_stream_view.hpp" |
| 21 | + |
| 22 | +#include <cuda_runtime_api.h> |
| 23 | + |
| 24 | +#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync |
| 25 | +#define RMM_CUDA_MALLOC_ASYNC_SUPPORT |
| 26 | +#endif |
| 27 | + |
| 28 | +namespace rmm { |
| 29 | +namespace mr { |
| 30 | + |
| 31 | +/** |
| 32 | + * @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for |
| 33 | + * allocation/deallocation. |
| 34 | + */ |
| 35 | +class cuda_async_memory_resource final : public device_memory_resource { |
| 36 | + public: |
| 37 | + /** |
| 38 | + * @brief Default constructor |
| 39 | + * |
| 40 | + * @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync` |
| 41 | + */ |
| 42 | + cuda_async_memory_resource() |
| 43 | + { |
| 44 | +#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT |
| 45 | + // Check if cudaMallocAsync Memory pool supported |
| 46 | + int device{0}; |
| 47 | + RMM_CUDA_TRY(cudaGetDevice(&device)); |
| 48 | + int v{0}; |
| 49 | + auto e = cudaDeviceGetAttribute(&v, cudaDevAttrMemoryPoolsSupported, device); |
| 50 | + RMM_EXPECTS(e == cudaSuccess && v == 1, |
| 51 | + "cudaMallocAsync not supported with this CUDA driver/runtime version"); |
| 52 | +#else |
| 53 | + RMM_FAIL("cudaMallocAsync not supported"); |
| 54 | +#endif |
| 55 | + } |
| 56 | + |
| 57 | + ~cuda_async_memory_resource() = default; |
| 58 | + cuda_async_memory_resource(cuda_async_memory_resource const&) = default; |
| 59 | + cuda_async_memory_resource(cuda_async_memory_resource&&) = default; |
| 60 | + cuda_async_memory_resource& operator=(cuda_async_memory_resource const&) = default; |
| 61 | + cuda_async_memory_resource& operator=(cuda_async_memory_resource&&) = default; |
| 62 | + |
| 63 | + /** |
| 64 | + * @brief Query whether the resource supports use of non-null CUDA streams for |
| 65 | + * allocation/deallocation. `cuda_memory_resource` does not support streams. |
| 66 | + * |
| 67 | + * @returns bool true |
| 68 | + */ |
| 69 | + bool supports_streams() const noexcept override { return true; } |
| 70 | + |
| 71 | + /** |
| 72 | + * @brief Query whether the resource supports the get_mem_info API. |
| 73 | + * |
| 74 | + * @return true |
| 75 | + */ |
| 76 | + bool supports_get_mem_info() const noexcept override { return false; } |
| 77 | + |
| 78 | + private: |
| 79 | + /** |
| 80 | + * @brief Allocates memory of size at least `bytes` using cudaMalloc. |
| 81 | + * |
| 82 | + * The returned pointer has at least 256B alignment. |
| 83 | + * |
| 84 | + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled |
| 85 | + * |
| 86 | + * @param bytes The size, in bytes, of the allocation |
| 87 | + * @return void* Pointer to the newly allocated memory |
| 88 | + */ |
| 89 | + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override |
| 90 | + { |
| 91 | + void* p{nullptr}; |
| 92 | +#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT |
| 93 | + if (bytes > 0) { RMM_CUDA_TRY(cudaMallocAsync(&p, bytes, stream.value()), rmm::bad_alloc); } |
| 94 | +#else |
| 95 | + (void)bytes; |
| 96 | + (void)stream; |
| 97 | +#endif |
| 98 | + return p; |
| 99 | + } |
| 100 | + |
| 101 | + /** |
| 102 | + * @brief Deallocate memory pointed to by \p p. |
| 103 | + * |
| 104 | + * @throws Nothing. |
| 105 | + * |
| 106 | + * @param p Pointer to be deallocated |
| 107 | + */ |
| 108 | + void do_deallocate(void* p, std::size_t, rmm::cuda_stream_view stream) override |
| 109 | + { |
| 110 | +#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT |
| 111 | + if (p != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(p, stream.value())); } |
| 112 | +#else |
| 113 | + (void)p; |
| 114 | + (void)stream; |
| 115 | +#endif |
| 116 | + } |
| 117 | + |
| 118 | + /** |
| 119 | + * @brief Compare this resource to another. |
| 120 | + * |
| 121 | + * @throws Nothing. |
| 122 | + * |
| 123 | + * @param other The other resource to compare to |
| 124 | + * @return true If the two resources are equivalent |
| 125 | + * @return false If the two resources are not equal |
| 126 | + */ |
| 127 | + bool do_is_equal(device_memory_resource const& other) const noexcept override |
| 128 | + { |
| 129 | + return dynamic_cast<cuda_async_memory_resource const*>(&other) != nullptr; |
| 130 | + } |
| 131 | + |
| 132 | + /** |
| 133 | + * @brief Get free and available memory for memory resource |
| 134 | + * |
| 135 | + * @throws `rmm::cuda_error` if unable to retrieve memory info. |
| 136 | + * |
| 137 | + * @return std::pair contaiing free_size and total_size of memory |
| 138 | + */ |
| 139 | + std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view) const override |
| 140 | + { |
| 141 | + return std::make_pair(0, 0); |
| 142 | + } |
| 143 | +}; |
| 144 | + |
| 145 | +} // namespace mr |
| 146 | +} // namespace rmm |
0 commit comments