|
14 | 14 | * limitations under the License. |
15 | 15 | */ |
16 | 16 | #include "ub_allocator.h" |
| 17 | +#include "tensorrt_llm/common/opUtils.h" |
| 18 | +#include <set> |
| 19 | +#include <stdexcept> |
17 | 20 |
|
18 | 21 | namespace tensorrt_llm::runtime::ub |
19 | 22 | { |
20 | 23 | UserBufferAllocator& UserBufferAllocator::Instance() |
21 | 24 | { |
22 | | - static UserBufferAllocator _; |
23 | | - return _; |
| 25 | + if (use_nccl_symmetric) |
| 26 | + { |
| 27 | + static NCCLUserBufferAllocator _; |
| 28 | + return _; |
| 29 | + } |
| 30 | + else |
| 31 | + { |
| 32 | + static UserBufferAllocator _; |
| 33 | + return _; |
| 34 | + } |
24 | 35 | } |
25 | 36 |
|
26 | | -void UserBufferAllocator::initialize(tensorrt_llm::runtime::WorldConfig const& world_config) |
| 37 | +void UserBufferAllocator::initialize(tensorrt_llm::runtime::WorldConfig const& worldConfig) |
27 | 38 | { |
28 | | - if (!is_initialized()) |
| 39 | + if (!isInitialized()) |
29 | 40 | { |
30 | | - ub_comm_ = nullptr; |
31 | | - world_config_ = world_config; |
32 | | - create_communicator_grouped2(&ub_comm_, world_config_); |
33 | | - TLLM_CHECK(ub_comm_ != nullptr); |
34 | | - is_initialized_ = true; |
| 41 | + mUbComm = nullptr; |
| 42 | + mWorldConfig = worldConfig; |
| 43 | + create_communicator_grouped2(&mUbComm, worldConfig); |
| 44 | + TLLM_CHECK(mUbComm != nullptr); |
| 45 | + mIsInitialized = true; |
35 | 46 | } |
36 | 47 | } |
37 | 48 |
|
38 | | -bool UserBufferAllocator::is_initialized() |
| 49 | +bool UserBufferAllocator::isInitialized() |
39 | 50 | { |
40 | | - return is_initialized_; |
| 51 | + return mIsInitialized; |
41 | 52 | } |
42 | 53 |
|
43 | | -UBBuffer UserBufferAllocator::register_ub_buffer(size_t bytes) |
| 54 | +UBBuffer UserBufferAllocator::registerUBBuffer(size_t bytes) |
44 | 55 | { |
45 | | - TLLM_CHECK(is_initialized()); |
| 56 | + TLLM_CHECK(isInitialized()); |
46 | 57 | void* addr = nullptr; |
47 | 58 | int handle = -1; |
48 | | - handle = register_user_buffer_collective((void**) &addr, bytes, ub_comm_); |
| 59 | + handle = register_user_buffer_collective((void**) &addr, bytes, mUbComm); |
49 | 60 | return {addr, handle, bytes}; |
50 | 61 | } |
51 | 62 |
|
52 | 63 | UBBuffer UserBufferAllocator::allocate(size_t bytes) |
53 | 64 | { |
54 | | - TLLM_CHECK(is_initialized()); |
55 | | - auto ub_buffer = register_ub_buffer(bytes); |
| 65 | + TLLM_CHECK(isInitialized()); |
| 66 | + auto ub_buffer = registerUBBuffer(bytes); |
56 | 67 | TLLM_CHECK(!ub_buffer.invalid()); |
57 | | - buffers_.push_back(ub_buffer); |
| 68 | + mBuffers.push_back(ub_buffer); |
58 | 69 | return ub_buffer; |
59 | 70 | } |
60 | 71 |
|
61 | 72 | void UserBufferAllocator::deallocate(void* addr) {} |
62 | 73 |
|
63 | 74 | UBBuffer UserBufferAllocator::get(int idx) |
64 | 75 | { |
65 | | - TLLM_CHECK(is_initialized() && idx < buffers_.size() && !buffers_[idx].invalid()); |
66 | | - return buffers_[idx]; |
| 76 | + TLLM_CHECK(isInitialized() && idx < mBuffers.size() && !mBuffers[idx].invalid()); |
| 77 | + return mBuffers[idx]; |
67 | 78 | } |
68 | 79 |
|
69 | 80 | communicator* UserBufferAllocator::comm() |
70 | 81 | { |
71 | | - TLLM_CHECK(is_initialized()); |
72 | | - return ub_comm_; |
| 82 | + TLLM_CHECK(isInitialized()); |
| 83 | + return mUbComm; |
| 84 | +} |
| 85 | + |
| 86 | +void NCCLUserBufferAllocator::initialize(tensorrt_llm::runtime::WorldConfig const& worldConfig) |
| 87 | +{ |
| 88 | + if (!isInitialized()) |
| 89 | + { |
| 90 | + TLLM_LOG_INFO("Initializing NCCLUserBufferAllocator"); |
| 91 | + std::set<int> group; |
| 92 | + for (int i = 0; i < worldConfig.getSize(); i++) |
| 93 | + { |
| 94 | + group.insert(i); |
| 95 | + } |
| 96 | + mComm = getComm(group); |
| 97 | + mIsInitialized = true; |
| 98 | + } |
73 | 99 | } |
| 100 | + |
| 101 | +UBBuffer NCCLUserBufferAllocator::registerUBBuffer(size_t bytes) |
| 102 | +{ |
| 103 | + TLLM_CHECK(isInitialized()); |
| 104 | + UBBuffer ub_buffer; |
| 105 | + |
| 106 | + auto& ncclHelper = getNCCLHelper(); |
| 107 | + if (!ncclHelper.isLoaded()) |
| 108 | + { |
| 109 | + TLLM_THROW("NCCL library could not be loaded for dynamic symbol access"); |
| 110 | + } |
| 111 | + |
| 112 | + auto ncclMemAllocFunc = ncclHelper.getNCCLMemAlloc(); |
| 113 | + auto ncclCommWindowRegisterFunc = ncclHelper.getNCCLCommWindowRegister(); |
| 114 | + |
| 115 | + NCCLCHECK(ncclMemAllocFunc(&ub_buffer.addr, bytes)); |
| 116 | + NCCLCHECK(ncclCommWindowRegisterFunc((*mComm), ub_buffer.addr, bytes, &ub_buffer.window, NCCL_WIN_COLL_SYMMETRIC)); |
| 117 | + ub_buffer.handle = 5; |
| 118 | + ub_buffer.size = bytes; |
| 119 | + return ub_buffer; |
| 120 | +} |
| 121 | + |
| 122 | +// Static member definitions |
| 123 | +std::unique_ptr<NCCLHelper> NCCLUserBufferAllocator::mNCCLHelper = nullptr; |
| 124 | + |
| 125 | +NCCLHelper& NCCLUserBufferAllocator::getNCCLHelper() |
| 126 | +{ |
| 127 | + if (!mNCCLHelper) |
| 128 | + { |
| 129 | + mNCCLHelper = std::make_unique<NCCLHelper>(); |
| 130 | + } |
| 131 | + return *mNCCLHelper; |
| 132 | +} |
| 133 | + |
| 134 | +// NCCLHelper implementation |
| 135 | +NCCLHelper::NCCLHelper() |
| 136 | + : mLibraryHandle(nullptr) |
| 137 | + , mNCCLCommWindowRegister(nullptr) |
| 138 | + , mNCCLMemAlloc(nullptr) |
| 139 | + , mIsLoaded(false) |
| 140 | +{ |
| 141 | + loadNCCLLibrary(); |
| 142 | +} |
| 143 | + |
| 144 | +NCCLHelper::~NCCLHelper() |
| 145 | +{ |
| 146 | + if (mLibraryHandle) |
| 147 | + { |
| 148 | +#ifdef _WIN32 |
| 149 | + FreeLibrary(mLibraryHandle); |
| 150 | +#else |
| 151 | + dlclose(mLibraryHandle); |
| 152 | +#endif |
| 153 | + mLibraryHandle = nullptr; |
| 154 | + } |
| 155 | +} |
| 156 | + |
| 157 | +void NCCLHelper::loadNCCLLibrary() |
| 158 | +{ |
| 159 | + try |
| 160 | + { |
| 161 | +#ifdef _WIN32 |
| 162 | + char const* libraryNames[] = {"nccl.dll"}; |
| 163 | +#else |
| 164 | + char const* libraryNames[] = {"libnccl.so"}; |
| 165 | +#endif |
| 166 | + |
| 167 | + for (int i = 0; libraryNames[i] != nullptr; ++i) |
| 168 | + { |
| 169 | + mLibraryHandle = loadLibraryHandle(libraryNames[i]); |
| 170 | + if (mLibraryHandle) |
| 171 | + { |
| 172 | + TLLM_LOG_INFO("Successfully loaded NCCL library: %s", libraryNames[i]); |
| 173 | + break; |
| 174 | + } |
| 175 | + } |
| 176 | + |
| 177 | + if (!mLibraryHandle) |
| 178 | + { |
| 179 | + TLLM_LOG_WARNING("Failed to load NCCL library"); |
| 180 | + return; |
| 181 | + } |
| 182 | + |
| 183 | + // Load the required symbols |
| 184 | + mNCCLCommWindowRegister |
| 185 | + = reinterpret_cast<ncclCommWindowRegisterFunc>(getSymbolAddress(mLibraryHandle, "ncclCommWindowRegister")); |
| 186 | + |
| 187 | + mNCCLMemAlloc = reinterpret_cast<ncclMemAllocFunc>(getSymbolAddress(mLibraryHandle, "ncclMemAlloc")); |
| 188 | + |
| 189 | + if (mNCCLCommWindowRegister == nullptr) |
| 190 | + { |
| 191 | + TLLM_LOG_WARNING("Failed to load ncclCommWindowRegister symbol, NCCL symmetric will not be supported."); |
| 192 | + } |
| 193 | + |
| 194 | + if (mNCCLMemAlloc) |
| 195 | + { |
| 196 | + mIsLoaded = true; |
| 197 | + } |
| 198 | + else |
| 199 | + { |
| 200 | + TLLM_LOG_WARNING("Failed to load required NCCL symbols"); |
| 201 | + } |
| 202 | + } |
| 203 | + catch (std::exception const& e) |
| 204 | + { |
| 205 | + TLLM_LOG_WARNING("Exception while loading NCCL library: %s", e.what()); |
| 206 | + } |
| 207 | +} |
| 208 | + |
| 209 | +void* NCCLHelper::loadLibraryHandle(char const* libName) |
| 210 | +{ |
| 211 | +#ifdef _WIN32 |
| 212 | + return LoadLibraryA(libName); |
| 213 | +#else |
| 214 | + return dlopen(libName, RTLD_LAZY | RTLD_GLOBAL); |
| 215 | +#endif |
| 216 | +} |
| 217 | + |
| 218 | +void* NCCLHelper::getSymbolAddress(void* handle, char const* symbolName) |
| 219 | +{ |
| 220 | + if (!handle) |
| 221 | + { |
| 222 | + return nullptr; |
| 223 | + } |
| 224 | + |
| 225 | +#ifdef _WIN32 |
| 226 | + return GetProcAddress(static_cast<HMODULE>(handle), symbolName); |
| 227 | +#else |
| 228 | + return dlsym(handle, symbolName); |
| 229 | +#endif |
| 230 | +} |
| 231 | + |
| 232 | +NCCLHelper::ncclCommWindowRegisterFunc NCCLHelper::getNCCLCommWindowRegister() |
| 233 | +{ |
| 234 | + return mNCCLCommWindowRegister; |
| 235 | +} |
| 236 | + |
| 237 | +NCCLHelper::ncclMemAllocFunc NCCLHelper::getNCCLMemAlloc() |
| 238 | +{ |
| 239 | + return mNCCLMemAlloc; |
| 240 | +} |
| 241 | + |
| 242 | +bool NCCLHelper::isLoaded() const |
| 243 | +{ |
| 244 | + return mIsLoaded; |
| 245 | +} |
| 246 | + |
| 247 | +bool UserBufferAllocator::use_nccl_symmetric = false; |
| 248 | + |
74 | 249 | }; // namespace tensorrt_llm::runtime::ub |
0 commit comments