diff --git a/source/adapters/native_cpu/context.hpp b/source/adapters/native_cpu/context.hpp index 30bfb31d71..0595f211d9 100644 --- a/source/adapters/native_cpu/context.hpp +++ b/source/adapters/native_cpu/context.hpp @@ -10,13 +10,141 @@ #pragma once +#include +#include #include #include "common.hpp" #include "device.hpp" +#include "ur/ur.hpp" + +namespace native_cpu { +struct usm_alloc_info { + ur_usm_type_t type; + const void *base_ptr; + size_t size; + ur_device_handle_t device; + ur_usm_pool_handle_t pool; + + // We store a pointer to the actual allocation because it is needed when + // freeing memory. + void *base_alloc_ptr; + constexpr usm_alloc_info(ur_usm_type_t type, const void *base_ptr, + size_t size, ur_device_handle_t device, + ur_usm_pool_handle_t pool, void *base_alloc_ptr) + : type(type), base_ptr(base_ptr), size(size), device(device), pool(pool), + base_alloc_ptr(base_alloc_ptr) {} +}; + +constexpr usm_alloc_info usm_alloc_info_null_entry(UR_USM_TYPE_UNKNOWN, nullptr, + 0, nullptr, nullptr, + nullptr); + +constexpr size_t alloc_header_size = sizeof(usm_alloc_info); + +// Computes the padding that we need to add to ensure the +// pointer returned by UR is aligned as the user requested. +static size_t get_padding(uint32_t alignment) { + assert(alignment >= alignof(usm_alloc_info) && + "memory not aligned to usm_alloc_info"); + if (!alignment || alloc_header_size % alignment == 0) + return 0; + size_t padd = 0; + if (alignment <= alloc_header_size) { + padd = alignment - (alloc_header_size % alignment); + } else { + padd = alignment - alloc_header_size; + } + return padd; +} + +// In order to satisfy the MemAllocInfo queries we allocate extra memory +// for the native_cpu::usm_alloc_info struct. +// To satisfy the alignment requirements we "pad" the memory +// allocation so that the pointer returned to the user +// always satisfies (ptr % align) == 0. +static inline void *malloc_impl(uint32_t alignment, size_t size) { + void *ptr = nullptr; + assert(alignment >= alignof(usm_alloc_info) && + "memory not aligned to usm_alloc_info"); +#ifdef _MSC_VER + ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size, + alignment); + +#else + ptr = std::aligned_alloc(alignment, + alloc_header_size + get_padding(alignment) + size); +#endif + return ptr; +} + +// The info struct is retrieved by subtracting its size from the pointer +// returned to the user. +static inline uint8_t *get_alloc_info_addr(const void *ptr) { + return (uint8_t *)const_cast(ptr) - alloc_header_size; +} + +static usm_alloc_info get_alloc_info(void *ptr) { + return *(usm_alloc_info *)get_alloc_info_addr(ptr); +} + +} // namespace native_cpu struct ur_context_handle_t_ : RefCounted { ur_context_handle_t_(ur_device_handle_t_ *phDevices) : _device{phDevices} {} ur_device_handle_t _device; + + ur_result_t remove_alloc(void *ptr) { + std::lock_guard lock(alloc_mutex); + const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr); + UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN, + UR_RESULT_ERROR_INVALID_MEM_OBJECT); +#ifdef _MSC_VER + _aligned_free(info.base_alloc_ptr); +#else + free(info.base_alloc_ptr); +#endif + allocations.erase(ptr); + return UR_RESULT_SUCCESS; + } + + const native_cpu::usm_alloc_info & + get_alloc_info_entry(const void *ptr) const { + auto it = allocations.find(ptr); + if (it == allocations.end()) { + return native_cpu::usm_alloc_info_null_entry; + } + + return *(native_cpu::usm_alloc_info *)native_cpu::get_alloc_info_addr(ptr); + } + + void *add_alloc(uint32_t alignment, ur_usm_type_t type, size_t size, + ur_usm_pool_handle_t pool) { + std::lock_guard lock(alloc_mutex); + // We need to ensure that we align to at least alignof(usm_alloc_info), + // otherwise its start address may be unaligned. + alignment = + std::max(alignment, alignof(native_cpu::usm_alloc_info)); + void *alloc = native_cpu::malloc_impl(alignment, size); + if (!alloc) + return nullptr; + // Compute the address of the pointer that we'll return to the user. + void *ptr = native_cpu::alloc_header_size + + native_cpu::get_padding(alignment) + (uint8_t *)alloc; + uint8_t *info_addr = native_cpu::get_alloc_info_addr(ptr); + if (!info_addr) + return nullptr; + // Do a placement new of the alloc_info to avoid allocation and copy + auto info = new (info_addr) + native_cpu::usm_alloc_info(type, ptr, size, this->_device, pool, alloc); + if (!info) + return nullptr; + allocations.insert(ptr); + return ptr; + } + +private: + std::mutex alloc_mutex; + std::set allocations; }; diff --git a/source/adapters/native_cpu/usm.cpp b/source/adapters/native_cpu/usm.cpp index 45ac0596f3..dcae1881f1 100644 --- a/source/adapters/native_cpu/usm.cpp +++ b/source/adapters/native_cpu/usm.cpp @@ -8,90 +8,98 @@ // //===----------------------------------------------------------------------===// +#include "ur/ur.hpp" #include "ur_api.h" #include "common.hpp" +#include "context.hpp" +#include -UR_APIEXPORT ur_result_t UR_APICALL -urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, - ur_usm_pool_handle_t pool, size_t size, void **ppMem) { - std::ignore = hContext; - std::ignore = pUSMDesc; - std::ignore = pool; +namespace native_cpu { +static ur_result_t alloc_helper(ur_context_handle_t hContext, + const ur_usm_desc_t *pUSMDesc, size_t size, + void **ppMem, ur_usm_type_t type) { + auto alignment = pUSMDesc ? pUSMDesc->align : 1u; + UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE); UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); // TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE); - *ppMem = malloc(size); + auto *ptr = hContext->add_alloc(alignment, type, size, nullptr); + UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_RESOURCES); + *ppMem = ptr; return UR_RESULT_SUCCESS; } +} // namespace native_cpu + +UR_APIEXPORT ur_result_t UR_APICALL +urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, void **ppMem) { + std::ignore = pool; + + return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem, + UR_USM_TYPE_HOST); +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, size_t size, void **ppMem) { - std::ignore = hContext; std::ignore = hDevice; - std::ignore = pUSMDesc; std::ignore = pool; - UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); - // TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented - UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE); - - *ppMem = malloc(size); - - return UR_RESULT_SUCCESS; + return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem, + UR_USM_TYPE_DEVICE); } UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, size_t size, void **ppMem) { - std::ignore = hContext; std::ignore = hDevice; - std::ignore = pUSMDesc; std::ignore = pool; - UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); - // TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented - UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE); - - *ppMem = malloc(size); - - return UR_RESULT_SUCCESS; + return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem, + UR_USM_TYPE_SHARED); } UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, void *pMem) { - std::ignore = hContext; UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_POINTER); - free(pMem); + auto res = hContext->remove_alloc(pMem); - return UR_RESULT_SUCCESS; + return res; } UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_usm_alloc_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - std::ignore = hContext; - std::ignore = pMem; - std::ignore = propName; - std::ignore = propSize; - std::ignore = pPropValue; - std::ignore = pPropSizeRet; + UR_ASSERT(pMem != nullptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (propName == UR_USM_ALLOC_INFO_BASE_PTR) { + // TODO: logic to compute base ptr given ptr + DIE_NO_IMPLEMENTATION; + } + const native_cpu::usm_alloc_info &alloc_info = + hContext->get_alloc_info_entry(pMem); switch (propName) { case UR_USM_ALLOC_INFO_TYPE: - // Todo implement this in context - return ReturnValue(UR_USM_TYPE_DEVICE); + return ReturnValue(alloc_info.type); + case UR_USM_ALLOC_INFO_SIZE: + return ReturnValue(alloc_info.size); + case UR_USM_ALLOC_INFO_DEVICE: + return ReturnValue(alloc_info.device); + case UR_USM_ALLOC_INFO_POOL: + return ReturnValue(alloc_info.pool); default: DIE_NO_IMPLEMENTATION; }