Skip to content

Commit

Permalink
POC: use UMF CUDA provider
Browse files Browse the repository at this point in the history
Signed-off-by: Lukasz Dorau <[email protected]>
  • Loading branch information
ldorau committed Dec 18, 2024
1 parent 39df031 commit ece4587
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 8 deletions.
13 changes: 13 additions & 0 deletions source/adapters/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include <ur/ur.hpp>

#include <umf/providers/provider_cuda.h>

#include "common.hpp"

struct ur_device_handle_t_ {
Expand All @@ -23,6 +25,7 @@ struct ur_device_handle_t_ {
std::atomic_uint32_t RefCount;
ur_platform_handle_t Platform;
uint32_t DeviceIndex;
umf_memory_provider_handle_t umfCUDAprovider[UMF_MEMORY_TYPE_SHARED];

static constexpr uint32_t MaxWorkItemDimensions = 3u;
size_t MaxWorkItemSizes[MaxWorkItemDimensions];
Expand Down Expand Up @@ -115,6 +118,16 @@ struct ur_device_handle_t_ {

uint32_t getNumComputeUnits() const noexcept { return NumComputeUnits; };

void setUmfCUDAprovider(umf_usm_memory_type_t memType,
umf_memory_provider_handle_t _umfCUDAprovider) {
umfCUDAprovider[(int)memType - 1] = _umfCUDAprovider;
}

umf_memory_provider_handle_t
getUmfCUDAprovider(umf_usm_memory_type_t memType) {
return umfCUDAprovider[(int)memType - 1];
}

// bookkeeping for mipmappedArray leaks in Mapping external Memory
std::map<CUarray, CUmipmappedArray> ChildCuarrayFromMipmapMap;
};
Expand Down
62 changes: 62 additions & 0 deletions source/adapters/cuda/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "common.hpp"
#include "context.hpp"
#include "device.hpp"
#include "umf_helpers.hpp"

#include <cassert>
#include <cuda.h>
Expand Down Expand Up @@ -111,6 +112,67 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
throw;
}

try {
umf_cuda_memory_provider_params_handle_t cu_memory_provider_params =
nullptr;
umf_result_t umf_result =
umfCUDAMemoryProviderParamsCreate(&cu_memory_provider_params);
if (umf_result != UMF_RESULT_SUCCESS) {
Result = umf::umf2urResult(umf_result);
throw Result;
}

for (int i = 0; i < NumDevices; ++i) {
ur_device_handle_t_ *device_handle = Platform.Devices[i].get();
CUdevice device = device_handle->get();
CUcontext context = device_handle->getNativeContext();

for (int memType = UMF_MEMORY_TYPE_HOST;
memType <= UMF_MEMORY_TYPE_SHARED; memType++) {
umf_result = umfCUDAMemoryProviderParamsSetContext(
cu_memory_provider_params, context);
if (umf_result != UMF_RESULT_SUCCESS) {
Result = umf::umf2urResult(umf_result);
throw Result;
}

umf_result = umfCUDAMemoryProviderParamsSetDevice(
cu_memory_provider_params, device);
if (umf_result != UMF_RESULT_SUCCESS) {
Result = umf::umf2urResult(umf_result);
throw Result;
}

umf_result = umfCUDAMemoryProviderParamsSetMemoryType(
cu_memory_provider_params, (umf_usm_memory_type_t)memType);
if (umf_result != UMF_RESULT_SUCCESS) {
Result = umf::umf2urResult(umf_result);
throw Result;
}

umf_memory_provider_handle_t umfCUDAprovider = nullptr;
umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(),
cu_memory_provider_params,
&umfCUDAprovider);
if (umf_result != UMF_RESULT_SUCCESS) {
Result = umf::umf2urResult(umf_result);
throw Result;
}

device_handle->setUmfCUDAprovider(
(umf_usm_memory_type_t)memType, umfCUDAprovider);
}
}

umfCUDAMemoryProviderParamsDestroy(cu_memory_provider_params);
} catch (ur_result_t Err) {
Result = Err;
throw Err;
} catch (...) {
Result = UR_RESULT_ERROR_OUT_OF_RESOURCES;
throw;
}
},
Result);

Expand Down
49 changes: 41 additions & 8 deletions source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
return UR_RESULT_SUCCESS;
}

ur_result_t USMFreeImpl(ur_context_handle_t, void *Pointer) {
ur_result_t USMFreeImpl(ur_context_handle_t hContext, void *Pointer) {
ur_result_t Result = UR_RESULT_SUCCESS;
try {
unsigned int IsManaged;
Expand All @@ -114,13 +114,29 @@ ur_result_t USMFreeImpl(ur_context_handle_t, void *Pointer) {
(CUdeviceptr)Pointer));
UR_ASSERT(Type == CU_MEMORYTYPE_DEVICE || Type == CU_MEMORYTYPE_HOST,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);

std::vector<ur_device_handle_t> Devices = hContext->getDevices();
ur_device_handle_t Device0 = Devices[0];

if (IsManaged || Type == CU_MEMORYTYPE_DEVICE) {
// Memory allocated with cuMemAlloc and cuMemAllocManaged must be freed
// with cuMemFree
UR_CHECK_ERROR(cuMemFree((CUdeviceptr)Pointer));
// UR_CHECK_ERROR(cuMemFree((CUdeviceptr)Pointer));
umf_memory_provider_handle_t umfCUDAprovider =
Device0->getUmfCUDAprovider(
umf_usm_memory_type_t::UMF_MEMORY_TYPE_DEVICE);
umf_result_t umf_result = umfMemoryProviderFree(umfCUDAprovider, Pointer,
0 /* size is unknown */);
UR_CHECK_ERROR(umf::umf2urResult(umf_result));
} else {
// Memory allocated with cuMemAllocHost must be freed with cuMemFreeHost
UR_CHECK_ERROR(cuMemFreeHost(Pointer));
// UR_CHECK_ERROR(cuMemFreeHost(Pointer));
umf_memory_provider_handle_t umfCUDAprovider =
Device0->getUmfCUDAprovider(
umf_usm_memory_type_t::UMF_MEMORY_TYPE_HOST);
umf_result_t umf_result = umfMemoryProviderFree(umfCUDAprovider, Pointer,
0 /* size is unknown */);
UR_CHECK_ERROR(umf::umf2urResult(umf_result));
}
} catch (ur_result_t Err) {
Result = Err;
Expand All @@ -143,7 +159,12 @@ ur_result_t USMDeviceAllocImpl(void **ResultPtr, ur_context_handle_t,
uint32_t Alignment) {
try {
ScopedContext Active(Device);
UR_CHECK_ERROR(cuMemAlloc((CUdeviceptr *)ResultPtr, Size));
// UR_CHECK_ERROR(cuMemAlloc((CUdeviceptr *)ResultPtr, Size));
umf_memory_provider_handle_t umfCUDAprovider = Device->getUmfCUDAprovider(
umf_usm_memory_type_t::UMF_MEMORY_TYPE_DEVICE);
umf_result_t umf_result =
umfMemoryProviderAlloc(umfCUDAprovider, Size, Alignment, ResultPtr);
UR_CHECK_ERROR(umf::umf2urResult(umf_result));
} catch (ur_result_t Err) {
return Err;
}
Expand All @@ -164,8 +185,13 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t,
uint32_t Alignment) {
try {
ScopedContext Active(Device);
UR_CHECK_ERROR(cuMemAllocManaged((CUdeviceptr *)ResultPtr, Size,
CU_MEM_ATTACH_GLOBAL));
// UR_CHECK_ERROR(cuMemAllocManaged((CUdeviceptr *)ResultPtr, Size,
// CU_MEM_ATTACH_GLOBAL));
umf_memory_provider_handle_t umfCUDAprovider = Device->getUmfCUDAprovider(
umf_usm_memory_type_t::UMF_MEMORY_TYPE_SHARED);
umf_result_t umf_result =
umfMemoryProviderAlloc(umfCUDAprovider, Size, Alignment, ResultPtr);
UR_CHECK_ERROR(umf::umf2urResult(umf_result));
} catch (ur_result_t Err) {
return Err;
}
Expand All @@ -179,11 +205,18 @@ ur_result_t USMSharedAllocImpl(void **ResultPtr, ur_context_handle_t,
return UR_RESULT_SUCCESS;
}

ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t,
ur_result_t USMHostAllocImpl(void **ResultPtr, ur_context_handle_t hContext,
ur_usm_host_mem_flags_t, size_t Size,
uint32_t Alignment) {
try {
UR_CHECK_ERROR(cuMemAllocHost(ResultPtr, Size));
// UR_CHECK_ERROR(cuMemAllocHost(ResultPtr, Size));
std::vector<ur_device_handle_t> Devices = hContext->getDevices();
ur_device_handle_t Device0 = Devices[0];
umf_memory_provider_handle_t umfCUDAprovider = Device0->getUmfCUDAprovider(
umf_usm_memory_type_t::UMF_MEMORY_TYPE_HOST);
umf_result_t umf_result =
umfMemoryProviderAlloc(umfCUDAprovider, Size, Alignment, ResultPtr);
UR_CHECK_ERROR(umf::umf2urResult(umf_result));
} catch (ur_result_t Err) {
return Err;
}
Expand Down

0 comments on commit ece4587

Please sign in to comment.