99// ===----------------------------------------------------------------------===//
1010
1111#include " memory.hpp"
12+
1213#include " ../ur_interface_loader.hpp"
1314#include " context.hpp"
1415
@@ -53,57 +54,69 @@ void ur_usm_handle_t::unmapHostPtr(void * /*pMappedPtr*/,
5354 /* nop */
5455}
5556
57+ static v2::raii::command_list_unique_handle
58+ getSyncCommandListForCopy (ur_context_handle_t hContext,
59+ ur_device_handle_t hDevice) {
60+ v2::command_list_desc_t listDesc;
61+ listDesc.IsInOrder = true ;
62+ listDesc.Ordinal =
63+ hDevice
64+ ->QueueGroup [ur_device_handle_t_::queue_group_info_t ::type::Compute]
65+ .ZeOrdinal ;
66+ listDesc.CopyOffloadEnable = true ;
67+ return hContext->getCommandListCache ().getImmediateCommandList (
68+ hDevice->ZeDevice , listDesc, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS,
69+ ZE_COMMAND_QUEUE_PRIORITY_NORMAL, std::nullopt );
70+ }
71+
72+ static ur_result_t synchronousZeCopy (ur_context_handle_t hContext,
73+ ur_device_handle_t hDevice, void *dst,
74+ const void *src, size_t size) try {
75+ auto commandList = getSyncCommandListForCopy (hContext, hDevice);
76+
77+ ZE2UR_CALL (zeCommandListAppendMemoryCopy,
78+ (commandList.get (), dst, src, size, nullptr , 0 , nullptr ));
79+
80+ return UR_RESULT_SUCCESS;
81+ } catch (...) {
82+ return exceptionToResult (std::current_exception ());
83+ }
84+
5685ur_integrated_buffer_handle_t ::ur_integrated_buffer_handle_t (
5786 ur_context_handle_t hContext, void *hostPtr, size_t size,
5887 device_access_mode_t accessMode)
5988 : ur_mem_buffer_t (hContext, size, accessMode) {
60- if (hostPtr) {
61- // Host pointer provided - check if it's already USM or needs import
62- ZeStruct<ze_memory_allocation_properties_t > memProps;
63- auto ret =
64- getMemoryAttrs (hContext->getZeHandle (), hostPtr, nullptr , &memProps);
65-
66- if (ret == UR_RESULT_SUCCESS && memProps.type != ZE_MEMORY_TYPE_UNKNOWN) {
67- // Already a USM allocation - just use it directly without import
68- this ->ptr = usm_unique_ptr_t (hostPtr, [](void *) {});
69- return ;
70- }
71-
72- // Not USM - try to import it
73- bool hostPtrImported =
74- maybeImportUSM (hContext->getPlatform ()->ZeDriverHandleExpTranslated ,
75- hContext->getZeHandle (), hostPtr, size);
76-
77- if (hostPtrImported) {
78- // Successfully imported - use it with release
79- this ->ptr = usm_unique_ptr_t (hostPtr, [hContext](void *ptr) {
80- ZeUSMImport.doZeUSMRelease (
81- hContext->getPlatform ()->ZeDriverHandleExpTranslated , ptr);
82- });
83- // No copy-back needed for imported pointers
84- return ;
85- }
86-
87- // Import failed - allocate backing buffer and set up copy-back
88- }
89+ bool hostPtrImported =
90+ maybeImportUSM (hContext->getPlatform ()->ZeDriverHandleExpTranslated ,
91+ hContext->getZeHandle (), hostPtr, size);
92+
93+ if (hostPtrImported) {
94+ this ->ptr = usm_unique_ptr_t (hostPtr, [hContext](void *ptr) {
95+ ZeUSMImport.doZeUSMRelease (
96+ hContext->getPlatform ()->ZeDriverHandleExpTranslated , ptr);
97+ });
98+ } else {
99+ void *rawPtr;
100+ // Use HOST memory for integrated GPUs to enable zero-copy device access
101+ UR_CALL_THROWS (hContext->getDefaultUSMPool ()->allocate (
102+ hContext, nullptr , nullptr , UR_USM_TYPE_HOST, size, &rawPtr));
89103
90- // No host pointer, or import failed - allocate new USM host memory
91- void *rawPtr;
92- UR_CALL_THROWS (hContext->getDefaultUSMPool ()->allocate (
93- hContext, nullptr , nullptr , UR_USM_TYPE_HOST, size, &rawPtr));
104+ this ->ptr = usm_unique_ptr_t (rawPtr, [hContext](void *ptr) {
105+ auto ret = hContext->getDefaultUSMPool ()->free (ptr);
106+ if (ret != UR_RESULT_SUCCESS) {
107+ UR_LOG (ERR, " Failed to free host memory: {}" , ret);
108+ }
109+ });
94110
95- this ->ptr = usm_unique_ptr_t (rawPtr, [hContext](void *ptr) {
96- auto ret = hContext->getDefaultUSMPool ()->free (ptr);
97- if (ret != UR_RESULT_SUCCESS) {
98- UR_LOG (ERR, " Failed to free host memory: {}" , ret);
111+ if (hostPtr) {
112+ // Initial copy using Level Zero for USM HOST memory
113+ auto hDevice = hContext->getDevices ()[0 ];
114+ UR_CALL_THROWS (
115+ synchronousZeCopy (hContext, hDevice, this ->ptr .get (), hostPtr, size));
116+ // Set writeBackPtr to enable map/unmap copy-back (but NOT destructor
117+ // copy-back)
118+ writeBackPtr = hostPtr;
99119 }
100- });
101-
102- if (hostPtr) {
103- // Copy data from user pointer to our backing buffer
104- std::memcpy (this ->ptr .get (), hostPtr, size);
105- // Remember to copy back on destruction
106- writeBackPtr = hostPtr;
107120 }
108121}
109122
@@ -119,12 +132,6 @@ ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t(
119132 });
120133}
121134
122- ur_integrated_buffer_handle_t ::~ur_integrated_buffer_handle_t () {
123- if (writeBackPtr) {
124- std::memcpy (writeBackPtr, this ->ptr .get (), size);
125- }
126- }
127-
128135void *ur_integrated_buffer_handle_t ::getDevicePtr(
129136 ur_device_handle_t /* hDevice*/ , device_access_mode_t /* access*/ ,
130137 size_t offset, size_t /* size*/ , ze_command_list_handle_t /* cmdList*/ ,
@@ -140,7 +147,11 @@ void *ur_integrated_buffer_handle_t::mapHostPtr(
140147 void *mappedPtr = ur_cast<char *>(writeBackPtr) + offset;
141148
142149 if (flags & UR_MAP_FLAG_READ) {
143- std::memcpy (mappedPtr, ur_cast<char *>(ptr.get ()) + offset, mapSize);
150+ // Use Level Zero copy for USM HOST memory to ensure GPU visibility
151+ auto hDevice = hContext->getDevices ()[0 ];
152+ UR_CALL_THROWS (synchronousZeCopy (hContext, hDevice, mappedPtr,
153+ ur_cast<char *>(ptr.get ()) + offset,
154+ mapSize));
144155 }
145156
146157 // Track this mapping for unmap
@@ -172,8 +183,11 @@ void ur_integrated_buffer_handle_t::unmapHostPtr(
172183
173184 if (mappedRegion->flags &
174185 (UR_MAP_FLAG_WRITE | UR_MAP_FLAG_WRITE_INVALIDATE_REGION)) {
175- std::memcpy (ur_cast<char *>(ptr.get ()) + mappedRegion->offset ,
176- mappedRegion->ptr .get (), mappedRegion->size );
186+ // Use Level Zero copy for USM HOST memory to ensure GPU visibility
187+ auto hDevice = hContext->getDevices ()[0 ];
188+ UR_CALL_THROWS (synchronousZeCopy (
189+ hContext, hDevice, ur_cast<char *>(ptr.get ()) + mappedRegion->offset ,
190+ mappedRegion->ptr .get (), mappedRegion->size ));
177191 }
178192
179193 mappedRegions.erase (mappedRegion);
@@ -182,32 +196,11 @@ void ur_integrated_buffer_handle_t::unmapHostPtr(
182196 // No op for zero-copy path, memory is synced
183197}
184198
185- static v2::raii::command_list_unique_handle
186- getSyncCommandListForCopy (ur_context_handle_t hContext,
187- ur_device_handle_t hDevice) {
188- v2::command_list_desc_t listDesc;
189- listDesc.IsInOrder = true ;
190- listDesc.Ordinal =
191- hDevice
192- ->QueueGroup [ur_device_handle_t_::queue_group_info_t ::type::Compute]
193- .ZeOrdinal ;
194- listDesc.CopyOffloadEnable = true ;
195- return hContext->getCommandListCache ().getImmediateCommandList (
196- hDevice->ZeDevice , listDesc, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS,
197- ZE_COMMAND_QUEUE_PRIORITY_NORMAL, std::nullopt );
198- }
199-
200- static ur_result_t synchronousZeCopy (ur_context_handle_t hContext,
201- ur_device_handle_t hDevice, void *dst,
202- const void *src, size_t size) try {
203- auto commandList = getSyncCommandListForCopy (hContext, hDevice);
204-
205- ZE2UR_CALL (zeCommandListAppendMemoryCopy,
206- (commandList.get (), dst, src, size, nullptr , 0 , nullptr ));
207-
208- return UR_RESULT_SUCCESS;
209- } catch (...) {
210- return exceptionToResult (std::current_exception ());
199+ ur_integrated_buffer_handle_t ::~ur_integrated_buffer_handle_t () {
200+ // Do NOT do automatic copy-back in destructor - it causes heap corruption
201+ // because writeBackPtr may be freed by SYCL runtime before buffer destructor
202+ // runs. Copy-back happens via explicit map/unmap operations (see
203+ // mapHostPtr/unmapHostPtr).
211204}
212205
213206void *ur_discrete_buffer_handle_t ::allocateOnDevice(ur_device_handle_t hDevice,
@@ -618,6 +611,12 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
618611 void *hostPtr = pProperties ? pProperties->pHost : nullptr ;
619612 auto accessMode = ur_mem_buffer_t::getDeviceAccessMode (flags);
620613
614+ // For integrated devices, use zero-copy host buffers. The integrated buffer
615+ // constructor will handle all cases:
616+ // 1. No host pointer - allocate USM host memory
617+ // 2. Host pointer is already USM - use directly
618+ // 3. Host pointer can be imported - import it
619+ // 4. Otherwise - allocate USM and copy-back on destruction
621620 if (useHostBuffer (hContext)) {
622621 *phBuffer = ur_mem_handle_t_::create<ur_integrated_buffer_handle_t >(
623622 hContext, hostPtr, size, accessMode);
0 commit comments