88//
99// ===----------------------------------------------------------------------===//
1010
11- #include " memory.hpp"
1211#include " ../ur_interface_loader.hpp"
1312#include " context.hpp"
13+ #include " memory.hpp"
1414
1515#include " ../helpers/memory_helpers.hpp"
1616#include " ../image_common.hpp"
@@ -53,57 +53,69 @@ void ur_usm_handle_t::unmapHostPtr(void * /*pMappedPtr*/,
5353 /* nop */
5454}
5555
56+ static v2::raii::command_list_unique_handle
57+ getSyncCommandListForCopy (ur_context_handle_t hContext,
58+ ur_device_handle_t hDevice) {
59+ v2::command_list_desc_t listDesc;
60+ listDesc.IsInOrder = true ;
61+ listDesc.Ordinal =
62+ hDevice
63+ ->QueueGroup [ur_device_handle_t_::queue_group_info_t ::type::Compute]
64+ .ZeOrdinal ;
65+ listDesc.CopyOffloadEnable = true ;
66+ return hContext->getCommandListCache ().getImmediateCommandList (
67+ hDevice->ZeDevice , listDesc, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS,
68+ ZE_COMMAND_QUEUE_PRIORITY_NORMAL, std::nullopt );
69+ }
70+
71+ static ur_result_t synchronousZeCopy (ur_context_handle_t hContext,
72+ ur_device_handle_t hDevice, void *dst,
73+ const void *src, size_t size) try {
74+ auto commandList = getSyncCommandListForCopy (hContext, hDevice);
75+
76+ ZE2UR_CALL (zeCommandListAppendMemoryCopy,
77+ (commandList.get (), dst, src, size, nullptr , 0 , nullptr ));
78+
79+ return UR_RESULT_SUCCESS;
80+ } catch (...) {
81+ return exceptionToResult (std::current_exception ());
82+ }
83+
5684ur_integrated_buffer_handle_t ::ur_integrated_buffer_handle_t (
5785 ur_context_handle_t hContext, void *hostPtr, size_t size,
5886 device_access_mode_t accessMode)
5987 : ur_mem_buffer_t (hContext, size, accessMode) {
60- if (hostPtr) {
61- // Host pointer provided - check if it's already USM or needs import
62- ZeStruct<ze_memory_allocation_properties_t > memProps;
63- auto ret =
64- getMemoryAttrs (hContext->getZeHandle (), hostPtr, nullptr , &memProps);
65-
66- if (ret == UR_RESULT_SUCCESS && memProps.type != ZE_MEMORY_TYPE_UNKNOWN) {
67- // Already a USM allocation - just use it directly without import
68- this ->ptr = usm_unique_ptr_t (hostPtr, [](void *) {});
69- return ;
70- }
71-
72- // Not USM - try to import it
73- bool hostPtrImported =
74- maybeImportUSM (hContext->getPlatform ()->ZeDriverHandleExpTranslated ,
75- hContext->getZeHandle (), hostPtr, size);
76-
77- if (hostPtrImported) {
78- // Successfully imported - use it with release
79- this ->ptr = usm_unique_ptr_t (hostPtr, [hContext](void *ptr) {
80- ZeUSMImport.doZeUSMRelease (
81- hContext->getPlatform ()->ZeDriverHandleExpTranslated , ptr);
82- });
83- // No copy-back needed for imported pointers
84- return ;
85- }
86-
87- // Import failed - allocate backing buffer and set up copy-back
88- }
88+ bool hostPtrImported =
89+ maybeImportUSM (hContext->getPlatform ()->ZeDriverHandleExpTranslated ,
90+ hContext->getZeHandle (), hostPtr, size);
91+
92+ if (hostPtrImported) {
93+ this ->ptr = usm_unique_ptr_t (hostPtr, [hContext](void *ptr) {
94+ ZeUSMImport.doZeUSMRelease (
95+ hContext->getPlatform ()->ZeDriverHandleExpTranslated , ptr);
96+ });
97+ } else {
98+ void *rawPtr;
99+ // Use HOST memory for integrated GPUs to enable zero-copy device access
100+ UR_CALL_THROWS (hContext->getDefaultUSMPool ()->allocate (
101+ hContext, nullptr , nullptr , UR_USM_TYPE_HOST, size, &rawPtr));
89102
90- // No host pointer, or import failed - allocate new USM host memory
91- void *rawPtr;
92- UR_CALL_THROWS (hContext->getDefaultUSMPool ()->allocate (
93- hContext, nullptr , nullptr , UR_USM_TYPE_HOST, size, &rawPtr));
103+ this ->ptr = usm_unique_ptr_t (rawPtr, [hContext](void *ptr) {
104+ auto ret = hContext->getDefaultUSMPool ()->free (ptr);
105+ if (ret != UR_RESULT_SUCCESS) {
106+ UR_LOG (ERR, " Failed to free host memory: {}" , ret);
107+ }
108+ });
94109
95- this ->ptr = usm_unique_ptr_t (rawPtr, [hContext](void *ptr) {
96- auto ret = hContext->getDefaultUSMPool ()->free (ptr);
97- if (ret != UR_RESULT_SUCCESS) {
98- UR_LOG (ERR, " Failed to free host memory: {}" , ret);
110+ if (hostPtr) {
111+ // Initial copy using Level Zero for USM HOST memory
112+ auto hDevice = hContext->getDevices ()[0 ];
113+ UR_CALL_THROWS (
114+ synchronousZeCopy (hContext, hDevice, this ->ptr .get (), hostPtr, size));
115+ // Set writeBackPtr to enable map/unmap copy-back (but NOT destructor
116+ // copy-back)
117+ writeBackPtr = hostPtr;
99118 }
100- });
101-
102- if (hostPtr) {
103- // Copy data from user pointer to our backing buffer
104- std::memcpy (this ->ptr .get (), hostPtr, size);
105- // Remember to copy back on destruction
106- writeBackPtr = hostPtr;
107119 }
108120}
109121
@@ -119,12 +131,6 @@ ur_integrated_buffer_handle_t::ur_integrated_buffer_handle_t(
119131 });
120132}
121133
122- ur_integrated_buffer_handle_t ::~ur_integrated_buffer_handle_t () {
123- if (writeBackPtr) {
124- std::memcpy (writeBackPtr, this ->ptr .get (), size);
125- }
126- }
127-
128134void *ur_integrated_buffer_handle_t ::getDevicePtr(
129135 ur_device_handle_t /* hDevice*/ , device_access_mode_t /* access*/ ,
130136 size_t offset, size_t /* size*/ , ze_command_list_handle_t /* cmdList*/ ,
@@ -140,7 +146,11 @@ void *ur_integrated_buffer_handle_t::mapHostPtr(
140146 void *mappedPtr = ur_cast<char *>(writeBackPtr) + offset;
141147
142148 if (flags & UR_MAP_FLAG_READ) {
143- std::memcpy (mappedPtr, ur_cast<char *>(ptr.get ()) + offset, mapSize);
149+ // Use Level Zero copy for USM HOST memory to ensure GPU visibility
150+ auto hDevice = hContext->getDevices ()[0 ];
151+ UR_CALL_THROWS (synchronousZeCopy (hContext, hDevice, mappedPtr,
152+ ur_cast<char *>(ptr.get ()) + offset,
153+ mapSize));
144154 }
145155
146156 // Track this mapping for unmap
@@ -172,8 +182,11 @@ void ur_integrated_buffer_handle_t::unmapHostPtr(
172182
173183 if (mappedRegion->flags &
174184 (UR_MAP_FLAG_WRITE | UR_MAP_FLAG_WRITE_INVALIDATE_REGION)) {
175- std::memcpy (ur_cast<char *>(ptr.get ()) + mappedRegion->offset ,
176- mappedRegion->ptr .get (), mappedRegion->size );
185+ // Use Level Zero copy for USM HOST memory to ensure GPU visibility
186+ auto hDevice = hContext->getDevices ()[0 ];
187+ UR_CALL_THROWS (synchronousZeCopy (
188+ hContext, hDevice, ur_cast<char *>(ptr.get ()) + mappedRegion->offset ,
189+ mappedRegion->ptr .get (), mappedRegion->size ));
177190 }
178191
179192 mappedRegions.erase (mappedRegion);
@@ -182,32 +195,11 @@ void ur_integrated_buffer_handle_t::unmapHostPtr(
182195 // No op for zero-copy path, memory is synced
183196}
184197
185- static v2::raii::command_list_unique_handle
186- getSyncCommandListForCopy (ur_context_handle_t hContext,
187- ur_device_handle_t hDevice) {
188- v2::command_list_desc_t listDesc;
189- listDesc.IsInOrder = true ;
190- listDesc.Ordinal =
191- hDevice
192- ->QueueGroup [ur_device_handle_t_::queue_group_info_t ::type::Compute]
193- .ZeOrdinal ;
194- listDesc.CopyOffloadEnable = true ;
195- return hContext->getCommandListCache ().getImmediateCommandList (
196- hDevice->ZeDevice , listDesc, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS,
197- ZE_COMMAND_QUEUE_PRIORITY_NORMAL, std::nullopt );
198- }
199-
200- static ur_result_t synchronousZeCopy (ur_context_handle_t hContext,
201- ur_device_handle_t hDevice, void *dst,
202- const void *src, size_t size) try {
203- auto commandList = getSyncCommandListForCopy (hContext, hDevice);
204-
205- ZE2UR_CALL (zeCommandListAppendMemoryCopy,
206- (commandList.get (), dst, src, size, nullptr , 0 , nullptr ));
207-
208- return UR_RESULT_SUCCESS;
209- } catch (...) {
210- return exceptionToResult (std::current_exception ());
198+ ur_integrated_buffer_handle_t ::~ur_integrated_buffer_handle_t () {
199+ // Do NOT do automatic copy-back in destructor - it causes heap corruption
200+ // because writeBackPtr may be freed by SYCL runtime before buffer destructor
201+ // runs. Copy-back happens via explicit map/unmap operations (see
202+ // mapHostPtr/unmapHostPtr).
211203}
212204
213205void *ur_discrete_buffer_handle_t ::allocateOnDevice(ur_device_handle_t hDevice,
@@ -618,6 +610,12 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
618610 void *hostPtr = pProperties ? pProperties->pHost : nullptr ;
619611 auto accessMode = ur_mem_buffer_t::getDeviceAccessMode (flags);
620612
613+ // For integrated devices, use zero-copy host buffers. The integrated buffer
614+ // constructor will handle all cases:
615+ // 1. No host pointer - allocate USM host memory
616+ // 2. Host pointer is already USM - use directly
617+ // 3. Host pointer can be imported - import it
618+ // 4. Otherwise - allocate USM and copy-back on destruction
621619 if (useHostBuffer (hContext)) {
622620 *phBuffer = ur_mem_handle_t_::create<ur_integrated_buffer_handle_t >(
623621 hContext, hostPtr, size, accessMode);
0 commit comments