-
Notifications
You must be signed in to change notification settings - Fork 795
[SYCL][L0] Fix memory leak in pi Queue #2905
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 11 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
798d912
[SYCL][L0] Fix memory leak in pi Queue
bso-intel 4178e67
replaced piQueueRetain with refcount++ when it is already guarded by …
bso-intel c7acf5d
Merge branch 'sycl' into retain-queue
bso-intel 9b57bd6
fixed removed mutex issue
bso-intel c7bc99a
Merge branch 'retain-queue' of https://github.com/bso-intel/llvm into …
bso-intel 95ab889
Merge branch 'sycl' of https://github.com/bso-intel/llvm into retain-q…
bso-intel ced1abf
outlined common codes into a helper function
bso-intel f027318
fixed locked error
bso-intel 7dd2c84
accommodated review comments
bso-intel 9e3b287
added static scope specifier
bso-intel 180f435
Fixed bug of lock in piQueueRelease
bso-intel 06c0c30
reverted moving the lock in the front due to conflict in piEventsWait
bso-intel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -406,6 +406,40 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr, | |
| return mapError(Result); | ||
| #define ZE_CALL_NOCHECK(Call) ZeCall().doCall(Call, #Call, false) | ||
|
|
||
| // This helper function increments the reference counter of the Queue | ||
| // without guarding with a lock. | ||
| // It is the caller's responsibility to make sure the lock is acquired | ||
| // on the Queue that is passed in. | ||
| inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; } | ||
|
|
||
| // This helper function creates a pi_event and associate a pi_queue. | ||
| // Note that the caller of this function must have acquired lock on the Queue | ||
| // that is passed in. | ||
| // \param Queue pi_queue to associate with a new event. | ||
| // \param Event a pointer to hold the newly created pi_event | ||
| // \param CommandType various command type determined by the caller | ||
| // \param ZeCommandList the handle to associate with the newly created event | ||
| inline static pi_result | ||
| createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, | ||
| pi_command_type CommandType, | ||
| ze_command_list_handle_t ZeCommandList) { | ||
| pi_result Res = piEventCreate(Queue->Context, Event); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = CommandType; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| // We need to increment the reference counter here to avoid pi_queue | ||
| // being released before the associated pi_event is released because | ||
| // piEventRelease requires access to the associated pi_queue. | ||
| // In piEventRelease, the reference counter of the Queue is decremented | ||
| // to release it. | ||
| piQueueRetainNoLock(Queue); | ||
| return PI_SUCCESS; | ||
| } | ||
|
|
||
| pi_result _pi_device::initialize() { | ||
| uint32_t numQueueGroups = 0; | ||
| ZE_CALL(zeDeviceGetCommandQueueGroupProperties(ZeDevice, &numQueueGroups, | ||
|
|
@@ -1988,34 +2022,44 @@ pi_result piQueueRetain(pi_queue Queue) { | |
| // Lock automatically releases when this goes out of scope. | ||
| std::lock_guard<std::mutex> lock(Queue->PiQueueMutex); | ||
|
|
||
| ++(Queue->RefCount); | ||
| piQueueRetainNoLock(Queue); | ||
| return PI_SUCCESS; | ||
| } | ||
|
|
||
| pi_result piQueueRelease(pi_queue Queue) { | ||
| PI_ASSERT(Queue, PI_INVALID_QUEUE); | ||
| // We need to use a bool variable here to check the condition that | ||
| // RefCount becomes zero atomically with PiQueueMutex lock. | ||
| // Then, we can release the lock before we remove the Queue below. | ||
| bool RefCountZero = false; | ||
| { | ||
| std::lock_guard<std::mutex> Lock(Queue->PiQueueMutex); | ||
| Queue->RefCount--; | ||
| if (Queue->RefCount == 0) | ||
| RefCountZero = true; | ||
|
|
||
| if (RefCountZero) { | ||
| // It is possible to get to here and still have an open command list | ||
| // if no wait or finish ever occurred for this queue. But still need | ||
| // // TODO: o make sure commands get executed. | ||
| if (auto Res = Queue->executeOpenCommandList()) | ||
| return Res; | ||
|
|
||
| // Lock automatically releases when this goes out of scope. | ||
| std::lock_guard<std::mutex> lock(Queue->PiQueueMutex); | ||
|
|
||
| if (--(Queue->RefCount) == 0) { | ||
| // It is possible to get to here and still have an open command list | ||
| // if no wait or finish ever occurred for this queue. But still need | ||
| // to make sure commands get executed. | ||
| if (auto Res = Queue->executeOpenCommandList()) | ||
| return Res; | ||
| // Destroy all the fences created associated with this queue. | ||
| for (const auto &MapEntry : Queue->ZeCommandListFenceMap) { | ||
| ZE_CALL(zeFenceDestroy(MapEntry.second)); | ||
| } | ||
| Queue->ZeCommandListFenceMap.clear(); | ||
| ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue)); | ||
| Queue->ZeCommandQueue = nullptr; | ||
|
|
||
| // Destroy all the fences created associated with this queue. | ||
| for (const auto &MapEntry : Queue->ZeCommandListFenceMap) { | ||
| ZE_CALL(zeFenceDestroy(MapEntry.second)); | ||
| zePrint("piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n", | ||
| Queue->NumTimesClosedFull, Queue->NumTimesClosedEarly); | ||
| } | ||
| Queue->ZeCommandListFenceMap.clear(); | ||
| ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue)); | ||
| Queue->ZeCommandQueue = nullptr; | ||
|
|
||
| zePrint("piQueueRelease NumTimesClosedFull %d, NumTimesClosedEarly %d\n", | ||
| Queue->NumTimesClosedFull, Queue->NumTimesClosedEarly); | ||
| } | ||
|
|
||
| if (RefCountZero) | ||
| delete Queue; | ||
| return PI_SUCCESS; | ||
| } | ||
|
|
||
|
|
@@ -3397,13 +3441,11 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, | |
| return Res; | ||
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
| auto Res = piEventCreate(Kernel->Program->Context, Event); | ||
| pi_result Res = createEventAndAssociateQueue( | ||
| Queue, Event, PI_COMMAND_TYPE_NDRANGE_KERNEL, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = PI_COMMAND_TYPE_NDRANGE_KERNEL; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
| ZeEvent = (*Event)->ZeEvent; | ||
|
|
||
| // Save the kernel in the event, so that when the event is signalled | ||
| // the code can do a piKernelRelease on this kernel. | ||
|
|
@@ -3416,8 +3458,6 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, | |
| // in CommandData. | ||
| piKernelRetain(Kernel); | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
|
|
||
| ze_event_handle_t *ZeEventWaitList = | ||
| _pi_event::createZeEventList(NumEventsInWaitList, EventWaitList); | ||
| if (!ZeEventWaitList) | ||
|
|
@@ -3682,6 +3722,11 @@ pi_result piEventRelease(pi_event Event) { | |
| auto Context = Event->Context; | ||
| ZE_CALL(Context->decrementAliveEventsInPool(Event->ZeEventPool)); | ||
|
|
||
| // We intentionally incremented the reference counter when an event is | ||
| // created so that we can avoid pi_queue is released before the associated | ||
| // pi_event is released. Here we have to decrement it so pi_queue | ||
| // can be released successfully. | ||
| piQueueRelease(Event->Queue); | ||
| delete Event; | ||
| } | ||
| return PI_SUCCESS; | ||
|
|
@@ -3871,14 +3916,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, | |
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER, | ||
| ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = PI_COMMAND_TYPE_USER; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -3945,7 +3986,7 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, | |
| pi_bool BlockingWrite, size_t Size, const void *Src, | ||
| pi_uint32 NumEventsInWaitList, | ||
| const pi_event *EventWaitList, pi_event *Event) { | ||
|
|
||
| PI_ASSERT(Queue, PI_INVALID_QUEUE); | ||
| // Get a new command list to be used on this call | ||
| ze_command_list_handle_t ZeCommandList = nullptr; | ||
| ze_fence_handle_t ZeFence = nullptr; | ||
|
|
@@ -3955,14 +3996,10 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, | |
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = | ||
| createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = CommandType; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -4005,7 +4042,7 @@ static pi_result enqueueMemCopyRectHelper( | |
| size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList, | ||
| const pi_event *EventWaitList, pi_event *Event) { | ||
|
|
||
| PI_ASSERT(Region && SrcOrigin && DstOrigin, PI_INVALID_VALUE); | ||
| PI_ASSERT(Region && SrcOrigin && DstOrigin && Queue, PI_INVALID_VALUE); | ||
|
|
||
| // Get a new command list to be used on this call | ||
| ze_command_list_handle_t ZeCommandList = nullptr; | ||
|
|
@@ -4016,14 +4053,10 @@ static pi_result enqueueMemCopyRectHelper( | |
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = | ||
| createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = CommandType; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -4188,7 +4221,7 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr, | |
| const void *Pattern, size_t PatternSize, size_t Size, | ||
| pi_uint32 NumEventsInWaitList, | ||
| const pi_event *EventWaitList, pi_event *Event) { | ||
|
|
||
| PI_ASSERT(Queue, PI_INVALID_QUEUE); | ||
| // Get a new command list to be used on this call | ||
| ze_command_list_handle_t ZeCommandList = nullptr; | ||
| ze_fence_handle_t ZeFence = nullptr; | ||
|
|
@@ -4198,14 +4231,10 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr, | |
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = | ||
| createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = CommandType; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -4282,15 +4311,14 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, | |
| ze_fence_handle_t ZeFence = nullptr; | ||
| ze_event_handle_t ZeEvent = nullptr; | ||
|
|
||
| // Lock automatically releases when this goes out of scope. | ||
| std::lock_guard<std::mutex> lock(Queue->PiQueueMutex); | ||
|
|
||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = createEventAndAssociateQueue( | ||
| Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_MAP, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_MAP; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -4324,9 +4352,6 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, | |
| return Buffer->addMapping(*RetMap, Offset, Size); | ||
| } | ||
|
|
||
| // Lock automatically releases when this goes out of scope. | ||
| std::lock_guard<std::mutex> lock(Queue->PiQueueMutex); | ||
|
|
||
| // For discrete devices we need a command list | ||
| if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList, | ||
| &ZeFence)) | ||
|
|
@@ -4380,15 +4405,15 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, | |
| PI_ASSERT(Event, PI_INVALID_EVENT); | ||
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
|
|
||
| // Lock automatically releases when this goes out of scope. | ||
| std::lock_guard<std::mutex> lock(Queue->PiQueueMutex); | ||
|
|
||
|
||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = createEventAndAssociateQueue( | ||
| Queue, Event, PI_COMMAND_TYPE_MEM_BUFFER_UNMAP, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = PI_COMMAND_TYPE_MEM_BUFFER_UNMAP; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -4420,9 +4445,6 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, | |
| return PI_SUCCESS; | ||
| } | ||
|
|
||
| // Lock automatically releases when this goes out of scope. | ||
| std::lock_guard<std::mutex> lock(Queue->PiQueueMutex); | ||
|
|
||
| if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList, | ||
| &ZeFence)) | ||
| return Res; | ||
|
|
@@ -4521,7 +4543,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, | |
| size_t RowPitch, size_t SlicePitch, | ||
| pi_uint32 NumEventsInWaitList, | ||
| const pi_event *EventWaitList, pi_event *Event) { | ||
|
|
||
| PI_ASSERT(Queue, PI_INVALID_QUEUE); | ||
| // Get a new command list to be used on this call | ||
| ze_command_list_handle_t ZeCommandList = nullptr; | ||
| ze_fence_handle_t ZeFence = nullptr; | ||
|
|
@@ -4531,14 +4553,10 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, | |
|
|
||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = | ||
| createEventAndAssociateQueue(Queue, Event, CommandType, ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = CommandType; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -5138,14 +5156,10 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, | |
| // TODO: do we need to create a unique command type for this? | ||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER, | ||
| ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = PI_COMMAND_TYPE_USER; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
@@ -5198,14 +5212,10 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, | |
| // TODO: do we need to create a unique command type for this? | ||
| ze_event_handle_t ZeEvent = nullptr; | ||
| if (Event) { | ||
| auto Res = piEventCreate(Queue->Context, Event); | ||
| auto Res = createEventAndAssociateQueue(Queue, Event, PI_COMMAND_TYPE_USER, | ||
| ZeCommandList); | ||
| if (Res != PI_SUCCESS) | ||
| return Res; | ||
|
|
||
| (*Event)->Queue = Queue; | ||
| (*Event)->CommandType = PI_COMMAND_TYPE_USER; | ||
| (*Event)->ZeCommandList = ZeCommandList; | ||
|
|
||
| ZeEvent = (*Event)->ZeEvent; | ||
| } | ||
|
|
||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't move this up because piEventsWait is trying to acquire the lock. So reverting this change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you can still have this lock specially for createEventAndAssociateQueue, before line 4318
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's what did. I moved it inside the { } so the lock_guard is only used for createEventAndAssociateQueue. There is a separate lock_guard in the original line below.