Skip to content

Commit 463336e

Browse files
author
Sergey Kanaev
committed
[SYCL] Fix possible failure when enqueing only a single host-task
Signed-off-by: Sergey Kanaev <[email protected]>
1 parent 358ec04 commit 463336e

File tree

2 files changed

+84
-9
lines changed

2 files changed

+84
-9
lines changed

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -156,18 +156,33 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
156156
}
157157

158158
void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
159+
MemObjRecord *Record = nullptr;
159160
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
160-
lockSharedTimedMutex(Lock);
161161

162-
MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
163-
if (!Record)
164-
// No operations were performed on the mem object
165-
return;
162+
{
163+
lockSharedTimedMutex(Lock);
164+
165+
Record = MGraphBuilder.getMemObjRecord(MemObj);
166+
if (!Record)
167+
// No operations were performed on the mem object
168+
return;
166169

167-
waitForRecordToFinish(Record);
168-
MGraphBuilder.decrementLeafCountersForRecord(Record);
169-
MGraphBuilder.cleanupCommandsForRecord(Record);
170-
MGraphBuilder.removeRecordForMemObj(MemObj);
170+
Lock.unlock();
171+
}
172+
173+
{
174+
// This only need a shared mutex as it only involves enqueueing and awaiting
175+
// for events
176+
std::shared_lock<std::shared_timed_mutex> Lock(MGraphLock);
177+
waitForRecordToFinish(Record);
178+
}
179+
180+
{
181+
lockSharedTimedMutex(Lock);
182+
MGraphBuilder.decrementLeafCountersForRecord(Record);
183+
MGraphBuilder.cleanupCommandsForRecord(Record);
184+
MGraphBuilder.removeRecordForMemObj(MemObj);
185+
}
171186
}
172187

173188
EventImplPtr Scheduler::addHostAccessor(Requirement *Req) {
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
6+
#include <CL/sycl.hpp>
7+
#include <CL/sycl/backend/opencl.hpp>
8+
#include <CL/sycl/detail/cl.h>
9+
10+
using namespace cl::sycl;
11+
using namespace cl::sycl::access;
12+
13+
static constexpr size_t BUFFER_SIZE = 1024;
14+
15+
template <typename T>
16+
class Modifier;
17+
18+
template <typename T>
19+
class Init;
20+
21+
template <typename DataT>
22+
void copy(buffer<DataT, 1> &Src, buffer<DataT, 1> &Dst, queue &Q) {
23+
Q.submit([&](handler &CGH) {
24+
auto SrcA = Src.template get_access<mode::read>(CGH);
25+
auto DstA = Dst.template get_access<mode::write>(CGH);
26+
27+
CGH.codeplay_host_task([=]() {
28+
for (size_t Idx = 0; Idx < SrcA.get_count(); ++Idx)
29+
DstA[Idx] = SrcA[Idx];
30+
});
31+
});
32+
}
33+
34+
template <typename DataT>
35+
void init(buffer<DataT, 1> &B1, buffer<DataT, 1> &B2, queue &Q) {
36+
Q.submit([&](handler &CGH) {
37+
auto Acc1 = B1.template get_access<mode::write>(CGH);
38+
auto Acc2 = B2.template get_access<mode::write>(CGH);
39+
40+
CGH.parallel_for<Init<DataT>>(BUFFER_SIZE, [=](item<1> Id) {
41+
Acc1[Id] = -1;
42+
Acc2[Id] = -2;
43+
});
44+
});
45+
}
46+
47+
void test() {
48+
queue Q;
49+
buffer<int, 1> Buffer1{BUFFER_SIZE};
50+
buffer<int, 1> Buffer2{BUFFER_SIZE};
51+
52+
init<int>(Buffer1, Buffer2, Q);
53+
54+
copy(Buffer1, Buffer2, Q);
55+
}
56+
57+
int main() {
58+
test();
59+
return 0;
60+
}

0 commit comments

Comments
 (0)