Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify execution graph via analysis of dependencies #15724

Draft
wants to merge 10 commits into
base: sycl
Choose a base branch
from
7 changes: 7 additions & 0 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,13 @@ class Command {
return MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess;
}

bool isCleanupSubject() const {
return MLeafCounter && // if already no leaves, can't be duplicate
MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess &&
// host task may be available by ThreadPool, can't cleanup it up
!isHostTask();
}

// Shows that command could not be enqueued, now it may be true for empty task
// only
bool isEnqueueBlocked() const {
Expand Down
156 changes: 114 additions & 42 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,28 @@ Scheduler::GraphBuilder::GraphBuilder() {
EnableAlways)
MPrintOptionsArray[AfterAddHostAcc] = true;
}

MAllocateDependency = [this](Command *Dependant, Command *Dependency,
const MemObjRecord *Record,
LeavesCollection::EnqueueListT &ToEnqueue) {
// Add the old leaf as a dependency for the new one by duplicating one
// of the requirements for the current record
DepDesc Dep = findDepForRecord(Dependant, Record);
Dep.MDepCommand = Dependency;
std::vector<Command *> ToCleanUp;
Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp);
if (ConnectionCmd)
ToEnqueue.push_back(ConnectionCmd);

unsigned old = Dependency->MLeafCounter;
--(Dependency->MLeafCounter);
assert(old > Dependency->MLeafCounter &&
"Leaf counter should be decremented");
if (Dependency->readyForCleanup())
ToCleanUp.push_back(Dependency);
for (Command *Cmd : ToCleanUp)
cleanupCommand(Cmd);
};
}

static bool markNodeAsVisited(Command *Cmd, std::vector<Command *> &Visited) {
Expand Down Expand Up @@ -188,24 +210,6 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
return Record;

const size_t LeafLimit = 8;
LeavesCollection::AllocateDependencyF AllocateDependency =
[this](Command *Dependant, Command *Dependency, MemObjRecord *Record,
LeavesCollection::EnqueueListT &ToEnqueue) {
// Add the old leaf as a dependency for the new one by duplicating one
// of the requirements for the current record
DepDesc Dep = findDepForRecord(Dependant, Record);
Dep.MDepCommand = Dependency;
std::vector<Command *> ToCleanUp;
Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp);
if (ConnectionCmd)
ToEnqueue.push_back(ConnectionCmd);

--(Dependency->MLeafCounter);
if (Dependency->readyForCleanup())
ToCleanUp.push_back(Dependency);
for (Command *Cmd : ToCleanUp)
cleanupCommand(Cmd);
};

const ContextImplPtr &InteropCtxPtr = Req->MSYCLMemObj->getInteropContext();
if (InteropCtxPtr) {
Expand All @@ -225,7 +229,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
Dev, InteropCtxPtr, /*AsyncHandler=*/{}, /*PropertyList=*/{}}};

MemObject->MRecord.reset(
new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency});
new MemObjRecord{InteropCtxPtr, LeafLimit, MAllocateDependency});
std::vector<Command *> ToEnqueue;
getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
ToEnqueue);
Expand All @@ -234,28 +238,39 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
"alloca or exceeding the leaf limit).");
} else
MemObject->MRecord.reset(new MemObjRecord{queue_impl::getContext(Queue),
LeafLimit, AllocateDependency});
LeafLimit, MAllocateDependency});

MMemObjs.push_back(MemObject);
return MemObject->MRecord.get();
}

void Scheduler::GraphBuilder::updateLeaves(const std::set<Command *> &Cmds,
MemObjRecord *Record,
access::mode AccessMode,
std::vector<Command *> &ToCleanUp) {
void Scheduler::GraphBuilder::updateLeaves(
Command *NewCmd, const std::set<Command *> &Cmds, MemObjRecord *Record,
access::mode AccessMode, const MapOfDependentCmds &DependentCmdsOfNewCmd,
const QueueImplPtr &Queue, std::vector<Command *> &ToCleanUp,
std::vector<Command *> &ToEnqueue) {

const bool ReadOnlyReq = AccessMode == access::mode::read;
if (ReadOnlyReq)
return;

for (Command *Cmd : Cmds) {
bool WasLeaf = Cmd->MLeafCounter > 0;
Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd);
Cmd->MLeafCounter -= Record->MWriteLeaves.remove(Cmd);
if (WasLeaf && Cmd->readyForCleanup()) {
ToCleanUp.push_back(Cmd);
if (!ReadOnlyReq) {
bool WasLeaf = Cmd->MLeafCounter > 0;
Cmd->MLeafCounter -= Record->MReadLeaves.remove(Cmd);
Cmd->MLeafCounter -= Record->MWriteLeaves.remove(Cmd);
if (WasLeaf && Cmd->readyForCleanup()) {
ToCleanUp.push_back(Cmd);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to capture what we discussed here. Instead of adding a new condition where we send commands to cleanup, I think a better solution would be excluding such dependencies from leaves (where both the newest node and its dependency are leaves of the same memory object) and let the usual cleanup path handle it. That can be done by adding a redundant dependency to the graph, the same way we're keeping graph width under control by imposing a leaf limit on each memory object.

That approach would also cover more cases: it's not needed that every dependency of the old command is covered by the new one for us to be able to clean it up. We should only care about how many memory objects the old command is a leaf for, other dependencies shouldn't be relevant.

}
}
if (detectDuplicates(Cmd, DependentCmdsOfNewCmd))
commandToCleanup(NewCmd, Cmd, ToEnqueue);
#if 1
// CGType::CopyAccToAcc implementation requires that dependent command is
// not a cleanup subject. So disable this code for now.

// For in-order queue, we may cleanup all dependent command from our queue
if (Queue && Queue->isInOrder() && Cmd->getQueue() == Queue &&
Cmd->isCleanupSubject())
commandToCleanup(NewCmd, Cmd, ToEnqueue);
#endif
}
}

Expand All @@ -269,6 +284,32 @@ void Scheduler::GraphBuilder::addNodeToLeaves(
++Cmd->MLeafCounter;
}

bool Scheduler::GraphBuilder::detectDuplicates(
Command *DepCommand, const MapOfDependentCmds &DependentCmdsOfNewCmd) {
if (!DepCommand->isCleanupSubject())
return false;
// any dependence of DepCommand already covered by NewCmd
return std::all_of(
DepCommand->MDeps.begin(), DepCommand->MDeps.end(),
[&DependentCmdsOfNewCmd](const DepDesc &DepOfDep) {
return DependentCmdsOfNewCmd.isMemObjExist(
std::make_pair(DepOfDep.MDepRequirement->MSYCLMemObj,
DepOfDep.MDepRequirement->MAccessMode));
});
}

void Scheduler::GraphBuilder::commandToCleanup(
Command *NewCmd, Command *DepCommand, std::vector<Command *> &ToEnqueue) {
for (const DepDesc &DepOfDep : DepCommand->MDeps) {
MemObjRecord *Record =
getMemObjRecord(DepOfDep.MDepRequirement->MSYCLMemObj);
if (Record->MReadLeaves.remove(DepCommand))
MAllocateDependency(NewCmd, DepCommand, Record, ToEnqueue);
if (Record->MWriteLeaves.remove(DepCommand))
MAllocateDependency(NewCmd, DepCommand, Record, ToEnqueue);
}
}

UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
std::vector<Command *> &ToEnqueue) {
Expand All @@ -289,7 +330,9 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
if (ConnCmd)
ToEnqueue.push_back(ConnCmd);
}
updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
const MapOfDependentCmds DependentCmdsOfNewCmd(UpdateCommand->MDeps);
updateLeaves(UpdateCommand, Deps, Record, Req->MAccessMode,
DependentCmdsOfNewCmd, Queue, ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode, ToEnqueue);
for (Command *Cmd : ToCleanUp)
cleanupCommand(Cmd);
Expand Down Expand Up @@ -404,7 +447,9 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(
if (ConnCmd)
ToEnqueue.push_back(ConnCmd);
}
updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp);
const MapOfDependentCmds DependentCmdsOfNewCmd(NewCmd->MDeps);
updateLeaves(NewCmd, Deps, Record, access::mode::read_write,
DependentCmdsOfNewCmd, Queue, ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue);
for (Command *Cmd : ToCleanUp)
cleanupCommand(Cmd);
Expand Down Expand Up @@ -448,7 +493,10 @@ Command *Scheduler::GraphBuilder::remapMemoryObject(
if (ConnCmd)
ToEnqueue.push_back(ConnCmd);

updateLeaves(Deps, Record, access::mode::read_write, ToCleanUp);
const MapOfDependentCmds DependentCmdsOfNewCmd(UnMapCmd->MDeps);
updateLeaves(UnMapCmd, Deps, Record, access::mode::read_write,
DependentCmdsOfNewCmd, LinkedAllocaCmd->getQueue(), ToCleanUp,
ToEnqueue);
addNodeToLeaves(Record, MapCmd, access::mode::read_write, ToEnqueue);
for (Command *Cmd : ToCleanUp)
cleanupCommand(Cmd);
Expand Down Expand Up @@ -492,7 +540,9 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
ToEnqueue.push_back(ConnCmd);
}

updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
const MapOfDependentCmds DependentCmdsOfNewCmd(MemCpyCmd->MDeps);
updateLeaves(MemCpyCmd, Deps, Record, Req->MAccessMode, DependentCmdsOfNewCmd,
SrcAllocaCmd->getQueue(), ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode, ToEnqueue);
for (Command *Cmd : ToCleanUp)
cleanupCommand(Cmd);
Expand Down Expand Up @@ -627,7 +677,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record,
// A helper function for finding a command dependency on a specific memory
// object
DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd,
MemObjRecord *Record) {
const MemObjRecord *Record) {
for (const DepDesc &DD : Cmd->MDeps) {
if (getMemObjRecord(DD.MDepRequirement->MSYCLMemObj) == Record) {
return DD;
Expand Down Expand Up @@ -817,7 +867,9 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
if (ConnCmd)
ToEnqueue.push_back(ConnCmd);
}
updateLeaves(Deps, Record, Req->MAccessMode, ToCleanUp);
const MapOfDependentCmds DependentCmdsOfNewCmd(AllocaCmd->MDeps);
updateLeaves(AllocaCmd, Deps, Record, Req->MAccessMode,
DependentCmdsOfNewCmd, Queue, ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode, ToEnqueue);
}
}
Expand Down Expand Up @@ -874,11 +926,13 @@ EmptyCommand *Scheduler::GraphBuilder::addEmptyCmd(

const std::vector<DepDesc> &Deps = Cmd->MDeps;
std::vector<Command *> ToCleanUp;
const MapOfDependentCmds DependentCmdsOfNewCmd(EmptyCmd->MDeps);
for (const DepDesc &Dep : Deps) {
const Requirement *Req = Dep.MDepRequirement;
MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);

updateLeaves({Cmd}, Record, Req->MAccessMode, ToCleanUp);
updateLeaves(EmptyCmd, {Cmd}, Record, Req->MAccessMode,
DependentCmdsOfNewCmd, nullptr, ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue);
}
for (Command *Cmd : ToCleanUp)
Expand Down Expand Up @@ -1015,18 +1069,28 @@ Command *Scheduler::GraphBuilder::addCG(
// Node dependencies can be modified further when adding the node to leaves,
// iterate over their copy.
// FIXME employ a reference here to eliminate copying of a vector

const MapOfDependentCmds DependentCmdsOfNewCmd(NewCmd->MDeps);

std::vector<DepDesc> Deps = NewCmd->MDeps;
for (DepDesc &Dep : Deps) {
const Requirement *Req = Dep.MDepRequirement;
MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode, ToCleanUp);
updateLeaves(NewCmd.get(), {Dep.MDepCommand}, Record, Req->MAccessMode,
DependentCmdsOfNewCmd, Queue, ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode, ToEnqueue);
}

// Register all the events as dependencies
for (detail::EventImplPtr e : Events) {
for (const detail::EventImplPtr &e : Events) {
if (Command *ConnCmd = NewCmd->addDep(e, ToCleanUp))
ToEnqueue.push_back(ConnCmd);

// If NewCmd depends on another command, and all dependences of that command
// already covered by NewCmd, can move the command in ToCleanUp
if (auto *Cmd = static_cast<Command *>(e->getCommand()))
if (detectDuplicates(Cmd, DependentCmdsOfNewCmd))
commandToCleanup(NewCmd.get(), Cmd, ToEnqueue);
}

if (MPrintOptionsArray[AfterAddCG])
Expand Down Expand Up @@ -1325,6 +1389,7 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
}
}

const MapOfDependentCmds DependentCmdsOfNewCmd(NewCmd->MDeps);
// Set new command as user for dependencies and update leaves.
// Node dependencies can be modified further when adding the node to leaves,
// iterate over their copy.
Expand All @@ -1333,7 +1398,8 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
for (DepDesc &Dep : Deps) {
const Requirement *Req = Dep.MDepRequirement;
MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj);
updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode, ToCleanUp);
updateLeaves(NewCmd.get(), {Dep.MDepCommand}, Record, Req->MAccessMode,
DependentCmdsOfNewCmd, Queue, ToCleanUp, ToEnqueue);
addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode, ToEnqueue);
}

Expand All @@ -1345,6 +1411,12 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
}
if (Command *ConnCmd = NewCmd->addDep(e, ToCleanUp))
ToEnqueue.push_back(ConnCmd);

// If NewCmd depends on another command, and all dependences of that command
// already covered by NewCmd, can move the cmd in ToCleanUp
if (auto *Cmd = static_cast<Command *>(e->getCommand()))
if (detectDuplicates(Cmd, DependentCmdsOfNewCmd))
commandToCleanup(NewCmd.get(), Cmd, ToEnqueue);
}

if (MPrintOptionsArray[AfterAddCG])
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/scheduler/leaves_collection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ class LeavesCollection {
using EnqueueListT = std::vector<Command *>;

// Make first command depend on the second
using AllocateDependencyF =
std::function<void(Command *, Command *, MemObjRecord *, EnqueueListT &)>;
using AllocateDependencyF = std::function<void(
Command *, Command *, const MemObjRecord *, EnqueueListT &)>;

template <bool IsConst> class IteratorT;

Expand Down
Loading
Loading