From 90b8a3b8f1095e4762ffc2f6b965c9d6e698fb00 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 14 Nov 2025 12:50:34 -0800 Subject: [PATCH 1/8] work --- src/passes/DeadArgumentElimination.cpp | 35 +++++++++++--------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index 605b69eeb86..c73b4e7f06a 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -240,26 +240,21 @@ struct DAE : public Pass { scanner.walkModuleCode(module); // Scan all the functions. scanner.run(getPassRunner(), module); - // Combine all the info. - struct CallContext { - Call* call; - Function* func; - }; + // Combine all the info from the scan. std::vector> allCalls(numFunctions); std::vector tailCallees(numFunctions); std::vector hasUnseenCalls(numFunctions); - // Track the function in which relevant expressions exist. When we modify - // those expressions we will need to mark the function's info as stale. - std::unordered_map expressionFuncs; + // For each function, the set of callers. + std::vector> callers; + for (auto& [func, info] : infoMap) { for (auto& [name, calls] : info.calls) { - auto& allCallsToName = allCalls[indexes[name]]; + auto targetIndex = indexes[name]; + auto& allCallsToName = allCalls[targetIndex]; allCallsToName.insert(allCallsToName.end(), calls.begin(), calls.end()); - for (auto* call : calls) { - expressionFuncs[call] = func; - } + callers[targetIndex].insert(func); } for (auto& callee : info.tailCallees) { tailCallees[indexes[callee]] = true; @@ -305,9 +300,9 @@ struct DAE : public Pass { assert(func.is()); infoMap[func].markStale(); }; - auto markCallersStale = [&](const std::vector& calls) { - for (auto* call : calls) { - markStale(expressionFuncs[call]); + auto markCallersStale = [&](Index index) { + for (auto caller : callers[index]) { + markStale(caller); } }; @@ -339,7 +334,7 @@ struct DAE : public Pass { if (refineReturnTypes(func, calls, module)) { refinedReturnTypes = true; markStale(name); - markCallersStale(calls); + markCallersStale(index); } auto optimizedIndexes = ParamUtils::applyConstantValues({func}, calls, {}, module); @@ -382,7 +377,7 @@ struct DAE : public Pass { // Success! worthOptimizing.insert(func); markStale(name); - markCallersStale(calls); + markCallersStale(index); } if (outcome == ParamUtils::RemovalOutcome::Failure) { callTargetsToLocalize.insert(name); @@ -424,15 +419,15 @@ struct DAE : public Pass { } if (removeReturnValue(func.get(), calls, module)) { // We should optimize the callers. - for (auto* call : calls) { - worthOptimizing.insert(module->getFunction(expressionFuncs[call])); + for (auto caller : callers[index]) { + worthOptimizing.insert(module->getFunction(caller)); } } // TODO Removing a drop may also open optimization opportunities in the // callers. worthOptimizing.insert(func.get()); markStale(name); - markCallersStale(calls); + markCallersStale(index); } } if (!callTargetsToLocalize.empty()) { From 54f34950eebdc5d0b474c7dfe8885bb24533cdcb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 14 Nov 2025 12:55:39 -0800 Subject: [PATCH 2/8] fix --- src/passes/DeadArgumentElimination.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index c73b4e7f06a..3de03a4ffe4 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -247,7 +247,7 @@ struct DAE : public Pass { std::vector hasUnseenCalls(numFunctions); // For each function, the set of callers. - std::vector> callers; + std::vector> callers(numFunctions); for (auto& [func, info] : infoMap) { for (auto& [name, calls] : info.calls) { From a74631136bae92e711a096664185ba55b089963d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 14 Nov 2025 17:00:00 -0800 Subject: [PATCH 3/8] mor --- src/passes/DeadArgumentElimination.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index 3de03a4ffe4..ec3c8d3b9a0 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -218,6 +218,14 @@ struct DAE : public Pass { } } + // For each function, the set of callers. This is somewhat expensive to + // compute, so we don't do it in every iteration. Rarely, a call may vanish + // (due to applying a constant param that then lets the optimizer remove it), + // and in such cases we are over-approximating the set of callers here, which + // can lead to a little wasted work, but this is still more efficient than + // computing callers precisely in each iteration. + std::vector> callers; + bool iteration(Module* module, DAEFunctionInfoMap& infoMap) { allDroppedCalls.clear(); @@ -246,15 +254,10 @@ struct DAE : public Pass { std::vector tailCallees(numFunctions); std::vector hasUnseenCalls(numFunctions); - // For each function, the set of callers. - std::vector> callers(numFunctions); - for (auto& [func, info] : infoMap) { for (auto& [name, calls] : info.calls) { - auto targetIndex = indexes[name]; - auto& allCallsToName = allCalls[targetIndex]; + auto& allCallsToName = allCalls[indexes[name]]; allCallsToName.insert(allCallsToName.end(), calls.begin(), calls.end()); - callers[targetIndex].insert(func); } for (auto& callee : info.tailCallees) { tailCallees[indexes[callee]] = true; @@ -273,6 +276,15 @@ struct DAE : public Pass { } } + if (callers.empty()) { + callers.resize(numFunctions); + for (auto& [func, info] : infoMap) { + for (auto& [name, calls] : info.calls) { + callers[indexes[name]].insert(func); + } + } + } + // Track which functions we changed that are worth re-optimizing at the end. std::unordered_set worthOptimizing; From 3369eb440f4531f335027c65f71ae26b1410fe3b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 17 Nov 2025 09:27:32 -0800 Subject: [PATCH 4/8] comment --- src/passes/DeadArgumentElimination.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index 016436e500f..7fb0f9e65b8 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -218,12 +218,18 @@ struct DAE : public Pass { } } - // For each function, the set of callers. This is somewhat expensive to - // compute, so we don't do it in every iteration. Rarely, a call may vanish - // (due to applying a constant param that then lets the optimizer remove it), - // and in such cases we are over-approximating the set of callers here, which - // can lead to a little wasted work, but this is still more efficient than - // computing callers precisely in each iteration. + // For each function, the set of callers. This is used to propagate changes, + // e.g. if we remove a return value from a function, the calls might benefit + // from optimization. It is ok if this is an over-approximation, that is, if + // we think there are more callers than there are, as it would just lead to + // unneeded extra scanning of calling functions (in the example just given, if + // a caller did not actually call, they would not benefit from optimization, + // but no harm is done, and no optimization is missed). Such over- + // approximation can happen in later optimization iterations: We may manage to + // remove a call from a function to another (say, after applying a constant + // param, we see the call is not reached). This is somewhat rare, and the cost + // of computing this map is significant, so we compute it once at the start + // and then use that possibly-over-approximating data. std::vector> callers; bool iteration(Module* module, DAEFunctionInfoMap& infoMap) { @@ -276,9 +282,8 @@ struct DAE : public Pass { } } + // See comment above, we compute callers once and never again. if (callers.empty()) { - // This is faster, but confirm no regress... maybe recompute once if no other opts kick in, from outside? - //a ctually thisis fine... it only does more unneeded work at worst! comment up top. but test on more code, and with dae-opt too callers.resize(numFunctions); for (auto& [func, info] : infoMap) { for (auto& [name, calls] : info.calls) { From 927c81e6ab68bb06cfba38c81a74e128c54a22f3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 17 Nov 2025 11:18:46 -0800 Subject: [PATCH 5/8] Update src/passes/DeadArgumentElimination.cpp Co-authored-by: Thomas Lively --- src/passes/DeadArgumentElimination.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index 7fb0f9e65b8..7cb76bfa049 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -223,7 +223,7 @@ struct DAE : public Pass { // from optimization. It is ok if this is an over-approximation, that is, if // we think there are more callers than there are, as it would just lead to // unneeded extra scanning of calling functions (in the example just given, if - // a caller did not actually call, they would not benefit from optimization, + // a caller did not actually call, they would not benefit from the extra optimization, // but no harm is done, and no optimization is missed). Such over- // approximation can happen in later optimization iterations: We may manage to // remove a call from a function to another (say, after applying a constant From 7bdccd5bc6197f19e1acd37a3d43290b9166b013 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 17 Nov 2025 11:19:43 -0800 Subject: [PATCH 6/8] format --- src/passes/DeadArgumentElimination.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index 7cb76bfa049..c730c697269 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -223,8 +223,8 @@ struct DAE : public Pass { // from optimization. It is ok if this is an over-approximation, that is, if // we think there are more callers than there are, as it would just lead to // unneeded extra scanning of calling functions (in the example just given, if - // a caller did not actually call, they would not benefit from the extra optimization, - // but no harm is done, and no optimization is missed). Such over- + // a caller did not actually call, they would not benefit from the extra + // optimization, but no harm is done, and no optimization missed). Such over- // approximation can happen in later optimization iterations: We may manage to // remove a call from a function to another (say, after applying a constant // param, we see the call is not reached). This is somewhat rare, and the cost From 48d6a918c92bcdd87242bc2d4622264917091644 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 17 Nov 2025 11:24:23 -0800 Subject: [PATCH 7/8] compute as vectors --- src/passes/DeadArgumentElimination.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index c730c697269..12f2f08cf80 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -34,6 +34,7 @@ // watch for here). // +#include #include #include @@ -230,7 +231,7 @@ struct DAE : public Pass { // param, we see the call is not reached). This is somewhat rare, and the cost // of computing this map is significant, so we compute it once at the start // and then use that possibly-over-approximating data. - std::vector> callers; + std::vector> callers; bool iteration(Module* module, DAEFunctionInfoMap& infoMap) { allDroppedCalls.clear(); @@ -284,12 +285,19 @@ struct DAE : public Pass { // See comment above, we compute callers once and never again. if (callers.empty()) { - callers.resize(numFunctions); + // Compute first as sets, to deduplicate. + std::vector> callersSets(numFunctions); for (auto& [func, info] : infoMap) { for (auto& [name, calls] : info.calls) { - callers[indexes[name]].insert(func); + callersSets[indexes[name]].insert(func); } } + // Copy into efficient vectors. + callers.resize(numFunctions); + for (Index i = 0; i < numFunctions; ++i) { + auto& set = callersSets[i]; + std::copy(set.begin(), set.end(), std::back_inserter(callers[i])); + } } // Track which functions we changed that are worth re-optimizing at the end. From 0851b43661ca3ec6e32d62082474ef986543ef89 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 17 Nov 2025 15:20:06 -0800 Subject: [PATCH 8/8] Update src/passes/DeadArgumentElimination.cpp Co-authored-by: Thomas Lively --- src/passes/DeadArgumentElimination.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/DeadArgumentElimination.cpp b/src/passes/DeadArgumentElimination.cpp index 12f2f08cf80..42d8d87a6a9 100644 --- a/src/passes/DeadArgumentElimination.cpp +++ b/src/passes/DeadArgumentElimination.cpp @@ -296,7 +296,7 @@ struct DAE : public Pass { callers.resize(numFunctions); for (Index i = 0; i < numFunctions; ++i) { auto& set = callersSets[i]; - std::copy(set.begin(), set.end(), std::back_inserter(callers[i])); + callers[i] = std::vector(set.begin(), set.end()); } }