[CoroCleanup] Noop coroutine elision for load-and-call pattern#179154
[CoroCleanup] Noop coroutine elision for load-and-call pattern#179154
Conversation
|
@llvm/pr-subscribers-coroutines @llvm/pr-subscribers-llvm-transforms Author: Weibo He (NewSigma) ChangesWe assume that the resume/destroy functions of coroutines follow the void load_and_call() {
using Fn = void (*)(void *);
void *frame = __builtin_coro_noop();
Fn x = *reinterpret_cast<Fn *>(frame);
x(frame);
__builtin_printf("1\n");
}https://godbolt.org/z/5ovdEqa9f Full diff: https://github.com/llvm/llvm-project/pull/179154.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 6b68cf5bc2c20..52386aa749bc9 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -8,6 +8,7 @@
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
#include "CoroInternal.h"
+#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -15,6 +16,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -30,9 +32,25 @@ struct Lowerer : coro::LowererBase {
bool lower(Function &F);
private:
- void elideCoroNoop(IntrinsicInst *II);
void lowerCoroNoop(IntrinsicInst *II);
};
+
+// Recursively walk and eliminate resume/destroy call on noop coro
+class NoopCoroElider : public PtrUseVisitor<NoopCoroElider> {
+ using Base = PtrUseVisitor<NoopCoroElider>;
+
+ IRBuilder<> Builder;
+public:
+ NoopCoroElider(const DataLayout &DL, LLVMContext &C) : Base(DL), Builder(C) {}
+
+ void run(IntrinsicInst *II);
+
+ void visitLoadInst(LoadInst &I) { enqueueUsers(I); }
+ void visitCallBase(CallBase &CB);
+ void visitIntrinsicInst(IntrinsicInst &II);
+private:
+ bool tryEraseCallInvoke(Instruction *I);
+};
}
static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
@@ -73,6 +91,7 @@ bool Lowerer::lower(Function &F) {
bool IsPrivateAndUnprocessed = F.isPresplitCoroutine() && F.hasLocalLinkage();
bool Changed = false;
+ NoopCoroElider NCE(F.getDataLayout(), F.getContext());
SmallPtrSet<Instruction *, 8> DeadInsts{};
for (Instruction &I : instructions(F)) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
@@ -100,7 +119,7 @@ bool Lowerer::lower(Function &F) {
II->replaceAllUsesWith(ConstantTokenNone::get(Context));
break;
case Intrinsic::coro_noop:
- elideCoroNoop(II);
+ NCE.run(II);
if (!II->user_empty())
lowerCoroNoop(II);
break;
@@ -142,28 +161,6 @@ bool Lowerer::lower(Function &F) {
return Changed;
}
-void Lowerer::elideCoroNoop(IntrinsicInst *II) {
- for (User *U : make_early_inc_range(II->users())) {
- auto *Fn = dyn_cast<CoroSubFnInst>(U);
- if (Fn == nullptr)
- continue;
-
- auto *User = Fn->getUniqueUndroppableUser();
- if (auto *Call = dyn_cast<CallInst>(User)) {
- Call->eraseFromParent();
- Fn->eraseFromParent();
- continue;
- }
-
- if (auto *I = dyn_cast<InvokeInst>(User)) {
- Builder.SetInsertPoint(I);
- Builder.CreateBr(I->getNormalDest());
- I->eraseFromParent();
- Fn->eraseFromParent();
- }
- }
-}
-
void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
if (!NoopCoro) {
LLVMContext &C = Builder.getContext();
@@ -200,6 +197,47 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
II->replaceAllUsesWith(NoopCoroVoidPtr);
}
+void NoopCoroElider::run(IntrinsicInst *II) {
+ visitPtr(*II);
+
+ Worklist.clear();
+ VisitedUses.clear();
+}
+
+void NoopCoroElider::visitCallBase(CallBase &CB) {
+ auto *V = U->get();
+ bool ResumeOrDestroy = V == CB.getCalledOperand();
+ if (ResumeOrDestroy) {
+ bool Success = tryEraseCallInvoke(&CB);
+ assert(Success && "Unexpected CallBase");
+ RecursivelyDeleteTriviallyDeadInstructions(V);
+ }
+}
+
+void NoopCoroElider::visitIntrinsicInst(IntrinsicInst &II) {
+ if (auto *SubFn = dyn_cast<CoroSubFnInst>(&II)) {
+ auto *User = SubFn->getUniqueUndroppableUser();
+ if (!tryEraseCallInvoke(cast<Instruction>(User)))
+ return;
+ SubFn->eraseFromParent();
+ }
+}
+
+bool NoopCoroElider::tryEraseCallInvoke(Instruction *I) {
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ Call->eraseFromParent();
+ return true;
+ }
+
+ if (auto *II = dyn_cast<InvokeInst>(I)) {
+ Builder.SetInsertPoint(II);
+ Builder.CreateBr(II->getNormalDest());
+ II->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
static bool declaresCoroCleanupIntrinsics(const Module &M) {
return coro::declaresIntrinsics(
M,
diff --git a/llvm/test/Transforms/Coroutines/coro-cleanup-noop-elide.ll b/llvm/test/Transforms/Coroutines/coro-cleanup-noop-elide.ll
new file mode 100644
index 0000000000000..6d9dd654b914a
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-cleanup-noop-elide.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -S -passes='coro-cleanup' | FileCheck %s
+
+; Tests that resume or destroy a no-op coroutine can be erased; Finally, erase coro.noop if it has no users.
+define void @erase() personality i32 0 {
+; CHECK-LABEL: define void @erase() personality i32 0 {
+; CHECK-NEXT: [[DONE:.*:]]
+; CHECK-NEXT: ret void
+;
+ %frame = call noundef ptr @llvm.coro.noop()
+ %resume = call ptr @llvm.coro.subfn.addr(ptr %frame, i8 0)
+ call fastcc void %resume(ptr %frame)
+ %destroy = call ptr @llvm.coro.subfn.addr(ptr %frame, i8 1)
+ invoke fastcc void %destroy(ptr %frame)
+ to label %done unwind label %unwind
+
+done:
+ ret void
+
+unwind:
+ %pad = landingpad { ptr, i32 }
+ catch ptr null
+ call void @terminate()
+ unreachable
+}
+
+; Tests the load-and-call pattern despite mismatched calling conventions. Prevent instcombine from breaking code.
+define void @load() personality i32 0 {
+; CHECK-LABEL: define void @load() personality i32 0 {
+; CHECK-NEXT: [[DONE:.*:]]
+; CHECK-NEXT: ret void
+;
+ %frame = call noundef ptr @llvm.coro.noop()
+ %resume = load ptr, ptr %frame, align 8
+ call void %resume(ptr %frame)
+ %destroy.addr = getelementptr inbounds nuw i8, ptr %frame, i64 8
+ %destroy = load ptr, ptr %destroy.addr, align 8
+ invoke void %destroy(ptr %frame)
+ to label %done unwind label %unwind
+
+done:
+ ret void
+
+unwind:
+ %pad = landingpad { ptr, i32 }
+ catch ptr null
+ call void @terminate()
+ unreachable
+}
+
+declare void @terminate() noreturn
diff --git a/llvm/test/Transforms/Coroutines/coro-cleanup-noop-erase.ll b/llvm/test/Transforms/Coroutines/coro-cleanup-noop-erase.ll
deleted file mode 100644
index 7fd9dc900ddb2..0000000000000
--- a/llvm/test/Transforms/Coroutines/coro-cleanup-noop-erase.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; Tests that resume or destroy a no-op coroutine can be erased; Finally, erase coro.noop if it has no users.
-; RUN: opt < %s -S -passes='coro-cleanup' | FileCheck %s
-
-define void @fn() personality i32 0 {
-; CHECK-LABEL: define void @fn() personality i32 0 {
-; CHECK-NEXT: [[DONE:.*:]]
-; CHECK-NEXT: ret void
-;
- %frame = call noundef ptr @llvm.coro.noop()
- %resume = call ptr @llvm.coro.subfn.addr(ptr %frame, i8 0)
- call fastcc void %resume(ptr %frame)
- %destroy = call ptr @llvm.coro.subfn.addr(ptr %frame, i8 1)
- invoke fastcc void %destroy(ptr %frame)
- to label %done unwind label %unwind
-
-done:
- ret void
-
-unwind:
- %pad = landingpad { ptr, i32 }
- catch ptr null
- unreachable
-}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
Thanks for the code review. |
|
cc @bgra8 . I tested your repro. Could you please confirm if it resolved the problem? |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/35545 Here is the relevant piece of the build log for the reference |
…call pattern" (#179289) There is a CI fail. Reverts llvm/llvm-project#179154
|
@NewSigma any ETA to reland the fix? |
llvm#179154)" (llvm#179431) There are multiple def-use paths from `coro.noop` to the calls of the resume/destroy functions. We should update the worklist before erasing instructions. The reversion passed tests with ASan enabled. Reland llvm#179154, which was reverted in llvm#179289 due to ASan failures.
…179154) We assume that the resume/destroy functions of coroutines follow the `fastcc` convention. If the convention mismatches, `InstCombine` would remove the function call. Specifically for the noop coroutine, the following code gives an inconsistent result between -O0 and -O1. This patch proposes that we carry out elision for this pattern. ``` C++ void load_and_call() { using Fn = void (*)(void *); void *frame = __builtin_coro_noop(); Fn x = *reinterpret_cast<Fn *>(frame); x(frame); __builtin_printf("1\n"); } ```
…n" (llvm#179289) There is a CI fail. Reverts llvm#179154
llvm#179154)" (llvm#179431) There are multiple def-use paths from `coro.noop` to the calls of the resume/destroy functions. We should update the worklist before erasing instructions. The reversion passed tests with ASan enabled. Reland llvm#179154, which was reverted in llvm#179289 due to ASan failures.
We assume that the resume/destroy functions of coroutines follow the
fastccconvention. If the convention mismatches,InstCombinewould remove the function call. Specifically for the noop coroutine, the following code gives an inconsistent result between -O0 and -O1. This patch proposes that we carry out elision for this pattern.https://godbolt.org/z/5ovdEqa9f