diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 24f0b7bf71bd3..a36fd746f4ab6 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -351,7 +351,7 @@ struct LateLowerGCFrame: private JuliaPassContext { void NoteOperandUses(State &S, BBState &BBS, Instruction &UI); void MaybeTrackDst(State &S, MemTransferInst *MI); - void MaybeTrackStore(State &S, StoreInst *I); + bool MaybeTrackStore(State &S, StoreInst *I); State LocalScan(Function &F); void ComputeLiveness(State &S); void ComputeLiveSets(State &S); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 0c92b4eb5bbb4..2d7ab213cc58d 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1148,50 +1148,44 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef PHINumbers, State &S) } } -// Look through instructions to find all possible allocas that might become the sret argument -static SmallSetVector FindSretAllocas(Value* SRetArg) { +// Look through selects and phis to find all possible alloca bases of a pointer. +// Returns an empty set if a non-alloca base is encountered. +static SmallSetVector FindAllocaBases(Value *V) { SmallSetVector allocas; - if (AllocaInst *OneSRet = dyn_cast(SRetArg)) { - allocas.insert(OneSRet); // Found it directly + if (AllocaInst *AI = dyn_cast(V)) { + allocas.insert(AI); // Found it directly } else { - SmallSetVector worklist; - worklist.insert(SRetArg); + SmallVector worklist; + SmallPtrSet visited; + worklist.push_back(V); + visited.insert(V); while (!worklist.empty()) { - Value *V = worklist.pop_back_val(); - if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { + Value *W = worklist.pop_back_val(); + if (AllocaInst *Alloca = dyn_cast(W->stripInBoundsOffsets())) { allocas.insert(Alloca); // Found a candidate } - else if (PHINode *Phi = dyn_cast(V)) { + else if (PHINode *Phi = dyn_cast(W)) { for (Value *Incoming : Phi->incoming_values()) { - worklist.insert(Incoming); + if (visited.insert(Incoming).second) + worklist.push_back(Incoming); } } - else if (SelectInst *SI = dyn_cast(V)) { - auto TrueBranch = SI->getTrueValue(); - auto FalseBranch = SI->getFalseValue(); - if (TrueBranch && FalseBranch) { - worklist.insert(TrueBranch); - worklist.insert(FalseBranch); - } - else { - llvm_dump(SI); - dbgs() << "Malformed Select\n"; - allocas.clear(); - return allocas; - } + else if (SelectInst *SI = dyn_cast(W)) { + if (visited.insert(SI->getTrueValue()).second) + worklist.push_back(SI->getTrueValue()); + if (visited.insert(SI->getFalseValue()).second) + worklist.push_back(SI->getFalseValue()); } else { - llvm_dump(V); - dbgs() << "Unexpected SRet argument\n"; allocas.clear(); return allocas; } } } - assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT { - return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() && - SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType()); + assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst *AI) JL_NOTSAFEPOINT { + return (AI->getArraySize() == allocas[0]->getArraySize() && + AI->getAllocatedType() == allocas[0]->getAllocatedType()); } )); return allocas; @@ -1259,7 +1253,7 @@ State LateLowerGCFrame::LocalScan(Function &F) { size_t return_roots = atol(RetRootsAttr.getValueAsString().data()); assert(return_roots); HasDefBefore = true; - auto gc_allocas = FindSretAllocas(CI->getArgOperand(i)->stripInBoundsOffsets()); + auto gc_allocas = FindAllocaBases(CI->getArgOperand(i)->stripInBoundsOffsets()); // We know that with the right optimizations we can forward a sret directly from an argument // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too if (gc_allocas.size() == 0) { @@ -1441,7 +1435,8 @@ State LateLowerGCFrame::LocalScan(Function &F) { } } else if (StoreInst *SI = dyn_cast(&I)) { NoteOperandUses(S, BBS, I); - MaybeTrackStore(S, SI); + if (MaybeTrackStore(S, SI)) + BBS.FirstSafepointAfterFirstDef = BBS.FirstSafepoint; } else if (isa(&I)) { NoteOperandUses(S, BBS, I); } else if (auto *ASCI = dyn_cast(&I)) { @@ -1484,37 +1479,48 @@ State LateLowerGCFrame::LocalScan(Function &F) { // return Ptrs.size(); //} -void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { +bool LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { Value *PtrBase = I->getPointerOperand()->stripInBoundsOffsets(); auto tracked = CountTrackedPointers(I->getValueOperand()->getType()); if (!tracked.count) - return; // nothing to track is being stored - if (AllocaInst *AI = dyn_cast(PtrBase)) { + return false; // nothing to track is being stored + // Find all alloca bases, looking through selects and phis. + // LLVM's SROA/InstCombine can merge conditional alloca stores into a + // select/phi over alloca pointers (see #60985). + auto Allocas = FindAllocaBases(PtrBase); + if (Allocas.empty()) + return false; // assume it is rooted--TODO: should we be more conservative? + bool needsTrackedStore = false; + bool Tracked = false; + for (AllocaInst *AI : Allocas) { Type *STy = AI->getAllocatedType(); if (!AI->isStaticAlloca() || (isa(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI)) - return; // already numbered this - auto tracked = CountTrackedPointers(STy); - if (tracked.count) { - assert(!tracked.derived); - if (tracked.all) { + continue; // already numbered this + auto allocaTracked = CountTrackedPointers(STy); + if (allocaTracked.count) { + assert(!allocaTracked.derived); + if (allocaTracked.all) { // track the Alloca directly - S.ArrayAllocas[AI] = tracked.count * cast(AI->getArraySize())->getZExtValue(); - return; + S.ArrayAllocas[AI] = allocaTracked.count * cast(AI->getArraySize())->getZExtValue(); + Tracked = true; + continue; } } - } - else { - return; // assume it is rooted--TODO: should we be more conservative? - } - // track the Store with a Shadow - //auto &Shadow = S.ShadowAllocas[AI]; - //if (!Shadow) - // Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI); - //AI = Shadow; - //Value *Src = I->getValueOperand(); - //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?); - //assert(count == tracked.count); (void)count; - S.TrackedStores.push_back(std::make_pair(I, tracked.count)); + needsTrackedStore = true; + } + if (needsTrackedStore) { + // track the Store with a Shadow + //auto &Shadow = S.ShadowAllocas[AI]; + //if (!Shadow) + // Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI); + //AI = Shadow; + //Value *Src = I->getValueOperand(); + //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?); + //assert(count == tracked.count); (void)count; + S.TrackedStores.push_back(std::make_pair(I, tracked.count)); + Tracked = true; + } + return Tracked; } /* diff --git a/test/llvmpasses/late-lower-gc-select-store.ll b/test/llvmpasses/late-lower-gc-select-store.ll new file mode 100644 index 0000000000000..39a82bff139a8 --- /dev/null +++ b/test/llvmpasses/late-lower-gc-select-store.ll @@ -0,0 +1,72 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s + +; Test that stores of GC-tracked values through select/phi of alloca pointers +; are properly rooted. This is a regression test for +; https://github.com/JuliaLang/julia/issues/60985 + +declare ptr @julia.get_pgcstack() + +declare void @safepoint() "julia.safepoint" + +; Store through a select of two allocas +define void @store_select(ptr addrspace(10) %val, i1 %cond) { + ; CHECK-LABEL: @store_select + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 4) + %pgcstack = call ptr @julia.get_pgcstack() + %alloca1 = alloca [2 x ptr addrspace(10)], align 8 + %alloca2 = alloca [2 x ptr addrspace(10)], align 8 + %sel = select i1 %cond, ptr %alloca1, ptr %alloca2 + store ptr addrspace(10) %val, ptr %sel, align 8 + call void @safepoint() + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret void +} + +; Store through a phi with a cycle (phi references itself via loop back-edge). +; This is a regression test for an infinite loop in FindAllocaBases. +define void @store_phi_cycle(ptr addrspace(10) %val, i1 %cond) { +top: + ; CHECK-LABEL: @store_phi_cycle + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 4) + %pgcstack = call ptr @julia.get_pgcstack() + %alloca1 = alloca [2 x ptr addrspace(10)], align 8 + %alloca2 = alloca [2 x ptr addrspace(10)], align 8 + br label %loop + +loop: + %phi = phi ptr [ %alloca1, %top ], [ %sel, %loop ] + %sel = select i1 %cond, ptr %phi, ptr %alloca2 + store ptr addrspace(10) %val, ptr %sel, align 8 + call void @safepoint() + br i1 %cond, label %loop, label %exit + +exit: + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret void +} + +; Store through a phi of two allocas +define void @store_phi(ptr addrspace(10) %val, i1 %cond) { +top: + ; CHECK-LABEL: @store_phi + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 4) + %pgcstack = call ptr @julia.get_pgcstack() + %alloca1 = alloca [2 x ptr addrspace(10)], align 8 + %alloca2 = alloca [2 x ptr addrspace(10)], align 8 + br i1 %cond, label %left, label %right + +left: + br label %merge + +right: + br label %merge + +merge: + %phi = phi ptr [ %alloca1, %left ], [ %alloca2, %right ] + store ptr addrspace(10) %val, ptr %phi, align 8 + call void @safepoint() + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret void +}