From a0a2e9a59b364261487bdd239eab5a219bfebb29 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 4 Mar 2026 12:14:13 -0300 Subject: [PATCH 1/3] LateLowerGCFrame: Fix GC rooting for stores through select/phi of allocas LLVM's SROA/InstCombine can merge two conditional alloca stores into a select (or phi) over the alloca pointers: %. = select i1 %cond, ptr %alloca1, ptr %alloca2 store ptr addrspace(10) %gc_val, ptr %. In MaybeTrackStore, stripInBoundsOffsets() returns the select (not an alloca), so dyn_cast fails and the function returns early via the `else { return; }` branch. Neither alloca gets added to ArrayAllocas, no GC frame is created, and tracked values go unrooted, leading to GC corruption. This commit fixes two issues: 1. Rename FindSretAllocas to FindAllocaBases and reuse it in MaybeTrackStore to look through selects/phis when resolving the store target to its underlying alloca(s). 2. Fix the safepoint check in runOnFunction that was skipping GC frame creation when ArrayAllocas/TrackedStores had entries but no numbered def existed before the safepoint in the entry block. This was a pre-existing bug where even a simple `alloca ptr addrspace(10)` in an entry-block-only function would fail to get rooted. Fixes #60985 Co-Authored-By: Claude Opus 4.6 --- src/llvm-late-gc-lowering.cpp | 99 ++++++++++--------- test/llvmpasses/late-lower-gc-select-store.ll | 49 +++++++++ 2 files changed, 100 insertions(+), 48 deletions(-) create mode 100644 test/llvmpasses/late-lower-gc-select-store.ll diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 0c92b4eb5bbb4..12716f9e5f4ad 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1148,50 +1148,39 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef PHINumbers, State &S) } } -// Look through instructions to find all possible allocas that might become the sret argument -static SmallSetVector FindSretAllocas(Value* SRetArg) { +// Look through selects and phis to find all possible alloca bases of a pointer. +// Returns an empty set if a non-alloca base is encountered. +static SmallSetVector FindAllocaBases(Value *V) { SmallSetVector allocas; - if (AllocaInst *OneSRet = dyn_cast(SRetArg)) { - allocas.insert(OneSRet); // Found it directly + if (AllocaInst *AI = dyn_cast(V)) { + allocas.insert(AI); // Found it directly } else { SmallSetVector worklist; - worklist.insert(SRetArg); + worklist.insert(V); while (!worklist.empty()) { - Value *V = worklist.pop_back_val(); - if (AllocaInst *Alloca = dyn_cast(V->stripInBoundsOffsets())) { + Value *W = worklist.pop_back_val(); + if (AllocaInst *Alloca = dyn_cast(W->stripInBoundsOffsets())) { allocas.insert(Alloca); // Found a candidate } - else if (PHINode *Phi = dyn_cast(V)) { + else if (PHINode *Phi = dyn_cast(W)) { for (Value *Incoming : Phi->incoming_values()) { worklist.insert(Incoming); } } - else if (SelectInst *SI = dyn_cast(V)) { - auto TrueBranch = SI->getTrueValue(); - auto FalseBranch = SI->getFalseValue(); - if (TrueBranch && FalseBranch) { - worklist.insert(TrueBranch); - worklist.insert(FalseBranch); - } - else { - llvm_dump(SI); - dbgs() << "Malformed Select\n"; - allocas.clear(); - return allocas; - } + else if (SelectInst *SI = dyn_cast(W)) { + worklist.insert(SI->getTrueValue()); + worklist.insert(SI->getFalseValue()); } else { - llvm_dump(V); - dbgs() << "Unexpected SRet argument\n"; allocas.clear(); return allocas; } } } - assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT { - return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() && - SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType()); + assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst *AI) JL_NOTSAFEPOINT { + return (AI->getArraySize() == allocas[0]->getArraySize() && + AI->getAllocatedType() == allocas[0]->getAllocatedType()); } )); return allocas; @@ -1259,7 +1248,7 @@ State LateLowerGCFrame::LocalScan(Function &F) { size_t return_roots = atol(RetRootsAttr.getValueAsString().data()); assert(return_roots); HasDefBefore = true; - auto gc_allocas = FindSretAllocas(CI->getArgOperand(i)->stripInBoundsOffsets()); + auto gc_allocas = FindAllocaBases(CI->getArgOperand(i)->stripInBoundsOffsets()); // We know that with the right optimizations we can forward a sret directly from an argument // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too if (gc_allocas.size() == 0) { @@ -1489,32 +1478,39 @@ void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { auto tracked = CountTrackedPointers(I->getValueOperand()->getType()); if (!tracked.count) return; // nothing to track is being stored - if (AllocaInst *AI = dyn_cast(PtrBase)) { + // Find all alloca bases, looking through selects and phis. + // LLVM's SROA/InstCombine can merge conditional alloca stores into a + // select/phi over alloca pointers (see #60985). + auto Allocas = FindAllocaBases(PtrBase); + if (Allocas.empty()) + return; // assume it is rooted--TODO: should we be more conservative? + bool needsTrackedStore = false; + for (AllocaInst *AI : Allocas) { Type *STy = AI->getAllocatedType(); if (!AI->isStaticAlloca() || (isa(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI)) - return; // already numbered this - auto tracked = CountTrackedPointers(STy); - if (tracked.count) { - assert(!tracked.derived); - if (tracked.all) { + continue; // already numbered this + auto allocaTracked = CountTrackedPointers(STy); + if (allocaTracked.count) { + assert(!allocaTracked.derived); + if (allocaTracked.all) { // track the Alloca directly - S.ArrayAllocas[AI] = tracked.count * cast(AI->getArraySize())->getZExtValue(); - return; + S.ArrayAllocas[AI] = allocaTracked.count * cast(AI->getArraySize())->getZExtValue(); + continue; } } + needsTrackedStore = true; } - else { - return; // assume it is rooted--TODO: should we be more conservative? + if (needsTrackedStore) { + // track the Store with a Shadow + //auto &Shadow = S.ShadowAllocas[AI]; + //if (!Shadow) + // Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI); + //AI = Shadow; + //Value *Src = I->getValueOperand(); + //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?); + //assert(count == tracked.count); (void)count; + S.TrackedStores.push_back(std::make_pair(I, tracked.count)); } - // track the Store with a Shadow - //auto &Shadow = S.ShadowAllocas[AI]; - //if (!Shadow) - // Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI); - //AI = Shadow; - //Value *Src = I->getValueOperand(); - //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?); - //assert(count == tracked.count); (void)count; - S.TrackedStores.push_back(std::make_pair(I, tracked.count)); } /* @@ -2529,12 +2525,19 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { if (pgcstack) { State S = LocalScan(F); // If there is no safepoint after the first reachable def, then we don't need any roots (even those for allocas) - if (std::any_of(S.BBStates.begin(), S.BBStates.end(), + // However, if ArrayAllocas or TrackedStores have entries, we need a GC frame + // to replace those allocas with frame slots, even when the only safepoint + // is in the entry block with no numbered def before it (#60985). + bool HasSafepointAfterDef = std::any_of(S.BBStates.begin(), S.BBStates.end(), [&F](auto BBS) { if (BBS.first == &F.getEntryBlock()) return BBS.second.FirstSafepointAfterFirstDef != -1; return BBS.second.HasSafepoint; - })) { + }); + bool HasAllocaTracking = !S.ArrayAllocas.empty() || !S.TrackedStores.empty(); + bool HasAnySafepoint = HasAllocaTracking && std::any_of(S.BBStates.begin(), S.BBStates.end(), + [](auto BBS) { return BBS.second.HasSafepoint; }); + if (HasSafepointAfterDef || HasAnySafepoint) { ComputeLiveness(S); auto Colors = ColorRoots(S); std::map> CallFrames; // = OptimizeCallFrames(S, Ordering); diff --git a/test/llvmpasses/late-lower-gc-select-store.ll b/test/llvmpasses/late-lower-gc-select-store.ll new file mode 100644 index 0000000000000..1d3c0edb81081 --- /dev/null +++ b/test/llvmpasses/late-lower-gc-select-store.ll @@ -0,0 +1,49 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s + +; Test that stores of GC-tracked values through select/phi of alloca pointers +; are properly rooted. This is a regression test for +; https://github.com/JuliaLang/julia/issues/60985 + +declare ptr @julia.get_pgcstack() + +declare void @safepoint() "julia.safepoint" + +; Store through a select of two allocas +define void @store_select(ptr addrspace(10) %val, i1 %cond) { + ; CHECK-LABEL: @store_select + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 4) + %pgcstack = call ptr @julia.get_pgcstack() + %alloca1 = alloca [2 x ptr addrspace(10)], align 8 + %alloca2 = alloca [2 x ptr addrspace(10)], align 8 + %sel = select i1 %cond, ptr %alloca1, ptr %alloca2 + store ptr addrspace(10) %val, ptr %sel, align 8 + call void @safepoint() + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret void +} + +; Store through a phi of two allocas +define void @store_phi(ptr addrspace(10) %val, i1 %cond) { +top: + ; CHECK-LABEL: @store_phi + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 4) + %pgcstack = call ptr @julia.get_pgcstack() + %alloca1 = alloca [2 x ptr addrspace(10)], align 8 + %alloca2 = alloca [2 x ptr addrspace(10)], align 8 + br i1 %cond, label %left, label %right + +left: + br label %merge + +right: + br label %merge + +merge: + %phi = phi ptr [ %alloca1, %left ], [ %alloca2, %right ] + store ptr addrspace(10) %val, ptr %phi, align 8 + call void @safepoint() + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret void +} From 5df73edcec574c757017f2fe5de1ba9570a05625 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 4 Mar 2026 20:19:11 +0000 Subject: [PATCH 2/3] LateLowerGCFrame: Fix infinite loop in FindAllocaBases with cyclic PHIs The SmallSetVector worklist removed elements from the set on pop_back_val(), allowing cyclic PHI graphs to re-insert and re-process the same values indefinitely. Use a separate SmallPtrSet to track visited values that persists across iterations. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/llvm-late-gc-lowering.cpp | 15 ++++++++---- test/llvmpasses/late-lower-gc-select-store.ll | 23 +++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 12716f9e5f4ad..1fb5fc18d7f0c 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1156,8 +1156,10 @@ static SmallSetVector FindAllocaBases(Value *V) { allocas.insert(AI); // Found it directly } else { - SmallSetVector worklist; - worklist.insert(V); + SmallVector worklist; + SmallPtrSet visited; + worklist.push_back(V); + visited.insert(V); while (!worklist.empty()) { Value *W = worklist.pop_back_val(); if (AllocaInst *Alloca = dyn_cast(W->stripInBoundsOffsets())) { @@ -1165,12 +1167,15 @@ static SmallSetVector FindAllocaBases(Value *V) { } else if (PHINode *Phi = dyn_cast(W)) { for (Value *Incoming : Phi->incoming_values()) { - worklist.insert(Incoming); + if (visited.insert(Incoming).second) + worklist.push_back(Incoming); } } else if (SelectInst *SI = dyn_cast(W)) { - worklist.insert(SI->getTrueValue()); - worklist.insert(SI->getFalseValue()); + if (visited.insert(SI->getTrueValue()).second) + worklist.push_back(SI->getTrueValue()); + if (visited.insert(SI->getFalseValue()).second) + worklist.push_back(SI->getFalseValue()); } else { allocas.clear(); diff --git a/test/llvmpasses/late-lower-gc-select-store.ll b/test/llvmpasses/late-lower-gc-select-store.ll index 1d3c0edb81081..39a82bff139a8 100644 --- a/test/llvmpasses/late-lower-gc-select-store.ll +++ b/test/llvmpasses/late-lower-gc-select-store.ll @@ -24,6 +24,29 @@ define void @store_select(ptr addrspace(10) %val, i1 %cond) { ret void } +; Store through a phi with a cycle (phi references itself via loop back-edge). +; This is a regression test for an infinite loop in FindAllocaBases. +define void @store_phi_cycle(ptr addrspace(10) %val, i1 %cond) { +top: + ; CHECK-LABEL: @store_phi_cycle + ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 4) + %pgcstack = call ptr @julia.get_pgcstack() + %alloca1 = alloca [2 x ptr addrspace(10)], align 8 + %alloca2 = alloca [2 x ptr addrspace(10)], align 8 + br label %loop + +loop: + %phi = phi ptr [ %alloca1, %top ], [ %sel, %loop ] + %sel = select i1 %cond, ptr %phi, ptr %alloca2 + store ptr addrspace(10) %val, ptr %sel, align 8 + call void @safepoint() + br i1 %cond, label %loop, label %exit + +exit: + ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret void +} + ; Store through a phi of two allocas define void @store_phi(ptr addrspace(10) %val, i1 %cond) { top: From ff68a7d31f819ac55ecab67be05d262bb0f5c7f7 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 5 Mar 2026 14:14:17 +0000 Subject: [PATCH 3/3] LateLowerGCFrame: Treat tracked stores as defs for safepoint analysis Instead of the overzealous HasAnySafepoint check (which checked for any safepoint anywhere in the function), have MaybeTrackStore return whether it tracked an alloca and set FirstSafepointAfterFirstDef at the call site. This integrates tracked stores into the existing def/safepoint analysis, so GC frames are only created when a safepoint actually follows the store in program order. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/llvm-gc-interface-passes.h | 2 +- src/llvm-late-gc-lowering.cpp | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 24f0b7bf71bd3..a36fd746f4ab6 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -351,7 +351,7 @@ struct LateLowerGCFrame: private JuliaPassContext { void NoteOperandUses(State &S, BBState &BBS, Instruction &UI); void MaybeTrackDst(State &S, MemTransferInst *MI); - void MaybeTrackStore(State &S, StoreInst *I); + bool MaybeTrackStore(State &S, StoreInst *I); State LocalScan(Function &F); void ComputeLiveness(State &S); void ComputeLiveSets(State &S); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 1fb5fc18d7f0c..2d7ab213cc58d 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1435,7 +1435,8 @@ State LateLowerGCFrame::LocalScan(Function &F) { } } else if (StoreInst *SI = dyn_cast(&I)) { NoteOperandUses(S, BBS, I); - MaybeTrackStore(S, SI); + if (MaybeTrackStore(S, SI)) + BBS.FirstSafepointAfterFirstDef = BBS.FirstSafepoint; } else if (isa(&I)) { NoteOperandUses(S, BBS, I); } else if (auto *ASCI = dyn_cast(&I)) { @@ -1478,18 +1479,19 @@ State LateLowerGCFrame::LocalScan(Function &F) { // return Ptrs.size(); //} -void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { +bool LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { Value *PtrBase = I->getPointerOperand()->stripInBoundsOffsets(); auto tracked = CountTrackedPointers(I->getValueOperand()->getType()); if (!tracked.count) - return; // nothing to track is being stored + return false; // nothing to track is being stored // Find all alloca bases, looking through selects and phis. // LLVM's SROA/InstCombine can merge conditional alloca stores into a // select/phi over alloca pointers (see #60985). auto Allocas = FindAllocaBases(PtrBase); if (Allocas.empty()) - return; // assume it is rooted--TODO: should we be more conservative? + return false; // assume it is rooted--TODO: should we be more conservative? bool needsTrackedStore = false; + bool Tracked = false; for (AllocaInst *AI : Allocas) { Type *STy = AI->getAllocatedType(); if (!AI->isStaticAlloca() || (isa(STy) && STy->getPointerAddressSpace() == AddressSpace::Tracked) || S.ArrayAllocas.count(AI)) @@ -1500,6 +1502,7 @@ void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { if (allocaTracked.all) { // track the Alloca directly S.ArrayAllocas[AI] = allocaTracked.count * cast(AI->getArraySize())->getZExtValue(); + Tracked = true; continue; } } @@ -1515,7 +1518,9 @@ void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) { //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?); //assert(count == tracked.count); (void)count; S.TrackedStores.push_back(std::make_pair(I, tracked.count)); + Tracked = true; } + return Tracked; } /* @@ -2530,19 +2535,12 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { if (pgcstack) { State S = LocalScan(F); // If there is no safepoint after the first reachable def, then we don't need any roots (even those for allocas) - // However, if ArrayAllocas or TrackedStores have entries, we need a GC frame - // to replace those allocas with frame slots, even when the only safepoint - // is in the entry block with no numbered def before it (#60985). - bool HasSafepointAfterDef = std::any_of(S.BBStates.begin(), S.BBStates.end(), + if (std::any_of(S.BBStates.begin(), S.BBStates.end(), [&F](auto BBS) { if (BBS.first == &F.getEntryBlock()) return BBS.second.FirstSafepointAfterFirstDef != -1; return BBS.second.HasSafepoint; - }); - bool HasAllocaTracking = !S.ArrayAllocas.empty() || !S.TrackedStores.empty(); - bool HasAnySafepoint = HasAllocaTracking && std::any_of(S.BBStates.begin(), S.BBStates.end(), - [](auto BBS) { return BBS.second.HasSafepoint; }); - if (HasSafepointAfterDef || HasAnySafepoint) { + })) { ComputeLiveness(S); auto Colors = ColorRoots(S); std::map> CallFrames; // = OptimizeCallFrames(S, Ordering);