Skip to content

Commit 70ab32f

Browse files
committed
Metal: Replace unreachable control flow with exit block branches.
1 parent dd6a8eb commit 70ab32f

File tree

2 files changed

+98
-1
lines changed

2 files changed

+98
-1
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
2020
[compat]
2121
ExprTools = "0.1"
2222
InteractiveUtils = "1"
23-
LLVM = "8, 9"
23+
LLVM = "9"
2424
Libdl = "1"
2525
Logging = "1"
2626
PrecompileTools = "1"

src/metal.jl

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,14 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
134134
entry::LLVM.Function)
135135
entry_fn = LLVM.name(entry)
136136

137+
# get rid of unreachable control flow (JuliaLang/Metal.jl#370)
138+
if job.config.target.macos < v"15"
139+
for f in functions(mod)
140+
replace_unreachable!(job, f)
141+
end
142+
isdebug(:metal) && verify(mod)
143+
end
144+
137145
# add kernel metadata
138146
if job.config.kernel
139147
entry = add_address_spaces!(job, mod, entry)
@@ -142,6 +150,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
142150

143151
add_module_metadata!(job, mod)
144152

153+
# JuliaLang/Metal.jl#113
145154
hide_noreturn!(mod)
146155
end
147156

@@ -1075,3 +1084,91 @@ function annotate_air_intrinsics!(@nospecialize(job::CompilerJob), mod::LLVM.Mod
10751084

10761085
return changed
10771086
end
1087+
1088+
# replace unreachable control flow with branches to the exit block
1089+
#
1090+
# before macOS 15, code generated by Julia 1.11 causes compilation failures in the back-end.
1091+
# the reduced example contains unreachable control flow executed divergently, so this is a
1092+
# similar issue as encountered with NVIDIA, albeit causing crashes instead of miscompiles.
1093+
#
1094+
# the proposed solution is to avoid (divergent) unreachable control flow, instead replacing
1095+
# it by branches to the exit block. since `unreachable` doesn't lower to anything that
1096+
# aborts the kernel anyway (can we fix this?), this transformation should be safe.
1097+
function replace_unreachable!(@nospecialize(job::CompilerJob), f::LLVM.Function)
1098+
# find unreachable instructions and exit blocks
1099+
unreachables = Instruction[]
1100+
exit_blocks = BasicBlock[]
1101+
for bb in blocks(f), inst in instructions(bb)
1102+
if isa(inst, LLVM.UnreachableInst)
1103+
push!(unreachables, inst)
1104+
end
1105+
if isa(inst, LLVM.RetInst)
1106+
push!(exit_blocks, bb)
1107+
end
1108+
end
1109+
isempty(unreachables) && return false
1110+
1111+
# if we don't have an exit block, we can't do much. we could insert a return, but that
1112+
# would probably keep the problematic control flow just as it is.
1113+
isempty(exit_blocks) && return false
1114+
1115+
@dispose builder=IRBuilder() begin
1116+
# if we have multiple exit blocks, take the last one, which is hopefully the least
1117+
# divergent (assuming divergent control flow is the root of the problem here).
1118+
exit_block = last(exit_blocks)
1119+
1120+
ret = terminator(exit_block)
1121+
if first(instructions(exit_block)) == ret
1122+
return_block = exit_block
1123+
else
1124+
# split the exit block right before the ret, so that we only have to care about
1125+
# the value that's returned, and not about any other SSA value in the block.
1126+
return_block = BasicBlock(f, "ret")
1127+
move_after(return_block, exit_block)
1128+
1129+
# emit a return
1130+
position!(builder, return_block)
1131+
if isempty(operands(ret))
1132+
ret!(builder)
1133+
else
1134+
# XXX: support aggregate returns?
1135+
val = only(operands(ret))
1136+
phi = phi!(builder, value_type(val))
1137+
push!(incoming(phi), (val, exit_block))
1138+
ret!(builder, phi)
1139+
end
1140+
1141+
# replace with a branch
1142+
position!(builder, ret)
1143+
br!(builder, return_block)
1144+
unsafe_delete!(exit_block, ret)
1145+
end
1146+
1147+
# replace the unreachable with a branch to the return block
1148+
for unreachable in unreachables
1149+
bb = LLVM.parent(unreachable)
1150+
1151+
# remove preceding traps to avoid reconstructing unreachable control flow
1152+
prev = previnst(unreachable)
1153+
if isa(prev, LLVM.CallInst) && name(called_operand(prev)) == "llvm.trap"
1154+
unsafe_delete!(bb, prev)
1155+
end
1156+
1157+
# replace the unreachable with a branch to the return block
1158+
position!(builder, unreachable)
1159+
br!(builder, return_block)
1160+
unsafe_delete!(bb, unreachable)
1161+
1162+
# patch up any phi nodes in the return block
1163+
for inst in instructions(return_block)
1164+
if isa(inst, LLVM.PHIInst)
1165+
undef = UndefValue(value_type(inst))
1166+
vals = incoming(inst)
1167+
push!(vals, (undef, bb))
1168+
end
1169+
end
1170+
end
1171+
end
1172+
1173+
return true
1174+
end

0 commit comments

Comments
 (0)