@@ -134,6 +134,14 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
134134 entry:: LLVM.Function )
135135 entry_fn = LLVM. name (entry)
136136
137+ # get rid of unreachable control flow (JuliaLang/Metal.jl#370)
138+ if job. config. target. macos < v " 15"
139+ for f in functions (mod)
140+ replace_unreachable! (job, f)
141+ end
142+ isdebug (:metal ) && verify (mod)
143+ end
144+
137145 # add kernel metadata
138146 if job. config. kernel
139147 entry = add_address_spaces! (job, mod, entry)
@@ -142,6 +150,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
142150
143151 add_module_metadata! (job, mod)
144152
153+ # JuliaLang/Metal.jl#113
145154 hide_noreturn! (mod)
146155 end
147156
@@ -1075,3 +1084,91 @@ function annotate_air_intrinsics!(@nospecialize(job::CompilerJob), mod::LLVM.Mod
10751084
10761085 return changed
10771086end
1087+
1088+ # replace unreachable control flow with branches to the exit block
1089+ #
1090+ # before macOS 15, code generated by Julia 1.11 causes compilation failures in the back-end.
1091+ # the reduced example contains unreachable control flow executed divergently, so this is a
1092+ # similar issue as encountered with NVIDIA, albeit causing crashes instead of miscompiles.
1093+ #
1094+ # the proposed solution is to avoid (divergent) unreachable control flow, instead replacing
1095+ # it by branches to the exit block. since `unreachable` doesn't lower to anything that
1096+ # aborts the kernel anyway (can we fix this?), this transformation should be safe.
1097+ function replace_unreachable! (@nospecialize (job:: CompilerJob ), f:: LLVM.Function )
1098+ # find unreachable instructions and exit blocks
1099+ unreachables = Instruction[]
1100+ exit_blocks = BasicBlock[]
1101+ for bb in blocks (f), inst in instructions (bb)
1102+ if isa (inst, LLVM. UnreachableInst)
1103+ push! (unreachables, inst)
1104+ end
1105+ if isa (inst, LLVM. RetInst)
1106+ push! (exit_blocks, bb)
1107+ end
1108+ end
1109+ isempty (unreachables) && return false
1110+
1111+ # if we don't have an exit block, we can't do much. we could insert a return, but that
1112+ # would probably keep the problematic control flow just as it is.
1113+ isempty (exit_blocks) && return false
1114+
1115+ @dispose builder= IRBuilder () begin
1116+ # if we have multiple exit blocks, take the last one, which is hopefully the least
1117+ # divergent (assuming divergent control flow is the root of the problem here).
1118+ exit_block = last (exit_blocks)
1119+
1120+ ret = terminator (exit_block)
1121+ if first (instructions (exit_block)) == ret
1122+ return_block = exit_block
1123+ else
1124+ # split the exit block right before the ret, so that we only have to care about
1125+ # the value that's returned, and not about any other SSA value in the block.
1126+ return_block = BasicBlock (f, " ret" )
1127+ move_after (return_block, exit_block)
1128+
1129+ # emit a return
1130+ position! (builder, return_block)
1131+ if isempty (operands (ret))
1132+ ret! (builder)
1133+ else
1134+ # XXX : support aggregate returns?
1135+ val = only (operands (ret))
1136+ phi = phi! (builder, value_type (val))
1137+ push! (incoming (phi), (val, exit_block))
1138+ ret! (builder, phi)
1139+ end
1140+
1141+ # replace with a branch
1142+ position! (builder, ret)
1143+ br! (builder, return_block)
1144+ unsafe_delete! (exit_block, ret)
1145+ end
1146+
1147+ # replace the unreachable with a branch to the return block
1148+ for unreachable in unreachables
1149+ bb = LLVM. parent (unreachable)
1150+
1151+ # remove preceding traps to avoid reconstructing unreachable control flow
1152+ prev = previnst (unreachable)
1153+ if isa (prev, LLVM. CallInst) && name (called_operand (prev)) == " llvm.trap"
1154+ unsafe_delete! (bb, prev)
1155+ end
1156+
1157+ # replace the unreachable with a branch to the return block
1158+ position! (builder, unreachable)
1159+ br! (builder, return_block)
1160+ unsafe_delete! (bb, unreachable)
1161+
1162+ # patch up any phi nodes in the return block
1163+ for inst in instructions (return_block)
1164+ if isa (inst, LLVM. PHIInst)
1165+ undef = UndefValue (value_type (inst))
1166+ vals = incoming (inst)
1167+ push! (vals, (undef, bb))
1168+ end
1169+ end
1170+ end
1171+ end
1172+
1173+ return true
1174+ end
0 commit comments