diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 0ff6943aa5408..cb8e9ce01d902 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -128,8 +128,7 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState) #@timeit "verify 2" verify_ir(ir) ir = compact!(ir) #@Base.show ("before_sroa", ir) - @timeit "domtree 2" domtree = construct_domtree(ir.cfg) - @timeit "SROA" ir = getfield_elim_pass!(ir, domtree) + @timeit "SROA" ir = getfield_elim_pass!(ir) #@Base.show ir.new_nodes #@Base.show ("after_sroa", ir) ir = adce_pass!(ir) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 621072527a334..2ed5e6355e399 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -283,11 +283,13 @@ function setindex!(x::IRCode, @nospecialize(repl), s::SSAValue) return x end - +# SSA values that need renaming struct OldSSAValue id::Int end +# SSA values that are in `new_new_nodes` of an `IncrementalCompact` and are to +# be actually inserted next time (they become `new_nodes` next time) struct NewSSAValue id::Int end @@ -500,9 +502,11 @@ mutable struct IncrementalCompact ir::IRCode result::InstructionStream result_bbs::Vector{BasicBlock} + ssa_rename::Vector{Any} bb_rename_pred::Vector{Int} bb_rename_succ::Vector{Int} + used_ssas::Vector{Int} late_fixup::Vector{Int} perm::Vector{Int} @@ -512,6 +516,7 @@ mutable struct IncrementalCompact # TODO: Switch these two to a min-heap of some sort pending_nodes::NewNodeStream # New nodes that were after the compaction point at insertion time pending_perm::Vector{Int} + # State idx::Int result_idx::Int @@ -519,6 +524,7 @@ mutable struct IncrementalCompact renamed_new_nodes::Bool cfg_transforms_enabled::Bool fold_constant_branches::Bool + function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) # Sort by position with attach after nodes after regular ones perm = my_sortperm(Int[let new_node = code.new_nodes.info[i] @@ -820,7 +826,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} return values end -function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int}, do_rename_ssa::Bool) +function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, do_rename_ssa::Bool) id = val.id if id > length(ssanums) return val @@ -829,14 +835,14 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int val = ssanums[id] end if isa(val, SSAValue) - if used_ssa !== nothing - used_ssa[val.id] += 1 + if used_ssas !== nothing + used_ssas[val.id] += 1 end end return val end -function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) +function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) urs = userefs(stmt) for op in urs val = op[] @@ -844,7 +850,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vec push!(late_fixup, result_idx) end if isa(val, SSAValue) - val = renumber_ssa2(val, ssanums, used_ssa, do_rename_ssa) + val = renumber_ssa2(val, ssanums, used_ssas, do_rename_ssa) end if isa(val, OldSSAValue) || isa(val, NewSSAValue) push!(late_fixup, result_idx) @@ -871,7 +877,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: # Note: We recursively kill as many edges as are obviously dead. However, this # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or # worstcase during codegen). - preds, succs = compact.result_bbs[compact.bb_rename_succ[to]].preds, compact.result_bbs[compact.bb_rename_pred[from]].succs + preds = compact.result_bbs[compact.bb_rename_succ[to]].preds + succs = compact.result_bbs[compact.bb_rename_pred[from]].succs deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int) deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int) # Check if the block is now dead @@ -888,10 +895,13 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: compact.result[last(stmts)][:inst] = ReturnNode() end else - # We need to remove this edge from any phi nodes + # Remove this edge from all phi nodes in `to` block + # NOTE: It is possible for `to` to contain only `nothing` statements, + # so we must be careful to stop at its last statement if to < active_bb - idx = first(compact.result_bbs[compact.bb_rename_succ[to]].stmts) - while idx < length(compact.result) + stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts + idx = first(stmts) + while idx <= last(stmts) stmt = compact.result[idx][:inst] stmt === nothing && continue isa(stmt, PhiNode) || break @@ -903,8 +913,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: idx += 1 end else - idx = first(compact.ir.cfg.blocks[to].stmts) - for stmt in CompactPeekIterator(compact, idx) + stmts = compact.ir.cfg.blocks[to].stmts + for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) stmt === nothing && continue isa(stmt, PhiNode) || break i = findfirst(x-> x === from, stmt.edges) @@ -984,15 +994,56 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa) result_idx += 1 elseif isa(stmt, PhiNode) - values = process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) - if length(stmt.edges) == 1 && isassigned(values, 1) && + if compact.cfg_transforms_enabled + # Rename phi node edges + map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges) + + # Remove edges and values associated with dead blocks. Entries in + # `values` can be undefined when the phi node refers to something + # that is not defined. (This is not a sign that the compiler is + # unintentionally leaving some entries in `values` uninitialized.) + # For example, consider a reference to a variable that is only + # defined if some branch is taken. + # + # In order to leave undefined values undefined (undefined-ness is + # not a value we can copy), we copy only the edges and (defined) + # values we want to keep to new arrays initialized with undefined + # elements. + edges = Vector{Any}(undef, length(stmt.edges)) + values = Vector{Any}(undef, length(stmt.values)) + new_index = 1 + for old_index in 1:length(stmt.edges) + if stmt.edges[old_index] != -1 + edges[new_index] = stmt.edges[old_index] + if isassigned(stmt.values, old_index) + values[new_index] = stmt.values[old_index] + end + new_index += 1 + end + end + resize!(edges, new_index-1) + resize!(values, new_index-1) + else + edges = stmt.edges + values = stmt.values + end + + values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) + # Don't remove the phi node if it is before the definition of its value + # because doing so can create forward references. This should only + # happen with dead loops, but can cause problems when optimization + # passes look at all code, dead or not. This check should be + # unnecessary when DCE can remove those dead loops entirely, so this is + # just to be safe. + before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) && + idx < values[1].id + if length(edges) == 1 && isassigned(values, 1) && !before_def && length(compact.cfg_transforms_enabled ? compact.result_bbs[compact.bb_rename_succ[active_bb]].preds : compact.ir.cfg.blocks[active_bb].preds) == 1 # There's only one predecessor left - just replace it ssa_rename[idx] = values[1] else - edges = compact.cfg_transforms_enabled ? map!(i->compact.bb_rename_pred[i], stmt.edges, stmt.edges) : stmt.edges result[result_idx][:inst] = PhiNode(edges, values) result_idx += 1 end @@ -1085,10 +1136,19 @@ end struct CompactPeekIterator compact::IncrementalCompact start_idx::Int + end_idx::Int +end + +function CompactPeekIterator(compact::IncrementalCompact, start_idx::Int) + return CompactPeekIterator(compact, start_idx, 0) end entry_at_idx(entry::NewNodeInfo, idx::Int) = entry.attach_after ? entry.pos == idx - 1 : entry.pos == idx function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.start_idx, it.compact.new_nodes_idx, 1)) + if it.end_idx > 0 && idx > it.end_idx + return nothing + end + # TODO: Take advantage of the fact that these arrays are sorted # TODO: this return value design is horrible compact = it.compact @@ -1146,7 +1206,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= # Move to next block compact.idx += 1 if finish_current_bb!(compact, active_bb, old_result_idx, true) - return iterate(compact, (compact.idx-1, active_bb + 1)) + return iterate(compact, (compact.idx, active_bb + 1)) else return Pair{Pair{Int, Int}, Any}(Pair{Int,Int}(compact.idx-1, old_result_idx), compact.result[old_result_idx][:inst]), (compact.idx, active_bb + 1) end @@ -1250,10 +1310,17 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt)) for ur in urs val = ur[] if isa(val, NewSSAValue) - ur[] = SSAValue(length(compact.result) + val.id) + val = SSAValue(length(compact.result) + val.id) elseif isa(val, OldSSAValue) - ur[] = compact.ssa_rename[val.id] + val = compact.ssa_rename[val.id] + end + if isa(val, SSAValue) && val.id <= length(compact.used_ssas) + # If `val.id` is greater than the length of `compact.result` or + # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so + # don't count the use + compact.used_ssas[val.id] += 1 end + ur[] = val end return urs[] end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index a2dc6d6e75f60..89c2b058bf0a7 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -524,7 +524,7 @@ function perform_lifting!(compact::IncrementalCompact, end assertion_counter = 0 -function getfield_elim_pass!(ir::IRCode, domtree::DomTree) +function getfield_elim_pass!(ir::IRCode) compact = IncrementalCompact(ir) insertions = Vector{Any}() defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}() @@ -721,6 +721,13 @@ function getfield_elim_pass!(ir::IRCode, domtree::DomTree) used_ssas = copy(compact.used_ssas) simple_dce!(compact) ir = complete(compact) + + # Compute domtree, needed below, now that we have finished compacting the + # IR. This needs to be after we iterate through the IR with + # `IncrementalCompact` because removing dead blocks can invalidate the + # domtree. + @timeit "domtree 2" domtree = construct_domtree(ir.cfg) + # Now go through any mutable structs and see which ones we can eliminate for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) @@ -1034,30 +1041,49 @@ function cfg_simplify!(ir::IRCode) bbs = ir.cfg.blocks merge_into = zeros(Int, length(bbs)) merged_succ = zeros(Int, length(bbs)) + function follow_merge_into(idx::Int) + while merge_into[idx] != 0 + idx = merge_into[idx] + end + return idx + end + function follow_merged_succ(idx::Int) + while merged_succ[idx] != 0 + idx = merged_succ[idx] + end + return idx + end - # Walk the CFG at from the entry block and aggressively combine blocks + # Walk the CFG from the entry block and aggressively combine blocks for (idx, bb) in enumerate(bbs) if length(bb.succs) == 1 succ = bb.succs[1] if length(bbs[succ].preds) == 1 - merge_into[succ] = idx - merged_succ[idx] = succ + # Prevent cycles by making sure we don't end up back at `idx` + # by following what is to be merged into `succ` + if follow_merged_succ(succ) != idx + merge_into[succ] = idx + merged_succ[idx] = succ + end end end end + + # Assign new BB numbers max_bb_num = 1 bb_rename_succ = zeros(Int, length(bbs)) - # Lay out the basic blocks for i = 1:length(bbs) + # Drop blocks that will be merged away if merge_into[i] != 0 bb_rename_succ[i] = -1 - continue end - # Drop unreachable blocks + # Drop blocks with no predecessors if i != 1 && length(ir.cfg.blocks[i].preds) == 0 bb_rename_succ[i] = -1 end + bb_rename_succ[i] != 0 && continue + curr = i while true bb_rename_succ[curr] = max_bb_num @@ -1065,9 +1091,7 @@ function cfg_simplify!(ir::IRCode) # Now walk the chain of blocks we merged. # If we end in something that may fall through, # we have to schedule that block next - while merged_succ[curr] != 0 - curr = merged_succ[curr] - end + curr = follow_merged_succ(curr) terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst] if isa(terminator, GotoNode) || isa(terminator, ReturnNode) break @@ -1075,19 +1099,24 @@ function cfg_simplify!(ir::IRCode) curr += 1 end end + + # Figure out how predecessors should be renamed bb_rename_pred = zeros(Int, length(bbs)) for i = 1:length(bbs) if merged_succ[i] != 0 + # Block `i` should no longer be a predecessor (before renaming) + # because it is being merged with its sole successor bb_rename_pred[i] = -1 continue end - bbnum = i - while merge_into[bbnum] != 0 - bbnum = merge_into[bbnum] - end + bbnum = follow_merge_into(i) bb_rename_pred[i] = bb_rename_succ[bbnum] end + + # Compute map from new to old blocks result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1] + + # Compute new block lengths result_bbs_lengths = zeros(Int, max_bb_num-1) for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb @@ -1096,41 +1125,40 @@ function cfg_simplify!(ir::IRCode) ms = merged_succ[ms] end end + + # Compute statement indices the new blocks start at bb_starts = Vector{Int}(undef, 1+length(result_bbs_lengths)) bb_starts[1] = 1 for i = 1:length(result_bbs_lengths) bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i] end - # Figure out the pred and succ lists for each basic block in our merged result + cresult_bbs = let result_bbs = result_bbs, merged_succ = merged_succ, merge_into = merge_into, bbs = bbs, bb_rename_succ = bb_rename_succ - function compute_succs(i) - # Look at the original successor - orig_bb = result_bbs[i] - while merged_succ[orig_bb] != 0 - orig_bb = merged_succ[orig_bb] - end - newsuccs = map(i->bb_rename_succ[i], bbs[orig_bb].succs) - return newsuccs - end - function compute_preds(i) - orig_bb = result_bbs[i] - preds = bbs[orig_bb].preds - newpreds = map(preds) do pred - while merge_into[pred] != 0 - pred = merge_into[pred] - end - bb_rename_succ[pred] - end - return newpreds - end - BasicBlock[BasicBlock( - StmtRange(bb_starts[i], i + 1 > length(bb_starts) ? length(compact.result) : bb_starts[i + 1] - 1), - compute_preds(i), compute_succs(i)) for i = 1:length(result_bbs)] + + # Compute (renamed) successors and predecessors given (renamed) block + function compute_succs(i) + orig_bb = follow_merged_succ(result_bbs[i]) + return map(i -> bb_rename_succ[i], bbs[orig_bb].succs) end + function compute_preds(i) + orig_bb = result_bbs[i] + preds = bbs[orig_bb].preds + return map(pred -> bb_rename_pred[pred], preds) + end + + BasicBlock[ + BasicBlock(StmtRange(bb_starts[i], + i+1 > length(bb_starts) ? + length(compact.result) : bb_starts[i+1]-1), + compute_preds(i), + compute_succs(i)) + for i = 1:length(result_bbs)] + end + compact = IncrementalCompact(ir, true) # Run instruction compaction to produce the result, # but we're messing with the CFG @@ -1139,6 +1167,7 @@ function cfg_simplify!(ir::IRCode) compact.bb_rename_succ = bb_rename_succ compact.bb_rename_pred = bb_rename_pred compact.result_bbs = cresult_bbs + result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb while ms != 0 @@ -1158,7 +1187,6 @@ function cfg_simplify!(ir::IRCode) ms = merged_succ[ms] end end - compact.active_result_bb = length(bb_starts) return finish(compact) end diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index be7539cac4d9f..34bd185eb2e73 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -180,8 +180,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any]) @test Core.Compiler.verify_ir(ir) === nothing - domtree = Core.Compiler.construct_domtree(ir.cfg) - ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir, domtree) + ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir) @test Core.Compiler.verify_ir(ir) === nothing end @@ -257,6 +256,30 @@ let m = Meta.@lower 1 + 1 @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2 end +let m = Meta.@lower 1 + 1 + # Test that CFG simplify doesn't try to merge every block in a loop into + # its predecessor + @assert Meta.isexpr(m, :thunk) + src = m.args[1]::Core.CodeInfo + src.code = Any[ + # Block 1 + Core.Compiler.GotoNode(2), + # Block 2 + Core.Compiler.GotoNode(3), + # Block 3 + Core.Compiler.GotoNode(1) + ] + nstmts = length(src.code) + src.ssavaluetypes = nstmts + src.codelocs = fill(Int32(1), nstmts) + src.ssaflags = fill(Int32(0), nstmts) + ir = Core.Compiler.inflate_ir(src) + Core.Compiler.verify_ir(ir) + ir = Core.Compiler.cfg_simplify!(ir) + Core.Compiler.verify_ir(ir) + @test length(ir.cfg.blocks) == 1 +end + # Issue #29213 function f_29213() while true diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 3f7b9b02a728f..e61d4b0f8d5d1 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -1,7 +1,21 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +using Base.Meta using Core.IR const Compiler = Core.Compiler +using .Compiler: CFG, BasicBlock + +make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs) + +function make_ci(code) + ci = (Meta.@lower 1 + 1).args[1] + ci.code = code + nstmts = length(ci.code) + ci.ssavaluetypes = nstmts + ci.codelocs = fill(Int32(1), nstmts) + ci.ssaflags = fill(Int32(0), nstmts) + return ci +end # TODO: this test is broken #let code = Any[ @@ -28,7 +42,6 @@ const Compiler = Core.Compiler #end # Issue #31121 -using .Compiler: CFG, BasicBlock # We have the following CFG and corresponding DFS numbering: # @@ -47,7 +60,6 @@ using .Compiler: CFG, BasicBlock # than `3`, so the idom search missed that `1` is `3`'s semi-dominator). Here # we manually construct that CFG and verify that the DFS records the correct # parent. -make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs) let cfg = CFG(BasicBlock[ make_bb([] , [2, 3]), make_bb([1] , [4, 5]), @@ -135,9 +147,7 @@ end @test f32579(0, false) === false # Test for bug caused by renaming blocks improperly, related to PR #32145 -using Base.Meta -let ci = (Meta.@lower 1 + 1).args[1] - ci.code = [ +let ci = make_ci([ # block 1 Core.Compiler.GotoIfNot(Expr(:boundscheck), 6), # block 2 @@ -155,18 +165,13 @@ let ci = (Meta.@lower 1 + 1).args[1] Core.Compiler.ReturnNode(Core.SSAValue(8)), # block 6 Core.Compiler.ReturnNode(Core.SSAValue(8)) - ] - nstmts = length(ci.code) - ci.ssavaluetypes = nstmts - ci.codelocs = fill(Int32(1), nstmts) - ci.ssaflags = fill(Int32(0), nstmts) + ]) ir = Core.Compiler.inflate_ir(ci) ir = Core.Compiler.compact!(ir, true) @test Core.Compiler.verify_ir(ir) == nothing end # Test that GlobalRef in value position is non-canonical -using Base.Meta let ci = (Meta.@lower 1 + 1).args[1] ci.code = [ Expr(:call, GlobalRef(Main, :something_not_defined_please)) @@ -180,3 +185,51 @@ let ci = (Meta.@lower 1 + 1).args[1] ir = Core.Compiler.compact!(ir, true) @test_throws ErrorException Core.Compiler.verify_ir(ir, false) end + +# Issue #29107 +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(6), + # Block 2 + # The following phi node gets deleted because it only has one edge, so + # the call to `something` is made to use the value of `something2()`, + # even though this value is defined after it. We don't want this to + # happen even though this block is dead because subsequent optimization + # passes may look at all code, dead or not. + Core.PhiNode(Any[2], Any[Core.SSAValue(4)]), + Expr(:call, :something, Core.SSAValue(2)), + Expr(:call, :something2), + Core.Compiler.GotoNode(2), + # Block 3 + Core.Compiler.ReturnNode(1000) + ]) + ir = Core.Compiler.inflate_ir(ci) + ir = Core.Compiler.compact!(ir, true) + # Make sure that if there is a call to `something` (block 2 should be + # removed entirely with working DCE), it doesn't use any SSA values that + # come after it. + for i in 1:length(ir.stmts) + s = ir.stmts[i] + if isa(s, Expr) && s.head == :call && s.args[1] == :something + if isa(s.args[2], SSAValue) + @test s.args[2].id <= i + end + end + end +end + +# Make sure dead blocks that are removed are not still referenced in live phi +# nodes +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(3), + # Block 2 (no predecessors) + Core.Compiler.ReturnNode(3), + # Block 3 + Core.PhiNode(Any[1, 2], Any[100, 200]), + Core.Compiler.ReturnNode(Core.SSAValue(3)) + ]) + ir = Core.Compiler.inflate_ir(ci) + ir = Core.Compiler.compact!(ir, true) + @test Core.Compiler.verify_ir(ir) == nothing +end