diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index a42d02b8b70b4..7d41ddd2afe03 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -268,6 +268,31 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y) # known return type isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T) +# return a flag that indicates whether a given line is on a return-path +# (meaning, is not on a branch that terminates in :unreachable) +function lines_return(a::Vector{Any}) + ir = fill(true, length(a)) + for i = length(a):-1:1 + ai = a[i] + if isa(ai, Expr) + if ai.head == :return + elseif ai.head == :unreachable + ir[i] = false + elseif ai.head == :gotoifnot + ln = ai.args[2] + ir[i] = ir[i+1] | (ln > i && ir[ln]) + else + ir[i] = ir[i+1] + end + elseif isa(ai, GotoNode) + ir[i] = ir[ai.label] + else + ir[i] = ir[i+1] + end + end + return ir +end + function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params) head = ex.head if is_meta_expr_head(head) @@ -304,6 +329,9 @@ function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector elseif f === Main.Core.arrayref && length(ex.args) >= 3 atyp = argextype(ex.args[3], src, spvals, slottypes) return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty + elseif f === Main.Core.arrayset && length(ex.args) >= 4 + atyp = argextype(ex.args[3], src, spvals, slottypes) + return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty end fidx = find_tfunc(f) if fidx === nothing @@ -314,14 +342,18 @@ function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector return T_FFUNC_COST[fidx] end return params.inline_nonleaf_penalty - elseif head === :foreigncall || head === :invoke - # Calls whose "return type" is Union{} do not actually return: - # they are errors. Since these are not part of the typical - # run-time of the function, we omit them from - # consideration. This way, non-inlined error branches do not - # prevent inlining. - extyp = line == -1 ? Any : src.ssavaluetypes[line] - return extyp === Union{} ? 0 : 20 + elseif head === :foreigncall + if isa(ex.args[1], QuoteNode) + sym = ex.args[1].value + for pr in T_CFUNC_COST + if sym === pr[1] + return pr[2] + end + end + end + return 20 + elseif head === :invoke + return 20 elseif head === :return a = ex.args[1] if a isa Expr @@ -360,6 +392,7 @@ end function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params, cost_threshold::Integer=params.inline_cost_threshold) bodycost::Int = 0 + lnr = lines_return(body) for line = 1:length(body) stmt = body[line] if stmt isa Expr @@ -372,6 +405,7 @@ function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, else continue end + lnr[line] || continue # only count lines that are not on error paths bodycost = plus_saturate(bodycost, thiscost) bodycost > cost_threshold && return false end diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index c33259f64dde7..d73791869d095 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -18,6 +18,7 @@ const T_IFUNC_COST = Vector{Int}(undef, N_IFUNC) const T_FFUNC_KEY = Vector{Any}() const T_FFUNC_VAL = Vector{Tuple{Int, Int, Any}}() const T_FFUNC_COST = Vector{Int}() +const T_CFUNC_COST = Vector{Tuple{Symbol, Int}}() # for ccalls that get special-cased by the compiler function find_tfunc(@nospecialize f) for i = 1:length(T_FFUNC_KEY) if T_FFUNC_KEY[i] === f @@ -37,6 +38,14 @@ const TYPENAME_NAME_FIELDINDEX = fieldindex(Core.TypeName, :name) const TYPENAME_MODULE_FIELDINDEX = fieldindex(Core.TypeName, :module) const TYPENAME_WRAPPER_FIELDINDEX = fieldindex(Core.TypeName, :wrapper) +########## +# ccalls # +########## +push!(T_CFUNC_COST, (:jl_array_ptr, 2)) +push!(T_CFUNC_COST, (:jl_value_ptr, 2)) +push!(T_CFUNC_COST, (:jl_array_isassigned, 5)) +push!(T_CFUNC_COST, (:jl_string_ptr, 2)) + ########## # tfuncs # ########## diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 89f5c0b05fb52..19b0baa94c680 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -229,6 +229,20 @@ end # using a function to ensure we can infer this @inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id : (s::TypedSlot).id +##################### +# sparams/slottypes # +##################### +function get_params_slottypes(@nospecialize(f), @nospecialize(argtypes)) + world = typemax(UInt) + params = Params(world) + match = _methods(f, argtypes, -1, world)[1] + mi = code_for_method(match[3], argtypes, match[2], world) + istate = InferenceState(InferenceResult(mi), + retrieve_code_info(mi), + false, params) + return istate.sp, istate.slottypes, params +end + ########### # options # ########### diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md index df1f9082eb90e..68432baf48565 100644 --- a/doc/src/devdocs/inference.md +++ b/doc/src/devdocs/inference.md @@ -98,12 +98,10 @@ where `f` is your function and `tt` is the Tuple-type of the arguments: f = fill tt = Tuple{Float64, Tuple{Int,Int}} # Create the objects we need to interact with the compiler -params = Core.Compiler.Params(typemax(UInt)) -mi = Base.method_instances(f, tt)[1] +spvals, slottypes, params = Core.Compiler.get_params_slottypes(f, tt) ci = code_typed(f, tt)[1][1] -opt = Core.Compiler.OptimizationState(mi, params) # Calculate cost of each statement -cost(stmt::Expr) = Core.Compiler.statement_cost(stmt, -1, ci, opt.sp, opt.slottypes, opt.params) +cost(stmt::Expr) = Core.Compiler.statement_cost(stmt, -1, ci, spvals, slottypes, params) cost(stmt) = 0 cst = map(cost, ci.code) @@ -120,3 +118,13 @@ cst = map(cost, ci.code) The output is a `Vector{Int}` holding the estimated cost of each statement in `ci.code`. Note that `ci` includes the consequences of inlining callees, and consequently the costs do too. + +It's worth noting that `inline_worthy` does not count branches that do +not return, which correspond to error-paths in the code. +To determine which statements return, you can run + +```julia +lnr = Core.Compiler.lines_return(ci.code) +``` + +and then `sum(cst .* lnr)` is the aggregate cost of the call used by `inline_worthy`. diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md index 96476d02c05d8..3f7bada4aa0f7 100644 --- a/doc/src/devdocs/ssair.md +++ b/doc/src/devdocs/ssair.md @@ -14,7 +14,7 @@ form representation, but the lack of such a representation ultimately proved pro ## New IR nodes -With the new IR representation, the compiler learned to handle four new IR nodes, Phi nodes, Pi +With the new IR representation, the compiler learned to handle four new IR nodes: Phi nodes, Pi nodes as well as PhiC nodes and Upsilon nodes (the latter two are only used for exception handling). ### Phi nodes and Pi nodes diff --git a/test/inline.jl b/test/inline.jl index 47d770977f016..ce116b43d9eff 100644 --- a/test/inline.jl +++ b/test/inline.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using Test +using Test, Random """ Helper to walk the AST and call a function on every node. @@ -217,3 +217,54 @@ function f_div(x, y) return x end @test length(code_typed(f_div, (Int, Int))[1][1].code) > 1 + +# Marking of lines_return (inliner cost model) +function _lines_return_test_(x, n) + if x == 1 + y = rand() + error("expected $y, got $x") + end + s = 0.0 + for i = 1:n + s += i*x + end + return s +end + +@testset "lines_return" begin + ci = code_typed(_lines_return_test_, Tuple{Float64, Int})[1].first + ir = Core.Compiler.lines_return(ci.code) + i = 1 + # Find a line related to `rand` + while !isa(ci.code[i], Expr) || ci.code[i].head != :invoke || ci.code[i].args[2] != GlobalRef(Random, :dsfmt_fill_array_close1_open2!) + i += 1 + end + @test !ir[i] + # Find the call to `error` + while !isa(ci.code[i], Expr) || ci.code[i].head != :invoke || !isa(ci.code[i].args[2], GlobalRef) || ci.code[i].args[2].name != :error + i += 1 + end + @test !ir[i] + # Skip past the :unreachable and check that the rest of the lines + # are on a returning branch + i += 2 + while i <= length(ci.code) + @test ir[i] + i += 1 + end +end + +# cost assignment for arrayset and arrayref +@testset "inlining cost for arrayref and arrayset" begin + f(a, b) = a[2] = b[1] + v = [1.0, 2.0] + tt = Tuple{typeof(v), typeof(v)} + # Create the objects we need to interact with the compiler + spvals, slottypes, params = Core.Compiler.get_params_slottypes(f, tt) + ci = code_typed(f, tt)[1][1] + # Calculate cost of each statement + cost(stmt::Expr) = Core.Compiler.statement_cost(stmt, -1, ci, spvals, slottypes, params) + cost(stmt) = 0 + cst = map(cost, ci.code) + @test sum(cst) < 10 +end