From 1f8cb06ba2cadde7624184c3b53c39f4b2a85f1b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 00:25:55 -0400 Subject: [PATCH 01/29] Create `safe` kwarg for `@turbo` macro Currently, this macro does nothing. --- src/condense_loopset.jl | 5 +++-- src/constructors.jl | 25 +++++++++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index d18a3ad7e..dac4c8357 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -876,7 +876,7 @@ Returns true if the element type is supported. @inline check_device(::ArrayInterface.CPUTuple) = true @inline check_device(x) = false -function check_args_call(ls::LoopSet) +function check_args_call(ls::LoopSet, safe::Bool) q = Expr(:call, lv(:check_args)) append!(q.args, ls.includedactualarrays) for r ∈ ls.outer_reductions @@ -969,6 +969,7 @@ function setup_call( v::Int8, thread::Int, warncheckarg::Int, + safe::Bool, ) # We outline/inline at the macro level by creating/not creating an anonymous function. # The old API instead was based on inlining or not inline the generated function, but @@ -986,7 +987,7 @@ function setup_call( warncheckarg > 0 && push!(warning.args, :(maxlog = $warncheckarg)) argfailure = Expr(:block, warning, argfailure) end - pushprepreamble!(ls, Expr(:if, check_args_call(ls), call, argfailure)) + pushprepreamble!(ls, Expr(:if, check_args_call(ls, safe), call, argfailure)) prepend_lnns!(ls.prepreamble, lnns) return ls.prepreamble end diff --git a/src/constructors.jl b/src/constructors.jl index bac498992..be2414808 100644 --- a/src/constructors.jl +++ b/src/constructors.jl @@ -52,12 +52,13 @@ function substitute_broadcast( v::Int8, threads::Int, warncheckarg::Int, + safe::Bool, ) ci = first(Meta.lower(LoopVectorization, q).args).code nargs = length(ci) - 1 ex = Expr(:block) syms = [gensym() for _ ∈ 1:nargs] - configarg = (inline, u₁, u₂, v, true, threads, warncheckarg) + configarg = (inline, u₁, u₂, v, true, threads, warncheckarg, safe) unroll_param_tup = Expr(:call, lv(:avx_config_val), :(Val{$configarg}()), staticexpr(0)) for n ∈ 1:nargs ciₙ = ci[n] @@ -102,6 +103,7 @@ function check_macro_kwarg( v::Int8, threads::Int, warncheckarg::Int, + safe::Bool, ) ((arg.head === :(=)) && (length(arg.args) == 2)) || throw(ArgumentError("macro kwarg should be of the form `argname = value`.")) @@ -132,6 +134,8 @@ function check_macro_kwarg( end elseif kw === :warn_check_args warncheckarg = convert(Int, value)::Int + elseif kw === :safe + safe = convert(Bool, value) else throw( ArgumentError( @@ -139,7 +143,7 @@ function check_macro_kwarg( ), ) end - inline, check_empty, u₁, u₂, v, threads, warncheckarg + inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe end function process_args( args; @@ -150,12 +154,13 @@ function process_args( v::Int8 = zero(Int8), threads::Int = 1, warncheckarg::Int = 1, + safe::Bool = true, ) for arg ∈ args - inline, check_empty, u₁, u₂, v, threads, warncheckarg = - check_macro_kwarg(arg, inline, check_empty, u₁, u₂, v, threads, warncheckarg) + inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe = + check_macro_kwarg(arg, inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe) end - inline, check_empty, u₁, u₂, v, threads, warncheckarg + inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe end # check if the body of loop is a block, if not convert it to a block issue#395 # and check if the range of loop is an enumerate, if it is replace it, issue#393 @@ -225,12 +230,12 @@ function turbo_macro(mod, src, q, args...) q = macroexpand(mod, q) if q.head === :for ls = LoopSet(q, mod) - inline, check_empty, u₁, u₂, v, threads, warncheckarg = process_args(args) - esc(setup_call(ls, q, src, inline, check_empty, u₁, u₂, v, threads, warncheckarg)) + inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe = process_args(args) + esc(setup_call(ls, q, src, inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe)) else - inline, check_empty, u₁, u₂, v, threads, warncheckarg = + inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe = process_args(args, inline = true) - substitute_broadcast(q, Symbol(mod), inline, u₁, u₂, v, threads, warncheckarg) + substitute_broadcast(q, Symbol(mod), inline, u₁, u₂, v, threads, warncheckarg, safe) end end """ @@ -367,7 +372,7 @@ macro _turbo(arg, q) @assert q.head === :for q = macroexpand(__module__, q) inline, check_empty, u₁, u₂, v = - check_macro_kwarg(arg, false, false, zero(Int8), zero(Int8), zero(Int8), 1, 0) + check_macro_kwarg(arg, false, false, zero(Int8), zero(Int8), zero(Int8), 1, 0, true) ls = LoopSet(q, __module__) set_hw!(ls) def_outer_reduct_types!(ls) From 3e148f52651082b1e173924e85558b5d910a3f77 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 01:01:57 -0400 Subject: [PATCH 02/29] Run `can_avx` on each operator when checking loopset --- src/condense_loopset.jl | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index dac4c8357..643dd7b0c 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -876,7 +876,7 @@ Returns true if the element type is supported. @inline check_device(::ArrayInterface.CPUTuple) = true @inline check_device(x) = false -function check_args_call(ls::LoopSet, safe::Bool) +function check_args_call(ls::LoopSet) q = Expr(:call, lv(:check_args)) append!(q.args, ls.includedactualarrays) for r ∈ ls.outer_reductions @@ -885,6 +885,25 @@ function check_args_call(ls::LoopSet, safe::Bool) q end +""" + check_avx_safe(ls::LoopSet) + +Returns an expression of the form `true && can_avx(op1) && can_avx(op2) && ...` +""" +function check_avx_safe(ls::LoopSet) + q = Expr(:&&, true) + last = q + for op in operations(ls) + iscompute(op) || continue + c = callexpr(op.instruction) + pushfirst!(c.args, ArrayInterface.can_avx) + new_last = Expr(:&&, c) + push!(last.args, new_last) + last = new_last + end + q +end + make_fast(q) = Expr(:macrocall, Symbol("@fastmath"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), q) make_crashy(q) = @@ -987,7 +1006,8 @@ function setup_call( warncheckarg > 0 && push!(warning.args, :(maxlog = $warncheckarg)) argfailure = Expr(:block, warning, argfailure) end - pushprepreamble!(ls, Expr(:if, check_args_call(ls, safe), call, argfailure)) + println(check_avx_safe(ls)) + pushprepreamble!(ls, Expr(:if, Expr(:&&, check_args_call(ls), Expr(:||, !safe, check_avx_safe(ls))), call, argfailure)) prepend_lnns!(ls.prepreamble, lnns) return ls.prepreamble end From 7a89027e9791a1e80720fcd7deb42a200aa4e101 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 01:06:49 -0400 Subject: [PATCH 03/29] Refactor `can_avx` test --- test/can_avx.jl | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/test/can_avx.jl b/test/can_avx.jl index b72d498fe..57097dcad 100644 --- a/test/can_avx.jl +++ b/test/can_avx.jl @@ -3,18 +3,16 @@ @testset "can_avx" begin - @test LoopVectorization.ArrayInterface.can_avx(log) - @test LoopVectorization.ArrayInterface.can_avx(log1p) - @test LoopVectorization.ArrayInterface.can_avx(exp) - @test LoopVectorization.ArrayInterface.can_avx(+) - @test LoopVectorization.ArrayInterface.can_avx(-) - @test LoopVectorization.ArrayInterface.can_avx(Base.FastMath.add_fast) - @test LoopVectorization.ArrayInterface.can_avx(/) - @test LoopVectorization.ArrayInterface.can_avx(sqrt) - @test LoopVectorization.ArrayInterface.can_avx(tanh_fast) - @test LoopVectorization.ArrayInterface.can_avx(sigmoid_fast) - @test LoopVectorization.ArrayInterface.can_avx(LoopVectorization.relu) - @test !LoopVectorization.ArrayInterface.can_avx(clenshaw) - @test !LoopVectorization.ArrayInterface.can_avx(println) + + good_operators = [log, log1p, exp, +, -, Base.FastMath.add_fast, /, sqrt, tanh_fast, sigmoid_fast, LoopVectorization.relu] + bad_operators = [clenshaw, println, SpecialFunctions.gamma] + + for op in good_operators + @test LoopVectorization.can_avx(op) + end + for op in bad_operators + @test !LoopVectorization.can_avx(op) + end + end From 3585ec9cacaff0048d5b196d909591fb6577e54b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 01:08:19 -0400 Subject: [PATCH 04/29] Add test for `safe=true` option in `@turbo` --- test/can_avx.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/can_avx.jl b/test/can_avx.jl index 57097dcad..5db4cf749 100644 --- a/test/can_avx.jl +++ b/test/can_avx.jl @@ -3,6 +3,7 @@ @testset "can_avx" begin + import SpecialFunctions good_operators = [log, log1p, exp, +, -, Base.FastMath.add_fast, /, sqrt, tanh_fast, sigmoid_fast, LoopVectorization.relu] bad_operators = [clenshaw, println, SpecialFunctions.gamma] @@ -15,4 +16,18 @@ end + # Test safe @turbo + x = Float32.(1:0.1:10) + y = similar(x) + truth = similar(x) + + @turbo safe=true for i in indices(x) + y[i] = SpecialFunctions.gamma(x[i]) + end + for i in indices(x) + truth[i] = SpecialFunctions.gamma(x[i]) + end + + @test y ≈ truth + end From ec3f6a03052587458b732ee053e739355c649c0d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 01:18:07 -0400 Subject: [PATCH 05/29] Remove debugging statement --- src/condense_loopset.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index 643dd7b0c..46d76458b 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -1006,7 +1006,6 @@ function setup_call( warncheckarg > 0 && push!(warning.args, :(maxlog = $warncheckarg)) argfailure = Expr(:block, warning, argfailure) end - println(check_avx_safe(ls)) pushprepreamble!(ls, Expr(:if, Expr(:&&, check_args_call(ls), Expr(:||, !safe, check_avx_safe(ls))), call, argfailure)) prepend_lnns!(ls.prepreamble, lnns) return ls.prepreamble From 02919d8d518bbc6340626c90eaaf1137bef67458 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 01:30:08 -0400 Subject: [PATCH 06/29] Clean up preamble generation --- src/condense_loopset.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index 46d76458b..ce18fa9f6 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -1006,7 +1006,12 @@ function setup_call( warncheckarg > 0 && push!(warning.args, :(maxlog = $warncheckarg)) argfailure = Expr(:block, warning, argfailure) end - pushprepreamble!(ls, Expr(:if, Expr(:&&, check_args_call(ls), Expr(:||, !safe, check_avx_safe(ls))), call, argfailure)) + call_check = if safe + Expr(:&&, check_args_call(ls), check_avx_safe(ls)) + else + check_args_call(ls) + end + pushprepreamble!(ls, Expr(:if, call_check, call, argfailure)) prepend_lnns!(ls.prepreamble, lnns) return ls.prepreamble end From f60c1f54beff10ec0ef7e113c44f2dac7cf09dd4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 01:49:06 -0400 Subject: [PATCH 07/29] Set `safe=false` for `@turbo` by default --- src/constructors.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/constructors.jl b/src/constructors.jl index be2414808..fbf2eccb6 100644 --- a/src/constructors.jl +++ b/src/constructors.jl @@ -154,7 +154,7 @@ function process_args( v::Int8 = zero(Int8), threads::Int = 1, warncheckarg::Int = 1, - safe::Bool = true, + safe::Bool = false, ) for arg ∈ args inline, check_empty, u₁, u₂, v, threads, warncheckarg, safe = From 51153511d4a8d6164f732a4148a4c5aa5a87ade2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 14:31:08 -0400 Subject: [PATCH 08/29] Switch to more generic `can_turbo` function for safe `@turbo` --- src/condense_loopset.jl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index ce18fa9f6..e7977bab6 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -886,17 +886,30 @@ function check_args_call(ls::LoopSet) end """ - check_avx_safe(ls::LoopSet) + can_turbo(f::Function, ::Val{NARGS}) -Returns an expression of the form `true && can_avx(op1) && can_avx(op2) && ...` +Check whether a given function with a specified number of arguments +can be used inside a `@turbo` loop. """ -function check_avx_safe(ls::LoopSet) +function can_turbo(f::F, ::Val{NARGS})::Bool where {F,NARGS} + promoted_op = Base.promote_op(f, ntuple(Returns(Vec{2,Int}), Val(NARGS))...) + return promoted_op !== Union{} +end + +""" + check_turbo_safe(ls::LoopSet) + +Returns an expression of the form `true && can_turbo(op1) && can_turbo(op2) && ...` +""" +function check_turbo_safe(ls::LoopSet) q = Expr(:&&, true) last = q for op in operations(ls) iscompute(op) || continue c = callexpr(op.instruction) - pushfirst!(c.args, ArrayInterface.can_avx) + nargs = length(op.dependencies) + push!(c.args, Val(nargs)) + pushfirst!(c.args, can_turbo) new_last = Expr(:&&, c) push!(last.args, new_last) last = new_last @@ -1007,7 +1020,7 @@ function setup_call( argfailure = Expr(:block, warning, argfailure) end call_check = if safe - Expr(:&&, check_args_call(ls), check_avx_safe(ls)) + Expr(:&&, check_args_call(ls), check_turbo_safe(ls)) else check_args_call(ls) end From 40c425af88b5e739817b744e4f4b167b408f8eed Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 14:38:44 -0400 Subject: [PATCH 09/29] Remove `@turbo safe=true` tests from `can_avx.jl` --- test/can_avx.jl | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/test/can_avx.jl b/test/can_avx.jl index 5db4cf749..3df517dc4 100644 --- a/test/can_avx.jl +++ b/test/can_avx.jl @@ -3,10 +3,8 @@ @testset "can_avx" begin - import SpecialFunctions - good_operators = [log, log1p, exp, +, -, Base.FastMath.add_fast, /, sqrt, tanh_fast, sigmoid_fast, LoopVectorization.relu] - bad_operators = [clenshaw, println, SpecialFunctions.gamma] + bad_operators = [clenshaw, println] for op in good_operators @test LoopVectorization.can_avx(op) @@ -15,19 +13,4 @@ @test !LoopVectorization.can_avx(op) end - - # Test safe @turbo - x = Float32.(1:0.1:10) - y = similar(x) - truth = similar(x) - - @turbo safe=true for i in indices(x) - y[i] = SpecialFunctions.gamma(x[i]) - end - for i in indices(x) - truth[i] = SpecialFunctions.gamma(x[i]) - end - - @test y ≈ truth - end From 2dff297d86260b35cde1c6ff42fe33408a69974c Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 14:39:28 -0400 Subject: [PATCH 10/29] Create file to test `@turbo safe=true` and `can_turbo` --- test/grouptests.jl | 2 ++ test/safe_turbo.jl | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 test/safe_turbo.jl diff --git a/test/grouptests.jl b/test/grouptests.jl index 8cac8d34c..430b5999b 100644 --- a/test/grouptests.jl +++ b/test/grouptests.jl @@ -22,6 +22,8 @@ const START_TIME = time() @time include("can_avx.jl") + @time include("safe_turbo.jl") + @time include("fallback.jl") @time include("utils.jl") diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl new file mode 100644 index 000000000..0a9ddbcf3 --- /dev/null +++ b/test/safe_turbo.jl @@ -0,0 +1,55 @@ + +@testset "Safe @turbo" begin + + @testset "Test `can_turbo`" begin + import SpecialFunctions + + # All methods, both `can_avx` and `can_turbo`, should recognize that + # `gamma` is not AVX-able + f(x) = SpecialFunctions.gamma(x) + + @test !LoopVectorization.can_avx(SpecialFunctions.gamma) + @test !LoopVectorization.can_turbo(SpecialFunctions.gamma, Val(1)) + @test !LoopVectorization.can_turbo(f, Val(1)) + + # `can_avx` is not able to detect that a function `f` which is just + # `gamma` can be AVX'd, but `can_turbo` can: + f(x) = exp(x) + + @test !LoopVectorization.can_avx(f) + @test LoopVectorization.can_turbo(exp, Val(1)) + @test LoopVectorization.can_turbo(f, Val(1)) + + # Next, we test with multiple arguments: + g(x, y) = x + SpecialFunctions.gamma(y) + @test !LoopVectorization.can_turbo(g, Val(2)) + g(x, y) = x + exp(y) + @test LoopVectorization.can_turbo(g, Val(2)) + end + + @testset "Test `@turbo` with `safe=true`" begin + import SpecialFunctions + + x = Float32.(1.05:0.1:10) + y = Float32.(0.55:0.1:10.5) + z = similar(x) + truth = similar(x) + + @turbo safe=true for i in indices(x) + z[i] = SpecialFunctions.gamma(x[i]) + end + for i in indices(x) + truth[i] = SpecialFunctions.gamma(x[i]) + end + @test z ≈ truth + + f(x, y) = x + SpecialFunctions.gamma(y) + @turbo safe=true for i in indices(x) + z[i] = f(x[i], y[i]) + end + for i in indices(x) + truth[i] = f(x[i], y[i]) + end + @test z ≈ truth + end +end From 7136114a60342905304ca280e939f2cce845df08 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 14:42:02 -0400 Subject: [PATCH 11/29] Compute `nargs` of instruction properly --- src/condense_loopset.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index e7977bab6..ce2105102 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -907,7 +907,7 @@ function check_turbo_safe(ls::LoopSet) for op in operations(ls) iscompute(op) || continue c = callexpr(op.instruction) - nargs = length(op.dependencies) + nargs = length(parents(op)) push!(c.args, Val(nargs)) pushfirst!(c.args, can_turbo) new_last = Expr(:&&, c) From 0df1606260f42965086ca4af21313612914afec4 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 15:49:12 -0400 Subject: [PATCH 12/29] Add missing `safe` kwarg in `vmaterialize!` --- src/broadcast.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index eb06474c3..6f428617e 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -548,7 +548,7 @@ end # we have an N dimensional loop. # need to construct the LoopSet ls = LoopSet(Mod) - inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, threads, warncheckarg = UNROLL + inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, threads, warncheckarg, safe = UNROLL set_hw!(ls, rs, rc, cls) ls.isbroadcast = isbroadcast # maybe set `false` in a DiffEq-like `@..` macro loopsyms = [gensym!(ls, "n") for _ ∈ 1:N] @@ -571,6 +571,7 @@ end v, threads % Int, warncheckarg, + safe, ) Expr(:block, Expr(:meta, :inline), sc, :dest) end @@ -584,7 +585,7 @@ end # we have an N dimensional loop. # need to construct the LoopSet ls = LoopSet(Mod) - inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, threads, warncheckarg = UNROLL + inline, u₁, u₂, v, isbroadcast, _, rs, rc, cls, threads, warncheckarg, safe = UNROLL set_hw!(ls, rs, rc, cls) ls.isbroadcast = isbroadcast # maybe set `false` in a DiffEq-like `@..` macro loopsyms = [gensym!(ls, "n") for _ ∈ 1:N] @@ -614,6 +615,7 @@ end v, threads % Int, warncheckarg, + safe, ), :dest′, ) From 066e3498113558f529e66662312abbcc342e8744 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 18:21:58 -0400 Subject: [PATCH 13/29] Also unpack `warncheckarg` and `safe` from UNROLL --- src/broadcast.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index 6f428617e..d295ceb10 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -628,7 +628,7 @@ end ::Val{UNROLL}, ::Val{dontbc} ) where {T<:NativeTypes,N,T2<:Number,Mod,UNROLL,dontbc} - inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, threads = UNROLL + inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, threads, warncheckarg, safe = UNROLL quote $(Expr(:meta, :inline)) arg = T(first(bc.args)) @@ -648,7 +648,7 @@ end ::Val{UNROLL}, ::Val{dontbc} ) where {T<:NativeTypes,N,A<:AbstractArray{T,N},T2<:Number,Mod,UNROLL,dontbc} - inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, threads = UNROLL + inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, threads, warncheckarg, safe = UNROLL quote $(Expr(:meta, :inline)) arg = T(first(bc.args)) From b7b947098c3469668421cbe54f82fbee13401407 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 18:40:08 -0400 Subject: [PATCH 14/29] Ensure warncheckarg and safe passed everywhere for consistency --- src/condense_loopset.jl | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index ce2105102..417a337cf 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -605,6 +605,8 @@ function split_ifelse!( k::Int, inlineu₁u₂::Tuple{Bool,Int8,Int8,Int8}, thread::UInt, + warncheckarg::Int, + safe::Bool, debug::Bool, ) roots[k] = false @@ -662,6 +664,8 @@ function split_ifelse!( copy(extra_args), inlineu₁u₂, thread, + warncheckarg, + safe, debug, )) else @@ -673,6 +677,8 @@ function split_ifelse!( extra_args, inlineu₁u₂, thread, + warncheckarg, + safe, debug, )) end @@ -685,6 +691,8 @@ function generate_call( ls::LoopSet, inlineu₁u₂::Tuple{Bool,Int8,Int8,Int8}, thread::UInt, + warncheckarg::Int, + safe::Bool, debug::Bool, ) extra_args = Expr(:tuple) @@ -698,6 +706,8 @@ function generate_call( extra_args, inlineu₁u₂, thread, + warncheckarg, + safe, debug, ) end @@ -709,6 +719,8 @@ function generate_call_split( extra_args::Expr, inlineu₁u₂::Tuple{Bool,Int8,Int8,Int8}, thread::UInt, + warncheckarg::Int, + safe::Bool, debug::Bool, ) for (k, op) ∈ enumerate(operations(ls)) @@ -725,6 +737,8 @@ function generate_call_split( k, inlineu₁u₂, thread, + warncheckarg, + safe, debug, ) end @@ -737,6 +751,8 @@ function generate_call_split( extra_args, inlineu₁u₂, thread, + warncheckarg, + safe, debug, ) end @@ -750,6 +766,8 @@ function generate_call_types( extra_args::Expr, (inline, u₁, u₂, v)::Tuple{Bool,Int8,Int8,Int8}, thread::UInt, + warncheckarg::Int, + safe::Bool, debug::Bool, ) # good place to check for split @@ -782,7 +800,7 @@ function generate_call_types( loop_syms = tuple_expr(QuoteNode, ls.loopsymbols) func = debug ? lv(:_turbo_loopset_debug) : lv(:_turbo_!) lbarg = debug ? Expr(:call, :typeof, loop_bounds) : loop_bounds - configarg = (inline, u₁, u₂, v, ls.isbroadcast, thread) + configarg = (inline, u₁, u₂, v, ls.isbroadcast, thread, warncheckarg, safe) unroll_param_tup = Expr(:call, lv(:avx_config_val), :(Val{$configarg}()), VECTORWIDTHSYMBOL) q = Expr( @@ -988,7 +1006,7 @@ function setup_call_final(ls::LoopSet, q::Expr) return ls.preamble end function setup_call_debug(ls::LoopSet) - generate_call(ls, (false, zero(Int8), zero(Int8), zero(Int8)), zero(UInt), true) + generate_call(ls, (false, zero(Int8), zero(Int8), zero(Int8)), zero(UInt), 1, true, true) end function setup_call( ls::LoopSet, @@ -1010,7 +1028,7 @@ function setup_call( # inlining the generated function into the loop preamble. lnns = extract_all_lnns(q) pushfirst!(lnns, source) - call = generate_call(ls, (inline, u₁, u₂, v), thread % UInt, false) + call = generate_call(ls, (inline, u₁, u₂, v), thread % UInt, 1, true, false) call = check_empty ? check_if_empty(ls, call) : call argfailure = make_crashy(make_fast(q)) if warncheckarg ≠ 0 From 4c57fdee45a88fcaf18ff9122c8a4fee301289ba Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 18:45:10 -0400 Subject: [PATCH 15/29] Consistency in `UNROLL` name --- src/broadcast.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index d295ceb10..19807a63f 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -662,8 +662,8 @@ end dest′ end end -@inline function vmaterialize!(dest, bc, ::Val{Mod}, ::Val{Unroll}) where {Mod,Unroll} - vmaterialize!(dest, bc, Val{Mod}(), Val{Unroll}(), Val(_dontbc(bc))) +@inline function vmaterialize!(dest, bc, ::Val{Mod}, ::Val{UNROLL}) where {Mod,UNROLL} + vmaterialize!(dest, bc, Val{Mod}(), Val{UNROLL}(), Val(_dontbc(bc))) end @inline function vmaterialize( From bd2fc43ed0c175e905cdcd1bb41e2f2876871ee7 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 18:49:39 -0400 Subject: [PATCH 16/29] Add packages required for testing to `[extras]` and `[targets]` --- Project.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Project.toml b/Project.toml index 5ed13297f..41fd47bd7 100644 --- a/Project.toml +++ b/Project.toml @@ -54,3 +54,11 @@ ThreadingUtilities = "0.5" UnPack = "1" VectorizationBase = "0.21.21" julia = "1.6" + +[extras] +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["SpecialFunctions", "Test", "SafeTestsets"] From 73f60ab395e24fd14ed7ae38143897622d3e329d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 18 Sep 2022 18:59:53 -0400 Subject: [PATCH 17/29] Add `safe` and `warncheckarg` throughout library --- src/codegen/lower_threads.jl | 6 +++--- src/condense_loopset.jl | 4 ++-- src/reconstruct_loopset.jl | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/codegen/lower_threads.jl b/src/codegen/lower_threads.jl index 873c01cc4..4152061e7 100644 --- a/src/codegen/lower_threads.jl +++ b/src/codegen/lower_threads.jl @@ -420,7 +420,7 @@ function thread_one_loops_expr( valid_thread_loop::Vector{Bool}, ntmax::UInt, c::Float64, - UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt}, + UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt,Int,Bool}, OPS::Expr, ARF::Expr, AM::Expr, @@ -615,7 +615,7 @@ function thread_two_loops_expr( valid_thread_loop::Vector{Bool}, ntmax::UInt, c::Float64, - UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt}, + UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt,Int,Bool}, OPS::Expr, ARF::Expr, AM::Expr, @@ -877,7 +877,7 @@ function valid_thread_loops(ls::LoopSet) end function avx_threads_expr( ls::LoopSet, - UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt}, + UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt,Int,Bool}, nt::UInt, OPS::Expr, ARF::Expr, diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index 417a337cf..d4246b508 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -558,9 +558,9 @@ end ::StaticInt{NT}, ::StaticInt{CLS}, ) where {CNFARG,W,RS,AR,CLS,NT} - inline, u₁, u₂, v, BROADCAST, thread = CNFARG + inline, u₁, u₂, v, BROADCAST, thread, warncheckarg, safe = CNFARG nt = min(thread % UInt, NT % UInt) - t = Expr(:tuple, inline, u₁, u₂, v, BROADCAST, W, RS, AR, CLS, nt) + t = Expr(:tuple, inline, u₁, u₂, v, BROADCAST, W, RS, AR, CLS, nt, warncheckarg, safe) length(CNFARG) == 7 && push!(t.args, CNFARG[7]) Expr(:call, Expr(:curly, :Val, t)) end diff --git a/src/reconstruct_loopset.jl b/src/reconstruct_loopset.jl index fab8f53ed..224cf372d 100644 --- a/src/reconstruct_loopset.jl +++ b/src/reconstruct_loopset.jl @@ -874,7 +874,7 @@ function avx_loopset!( end function avx_body( ls::LoopSet, - UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt}, + UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt,Int,Bool}, ) inline, u₁, u₂, v, isbroadcast, W, rs, rc, cls, nt = UNROLL q = @@ -916,7 +916,7 @@ function _turbo_loopset( @nospecialize(LPSYMsv), LBsv::Core.SimpleVector, vargs::Core.SimpleVector, - UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt}, + UNROLL::Tuple{Bool,Int8,Int8,Int8,Bool,Int,Int,Int,Int,UInt,Int,Bool}, ) nops = length(OPSsv) ÷ 3 instr = Instruction[Instruction(OPSsv[3i+1], OPSsv[3i+2]) for i ∈ 0:nops-1] From 3d399d000e6e96d82a023ea4dc80aab0ff23d8e2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 00:03:46 -0400 Subject: [PATCH 18/29] Remove edits to Project --- Project.toml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Project.toml b/Project.toml index 41fd47bd7..5ed13297f 100644 --- a/Project.toml +++ b/Project.toml @@ -54,11 +54,3 @@ ThreadingUtilities = "0.5" UnPack = "1" VectorizationBase = "0.21.21" julia = "1.6" - -[extras] -SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["SpecialFunctions", "Test", "SafeTestsets"] From 181e10af0f0c5aa7f5f98f0de0c5f90ed7233ea8 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 00:04:27 -0400 Subject: [PATCH 19/29] Add missing imports in save `@turbo` tests --- test/can_avx.jl | 2 ++ test/safe_turbo.jl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/test/can_avx.jl b/test/can_avx.jl index 3df517dc4..c115ed14e 100644 --- a/test/can_avx.jl +++ b/test/can_avx.jl @@ -3,6 +3,8 @@ @testset "can_avx" begin + using LoopVectorization + good_operators = [log, log1p, exp, +, -, Base.FastMath.add_fast, /, sqrt, tanh_fast, sigmoid_fast, LoopVectorization.relu] bad_operators = [clenshaw, println] diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index 0a9ddbcf3..cbe62c841 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -3,6 +3,7 @@ @testset "Test `can_turbo`" begin import SpecialFunctions + using LoopVectorization # All methods, both `can_avx` and `can_turbo`, should recognize that # `gamma` is not AVX-able @@ -29,6 +30,7 @@ @testset "Test `@turbo` with `safe=true`" begin import SpecialFunctions + using LoopVectorization x = Float32.(1.05:0.1:10) y = Float32.(0.55:0.1:10.5) From 809dbf2ad66ecf0b6918da9ac80bda82db55fd2b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 00:12:56 -0400 Subject: [PATCH 20/29] Fix call to `can_avx` --- test/can_avx.jl | 4 ++-- test/safe_turbo.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/can_avx.jl b/test/can_avx.jl index c115ed14e..1b1289453 100644 --- a/test/can_avx.jl +++ b/test/can_avx.jl @@ -9,10 +9,10 @@ bad_operators = [clenshaw, println] for op in good_operators - @test LoopVectorization.can_avx(op) + @test LoopVectorization.ArrayInterface.can_avx(op) end for op in bad_operators - @test !LoopVectorization.can_avx(op) + @test !LoopVectorization.ArrayInterface.can_avx(op) end end diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index cbe62c841..bf7765676 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -9,7 +9,7 @@ # `gamma` is not AVX-able f(x) = SpecialFunctions.gamma(x) - @test !LoopVectorization.can_avx(SpecialFunctions.gamma) + @test !LoopVectorization.ArrayInterface.can_avx(SpecialFunctions.gamma) @test !LoopVectorization.can_turbo(SpecialFunctions.gamma, Val(1)) @test !LoopVectorization.can_turbo(f, Val(1)) @@ -17,7 +17,7 @@ # `gamma` can be AVX'd, but `can_turbo` can: f(x) = exp(x) - @test !LoopVectorization.can_avx(f) + @test !LoopVectorization.ArrayInterface.can_avx(f) @test LoopVectorization.can_turbo(exp, Val(1)) @test LoopVectorization.can_turbo(f, Val(1)) From da44c7443d8309cb1708c3f1cf19f27659b28e59 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 00:21:53 -0400 Subject: [PATCH 21/29] Remove nested `testset` Seems to be breaking imports. --- test/safe_turbo.jl | 96 ++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index bf7765676..39f513b82 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -1,57 +1,51 @@ @testset "Safe @turbo" begin - @testset "Test `can_turbo`" begin - import SpecialFunctions - using LoopVectorization - - # All methods, both `can_avx` and `can_turbo`, should recognize that - # `gamma` is not AVX-able - f(x) = SpecialFunctions.gamma(x) - - @test !LoopVectorization.ArrayInterface.can_avx(SpecialFunctions.gamma) - @test !LoopVectorization.can_turbo(SpecialFunctions.gamma, Val(1)) - @test !LoopVectorization.can_turbo(f, Val(1)) - - # `can_avx` is not able to detect that a function `f` which is just - # `gamma` can be AVX'd, but `can_turbo` can: - f(x) = exp(x) - - @test !LoopVectorization.ArrayInterface.can_avx(f) - @test LoopVectorization.can_turbo(exp, Val(1)) - @test LoopVectorization.can_turbo(f, Val(1)) - - # Next, we test with multiple arguments: - g(x, y) = x + SpecialFunctions.gamma(y) - @test !LoopVectorization.can_turbo(g, Val(2)) - g(x, y) = x + exp(y) - @test LoopVectorization.can_turbo(g, Val(2)) + import SpecialFunctions + using LoopVectorization + + # All methods, both `can_avx` and `can_turbo`, should recognize that + # `gamma` is not AVX-able + f(x) = SpecialFunctions.gamma(x) + + @test !LoopVectorization.ArrayInterface.can_avx(SpecialFunctions.gamma) + @test !LoopVectorization.can_turbo(SpecialFunctions.gamma, Val(1)) + @test !LoopVectorization.can_turbo(f, Val(1)) + + # `can_avx` is not able to detect that a function `f` which is just + # `gamma` can be AVX'd, but `can_turbo` can: + f(x) = exp(x) + + @test !LoopVectorization.ArrayInterface.can_avx(f) + @test LoopVectorization.can_turbo(exp, Val(1)) + @test LoopVectorization.can_turbo(f, Val(1)) + + # Next, we test with multiple arguments: + g(x, y) = x + SpecialFunctions.gamma(y) + @test !LoopVectorization.can_turbo(g, Val(2)) + g(x, y) = x + exp(y) + @test LoopVectorization.can_turbo(g, Val(2)) + + x = Float32.(1.05:0.1:10) + y = Float32.(0.55:0.1:10.5) + z = similar(x) + truth = similar(x) + + @turbo safe=true for i in indices(x) + z[i] = SpecialFunctions.gamma(x[i]) end - - @testset "Test `@turbo` with `safe=true`" begin - import SpecialFunctions - using LoopVectorization - - x = Float32.(1.05:0.1:10) - y = Float32.(0.55:0.1:10.5) - z = similar(x) - truth = similar(x) - - @turbo safe=true for i in indices(x) - z[i] = SpecialFunctions.gamma(x[i]) - end - for i in indices(x) - truth[i] = SpecialFunctions.gamma(x[i]) - end - @test z ≈ truth - - f(x, y) = x + SpecialFunctions.gamma(y) - @turbo safe=true for i in indices(x) - z[i] = f(x[i], y[i]) - end - for i in indices(x) - truth[i] = f(x[i], y[i]) - end - @test z ≈ truth + for i in indices(x) + truth[i] = SpecialFunctions.gamma(x[i]) + end + @test z ≈ truth + + f(x, y) = x + SpecialFunctions.gamma(y) + @turbo safe=true for i in indices(x) + z[i] = f(x[i], y[i]) end + for i in indices(x) + truth[i] = f(x[i], y[i]) + end + @test z ≈ truth + end From 5ef2edc9791bdc2ef23ecafa2ebcd4d498473a64 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 00:23:54 -0400 Subject: [PATCH 22/29] Test that `can_avx` validates `exp` by itself --- test/safe_turbo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index 39f513b82..ed2bd6ac5 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -16,6 +16,7 @@ # `gamma` can be AVX'd, but `can_turbo` can: f(x) = exp(x) + @test LoopVectorization.ArrayInterface.can_avx(exp) @test !LoopVectorization.ArrayInterface.can_avx(f) @test LoopVectorization.can_turbo(exp, Val(1)) @test LoopVectorization.can_turbo(f, Val(1)) From 2fddc430ff0b46789fe35a44668285c2ffb092f0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 02:26:03 -0400 Subject: [PATCH 23/29] Add SpecialFunctions.jl to test --- test/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Project.toml b/test/Project.toml index e7c69465d..e57e06bb3 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -8,6 +8,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SnoopCompileCore = "e2b509da-e806-4183-be48-004708413034" SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c" +SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 02a29be2287e7300799fb0bd56a0773faa7d47b2 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 12:48:07 -0400 Subject: [PATCH 24/29] Clean up test set --- test/safe_turbo.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index ed2bd6ac5..eaf531b4f 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -2,30 +2,29 @@ @testset "Safe @turbo" begin import SpecialFunctions - using LoopVectorization # All methods, both `can_avx` and `can_turbo`, should recognize that # `gamma` is not AVX-able - f(x) = SpecialFunctions.gamma(x) + f1(x) = SpecialFunctions.gamma(x) @test !LoopVectorization.ArrayInterface.can_avx(SpecialFunctions.gamma) @test !LoopVectorization.can_turbo(SpecialFunctions.gamma, Val(1)) - @test !LoopVectorization.can_turbo(f, Val(1)) + @test !LoopVectorization.can_turbo(f1, Val(1)) # `can_avx` is not able to detect that a function `f` which is just # `gamma` can be AVX'd, but `can_turbo` can: - f(x) = exp(x) + f2(x) = exp(x) @test LoopVectorization.ArrayInterface.can_avx(exp) - @test !LoopVectorization.ArrayInterface.can_avx(f) + @test !LoopVectorization.ArrayInterface.can_avx(f2) @test LoopVectorization.can_turbo(exp, Val(1)) - @test LoopVectorization.can_turbo(f, Val(1)) + @test LoopVectorization.can_turbo(f2, Val(1)) # Next, we test with multiple arguments: - g(x, y) = x + SpecialFunctions.gamma(y) - @test !LoopVectorization.can_turbo(g, Val(2)) - g(x, y) = x + exp(y) - @test LoopVectorization.can_turbo(g, Val(2)) + g1(x, y) = x + SpecialFunctions.gamma(y) + @test !LoopVectorization.can_turbo(g1, Val(2)) + g2(x, y) = x + exp(y) + @test LoopVectorization.can_turbo(g2, Val(2)) x = Float32.(1.05:0.1:10) y = Float32.(0.55:0.1:10.5) @@ -40,12 +39,13 @@ end @test z ≈ truth - f(x, y) = x + SpecialFunctions.gamma(y) + f3(x, y) = x + SpecialFunctions.gamma(y) @turbo safe=true for i in indices(x) - z[i] = f(x[i], y[i]) + z[i] = f3(x[i], y[i]) end + f4(x, y) = x + SpecialFunctions.gamma(y) for i in indices(x) - truth[i] = f(x[i], y[i]) + truth[i] = f4(x[i], y[i]) end @test z ≈ truth From cbed1d38469af9036cedd5e540ef9a4699a358d6 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 12:49:07 -0400 Subject: [PATCH 25/29] Ping test --- test/safe_turbo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index eaf531b4f..b01d7492c 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -1,4 +1,3 @@ - @testset "Safe @turbo" begin import SpecialFunctions From 9568ba9c3d1e11ac6123916a59eb83e7fc0de4d7 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Mon, 19 Sep 2022 13:25:00 -0400 Subject: [PATCH 26/29] Ensure that function names in safe test are unique --- test/safe_turbo.jl | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index b01d7492c..0848d7b8a 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -1,36 +1,41 @@ +_f1(a) = SpecialFunctions.gamma(a) +_f2(a) = exp(a) +_f3(a, b) = a + SpecialFunctions.gamma(b) +_f4(a, b) = a + exp(b) +_f5(a, b) = a + SpecialFunctions.gamma(b) +_f6(a, b) = a + SpecialFunctions.gamma(b) + @testset "Safe @turbo" begin + using LoopVectorization + using Test import SpecialFunctions # All methods, both `can_avx` and `can_turbo`, should recognize that # `gamma` is not AVX-able - f1(x) = SpecialFunctions.gamma(x) @test !LoopVectorization.ArrayInterface.can_avx(SpecialFunctions.gamma) @test !LoopVectorization.can_turbo(SpecialFunctions.gamma, Val(1)) - @test !LoopVectorization.can_turbo(f1, Val(1)) + @test !LoopVectorization.can_turbo(_f1, Val(1)) # `can_avx` is not able to detect that a function `f` which is just # `gamma` can be AVX'd, but `can_turbo` can: - f2(x) = exp(x) @test LoopVectorization.ArrayInterface.can_avx(exp) - @test !LoopVectorization.ArrayInterface.can_avx(f2) + @test !LoopVectorization.ArrayInterface.can_avx(_f2) @test LoopVectorization.can_turbo(exp, Val(1)) - @test LoopVectorization.can_turbo(f2, Val(1)) + @test LoopVectorization.can_turbo(_f2, Val(1)) # Next, we test with multiple arguments: - g1(x, y) = x + SpecialFunctions.gamma(y) - @test !LoopVectorization.can_turbo(g1, Val(2)) - g2(x, y) = x + exp(y) - @test LoopVectorization.can_turbo(g2, Val(2)) + @test !LoopVectorization.can_turbo(_f3, Val(2)) + @test LoopVectorization.can_turbo(_f4, Val(2)) x = Float32.(1.05:0.1:10) y = Float32.(0.55:0.1:10.5) z = similar(x) truth = similar(x) - @turbo safe=true for i in indices(x) + LoopVectorization.@turbo safe=true for i in indices(x) z[i] = SpecialFunctions.gamma(x[i]) end for i in indices(x) @@ -38,14 +43,13 @@ end @test z ≈ truth - f3(x, y) = x + SpecialFunctions.gamma(y) - @turbo safe=true for i in indices(x) - z[i] = f3(x[i], y[i]) + LoopVectorization.@turbo safe=true for i in indices(x) + z[i] = _f5(x[i], y[i]) end - f4(x, y) = x + SpecialFunctions.gamma(y) for i in indices(x) - truth[i] = f4(x[i], y[i]) + truth[i] = _f6(x[i], y[i]) end @test z ≈ truth end + From a93f1ad38b18f0dacf53eb93e0e719e502cda73d Mon Sep 17 00:00:00 2001 From: Miles Cranmer Date: Mon, 19 Sep 2022 15:23:20 -0400 Subject: [PATCH 27/29] Add `RetVec2Int` for julia <1.6 as `Returns()` Co-authored-by: Chris Elrod --- src/condense_loopset.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index d4246b508..c2878047e 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -902,7 +902,8 @@ function check_args_call(ls::LoopSet) end q end - +struct RetVec2Int end +(::RetVec2Int)(_) = Vec{2,Int} """ can_turbo(f::Function, ::Val{NARGS}) From e126032bcce114af78b3b9b955ec5468f7ba444e Mon Sep 17 00:00:00 2001 From: Miles Cranmer Date: Mon, 19 Sep 2022 15:23:43 -0400 Subject: [PATCH 28/29] Use `RetVec2Int()` instead of `Returns(Vec{2,Int})` Co-authored-by: Chris Elrod --- src/condense_loopset.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/condense_loopset.jl b/src/condense_loopset.jl index c2878047e..18f5d04fe 100644 --- a/src/condense_loopset.jl +++ b/src/condense_loopset.jl @@ -911,7 +911,7 @@ Check whether a given function with a specified number of arguments can be used inside a `@turbo` loop. """ function can_turbo(f::F, ::Val{NARGS})::Bool where {F,NARGS} - promoted_op = Base.promote_op(f, ntuple(Returns(Vec{2,Int}), Val(NARGS))...) + promoted_op = Base.promote_op(f, ntuple(RetVec2Int(), Val(NARGS))...) return promoted_op !== Union{} end From 4efdb90b7009b0c00a45177010cec0eb9373b62a Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Tue, 27 Sep 2022 17:10:21 -0400 Subject: [PATCH 29/29] push functions into prepre --- Project.toml | 2 +- src/modeling/graphs.jl | 4 ++-- test/safe_turbo.jl | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Project.toml b/Project.toml index d938f0d1e..f8d6d7864 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" authors = ["Chris Elrod "] -version = "0.12.128" +version = "0.12.129" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" diff --git a/src/modeling/graphs.jl b/src/modeling/graphs.jl index 92e5aea42..4cd73de94 100644 --- a/src/modeling/graphs.jl +++ b/src/modeling/graphs.jl @@ -1283,7 +1283,7 @@ function instruction!(ls::LoopSet, x::Expr) instr ∈ keys(COST) && return Instruction(:LoopVectorization, instr) # end instr = gensym!(ls, "f") - pushpreamble!(ls, Expr(:(=), instr, x)) + pushprepreamble!(ls, Expr(:(=), instr, x)) Instruction(Symbol(""), instr) end instruction!(ls::LoopSet, x::Symbol) = instruction(x) @@ -1481,7 +1481,7 @@ function add_operation!( add_comparison!(ls, LHS_sym, RHS, elementbytes, position) else throw(LoopError("Expression not recognized.", RHS)) - end + end end function prepare_rhs_for_storage!( diff --git a/test/safe_turbo.jl b/test/safe_turbo.jl index 0848d7b8a..84868d224 100644 --- a/test/safe_turbo.jl +++ b/test/safe_turbo.jl @@ -1,3 +1,7 @@ +using LoopVectorization +using Test +import SpecialFunctions + _f1(a) = SpecialFunctions.gamma(a) _f2(a) = exp(a) _f3(a, b) = a + SpecialFunctions.gamma(b) @@ -7,9 +11,6 @@ _f6(a, b) = a + SpecialFunctions.gamma(b) @testset "Safe @turbo" begin - using LoopVectorization - using Test - import SpecialFunctions # All methods, both `can_avx` and `can_turbo`, should recognize that # `gamma` is not AVX-able