Skip to content

Commit

Permalink
WIP: evaluation of multi splines at the same time.
Browse files Browse the repository at this point in the history
  • Loading branch information
albop committed Jan 11, 2016
1 parent 115d3e8 commit c675127
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 8 deletions.
19 changes: 19 additions & 0 deletions src/csplines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ function eval_UC_spline_G(a, b, orders, C, S)

end

function eval_UC_multi_spline(a, b, orders, C, S)

d = size(S,2)
N = size(S,1)
K = size(C,1) # number of splines to evaluate
vals = zeros(K,N)
eval_UC_multi_spline!(a, b, orders, C, S, vals, Ad, dAd)
return vals

end

# problem with this approach: the functions don't get cached.

# fun = (create_function(1,"natural"))
Expand Down Expand Up @@ -77,3 +88,11 @@ end
fun = (create_function_with_gradient(d,"natural"))
return fun.args[2].args[2]
end

@generated function eval_UC_multi_spline!(a, b, orders, C, S, V, Ad, dAd)
d = C.parameters[2]-1 # first dimension of C indexes the splines
# the speed penalty of extrapolating points when iterating over point
# seems very small so this is the default
fun = (create_function_multi_spline(d,"natural"))
return fun.args[2].args[2]
end
45 changes: 38 additions & 7 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,14 @@ function create_Phi(d, extrap, diff)
return lines
end

function tensor_prod(symbs, inds)
function tensor_prod(symbs, inds, add_index=false)
if length(symbs)==0
subscripts = [:($(U("i",i))+$(inds[i])) for i=1:length(inds)]
return :(C[$(subscripts...)])
if add_index
return :(C[k,$(subscripts...)])
else
return :(C[$(subscripts...)])
end
else
h = symbs[1]
if length(symbs)>1
Expand All @@ -50,14 +54,16 @@ function tensor_prod(symbs, inds)
end
exprs = []
for i = 1:4
e = parse( string(h,"_",i,"*(",tensor_prod(q,cat(1,inds,[i])),")") )
e = parse( string(h,"_",i,"*(",tensor_prod(q,cat(1,inds,[i]),add_index),")") )
push!(exprs,e)
end
return :(+($(exprs...)))
end
end

tensor_prod([], Int64[1,2,3,4]) # C[1,2,3,4]
tensor_prod([], Int64[1,2,3,4],) # C[i1+1,i2+2,i3+3,i4+4]
tensor_prod([], Int64[1,2,3,4], true) # C[k,i1+1,i2+2,i3+3,i4+4]

tensor_prod(["Phi_1"], Int64[]) # Phi_1_1 * C[i1 + 1] + Phi_1_2 * C[i1 + 2] + Phi_1_3 * C[i1 + 3] + Phi_1_4 * C[i1 + 4]
tensor_prod(["Phi_1", "Phi_2"], Int64[]) # Phi_1_1 * (Phi_2_1 * C[i1 + 1,i2 + 1] + Phi_2_2 * C[i1 + 1,i2 + 2] + Phi_2_3 * C[i1 + 1,i2 + 3] + Phi_2_4 * C[i1 + 1,i2 + 4]) + Phi_1_2 * (Phi_2_1 * C[i1 + 2,i2 + 1] + Phi_2_2 * C[i1 + 2,i2 + 2] + Phi_2_3 * C[i1 + 2,i2 + 3] + Phi_2_4 * C[i1 + 2,i2 + 4]) + Phi_1_3 * (Phi_2_1 * C[i1 + 3,i2 + 1] + Phi_2_2 * C[i1 + 3,i2 + 2] + Phi_2_3 * C[i1 + 3,i2 + 3] + Phi_2_4 * C[i1 + 3,i2 + 4]) + Phi_1_4 * (Phi_2_1 * C[i1 + 4,i2 + 1] + Phi_2_2 * C[i1 + 4,i2 + 2] + Phi_2_3 * C[i1 + 4,i2 + 3] + Phi_2_4 * C[i1 + 4,i2 + 4])
tensor_prod(["Phi_1", "dPhi_2"], Int64[]) # Phi_1_1 * (dPhi_2_1 * C[i1 + 1,i2 + 1] + dPhi_2_2 * C[i1 + 1,i2 + 2] + dPhi_2_3 * C[i1 + 1,i2 + 3] + dPhi_2_4 * C[i1 + 1,i2 + 4]) + Phi_1_2 * (dPhi_2_1 * C[i1 + 2,i2 + 1] + dPhi_2_2 * C[i1 + 2,i2 + 2] + dPhi_2_3 * C[i1 + 2,i2 + 3] + dPhi_2_4 * C[i1 + 2,i2 + 4]) + Phi_1_3 * (dPhi_2_1 * C[i1 + 3,i2 + 1] + dPhi_2_2 * C[i1 + 3,i2 + 2] + dPhi_2_3 * C[i1 + 3,i2 + 3] + dPhi_2_4 * C[i1 + 3,i2 + 4]) + Phi_1_4 * (dPhi_2_1 * C[i1 + 4,i2 + 1] + dPhi_2_2 * C[i1 + 4,i2 + 2] + dPhi_2_3 * C[i1 + 4,i2 + 3] + dPhi_2_4 * C[i1 + 4,i2 + 4])
Expand Down Expand Up @@ -104,13 +110,13 @@ function create_function(d,extrap="natural")
function $(Symbol(string("eval_UC_spline_",d,"d")))( a, b, orders, C, S, V, Ad, dAd)
$(create_parameters(d)...)
N = size(S,1)
# @simd( # doesn't seem to make any difference
@fastmath @inbounds @simd( # doesn't seem to make any difference

This comment has been minimized.

Copy link
@sglyon

sglyon Jan 11, 2016

Member

Is this @simd over a codegen loop? If so, it shouldn't do anything and isn't surprising that you don't see any benefits

The @simd loop should go on the inner most loop that does the arithmetic

This comment has been minimized.

Copy link
@albop

albop Jan 11, 2016

Author Member

I don't get what you mean by codegen loop. In the generated code, it appears in front of the inner most loop:

    function eval_UC_spline_1d(a,b,orders,C,S,V,Ad,dAd) # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 111: # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 76:
        M1 = orders[1] # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 77:
        start1 = a[1] # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 78:
        dinv1 = (orders[1] - 1.0) / (b[1] - a[1]) # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 112:
        N = size(S,1) # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 113:
        @fastmath @inbounds(@simd(for n = 1:N # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 115: # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 91:
                        x1 = S[n,1] # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 92:
                        u1 = (x1 - start1) * dinv1 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 93:
                        i1 = floor(Int,u1) # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 94:
                        i1 = max(min(i1,M1 - 2),0) # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 95:
                        t1 = u1 - i1 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 96:
                        tp1_1 = t1 * t1 * t1 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 97:
                        tp1_2 = t1 * t1 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 98:
                        tp1_3 = t1 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 99:
                        tp1_4 = 1.0 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 116: # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 19:
                        if t1 < 0 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 20:
                            Phi_1_1 = dAd[1,4] * tp1_3 + Ad[1,4]
                            Phi_1_2 = dAd[2,4] * tp1_3 + Ad[2,4]
                            Phi_1_3 = dAd[3,4] * tp1_3 + Ad[3,4]
                            Phi_1_4 = dAd[4,4] * tp1_3 + Ad[4,4]
                        else  # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 21:
                            if t1 > 1 # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 22:
                                Phi_1_1 = (3 * Ad[1,1] + 2 * Ad[1,2] + Ad[1,3]) * (tp1_3 - 1) + (Ad[1,1] + Ad[1,2] + Ad[1,3] + Ad[1,4])
                                Phi_1_2 = (3 * Ad[2,1] + 2 * Ad[2,2] + Ad[2,3]) * (tp1_3 - 1) + (Ad[2,1] + Ad[2,2] + Ad[2,3] + Ad[2,4])
                                Phi_1_3 = (3 * Ad[3,1] + 2 * Ad[3,2] + Ad[3,3]) * (tp1_3 - 1) + (Ad[3,1] + Ad[3,2] + Ad[3,3] + Ad[3,4])
                                Phi_1_4 = (3 * Ad[4,1] + 2 * Ad[4,2] + Ad[4,3]) * (tp1_3 - 1) + (Ad[4,1] + Ad[4,2] + Ad[4,3] + Ad[4,4])
                            else  # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 24:
                                Phi_1_1 = Ad[1,1] * tp1_1 + Ad[1,2] * tp1_2 + Ad[1,3] * tp1_3 + Ad[1,4] * tp1_4
                                Phi_1_2 = Ad[2,1] * tp1_1 + Ad[2,2] * tp1_2 + Ad[2,3] * tp1_3 + Ad[2,4] * tp1_4
                                Phi_1_3 = Ad[3,1] * tp1_1 + Ad[3,2] * tp1_2 + Ad[3,3] * tp1_3 + Ad[3,4] * tp1_4
                                Phi_1_4 = Ad[4,1] * tp1_1 + Ad[4,2] * tp1_2 + Ad[4,3] * tp1_3 + Ad[4,4] * tp1_4
                            end
                        end # C:\Users\pablo\.julia\v0.4\splines\src\macros.jl, line 117:
                        V[n] = Phi_1_1 * C[i1 + 1] + Phi_1_2 * C[i1 + 2] + Phi_1_3 * C[i1 + 3] + Phi_1_4 * C[i1 + 4]
                    end))
    end
end

This comment has been minimized.

Copy link
@sglyon

sglyon Jan 11, 2016

Member

Ahh good point, I misread what it was doing. Sorry about that.

The docs on @simd specify this as a condition:

The loop body must be straight-line code. This is why @inbounds is currently needed for all array accesses. The compiler can sometimes turn short &&, ||, and ?: expressions into straight-line code, if it is safe to evaluate all operands unconditionally. Consider using ifelse() instead of ?: in the loop if it is safe to do so.

It looks like the two if, else there will violate this condition...

This comment has been minimized.

Copy link
@albop

albop via email Jan 11, 2016

Author Member

This comment has been minimized.

Copy link
@albop

albop Jan 12, 2016

Author Member

Actually, after some experiments, I suspect that the floor instruction is the one which does not vectorize. The following example is not vectorized on my computer:

function vecfun(a,b)
    N = length(a)
     @inbounds @simd for n=1:N
        b[n] = floor(Int64,a[n])
    end
end
@code_llvm vecfun(Float64[],Int64[])

This comment has been minimized.

Copy link
@albop

albop Jan 12, 2016

Author Member

Apparently, this is a known issue, the test for floor in Julia is commented: JuliaLang/julia#13692

for n=1:N
$(create_local_parameters(d)...)
$(create_Phi(d,extrap,false)...)
V[n] = $( tensor_prod([string("Phi_",i) for i=1:d], Int64[]) )
end
# )
)
end
end
return expr
Expand All @@ -131,15 +137,40 @@ function create_function_with_gradient(d,extrap="natural")
function $(Symbol(string("eval_UC_spline_",d,"d")))( a, b, orders, C, S, V, dV, Ad, dAd)
$(create_parameters(d)...)
N = size(S,1)
# @simd( # doesn't seem to make any difference
@fastmath @inbounds @simd( # doesn't seem to make any difference
for n=1:N
$(create_local_parameters(d)...)
$(create_Phi(d,extrap,true)...)
V[n] = $(tensor_prod([string("Phi_",i) for i=1:d], Int64[]))
$([ :( dV[n,$j] = $(tensor_prod(grad_allocs[j], Int64[])) ) for j=1:d]...)
end
)
end
end
return expr
end

function create_function_multi_spline(d,extrap="natural")
expr = quote
function $(Symbol(string("eval_UC_multi_spline_",d,"d")))( a, b, orders, C, S, V, Ad, dAd)
K = size(C,1)
$(create_parameters(d)...)
N = size(S,1)
# @fastmath @inbounds @simd( # doesn't seem to make any difference
for n=1:N
$(create_local_parameters(d)...)
$(create_Phi(d,extrap,false)...)
for k=1:K
V[k,n] = $( tensor_prod([string("Phi_",i) for i=1:d], Int64[], true) )
end

end
# )
end
end
return expr
end



create_function_multi_spline(1)
2 changes: 1 addition & 1 deletion src/splines.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module splines

export filter_coeffs, interpolant_cspline
export eval_UC_spline, eval_UC_spline_G
export eval_UC_spline, eval_UC_spline_G, eval_UC_multi_spline
include("csplines.jl")
include("splines_filter.jl")

Expand Down
8 changes: 8 additions & 0 deletions test/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ true_values = f(s)
println("Maximum interpolation error : ", norm(true_values-interp_values,Inf))
toc()


# Test multi splines:

CC = zeros(2,size(coefs,1))
CC[1,:] = coefs # first spline
CC[2,:] = coefs/2 # second spline
interp_multi_values = eval_UC_multi_spline(smin,smax,orders,CC,s)

#using PyPlot
#plot(s,sol)
#plot(s,true_values)
Expand Down

0 comments on commit c675127

Please sign in to comment.