Skip to content

Commit

Permalink
Merge branch 'master' of github.com:yebai/Turing.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
yebai committed Aug 28, 2017
2 parents 219b9f4 + 18f4d18 commit cb5b054
Show file tree
Hide file tree
Showing 19 changed files with 158 additions and 71 deletions.
59 changes: 13 additions & 46 deletions benchmarks/benchmark.jl
Original file line number Diff line number Diff line change
@@ -1,51 +1,18 @@
using Distributions
using Turing
using Stan
include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")

# NOTE: put Stan models before Turing ones if you want to compare them in print_log
CONFIG = Dict(
"model-list" => [
"gdemo-geweke",
#"normal-loc",
"normal-mixture",
"gdemo",
"gauss",
"bernoulli",
#"negative-binomial",
"school8",
"binormal",
"kid"
],

"test-level" => 2 # 1 = model lang, 2 = whole interface
)

if CONFIG["test-level"] == 1

println("Turing compiler test started.")

for model in CONFIG["model-list"]
println("Tesing `$model` ... ")
include("$(model).run.jl")
println("")
end

println("Turing compiler test passed.")

elseif CONFIG["test-level"] == 2

println("Turing benchmarking started.")

for model in CONFIG["model-list"]
println("Benchmarking `$model` ... ")
job = `julia -e " cd(\"$(pwd())\");include(dirname(\"$(@__FILE__)\")*\"/benchmarkhelper.jl\");
CMDSTAN_HOME = \"$CMDSTAN_HOME\";
using Turing, Distributions, Stan;
include(dirname(\"$(@__FILE__)\")*\"/$(model).run.jl\") "`
println(job); run(job)
println("`$model` ✓")
end

println("Turing benchmarking completed.")

end
model_list = ["gdemo-geweke",
#"normal-loc",
"normal-mixture",
"gdemo",
"gauss",
"bernoulli",
#"negative-binomial",
"school8",
"binormal",
"kid"]

benchmakr_turing(model_list)
56 changes: 54 additions & 2 deletions benchmarks/benchmarkhelper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,16 @@ end
print_log(logd::Dict, monitor=[]) = print(log2str(logd, monitor))

send_log(logd::Dict, monitor=[]) = begin
log_str = log2str(logd, monitor)
send_str(log_str, logd["name"])
# log_str = log2str(logd, monitor)
# send_str(log_str, logd["name"])
dir_old = pwd()
cd(Pkg.dir("Turing"))
commit_str = replace(split(readstring(pipeline(`git show --summary `, `grep "commit"`)), " ")[2], "\n", "")
cd(dir_old)
time_str = "$(Dates.format(now(), "dd-u-yyyy-HH-MM-SS"))"
logd["created"] = time_str
logd["commit"] = commit_str
post("https://api.mlab.com/api/1/databases/benchmark/collections/log?apiKey=Hak1H9--KFJz7aAx2rAbNNgub1KEylgN"; json=logd)
end

send_str(str::String, fname::String) = begin
Expand All @@ -95,3 +103,47 @@ send_str(str::String, fname::String) = begin
time_str = "$(Dates.format(now(), "dd-u-yyyy-HH-MM-SS"))"
post("http://80.85.86.210:1110"; files = [FileParam(str, "text","upfile","benchmark-$time_str-$commit_str-$fname.txt")])
end



# using Requests
# import Requests: get, post, put, delete, options, FileParam
# import JSON

gen_mkd_table_for_commit(commit) = begin
# commit = "f4ca7bfc8a63e5a6825ec272e7dffed7be623b31"
api_url = "https://api.mlab.com/api/1/databases/benchmark/collections/log?q={%22commit%22:%22$commit%22}&apiKey=Hak1H9--KFJz7aAx2rAbNNgub1KEylgN"
res = get(api_url)
# print(res)

json = JSON.parse(readstring(res))
# json[1]

mkd = "| Model | Turing | Stan | Ratio |\n"
mkd *= "| ----- | ------ | ---- | ----- |\n"
for log in json
modelName = log["name"]
tt, ts = log["time"], log["time_stan"]
rt = tt / ts
tt, ts, rt = round(tt, 2), round(ts, 2), round(rt, 2)
mkd *= "|$modelName|$tt|$ts|$rt|\n"
end

mkd
end

benchmakr_turing(model_list) = begin
println("Turing benchmarking started.")

for model in model_list
println("Benchmarking `$model` ... ")
job = `julia -e " cd(\"$(pwd())\");include(dirname(\"$(@__FILE__)\")*\"/benchmarkhelper.jl\");
CMDSTAN_HOME = \"$CMDSTAN_HOME\";
using Turing, Distributions, Stan;
include(dirname(\"$(@__FILE__)\")*\"/$(model).run.jl\") "`
println(job); run(job)
println("`$model` ✓")
end

println("Turing benchmarking completed.")
end
2 changes: 2 additions & 0 deletions benchmarks/bernoulli.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan.data.jl")
include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli.model.jl")

tbenchmark("HMC(10, 0.25, 5)", "bermodel", "data=berstandata[1]")

bench_res = tbenchmark("HMC(1000, 0.25, 5)", "bermodel", "data=berstandata[1]")
logd = build_logd("Bernoulli Model", bench_res...)

Expand Down
2 changes: 2 additions & 0 deletions benchmarks/binormal.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ using Turing
include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
include(Pkg.dir("Turing")*"/example-models/stan-models/binormal.model.jl")

tbenchmark("HMC(20, 0.5, 5)", "binormal", "")

bench_res = tbenchmark("HMC(2000, 0.5, 5)", "binormal", "")
# chn = sample(binormal(), HMC(2000,0.5,5))

Expand Down
2 changes: 2 additions & 0 deletions benchmarks/gauss.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
include(Pkg.dir("Turing")*"/example-models/benchmarks/gdemo-stan.data.jl")
include(Pkg.dir("Turing")*"/example-models/benchmarks/gdemo.model.jl")

tbenchmark("HMC(20, 0.1, 3)", "simplegaussmodel", "data=simplegaussstandata[1]")

bench_res = tbenchmark("HMC(2000, 0.1, 3)", "simplegaussmodel", "data=simplegaussstandata[1]")
logd = build_logd("Simple Gaussian Model", bench_res...)
logd["analytic"] = Dict("s" => 49/24, "m" => 7/6)
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/gdemo.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
include(Pkg.dir("Turing")*"/example-models/benchmarks/gauss.data.jl")
include(Pkg.dir("Turing")*"/example-models/benchmarks/gauss.model.jl")

tbenchmark("PG(20, 20)", "gaussmodel", "gaussdata")

bench_res = tbenchmark("PG(20, 2000)", "gaussmodel", "gaussdata")
logd = build_logd("Gaussian Model", bench_res...)
print_log(logd)
Expand Down
1 change: 1 addition & 0 deletions benchmarks/kid.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ using Turing
end

# chn = sample(kid_turing(data=kiddata[1]), HMC(2000, 0.0025, 10))
tbenchmark("HMC(20, 0.0025, 10)", "kid_turing", "data=kiddata[1]")
bench_res = tbenchmark("HMC(2000, 0.0025, 10)", "kid_turing", "data=kiddata[1]")
chn = bench_res[4]
logd = build_logd("Kid", bench_res...)
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/normal-mixture.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ include(Pkg.dir("Turing")*"/example-models/stan-models/normal-mixture-stan.data.
include(Pkg.dir("Turing")*"/example-models/stan-models/normal-mixture.model.jl")

# NOTE: I only run a sub-set of the data as running the whole is quite slow
tbenchmark("Gibbs(10, HMC(1, 0.05, 1, :theta), PG(50, 1, :k), HMC(1, 0.2, 3, :mu))", "nmmodel", "simplenormalmixturestandata[1][\"y\"][1:100]")

bench_res = tbenchmark("Gibbs(1000, HMC(1, 0.05, 1, :theta), PG(50, 1, :k), HMC(1, 0.2, 3, :mu))", "nmmodel", "simplenormalmixturestandata[1][\"y\"][1:100]")
logd = build_logd("Simple Gaussian Mixture Model", bench_res...)

Expand Down
2 changes: 2 additions & 0 deletions benchmarks/school8.run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ delete!(data, "tau")

# chn = sample(school8(data=data), HMC(2000, 0.75, 5))

tbenchmark("HMC(20, 0.75, 5)", "school8", "data=data")

bench_res = tbenchmark("HMC(2000, 0.75, 5)", "school8", "data=data")
# bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"]
logd = build_logd("School 8", bench_res...)
Expand Down
37 changes: 21 additions & 16 deletions src/core/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -207,27 +207,32 @@ macro model(fexpr)

# Modify fbody, so that we always return VarInfo
fbody_inner = deepcopy(fbody)
return_ex = fbody.args[end] # get last statement of defined model

return_ex = fbody.args[end] # get last statement of defined model
if typeof(return_ex) == Symbol
pop!(fbody_inner.args)
vstr = string(return_ex)
push!(fbody_inner.args, :(vn = Turing.VarName(:ret, Symbol($vstr*"_ret"), "", 1)))
# push!(fbody_inner.args, :(haskey(vi, vn) ? Turing.setval!(vi, $return_ex, vn) :
# push!(vi, vn, $return_ex, Distributions.Uniform(-Inf,+Inf), -1)))
# NOTE: code below is commented out to disable explict return
# vstr = string(return_ex)
# push!(fbody_inner.args, :(vn = Turing.VarName(:ret, Symbol($vstr*"_ret"), "", 1)))
# NOTE: code above is commented out to disable explict return
elseif return_ex.head == :return || return_ex.head == :tuple
if return_ex.head == :return && typeof(return_ex.args[1])!=Symbol && return_ex.args[1].head == :tuple
return_ex = return_ex.args[1]
end
pop!(fbody_inner.args)
for v = return_ex.args
@assert typeof(v) == Symbol "Returned variable ($v) name must be a symbol."
vstr = string(v)
push!(fbody_inner.args, :(vn = Turing.VarName(:ret, Symbol($vstr*"_ret"), "", 1)))
# push!(fbody_inner.args, :(haskey(vi, vn) ? Turing.setval!(vi, $v, vn) :
# push!(vi, vn, $v, Distributions.Uniform(-Inf,+Inf), -1)))
end
# NOTE: code below is commented out to disable explict return
# # Turn statement from return to tuple
# if return_ex.head == :return && typeof(return_ex.args[1]) != Symbol && return_ex.args[1].head == :tuple
# return_ex = return_ex.args[1]
# end
#
# # Replace :return or :tuple statement with corresponding operations on vi
# for v = return_ex.args
# @assert typeof(v) == Symbol "Returned variable ($v) name must be a symbol."
# push!(fbody_inner.args, :(if sampler != nothing vi.pred[Symbol($(string(v)))] = Turing.realpart($v) end))
# end
# NOTE: code above is commented out to disable explict return
end
push!(fbody_inner.args, Expr(:return, :vi))

push!(fbody_inner.args, Expr(:return, :vi)) # always return vi in the end of function body

dprintln(1, fbody_inner)

fname_inner = Symbol("$(fname)_model")
Expand Down
2 changes: 1 addition & 1 deletion src/core/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ end
type NotImplementedException <: Exception end

# Numerically stable sum of values represented in log domain.
function logsum(xs::Vector{Float64})
logsum{T<:Real}(xs::Vector{T}) = begin
largest = maximum(xs)
ys = map(x -> exp(x - largest), xs)

Expand Down
5 changes: 4 additions & 1 deletion src/core/varinfo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,22 @@ copybyindex(vn::VarName, indexing::String) = VarName(vn.csym, vn.sym, indexing,
###########

type VarInfo
idcs :: Dict{VarName, Int}
idcs :: Dict{VarName,Int}
vns :: Vector{VarName}
ranges :: Vector{UnitRange{Int}}
vals :: Vector{Vector{Real}}
dists :: Vector{Distributions.Distribution}
gids :: Vector{Int}
trans :: Vector{Vector{Bool}}
logp :: Vector{Real}
pred :: Dict{Symbol,Any}
index :: Int # index of current randomness
num_produce :: Int # num of produce calls from trace, each produce corresponds to an observe.
VarInfo() = begin
vals = Vector{Vector{Real}}(); push!(vals, Vector{Real}())
trans = Vector{Vector{Real}}(); push!(trans, Vector{Real}())
logp = Vector{Real}(); push!(logp, zero(Real))
pred = Dict{Symbol,Any}()

new(
Dict{VarName, Int}(),
Expand All @@ -53,6 +55,7 @@ type VarInfo
Vector{Distributions.Distribution}(),
Vector{Int}(),
trans, logp,
pred,
0,
0
)
Expand Down
1 change: 1 addition & 0 deletions src/samplers/gibbs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ sample(model::Function, alg::Gibbs;
lp = nothing; epsilon = nothing; lf_num = nothing

for local_spl in spl.info[:samplers]
last_spl = local_spl
# if PROGRESS && haskey(spl.info, :progress) local_spl.info[:progress] = spl.info[:progress] end

dprintln(2, "$(typeof(local_spl)) stepping...")
Expand Down
2 changes: 2 additions & 0 deletions src/samplers/hmc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Sampler(alg::Hamiltonian) = begin

# For caching gradient
info[:grad_cache] = Dict{UInt64,Vector}()

Sampler(alg, info)
end

Expand Down Expand Up @@ -122,6 +123,7 @@ function sample{T<:Hamiltonian}(model::Function, alg::T;
end
samples[i].value[:elapsed] = time_elapsed
samples[i].value[:lf_eps] = spl.info[:wum][][end]

if PROGRESS ProgressMeter.next!(spl.info[:progress]) end
end

Expand Down
3 changes: 2 additions & 1 deletion src/samplers/pgibbs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ sample(model::Function, alg::PG;
time_elapsed = @elapsed vi = step(model, spl, vi)
push!(samples, Sample(vi))
samples[i].value[:elapsed] = time_elapsed

time_total += time_elapsed

if PROGRESS && spl.alg.gid == 0
Expand Down Expand Up @@ -165,7 +166,7 @@ assume{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, vn::VarName, _::Va
end
end

assume{T<:Union{PG,SMC}}(spl::Void, dists::Vector{T}, vn::VarName, var::Any, vi::VarInfo) =
assume{A<:Union{PG,SMC},D<:Distribution}(spl::Sampler{A}, dists::Vector{D}, vn::VarName, var::Any, vi::VarInfo) =
error("[Turing] PG and SMC doesn't support vectorizing assume statement")

observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) =
Expand Down
5 changes: 3 additions & 2 deletions src/samplers/smc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ function sample(model::Function, alg::SMC)
while consume(particles) != Val{:done}
ess = effectiveSampleSize(particles)
if ess <= spl.alg.resampler_threshold * length(particles)
resample!(particles,use_replay=spl.alg.use_replay)
resample!(particles,spl.alg.resampler,use_replay=spl.alg.use_replay)
end
end
res = Chain(getsample(particles)...)
w, samples = getsample(particles)
res = Chain(w, samples)

end
22 changes: 21 additions & 1 deletion src/samplers/support/helper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
@inline realpart(d::ForwardDiff.Dual) = d.value
@inline realpart(ds::Union{Vector,SubArray}) = Float64[realpart(d) for d in ds] # NOTE: the function below is assumed to return a Vector now
@inline realpart!(arr::Union{Array,SubArray}, ds::Union{Array,SubArray}) = for i = 1:length(ds) arr[i] = realpart(ds[i]) end
@inline realpart(ds::Matrix) = Float64[realpart(col) for col in ds]
@inline realpart{T<:Real}(ds::Matrix{T}) = Float64[realpart(col) for col in ds]
@inline realpart(ds::Matrix{Any}) = [realpart(col) for col in ds]
@inline realpart(ds::Array) = map(d -> realpart(d), ds) # NOTE: this function is not optimized
@inline realpart(ds::TArray) = realpart(Array(ds))

@inline dualpart(d::ForwardDiff.Dual) = d.partials.values
@inline dualpart(ds::Union{Array,SubArray}) = map(d -> dualpart(d), ds)
Expand Down Expand Up @@ -56,9 +58,27 @@ end
for vn in keys(vi)
value[sym(vn)] = realpart(vi[vn])
end

# NOTE: do we need to check if lp is 0?
value[:lp] = realpart(getlogp(vi))



if ~isempty(vi.pred)
for sym in keys(vi.pred)
# if ~haskey(sample.value, sym)
value[sym] = vi.pred[sym]
# end
end
# TODO: check why 1. 2. cause errors
# TODO: which one is faster?
# 1. Using empty!
# empty!(vi.pred)
# 2. Reassign an enmtpy dict
# vi.pred = Dict{Symbol,Any}()
# 3. Do nothing?
end

Sample(0.0, value)
end

Expand Down
21 changes: 21 additions & 0 deletions test/compiler.jl/explicit_ret.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using Turing
using Distributions
using Base.Test

@model test_ex_rt() = begin
x ~ Normal(10, 1)
y ~ Normal(x / 2, 1)
z = y + 1
x = x - 1
x, y, z
end

mf = test_ex_rt()

for alg = [HMC(2000, 0.2, 3), PG(20, 2000), SMC(10000), IS(10000), Gibbs(2000, PG(20, 1, :x), HMC(1, 0.2, 3, :y))]
println("[explicit_ret.jl] testing $alg")
chn = sample(mf, alg)
@test_approx_eq_eps mean(chn[:x]) 9.0 0.2
@test_approx_eq_eps mean(chn[:y]) 5.0 0.2
@test_approx_eq_eps mean(chn[:z]) 6.0 0.2
end
Loading

0 comments on commit cb5b054

Please sign in to comment.