Merge branch 'master' of github.com:yebai/Turing.jl

TuringLang · Aug 28, 2017 · cb5b054 · cb5b054
2 parents 219b9f4 + 18f4d18
commit cb5b054
Show file tree

Hide file tree

Showing 19 changed files with 158 additions and 71 deletions.
diff --git a/benchmarks/benchmark.jl b/benchmarks/benchmark.jl
@@ -1,51 +1,18 @@
 using Distributions
 using Turing
 using Stan
+include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
 
 # NOTE: put Stan models before Turing ones if you want to compare them in print_log
-CONFIG = Dict(
-  "model-list" => [
-    "gdemo-geweke",
-    #"normal-loc",
-    "normal-mixture",
-    "gdemo",
-    "gauss",
-    "bernoulli",
-    #"negative-binomial",
-    "school8",
-    "binormal",
-    "kid"
-  ],
-
-  "test-level" => 2   # 1 = model lang, 2 = whole interface
-)
-
-if CONFIG["test-level"] == 1
-
-  println("Turing compiler test started.")
-
-  for model in CONFIG["model-list"]
-    println("Tesing `$model` ... ")
-    include("$(model).run.jl")
-    println("✓")
-  end
-
-  println("Turing compiler test passed.")
-
-elseif CONFIG["test-level"] == 2
-
-  println("Turing benchmarking started.")
-
-  for model in CONFIG["model-list"]
-    println("Benchmarking `$model` ... ")
-    job = `julia -e " cd(\"$(pwd())\");include(dirname(\"$(@__FILE__)\")*\"/benchmarkhelper.jl\");
-                         CMDSTAN_HOME = \"$CMDSTAN_HOME\";
-                         using Turing, Distributions, Stan;
-                         include(dirname(\"$(@__FILE__)\")*\"/$(model).run.jl\") "`
-    println(job); run(job)
-    println("`$model` ✓")
-  end
-
-  println("Turing benchmarking completed.")
-
-end
+model_list = ["gdemo-geweke",
+              #"normal-loc",
+              "normal-mixture",
+              "gdemo",
+              "gauss",
+              "bernoulli",
+              #"negative-binomial",
+              "school8",
+              "binormal",
+              "kid"]
+
+benchmakr_turing(model_list)
diff --git a/benchmarks/benchmarkhelper.jl b/benchmarks/benchmarkhelper.jl
@@ -83,8 +83,16 @@ end
 print_log(logd::Dict, monitor=[]) = print(log2str(logd, monitor))
 
 send_log(logd::Dict, monitor=[]) = begin
-  log_str = log2str(logd, monitor)
-  send_str(log_str, logd["name"])
+  # log_str = log2str(logd, monitor)
+  # send_str(log_str, logd["name"])
+  dir_old = pwd()
+  cd(Pkg.dir("Turing"))
+  commit_str = replace(split(readstring(pipeline(`git show --summary `, `grep "commit"`)), " ")[2], "\n", "")
+  cd(dir_old)
+  time_str = "$(Dates.format(now(), "dd-u-yyyy-HH-MM-SS"))"
+  logd["created"] = time_str
+  logd["commit"] = commit_str
+  post("https://api.mlab.com/api/1/databases/benchmark/collections/log?apiKey=Hak1H9--KFJz7aAx2rAbNNgub1KEylgN"; json=logd)
 end
 
 send_str(str::String, fname::String) = begin
@@ -95,3 +103,47 @@ send_str(str::String, fname::String) = begin
   time_str = "$(Dates.format(now(), "dd-u-yyyy-HH-MM-SS"))"
   post("http://80.85.86.210:1110"; files = [FileParam(str, "text","upfile","benchmark-$time_str-$commit_str-$fname.txt")])
 end
+
+
+
+# using Requests
+# import Requests: get, post, put, delete, options, FileParam
+# import JSON
+
+gen_mkd_table_for_commit(commit) = begin
+  # commit = "f4ca7bfc8a63e5a6825ec272e7dffed7be623b31"
+  api_url = "https://api.mlab.com/api/1/databases/benchmark/collections/log?q={%22commit%22:%22$commit%22}&apiKey=Hak1H9--KFJz7aAx2rAbNNgub1KEylgN"
+  res = get(api_url)
+  # print(res)
+
+  json = JSON.parse(readstring(res))
+  # json[1]
+
+  mkd  = "| Model | Turing | Stan | Ratio |\n"
+  mkd *= "| ----- | ------ | ---- | ----- |\n"
+  for log in json
+    modelName = log["name"]
+    tt, ts = log["time"], log["time_stan"]
+    rt = tt / ts
+    tt, ts, rt = round(tt, 2), round(ts, 2), round(rt, 2)
+    mkd *= "|$modelName|$tt|$ts|$rt|\n"
+  end
+
+  mkd
+end
+
+benchmakr_turing(model_list) = begin
+  println("Turing benchmarking started.")
+
+  for model in model_list
+    println("Benchmarking `$model` ... ")
+    job = `julia -e " cd(\"$(pwd())\");include(dirname(\"$(@__FILE__)\")*\"/benchmarkhelper.jl\");
+                         CMDSTAN_HOME = \"$CMDSTAN_HOME\";
+                         using Turing, Distributions, Stan;
+                         include(dirname(\"$(@__FILE__)\")*\"/$(model).run.jl\") "`
+    println(job); run(job)
+    println("`$model` ✓")
+  end
+
+  println("Turing benchmarking completed.")
+end
diff --git a/benchmarks/bernoulli.run.jl b/benchmarks/bernoulli.run.jl
@@ -6,6 +6,8 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
 include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli-stan.data.jl")
 include(Pkg.dir("Turing")*"/example-models/stan-models/bernoulli.model.jl")
 
+tbenchmark("HMC(10, 0.25, 5)", "bermodel", "data=berstandata[1]")
+
 bench_res = tbenchmark("HMC(1000, 0.25, 5)", "bermodel", "data=berstandata[1]")
 logd = build_logd("Bernoulli Model", bench_res...)
 

diff --git a/benchmarks/binormal.run.jl b/benchmarks/binormal.run.jl
@@ -5,6 +5,8 @@ using Turing
 include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
 include(Pkg.dir("Turing")*"/example-models/stan-models/binormal.model.jl")
 
+tbenchmark("HMC(20, 0.5, 5)", "binormal", "")
+
 bench_res = tbenchmark("HMC(2000, 0.5, 5)", "binormal", "")
 # chn = sample(binormal(), HMC(2000,0.5,5))
 

diff --git a/benchmarks/gauss.run.jl b/benchmarks/gauss.run.jl
@@ -6,6 +6,8 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
 include(Pkg.dir("Turing")*"/example-models/benchmarks/gdemo-stan.data.jl")
 include(Pkg.dir("Turing")*"/example-models/benchmarks/gdemo.model.jl")
 
+tbenchmark("HMC(20, 0.1, 3)", "simplegaussmodel", "data=simplegaussstandata[1]")
+
 bench_res = tbenchmark("HMC(2000, 0.1, 3)", "simplegaussmodel", "data=simplegaussstandata[1]")
 logd = build_logd("Simple Gaussian Model", bench_res...)
 logd["analytic"] = Dict("s" => 49/24, "m" => 7/6)

diff --git a/benchmarks/gdemo.run.jl b/benchmarks/gdemo.run.jl
@@ -6,6 +6,8 @@ include(Pkg.dir("Turing")*"/benchmarks/benchmarkhelper.jl")
 include(Pkg.dir("Turing")*"/example-models/benchmarks/gauss.data.jl")
 include(Pkg.dir("Turing")*"/example-models/benchmarks/gauss.model.jl")
 
+tbenchmark("PG(20, 20)", "gaussmodel", "gaussdata")
+
 bench_res = tbenchmark("PG(20, 2000)", "gaussmodel", "gaussdata")
 logd = build_logd("Gaussian Model", bench_res...)
 print_log(logd)

diff --git a/benchmarks/kid.run.jl b/benchmarks/kid.run.jl
@@ -200,6 +200,7 @@ using Turing
 end
 
 # chn = sample(kid_turing(data=kiddata[1]), HMC(2000, 0.0025, 10))
+tbenchmark("HMC(20, 0.0025, 10)", "kid_turing", "data=kiddata[1]")
 bench_res = tbenchmark("HMC(2000, 0.0025, 10)", "kid_turing", "data=kiddata[1]")
 chn = bench_res[4]
 logd = build_logd("Kid", bench_res...)

diff --git a/benchmarks/normal-mixture.run.jl b/benchmarks/normal-mixture.run.jl
@@ -7,6 +7,8 @@ include(Pkg.dir("Turing")*"/example-models/stan-models/normal-mixture-stan.data.
 include(Pkg.dir("Turing")*"/example-models/stan-models/normal-mixture.model.jl")
 
 # NOTE: I only run a sub-set of the data as running the whole is quite slow
+tbenchmark("Gibbs(10, HMC(1, 0.05, 1, :theta), PG(50, 1, :k), HMC(1, 0.2, 3, :mu))", "nmmodel", "simplenormalmixturestandata[1][\"y\"][1:100]")
+
 bench_res = tbenchmark("Gibbs(1000, HMC(1, 0.05, 1, :theta), PG(50, 1, :k), HMC(1, 0.2, 3, :mu))", "nmmodel", "simplenormalmixturestandata[1][\"y\"][1:100]")
 logd = build_logd("Simple Gaussian Mixture Model", bench_res...)
 

diff --git a/benchmarks/school8.run.jl b/benchmarks/school8.run.jl
@@ -10,6 +10,8 @@ delete!(data, "tau")
 
 # chn = sample(school8(data=data), HMC(2000, 0.75, 5))
 
+tbenchmark("HMC(20, 0.75, 5)", "school8", "data=data")
+
 bench_res = tbenchmark("HMC(2000, 0.75, 5)", "school8", "data=data")
 # bench_res[4].names = ["phi[1]", "phi[2]", "phi[3]", "phi[4]"]
 logd = build_logd("School 8", bench_res...)

diff --git a/src/core/compiler.jl b/src/core/compiler.jl
@@ -207,27 +207,32 @@ macro model(fexpr)
 
   # Modify fbody, so that we always return VarInfo
   fbody_inner = deepcopy(fbody)
-  return_ex = fbody.args[end]   # get last statement of defined model
+
+  return_ex = fbody.args[end] # get last statement of defined model
   if typeof(return_ex) == Symbol
     pop!(fbody_inner.args)
-    vstr = string(return_ex)
-    push!(fbody_inner.args, :(vn = Turing.VarName(:ret, Symbol($vstr*"_ret"), "", 1)))
-    # push!(fbody_inner.args, :(haskey(vi, vn) ? Turing.setval!(vi, $return_ex, vn) :
-    #  push!(vi, vn, $return_ex, Distributions.Uniform(-Inf,+Inf), -1)))
+    # NOTE: code below is commented out to disable explict return
+    # vstr = string(return_ex)
+    # push!(fbody_inner.args, :(vn = Turing.VarName(:ret, Symbol($vstr*"_ret"), "", 1)))
+    # NOTE: code above is commented out to disable explict return
   elseif return_ex.head == :return || return_ex.head == :tuple
-    if return_ex.head == :return && typeof(return_ex.args[1])!=Symbol && return_ex.args[1].head == :tuple
-      return_ex = return_ex.args[1]
-    end
     pop!(fbody_inner.args)
-    for v = return_ex.args
-      @assert typeof(v) == Symbol "Returned variable ($v) name must be a symbol."
-      vstr = string(v)
-      push!(fbody_inner.args, :(vn = Turing.VarName(:ret, Symbol($vstr*"_ret"), "", 1)))
-      # push!(fbody_inner.args, :(haskey(vi, vn) ? Turing.setval!(vi, $v, vn) :
-      #  push!(vi, vn, $v, Distributions.Uniform(-Inf,+Inf), -1)))
-    end
+    # NOTE: code below is commented out to disable explict return
+    # # Turn statement from return to tuple
+    # if return_ex.head == :return && typeof(return_ex.args[1]) != Symbol && return_ex.args[1].head == :tuple
+    #   return_ex = return_ex.args[1]
+    # end
+    #
+    # # Replace :return or :tuple statement with corresponding operations on vi
+    # for v = return_ex.args
+    #   @assert typeof(v) == Symbol "Returned variable ($v) name must be a symbol."
+    #   push!(fbody_inner.args, :(if sampler != nothing vi.pred[Symbol($(string(v)))] = Turing.realpart($v) end))
+    # end
+    # NOTE: code above is commented out to disable explict return
   end
-  push!(fbody_inner.args, Expr(:return, :vi))
+
+  push!(fbody_inner.args, Expr(:return, :vi)) # always return vi in the end of function body
+
   dprintln(1, fbody_inner)
 
   fname_inner = Symbol("$(fname)_model")

diff --git a/src/core/util.jl b/src/core/util.jl
@@ -20,7 +20,7 @@ end
 type NotImplementedException <: Exception end
 
 # Numerically stable sum of values represented in log domain.
-function logsum(xs::Vector{Float64})
+logsum{T<:Real}(xs::Vector{T}) = begin
   largest = maximum(xs)
   ys = map(x -> exp(x - largest), xs)
 

diff --git a/src/core/varinfo.jl b/src/core/varinfo.jl
@@ -30,20 +30,22 @@ copybyindex(vn::VarName, indexing::String) = VarName(vn.csym, vn.sym, indexing,
 ###########
 
 type VarInfo
-  idcs        ::    Dict{VarName, Int}
+  idcs        ::    Dict{VarName,Int}
   vns         ::    Vector{VarName}
   ranges      ::    Vector{UnitRange{Int}}
   vals        ::    Vector{Vector{Real}}
   dists       ::    Vector{Distributions.Distribution}
   gids        ::    Vector{Int}
   trans       ::    Vector{Vector{Bool}}
   logp        ::    Vector{Real}
+  pred        ::    Dict{Symbol,Any}
   index       ::    Int           # index of current randomness
   num_produce ::    Int           # num of produce calls from trace, each produce corresponds to an observe.
   VarInfo() = begin
     vals = Vector{Vector{Real}}(); push!(vals, Vector{Real}())
     trans = Vector{Vector{Real}}(); push!(trans, Vector{Real}())
     logp = Vector{Real}(); push!(logp, zero(Real))
+    pred = Dict{Symbol,Any}()
 
     new(
       Dict{VarName, Int}(),
@@ -53,6 +55,7 @@ type VarInfo
       Vector{Distributions.Distribution}(),
       Vector{Int}(),
       trans, logp,
+      pred,
       0,
       0
     )

diff --git a/src/samplers/gibbs.jl b/src/samplers/gibbs.jl
@@ -101,6 +101,7 @@ sample(model::Function, alg::Gibbs;
     lp = nothing; epsilon = nothing; lf_num = nothing
 
     for local_spl in spl.info[:samplers]
+      last_spl = local_spl
       # if PROGRESS && haskey(spl.info, :progress) local_spl.info[:progress] = spl.info[:progress] end
 
       dprintln(2, "$(typeof(local_spl)) stepping...")

diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl
@@ -62,6 +62,7 @@ Sampler(alg::Hamiltonian) = begin
 
   # For caching gradient
   info[:grad_cache] = Dict{UInt64,Vector}()
+
   Sampler(alg, info)
 end
 
@@ -122,6 +123,7 @@ function sample{T<:Hamiltonian}(model::Function, alg::T;
     end
     samples[i].value[:elapsed] = time_elapsed
     samples[i].value[:lf_eps] = spl.info[:wum][:ϵ][end]
+
     if PROGRESS ProgressMeter.next!(spl.info[:progress]) end
   end
 

diff --git a/src/samplers/pgibbs.jl b/src/samplers/pgibbs.jl
@@ -114,6 +114,7 @@ sample(model::Function, alg::PG;
     time_elapsed = @elapsed vi = step(model, spl, vi)
     push!(samples, Sample(vi))
     samples[i].value[:elapsed] = time_elapsed
+
     time_total += time_elapsed
 
     if PROGRESS  && spl.alg.gid == 0
@@ -165,7 +166,7 @@ assume{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, vn::VarName, _::Va
   end
 end
 
-assume{T<:Union{PG,SMC}}(spl::Void, dists::Vector{T}, vn::VarName, var::Any, vi::VarInfo) =
+assume{A<:Union{PG,SMC},D<:Distribution}(spl::Sampler{A}, dists::Vector{D}, vn::VarName, var::Any, vi::VarInfo) =
   error("[Turing] PG and SMC doesn't support vectorizing assume statement")
 
 observe{T<:Union{PG,SMC}}(spl::Sampler{T}, dist::Distribution, value, vi) =

diff --git a/src/samplers/smc.jl b/src/samplers/smc.jl
@@ -52,9 +52,10 @@ function sample(model::Function, alg::SMC)
   while consume(particles) != Val{:done}
     ess = effectiveSampleSize(particles)
     if ess <= spl.alg.resampler_threshold * length(particles)
-      resample!(particles,use_replay=spl.alg.use_replay)
+      resample!(particles,spl.alg.resampler,use_replay=spl.alg.use_replay)
     end
   end
-  res = Chain(getsample(particles)...)
+  w, samples = getsample(particles)
+  res = Chain(w, samples)
 
 end
diff --git a/src/samplers/support/helper.jl b/src/samplers/support/helper.jl
@@ -6,8 +6,10 @@
 @inline realpart(d::ForwardDiff.Dual) = d.value
 @inline realpart(ds::Union{Vector,SubArray}) = Float64[realpart(d) for d in ds] # NOTE: the function below is assumed to return a Vector now
 @inline realpart!(arr::Union{Array,SubArray}, ds::Union{Array,SubArray}) = for i = 1:length(ds) arr[i] = realpart(ds[i]) end
-@inline realpart(ds::Matrix) = Float64[realpart(col) for col in ds]
+@inline realpart{T<:Real}(ds::Matrix{T}) = Float64[realpart(col) for col in ds]
+@inline realpart(ds::Matrix{Any}) = [realpart(col) for col in ds]
 @inline realpart(ds::Array)  = map(d -> realpart(d), ds)  # NOTE: this function is not optimized
+@inline realpart(ds::TArray) = realpart(Array(ds))
 
 @inline dualpart(d::ForwardDiff.Dual)       = d.partials.values
 @inline dualpart(ds::Union{Array,SubArray}) = map(d -> dualpart(d), ds)
@@ -56,9 +58,27 @@ end
   for vn in keys(vi)
     value[sym(vn)] = realpart(vi[vn])
   end
+
   # NOTE: do we need to check if lp is 0?
   value[:lp] = realpart(getlogp(vi))
 
+
+
+  if ~isempty(vi.pred)
+    for sym in keys(vi.pred)
+      # if ~haskey(sample.value, sym)
+        value[sym] = vi.pred[sym]
+      # end
+    end
+    # TODO: check why 1. 2. cause errors
+    # TODO: which one is faster?
+    # 1. Using empty!
+    # empty!(vi.pred)
+    # 2. Reassign an enmtpy dict
+    # vi.pred = Dict{Symbol,Any}()
+    # 3. Do nothing?
+  end
+
   Sample(0.0, value)
 end
 

diff --git a/test/compiler.jl/explicit_ret.jl b/test/compiler.jl/explicit_ret.jl
@@ -0,0 +1,21 @@
+using Turing
+using Distributions
+using Base.Test
+
+@model test_ex_rt() = begin
+  x ~ Normal(10, 1)
+  y ~ Normal(x / 2, 1)
+  z = y + 1
+  x = x - 1
+  x, y, z
+end
+
+mf = test_ex_rt()
+
+for alg = [HMC(2000, 0.2, 3), PG(20, 2000), SMC(10000), IS(10000), Gibbs(2000, PG(20, 1, :x), HMC(1, 0.2, 3, :y))]
+  println("[explicit_ret.jl] testing $alg")
+  chn = sample(mf, alg)
+  @test_approx_eq_eps mean(chn[:x]) 9.0 0.2
+  @test_approx_eq_eps mean(chn[:y]) 5.0 0.2
+  @test_approx_eq_eps mean(chn[:z]) 6.0 0.2
+end