Skip to content

Commit

Permalink
Changes for Julia 0.7/1.0 (JuliaStats#45)
Browse files Browse the repository at this point in the history
* Modified code to support Julia 0.7 - 1.0

* added dependencies

* Removed Iterators

* Added version check for initial imports

* Fixed deprecation warning (Iterators)

* Addressed commets raised by ararslan

* Removed VERSION checks and code for < v"0.7.0"

* Update .travis.yml

Co-authored-by: Anand Bisen <[email protected]>
  • Loading branch information
2 people authored and ararslan committed Aug 31, 2018
1 parent 669b69b commit 6221277
Show file tree
Hide file tree
Showing 13 changed files with 90 additions and 90 deletions.
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ os:
- linux
- osx
julia:
- 0.6
- 0.7
- 1.0
- nightly
notifications:
email: false
Expand All @@ -12,4 +13,4 @@ notifications:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("MLBase"); Pkg.test("MLBase")'
after_success:
- julia -e 'cd(Pkg.dir("MLBase")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
- julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
julia 0.6
julia 0.7
Reexport
StatsBase 0.6.9-
IterTools
5 changes: 3 additions & 2 deletions src/MLBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@ module MLBase

using Reexport
using IterTools
using Random
@reexport using StatsBase

import Base: length, show, keys, precision, length, getindex
import Base: start, next, done
import Base: iterate
import Base.Order: lt, Ordering, ForwardOrdering, ReverseOrdering, Forward, Reverse
import StatsBase: RealVector, IntegerVector, RealMatrix, IntegerMatrix, RealArray
import IterTools: product

export

Expand Down Expand Up @@ -77,4 +79,3 @@ module MLBase

include("deprecates.jl")
end

26 changes: 13 additions & 13 deletions src/classification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ end

classify!(r::IntegerVector, x::RealMatrix) = classify!(r, x, Forward)

classify(x::RealMatrix, ord::Ordering) = classify!(Array{Int}(size(x,2)), x, ord)
# - this one throws a deprecation
classify(x::RealMatrix, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord)
classify(x::RealMatrix) = classify(x, Forward)

# classify with score(s)
Expand Down Expand Up @@ -65,13 +66,13 @@ function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, or
return (r, s)
end

classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix) =
classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix) =
classify_withscores!(r, s, x, Forward)

function classify_withscores(x::RealMatrix{T}, ord::Ordering) where T<:Real
n = size(x, 2)
r = Array{Int}(n)
s = Array{T}(n)
r = Array{Int}(undef, n)
s = Array{T}(undef, n)
return classify_withscores!(r, s, x, ord)
end

Expand All @@ -80,7 +81,7 @@ classify_withscores(x::RealMatrix{T}) where {T<:Real} = classify_withscores(x, F

# classify with threshold

classify(x::RealVector, t::Real, ord::Ordering) =
classify(x::RealVector, t::Real, ord::Ordering) =
((k, v) = classify_withscore(x, ord); ifelse(lt(ord, v, t), 0, k))

classify(x::RealVector, t::Real) = classify(x, t, Forward)
Expand All @@ -97,8 +98,8 @@ end

classify!(r::IntegerVector, x::RealMatrix, t::Real) = classify!(r, x, t, Forward)

classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array{Int}(size(x,2)), x, t, ord)
classify(x::RealMatrix, t::Real) = classify(x, t, Forward)
classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord)
classify(x::RealMatrix, t::Real) = classify(x, t, Forward)


## label map
Expand All @@ -109,7 +110,7 @@ struct LabelMap{K}

function LabelMap{K}(vs, v2i) where K
length(vs) == length(v2i) || throw(DimensionMismatch("lengths of vs and v2i mismatch"))
new(vs,v2i)
new(vs,v2i)
end
end

Expand Down Expand Up @@ -143,18 +144,18 @@ end

# use a map to encode discrete values into labels
labelencode(lmap::LabelMap{T}, x) where {T} = lmap.v2i[convert(T, x)]
labelencode(lmap::LabelMap{T}, xs::AbstractArray{T}) where {T} =
labelencode(lmap::LabelMap{T}, xs::AbstractArray{T}) where {T} =
reshape(Int[labelencode(lmap, x) for x in xs], size(xs))

# decode the label to the associated discrete value
labeldecode(lmap::LabelMap{T}, y::Int) where {T} = lmap.vs[y]
labeldecode(lmap::LabelMap{T}, ys::AbstractArray{Int}) where {T} =
labeldecode(lmap::LabelMap{T}, ys::AbstractArray{Int}) where {T} =
reshape(T[labeldecode(lmap, y) for y in ys], size(ys))

## group labels

function groupindices(k::Int, xs::IntegerVector; warning::Bool=true)
gs = Array{Vector{Int}}(k)
gs = Array{Vector{Int}}(undef, k)
for i = 1:k
gs[i] = Int[]
end
Expand All @@ -176,7 +177,7 @@ end

function groupindices(lmap::LabelMap{T}, xs::AbstractArray{T}) where T
k = length(lmap)
gs = Array{Vector{Int}}(k)
gs = Array{Vector{Int}}(undef, k)
for i = 1:k
gs[i] = Int[]
end
Expand All @@ -187,4 +188,3 @@ function groupindices(lmap::LabelMap{T}, xs::AbstractArray{T}) where T
end
return gs
end

59 changes: 30 additions & 29 deletions src/crossval.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

abstract type CrossValGenerator end

# K-fold
# K-fold

struct Kfold <: CrossValGenerator
permseq::Vector{Int}
Expand All @@ -20,15 +20,20 @@ end
length(c::Kfold) = c.k

struct KfoldState
i::Int # the i-th of the subset
i::Int # the i-th of the subset
s::Int # starting index
e::Int # ending index
end

start(c::Kfold) = KfoldState(1, 1, round.(Integer,c.coeff))
next(c::Kfold, s::KfoldState) =
(i = s.i+1; (setdiff(1:length(c.permseq), c.permseq[s.s:s.e]), KfoldState(i, s.e+1, round.(Integer,c.coeff * i))))
done(c::Kfold, s::KfoldState) = (s.i > c.k)
# A version check allows to maintain compatibility with earlier versions
function Base.iterate(c::Kfold, state::KfoldState=KfoldState(1, 1, round.(Integer, c.coeff)))
i, s, e = state.i, state.s, state.e
(i > c.k) && return nothing
i += 1
sd = setdiff(1:length(c.permseq), c.permseq[s:e])
kst = KfoldState(i, e + 1, round.(Integer, c.coeff * i))
return (i; (sd, kst))
end

# Stratified K-fold

Expand All @@ -52,22 +57,21 @@ end

length(c::StratifiedKfold) = c.k

start(c::StratifiedKfold) = 1
function next(c::StratifiedKfold, s::Int)
function Base.iterate(c::StratifiedKfold, s::Int=1)
(s > c.k) && return nothing
r = Int[]
for (permseq, coeff) in zip(c.permseqs, c.coeffs)
a, b = round.(Integer, [s-1, s] .* coeff)
append!(r, view(permseq, a+1:b))
end
setdiff(1:c.n, r), s+1
return setdiff(1:c.n, r), s+1
end
done(c::StratifiedKfold, s::Int) = (s > c.k)

# LOOCV (Leave-one-out cross-validation)

function leave_one_out(n::Int, i::Int)
@assert 1 <= i <= n
x = Array{Int}(n-1)
x = Array{Int}(undef, n - 1)
for j = 1:i-1
x[j] = j
end
Expand All @@ -83,10 +87,10 @@ end

length(c::LOOCV) = c.n

start(c::LOOCV) = 1
next(c::LOOCV, s::Int) = (leave_one_out(c.n, s), s+1)
done(c::LOOCV, s::Int) = (s > c.n)

function iterate(c::LOOCV, s::Int=1)
(s > c.n) && return nothing
return (leave_one_out(c.n, s), s + 1)
end

# Repeated random sub-sampling

Expand All @@ -98,9 +102,10 @@ end

length(c::RandomSub) = c.k

start(c::RandomSub) = 1
next(c::RandomSub, s::Int) = (sort!(sample(1:c.n, c.sn; replace=false)), s+1)
done(c::RandomSub, s::Int) = (s > c.k)
function iterate(c::RandomSub, s::Int=1)
(s > c.k) && return nothing
return (sort!(sample(1:c.n, c.sn; replace=false)), s+1)
end

# Stratified repeated random sub-sampling

Expand Down Expand Up @@ -134,24 +139,23 @@ end

length(c::StratifiedRandomSub) = c.k

start(c::StratifiedRandomSub) = 1
function next(c::StratifiedRandomSub, s::Int)
idxs = Array{Int}(0)
function iterate(c::StratifiedRandomSub, s::Int=1)
(s > c.k) && return nothing
idxs = Array{Int}(undef, 0)
sizehint!(idxs, c.sn)
for (stratum_sn, stratum_idxs) in zip(c.sns, c.idxs)
append!(idxs, sample(stratum_idxs, stratum_sn, replace=false))
end
(sort!(idxs), s+1)
return (sort!(idxs), s + 1)
end
done(c::StratifiedRandomSub, s::Int) = (s > c.k)

## Cross validation algorithm
#
# estfun: model estimation function
#
# model = estfun(train_inds)
#
# it takes as input the indices of
# it takes as input the indices of
# the samples for training, and returns
# a trained model.
#
Expand All @@ -172,7 +176,7 @@ done(c::StratifiedRandomSub, s::Int) = (s > c.k)
#
function cross_validate(estfun::Function, evalfun::Function, n::Int, gen)
best_model = nothing
best_score = NaN
best_score = NaN
best_inds = Int[]
first = true

Expand All @@ -186,8 +190,5 @@ function cross_validate(estfun::Function, evalfun::Function, n::Int, gen)
return scores
end

cross_validate(estfun::Function, evalfun::Function, n::Integer, gen) =
cross_validate(estfun::Function, evalfun::Function, n::Integer, gen) =
cross_validate(estfun, evalfun, n, gen, Forward)



2 changes: 1 addition & 1 deletion src/modeltune.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function gridtune(estfun::Function, # model estimation
t = 0
first = true
local best_score, best_model, best_cfg
for cf in product(pvals...)
for cf in Iterators.product(pvals...)
t += 1
m = estfun(cf...)
if m == nothing
Expand Down
27 changes: 13 additions & 14 deletions src/perfeval.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
end


hitrate(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) =
hitrate(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) =
(counthits(gt, rklst, k) / length(gt))::Float64

function hitrates(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector)
n = length(gt)
h = counthits(gt, rklst, ks)
nk = length(ks)
r = Array{Float64}(nk)
r = Array{Float64}(undef, nk)
for i = 1:nk
r[i] = h[i] / n
end
Expand Down Expand Up @@ -178,7 +178,7 @@ length(v::BinaryThresPredVec) = length(v.scores)
getindex(v::BinaryThresPredVec, i::Integer) = !lt(v.ord, v.scores[i], v.thres)

# compute roc numbers based on scores & threshold
roc(gt::IntegerVector, scores::RealVector, t::Real, ord::Ordering) =
roc(gt::IntegerVector, scores::RealVector, t::Real, ord::Ordering) =
_roc(gt, BinaryThresPredVec(scores, t, ord))

roc(gt::IntegerVector, scores::RealVector, thres::Real) =
Expand Down Expand Up @@ -211,7 +211,7 @@ length(v::ThresPredVec) = length(v.preds)
getindex(v::ThresPredVec, i::Integer) = ifelse(lt(v.ord, v.scores[i], v.thres), 0, v.preds[i])

# compute roc numbers based on predictions & scores & threshold
roc(gt::IntegerVector, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector} =
roc(gt::IntegerVector, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector} =
_roc(gt, ThresPredVec(preds..., t, ord))

roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVector,SV<:RealVector} =
Expand Down Expand Up @@ -246,10 +246,10 @@ end

find_thresbin(x::Real, thresholds::RealVector) = find_thresbin(x, thresholds, Forward)

lin_thresholds(scores::RealArray, n::Integer, ord::ForwardOrdering) =
lin_thresholds(scores::RealArray, n::Integer, ord::ForwardOrdering) =
((s0, s1) = extrema(scores); intv = (s1 - s0) / (n-1); s0:intv:s1)

lin_thresholds(scores::RealArray, n::Integer, ord::ReverseOrdering{ForwardOrdering}) =
lin_thresholds(scores::RealArray, n::Integer, ord::ReverseOrdering{ForwardOrdering}) =
((s0, s1) = extrema(scores); intv = (s0 - s1) / (n-1); s1:intv:s0)

# roc for binary predictions
Expand Down Expand Up @@ -278,7 +278,7 @@ function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord:
end

# produce results
r = Array{ROCNums{Int}}(nt)
r = Array{ROCNums{Int}}(undef, nt)
fn = 0
tn = 0
@inbounds for i = 1:nt
Expand All @@ -293,7 +293,7 @@ end

roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector) = roc(gt, scores, thresholds, Forward)

roc(gt::IntegerVector, scores::RealVector, n::Integer, ord::Ordering) =
roc(gt::IntegerVector, scores::RealVector, n::Integer, ord::Ordering) =
roc(gt, scores, lin_thresholds(scores, n, ord), ord)

roc(gt::IntegerVector, scores::RealVector, n::Integer) = roc(gt, scores, n, Forward)
Expand Down Expand Up @@ -341,7 +341,7 @@ function roc(
end

# produce results
r = Array{ROCNums{Int}}(nt)
r = Array{ROCNums{Int}}(undef, nt)
fn = 0
tn = 0
@inbounds for i = 1:nt
Expand All @@ -357,15 +357,14 @@ end
roc(gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt, preds, thresholds, Forward)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt, preds, lin_thresholds(preds[2],n,ord), ord)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt, preds, n, Forward)

roc(gt::IntegerVector, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::IntegerVector, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt, preds, 100, ord)

roc(gt::IntegerVector, preds::Tuple{PV,SV}) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt::IntegerVector, preds::Tuple{PV,SV}) where {PV<:IntegerVector, SV<:RealVector} =
roc(gt, preds, Forward)

Loading

0 comments on commit 6221277

Please sign in to comment.