From efdda0d0e1ccf00373c63c319dcb35c220258e1f Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 28 Jun 2017 19:01:03 +0200 Subject: [PATCH 1/3] Enable AppVeyor --- README.md | 1 + appveyor.yml | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 appveyor.yml diff --git a/README.md b/README.md index 26333cb5e..e8aa69bc5 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ [![StatsBase](http://pkg.julialang.org/badges/StatsBase_0.6.svg)](http://pkg.julialang.org/?pkg=StatsBase) - **Build & Testing Status:** [![Build Status](https://travis-ci.org/JuliaStats/StatsBase.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/StatsBase.jl) + [![Build status](https://ci.appveyor.com/api/projects/status/fsut3j3onulvws1w?svg=true)](https://ci.appveyor.com/project/nalimilan/statsbase-jl) [![Coverage Status](https://coveralls.io/repos/JuliaStats/StatsBase.jl/badge.svg?branch=master)](https://coveralls.io/r/JuliaStats/StatsBase.jl?branch=master) - **Documentation**: [![][docs-stable-img]][docs-stable-url] [![][docs-latest-img]][docs-latest-url] diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 000000000..ac5568383 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,42 @@ +environment: + matrix: + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" + - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" + - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" + - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" + +branches: + only: + - master + - /release-.*/ + +notifications: + - provider: Email + on_build_success: false + on_build_failure: false + on_build_status_changed: false + +install: + - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" +# If there's a newer build queued for the same PR, cancel this one + - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` + https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` + Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` + throw "There are newer queued builds for this pull request, failing early." } +# Download most recent Julia Windows binary + - ps: (new-object net.webclient).DownloadFile( + $env:JULIA_URL, + "C:\projects\julia-binary.exe") +# Run installer silently, output to C:\projects\julia + - C:\projects\julia-binary.exe /S /D=C:\projects\julia + +build_script: +# Need to convert from shallow to complete for Pkg.clone to work + - IF EXIST .git\shallow (git fetch --unshallow) + - C:\projects\julia\bin\julia -e "versioninfo(); + Pkg.clone(pwd(), \"StatsBase\"); Pkg.build(\"StatsBase\")" + +test_script: + - C:\projects\julia\bin\julia -e "Pkg.test(\"StatsBase\")" From 8c5c9a4aad4fba76bd100f08af6de93ba4409455 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 28 Jun 2017 19:40:42 +0200 Subject: [PATCH 2/3] Adjust expected values in sampling tests on 32-bit --- test/sampling.jl | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/sampling.jl b/test/sampling.jl index 3e7d75182..7453e81ae 100644 --- a/test/sampling.jl +++ b/test/sampling.jl @@ -96,11 +96,19 @@ test_rng_use(sample, 1:10, 10) srand(1); -@test samplepair(2) === (1, 2) -@test samplepair(10) === (7, 3) - -@test samplepair([3, 4, 2, 6, 8]) === (4, 3) -@test samplepair([1, 2]) === (1, 2) +if Int === Int64 + @test samplepair(2) === (1, 2) + @test samplepair(10) === (7, 3) + + @test samplepair([3, 4, 2, 6, 8]) === (4, 3) + @test samplepair([1, 2]) === (1, 2) +else + @test samplepair(2) === (1, 2) + @test samplepair(10) === (10, 6) + + @test samplepair([3, 4, 2, 6, 8]) === (6, 2) + @test samplepair([1, 2]) === (1, 2) +end test_rng_use(samplepair, 1000) From 9d541015d534f4505b918ca8dc94c5c4818b0f21 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 28 Jun 2017 22:08:20 +0200 Subject: [PATCH 3/3] Use rand() and Base.Random.RangeGeneratorInt instead of randi() and RandIntGenerator This fixes the inconsistency between 32-bit and 64-bit. rand() is somewhat slower than randi() when getting a single random number, but we should improve the implementation in Base if needed. Add deprecations since some packages used these internal methods. --- src/StatsBase.jl | 1 - src/deprecates.jl | 34 ++++++++++++++++++++++++++++++++++ src/rand.jl | 24 ------------------------ src/sampling.jl | 22 ++++++++++++---------- test/sampling.jl | 34 +++++----------------------------- 5 files changed, 51 insertions(+), 64 deletions(-) delete mode 100644 src/rand.jl diff --git a/src/StatsBase.jl b/src/StatsBase.jl index 5304f7df1..bfc57d4fb 100644 --- a/src/StatsBase.jl +++ b/src/StatsBase.jl @@ -192,7 +192,6 @@ module StatsBase include("toeplitzsolvers.jl") include("rankcorr.jl") include("signalcorr.jl") - include("rand.jl") include("empirical.jl") include("hist.jl") include("misc.jl") diff --git a/src/deprecates.jl b/src/deprecates.jl index 03ba0e22d..1b3365268 100644 --- a/src/deprecates.jl +++ b/src/deprecates.jl @@ -48,3 +48,37 @@ findat(a::AbstractArray, b::AbstractArray) = findat!(Array{Int}(size(b)), a, b) @deprecate df_residual(obj::StatisticalModel) dof_residual(obj) @deprecate_binding WeightVec Weights + +immutable RandIntSampler # for generating Int samples in [0, K-1] + a::Int + Ku::UInt + U::UInt + + function RandIntSampler(K::Int) + Base.depwarn("RandIntSampler is deprecated, use Base.Random.RangeGenerator instead", + :RandIntSampler) + Ku = UInt(K) + new(1, Ku, div(typemax(UInt), Ku) * Ku) + end + function RandIntSampler(a::Int, b::Int) + Base.depwarn("RandIntSampler is deprecated, use Base.Random.RangeGenerator instead", + :RandIntSampler) + Ku = UInt(b-a+1) + new(a, Ku, div(typemax(UInt), Ku) * Ku) + end +end + +function rand(rng::AbstractRNG, s::RandIntSampler) + x = rand(rng, UInt) + while x >= s.U + x = rand(rng, UInt) + end + s.a + Int(rem(x, s.Ku)) +end +rand(s::RandIntSampler) = rand(Base.GLOBAL_RNG, s) + +@deprecate randi(rng::AbstractRNG, K::Int) rand(rng, 1:K) +@deprecate randi(K::Int) rand(1:K) +@deprecate randi(rng::AbstractRNG, a::Int, b::Int) rand(rng, a:b) +@deprecate randi(a::Int, b::Int) rand(a:b) + diff --git a/src/rand.jl b/src/rand.jl deleted file mode 100644 index 7de341216..000000000 --- a/src/rand.jl +++ /dev/null @@ -1,24 +0,0 @@ -# Internal facilities for fast random number generation - -immutable RandIntSampler # for generating Int samples in [0, K-1] - a::Int - Ku::UInt - U::UInt - - @compat RandIntSampler(K::Int) = (Ku = UInt(K); new(1, Ku, div(typemax(UInt), Ku) * Ku)) - @compat RandIntSampler(a::Int, b::Int) = (Ku = UInt(b-a+1); new(a, Ku, div(typemax(UInt), Ku) * Ku)) -end - -function rand(rng::AbstractRNG, s::RandIntSampler) - x = rand(rng, UInt) - while x >= s.U - x = rand(rng, UInt) - end - @compat s.a + Int(rem(x, s.Ku)) -end -rand(s::RandIntSampler) = rand(Base.GLOBAL_RNG, s) - -randi(rng::AbstractRNG, K::Int) = rand(rng, RandIntSampler(K)) -randi(K::Int) = randi(Base.GLOBAL_RNG, K) -randi(rng::AbstractRNG, a::Int, b::Int) = rand(rng, RandIntSampler(a, b)) -randi(a::Int, b::Int) = randi(Base.GLOBAL_RNG, a, b) diff --git a/src/sampling.jl b/src/sampling.jl index ea28e4d98..21b1483ca 100644 --- a/src/sampling.jl +++ b/src/sampling.jl @@ -5,10 +5,12 @@ # ########################################################### +using Base.Random: RangeGenerator + ### Algorithms for sampling with replacement function direct_sample!(rng::AbstractRNG, a::UnitRange, x::AbstractArray) - s = RandIntSampler(length(a)) + s = RangeGenerator(1:length(a)) b = a[1] - 1 if b == 0 for i = 1:length(x) @@ -32,7 +34,7 @@ and set `x[j] = a[i]`, with `n=length(a)` and `k=length(x)`. This algorithm consumes `k` random numbers. """ function direct_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray) - s = RandIntSampler(length(a)) + s = RangeGenerator(1:length(a)) for i = 1:length(x) @inbounds x[i] = a[rand(rng, s)] end @@ -51,8 +53,8 @@ Optionally specify a random number generator `rng` as the first argument (defaults to `Base.GLOBAL_RNG`). """ function samplepair(rng::AbstractRNG, n::Int) - i1 = randi(rng, n) - i2 = randi(rng, n-1) + i1 = rand(rng, 1:n) + i2 = rand(rng, 1:n-1) return (i1, ifelse(i2 == i1, n, i2)) end samplepair(n::Int) = samplepair(Base.GLOBAL_RNG, n) @@ -95,7 +97,7 @@ function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; end if initshuffle @inbounds for j = 1:k - l = randi(rng, j, k) + l = rand(rng, j:k) if l != j t = x[j] x[j] = x[l] @@ -105,7 +107,7 @@ function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray; end # scan remaining - s = RandIntSampler(k) + s = RangeGenerator(1:k) for i = k+1:n if rand(rng) * i < k # keep it with probability k / i @inbounds x[rand(rng, s)] = a[i] @@ -151,7 +153,7 @@ function fisher_yates_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArr end @inbounds for i = 1:k - j = randi(rng, i, n) + j = rand(rng, i:n) t = inds[j] inds[j] = inds[i] inds[i] = t @@ -183,7 +185,7 @@ function self_avoid_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray s = Set{Int}() sizehint!(s, k) - rgen = RandIntSampler(n) + rgen = RangeGenerator(1:n) # first one idx = rand(rng, rgen) @@ -297,7 +299,7 @@ the weights given in `wv`, if provided. Optionally specify a random number generator `rng` as the first argument (defaults to `Base.GLOBAL_RNG`). """ -sample(rng::AbstractRNG, a::AbstractArray) = a[randi(rng, length(a))] +sample(rng::AbstractRNG, a::AbstractArray) = a[rand(rng, 1:length(a))] sample(a::AbstractArray) = sample(Base.GLOBAL_RNG, a) @@ -531,7 +533,7 @@ function alias_sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, make_alias_table!(values(wv), sum(wv), ap, alias) # sampling - s = RandIntSampler(n) + s = RangeGenerator(1:n) for i = 1:length(x) j = rand(rng, s) x[i] = rand(rng) < ap[j] ? a[j] : a[alias[j]] diff --git a/test/sampling.jl b/test/sampling.jl index 7453e81ae..9c898b451 100644 --- a/test/sampling.jl +++ b/test/sampling.jl @@ -1,7 +1,7 @@ using StatsBase using Base.Test import Base: maxabs -import StatsBase: norepeat, randi +import StatsBase: norepeat srand(1234) @@ -25,22 +25,6 @@ function test_rng_use(func, non_rng_args...) @test x == y end -#### randi - -n = 10^5 - -x = [randi(10) for i = 1:n] -@test isa(x, Vector{Int}) -@test extrema(x) == (1, 10) -@test isapprox(proportions(x, 1:10), fill(0.1, 10), atol=5.0e-3) -test_rng_use(randi, 1000) - - -x = [randi(3, 12) for i = 1:n] -@test isa(x, Vector{Int}) -@test extrema(x) == (3, 12) -@test isapprox(proportions(x, 3:12), fill(0.1, 10), atol=5.0e-3) - #### sample with replacement function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=false) @@ -96,19 +80,11 @@ test_rng_use(sample, 1:10, 10) srand(1); -if Int === Int64 - @test samplepair(2) === (1, 2) - @test samplepair(10) === (7, 3) - - @test samplepair([3, 4, 2, 6, 8]) === (4, 3) - @test samplepair([1, 2]) === (1, 2) -else - @test samplepair(2) === (1, 2) - @test samplepair(10) === (10, 6) +@test samplepair(2) === (1, 2) +@test samplepair(10) === (10, 6) - @test samplepair([3, 4, 2, 6, 8]) === (6, 2) - @test samplepair([1, 2]) === (1, 2) -end +@test samplepair([3, 4, 2, 6, 8]) === (6, 2) +@test samplepair([1, 2]) === (1, 2) test_rng_use(samplepair, 1000)