From 1cdf046c7750856fc917b56fbc39b575883a6411 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Mon, 27 Apr 2020 11:40:33 -0400 Subject: [PATCH 01/19] Initial commit, `collects` everywhere --- Project.toml | 1 - src/Statistics.jl | 59 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 12c96773..21bd1852 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,4 @@ name = "Statistics" -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/Statistics.jl b/src/Statistics.jl index 977ae8a6..d9726e5f 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -494,7 +494,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) = (vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y))) # covzm (with centered data) - +covzm(itr::Any; corrected::Bool = true) = covzm(collect(itr); corrected = corrected) covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected)) function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) C = unscaled_covzm(x, vardim) @@ -504,6 +504,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) A .= A .* b return A end +covzm(x::Any, y::Any; corrected::Bool = true) = covzm(collect(x), collect(y); corrected = corrected) covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = unscaled_covzm(x, y) / (length(x) - Int(corrected)) function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true) @@ -518,16 +519,31 @@ end # covm (with provided mean) ## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector} ## which can't be handled by broadcast +covm(itr::Any, itrmean; corrected::Bool = true) = covm(collect(itr), itrmean) covm(x::AbstractVector, xmean; corrected::Bool=true) = covzm(map(t -> t - xmean, x); corrected=corrected) covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = covzm(x .- xmean, vardim; corrected=corrected) +covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) = + covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) = covzm(x .- xmean, y .- ymean, vardim; corrected=corrected) # cov (API) +""" + cov(itr::Any; corrected::Bool=true) + +Compute the variance of the iterator `itr`. If `corrected` is `true` (the default) then the sum +is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where +`n = length(collect(itr))`. +""" +function cov(itr::Any; corrected::Bool = true) + x = collect(itr) + covm(x, mean(x); corrected = corrected) +end + """ cov(x::AbstractVector; corrected::Bool=true) @@ -546,6 +562,22 @@ if `corrected` is `false` where `n = size(X, dims)`. cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) = covm(X, _vmean(X, dims), dims; corrected=corrected) + +""" + cov(x::Any, y::Any; corrected::Bool=true) + +Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the +default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where +``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is +`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. +""" +function cov(x::Any, y::Any; corrected::Bool = true) + cx = collect(x) + cy = collect(y) + + covm(cx, mean(cx), cy, mean(cy); corrected = corrected) +end + """ cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) @@ -630,7 +662,7 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray) end # corzm (non-exported, with centered data) - +corzm(x::Any) = corzm(collect(x)) corzm(x::AbstractVector{T}) where {T} = one(real(T)) function corzm(x::AbstractMatrix, vardim::Int=1) c = unscaled_covzm(x, vardim) @@ -644,9 +676,10 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim))) # corm - +corm(x::Any, xmean) = corzm(collect(x), xmean) corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) +corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my) function corm(x::AbstractVector, mx, y::AbstractVector, my) require_one_based_indexing(x, y) n = length(x) @@ -674,6 +707,14 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) = corzm(x .- xmean, y .- ymean, vardim) # cor +""" + cor(itr::Any) + +Return the number one. +""" +cor(itr::Any) = one(real(eltype(collect(x)))) + + """ cor(x::AbstractVector) @@ -688,6 +729,18 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di """ cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims) +""" + cor(x::AbstractVector, y::AbstractVector) + +Compute the Pearson correlation between the vectors `x` and `y`. +""" +function cor(x::Any, y::Any) + cx = collect(x) + cy = collect(y) + + corm(cx, mean(cx), cy, mean(cy)) +end + """ cor(x::AbstractVector, y::AbstractVector) From f3e9641f928b4e024f0fdb3f6af7a741bb114472 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Mon, 27 Apr 2020 12:08:57 -0400 Subject: [PATCH 02/19] Add tests --- src/Statistics.jl | 15 ++++++++------- test/runtests.jl | 17 ++++++++++++++--- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index d9726e5f..480bf5f0 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -519,7 +519,8 @@ end # covm (with provided mean) ## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector} ## which can't be handled by broadcast -covm(itr::Any, itrmean; corrected::Bool = true) = covm(collect(itr), itrmean) +covm(itr::Any, itrmean; corrected::Bool=true) = + @show covm(collect(itr), itrmean; corrected=corrected) covm(x::AbstractVector, xmean; corrected::Bool=true) = covzm(map(t -> t - xmean, x); corrected=corrected) covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = @@ -539,9 +540,9 @@ Compute the variance of the iterator `itr`. If `corrected` is `true` (the defaul is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(collect(itr))`. """ -function cov(itr::Any; corrected::Bool = true) +function cov(itr::Any; corrected::Bool=true) x = collect(itr) - covm(x, mean(x); corrected = corrected) + covm(x, mean(x); corrected=corrected) end """ @@ -571,11 +572,11 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` ``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. """ -function cov(x::Any, y::Any; corrected::Bool = true) +function cov(x::Any, y::Any; corrected::Bool=true) cx = collect(x) cy = collect(y) - covm(cx, mean(cx), cy, mean(cy); corrected = corrected) + covm(cx, mean(cx), cy, mean(cy); corrected=corrected) end """ @@ -676,7 +677,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim))) # corm -corm(x::Any, xmean) = corzm(collect(x), xmean) +corm(x::Any, xmean) = corm(collect(x), xmean) corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my) @@ -712,7 +713,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) = Return the number one. """ -cor(itr::Any) = one(real(eltype(collect(x)))) +cor(itr::Any) = one(real(eltype(collect(itr)))) """ diff --git a/test/runtests.jl b/test/runtests.jl index bc33cf57..b8aff9a3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -339,11 +339,16 @@ Y = [6.0 2.0; x1 = vec(X[1,:]) y1 = vec(Y[1,:]) end + @show x1 + x1_itr = (x1i for x1i in x1) + y1_itr = skipmissing(y1) c = zm ? Statistics.covm(x1, 0, corrected=cr) : cov(x1, corrected=cr) + c_itr = zm ? Statistics.covm(x1_itr, 0, corrected=cr) : + cov(x1_itr, corrected=cr) @test isa(c, Float64) - @test c ≈ Cxx[1,1] + @test c ≈ c_itr ≈ Cxx[1,1] @inferred cov(x1, corrected=cr) @test cov(X) == Statistics.covm(X, mean(X, dims=1)) @@ -356,6 +361,8 @@ Y = [6.0 2.0; @test cov(x1, y1) == Statistics.covm(x1, mean(x1), y1, mean(y1)) c = zm ? Statistics.covm(x1, 0, y1, 0, corrected=cr) : cov(x1, y1, corrected=cr) + c_itr = zm ? Statistics.covm(x1_itr, 0, y1_itr, 0, corrected=cr) : + cov(x1_itr, y1_itr, corrected=cr) @test isa(c, Float64) @test c ≈ Cxy[1,1] @inferred cov(x1, y1, corrected=cr) @@ -426,10 +433,13 @@ end x1 = vec(X[1,:]) y1 = vec(Y[1,:]) end + x1_itr = (x1i for x1i in x1) + y1_itr = skipmissing(y1) c = zm ? Statistics.corm(x1, 0) : cor(x1) + c_itr = zm ? Statistics.corm(x1_itr, 0) : cor(x1_itr) @test isa(c, Float64) - @test c ≈ Cxx[1,1] + @test c ≈ c_itr ≈ Cxx[1,1] @inferred cor(x1) @test cor(X) == Statistics.corm(X, mean(X, dims=1)) @@ -440,8 +450,9 @@ end @test cor(x1, y1) == Statistics.corm(x1, mean(x1), y1, mean(y1)) c = zm ? Statistics.corm(x1, 0, y1, 0) : cor(x1, y1) + c_itr = zm ? Statistics.corm(x1_itr, 0, y1_itr, 0) : cor(x1_itr, y1_itr) @test isa(c, Float64) - @test c ≈ Cxy[1,1] + @test c ≈ c_itr ≈ Cxy[1,1] @inferred cor(x1, y1) if vd == 1 From 2f9c4f8f28f0a5d989ca8a5174301b70922a3a12 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Tue, 28 Apr 2020 11:20:04 -0400 Subject: [PATCH 03/19] Respond to comments --- src/Statistics.jl | 57 +++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 480bf5f0..6f1e0dee 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -520,7 +520,7 @@ end ## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector} ## which can't be handled by broadcast covm(itr::Any, itrmean; corrected::Bool=true) = - @show covm(collect(itr), itrmean; corrected=corrected) + covm(collect(itr), itrmean; corrected=corrected) covm(x::AbstractVector, xmean; corrected::Bool=true) = covzm(map(t -> t - xmean, x); corrected=corrected) covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = @@ -538,19 +538,13 @@ covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corr Compute the variance of the iterator `itr`. If `corrected` is `true` (the default) then the sum is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where -`n = length(collect(itr))`. +``n`` is the number of elements. """ function cov(itr::Any; corrected::Bool=true) x = collect(itr) - covm(x, mean(x); corrected=corrected) + meanx = mean(x) + covzm(map!(t -> t - meanx, x, x); corrected=corrected) end - -""" - cov(x::AbstractVector; corrected::Bool=true) - -Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum -is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`. -""" cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected) """ @@ -569,24 +563,18 @@ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) = Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where -``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is +``*`` denotes the complex conjugate and ``n`` the number of elements. If `corrected` is `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. """ function cov(x::Any, y::Any; corrected::Bool=true) cx = collect(x) cy = collect(y) - - covm(cx, mean(cx), cy, mean(cy); corrected=corrected) + meanx = mean(cx) + meany = mean(cy) + dx = map!(t -> t - meanx, cx, cx) + dy = map!(t -> t - meany, cy, cy) + covzm(dx, dy; corrected=corrected) end - -""" - cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) - -Compute the covariance between the vectors `x` and `y`. If `corrected` is `true` (the -default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where -``*`` denotes the complex conjugate and `n = length(x) = length(y)`. If `corrected` is -`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. -""" cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = covm(x, mean(x), y, mean(y); corrected=corrected) @@ -663,7 +651,13 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray) end # corzm (non-exported, with centered data) -corzm(x::Any) = corzm(collect(x)) +function corzm(itr::Any) + if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) + return one(real(eltype(itr))) + else + return one(real(eltype(collect(itr)))) + end +end corzm(x::AbstractVector{T}) where {T} = one(real(T)) function corzm(x::AbstractMatrix, vardim::Int=1) c = unscaled_covzm(x, vardim) @@ -713,15 +707,14 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) = Return the number one. """ -cor(itr::Any) = one(real(eltype(collect(itr)))) - - -""" - cor(x::AbstractVector) - -Return the number one. -""" -cor(x::AbstractVector) = one(real(eltype(x))) +function cor(itr::Any) + if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) + return one(real(eltype(itr))) + else + return one(real(eltype(collect(itr)))) + end +end +cor(x::AbstractVector{T}) where {T} = one(real(T)) """ cor(X::AbstractMatrix; dims::Int=1) From 52c18ea3b71f20090c608c11b732dae41d298599 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Tue, 28 Apr 2020 13:30:53 -0400 Subject: [PATCH 04/19] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- src/Statistics.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 6f1e0dee..56578b64 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -504,7 +504,8 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) A .= A .* b return A end -covzm(x::Any, y::Any; corrected::Bool = true) = covzm(collect(x), collect(y); corrected = corrected) +covzm(x::Any, y::Any; corrected::Bool = true) = + covzm(collect(x), collect(y); corrected = corrected) covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = unscaled_covzm(x, y) / (length(x) - Int(corrected)) function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true) @@ -714,7 +715,7 @@ function cor(itr::Any) return one(real(eltype(collect(itr)))) end end -cor(x::AbstractVector{T}) where {T} = one(real(T)) +cor(x::AbstractVector) = one(real(eltype(x))) """ cor(X::AbstractMatrix; dims::Int=1) From 4620247ed19c0664129683ab95f9bbd849e3fcc9 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Tue, 28 Apr 2020 13:36:32 -0400 Subject: [PATCH 05/19] more comments -- ready for review --- src/Statistics.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 56578b64..721ee3ca 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -672,7 +672,13 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim))) # corm -corm(x::Any, xmean) = corm(collect(x), xmean) +function corm(itr::Any, itrmean) + if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) + return one(real(eltype(itr))) + else + return one(real(eltype(collect(itr)))) + end +end corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my) From b86ddba78e224d146f7b0d4c050fb4d930462967 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Tue, 28 Apr 2020 13:38:43 -0400 Subject: [PATCH 06/19] fix deleted line --- src/Statistics.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Statistics.jl b/src/Statistics.jl index 721ee3ca..c828272e 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -494,6 +494,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) = (vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y))) # covzm (with centered data) + covzm(itr::Any; corrected::Bool = true) = covzm(collect(itr); corrected = corrected) covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected)) function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) From 0221557c66f70fe714047339bc4354020c21e951 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 29 Apr 2020 11:07:00 -0400 Subject: [PATCH 07/19] many more tests --- src/Statistics.jl | 12 ++++++------ test/runtests.jl | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index c828272e..4955ac26 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -528,7 +528,7 @@ covm(x::AbstractVector, xmean; corrected::Bool=true) = covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = covzm(x .- xmean, vardim; corrected=corrected) covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) = - covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) + covzm(x .- xmean, y .- ymean; corrected=corrected) covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) = @@ -571,10 +571,10 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` function cov(x::Any, y::Any; corrected::Bool=true) cx = collect(x) cy = collect(y) - meanx = mean(cx) - meany = mean(cy) - dx = map!(t -> t - meanx, cx, cx) - dy = map!(t -> t - meany, cy, cy) + meanx = _vmean(cx, 1) + meany = _vmean(cy, 1) + dx = x .- meanx + dy = y .- meany covzm(dx, dy; corrected=corrected) end cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = @@ -740,7 +740,7 @@ function cor(x::Any, y::Any) cx = collect(x) cy = collect(y) - corm(cx, mean(cx), cy, mean(cy)) + corm(cx, _vmean(cx, 1), cy, _vmean(cy, 1)) end """ diff --git a/test/runtests.jl b/test/runtests.jl index b8aff9a3..98b5fbf8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -339,7 +339,6 @@ Y = [6.0 2.0; x1 = vec(X[1,:]) y1 = vec(Y[1,:]) end - @show x1 x1_itr = (x1i for x1i in x1) y1_itr = skipmissing(y1) @@ -348,7 +347,7 @@ Y = [6.0 2.0; c_itr = zm ? Statistics.covm(x1_itr, 0, corrected=cr) : cov(x1_itr, corrected=cr) @test isa(c, Float64) - @test c ≈ c_itr ≈ Cxx[1,1] + @test c == c_itr == Cxx[1,1] @inferred cov(x1, corrected=cr) @test cov(X) == Statistics.covm(X, mean(X, dims=1)) @@ -363,21 +362,29 @@ Y = [6.0 2.0; cov(x1, y1, corrected=cr) c_itr = zm ? Statistics.covm(x1_itr, 0, y1_itr, 0, corrected=cr) : cov(x1_itr, y1_itr, corrected=cr) + c_itrx = zm ? Statistics.covm(x1_itr, 0, y1, 0, corrected=cr) : + cov(x1_itr, y1, corrected=cr) + c_itry = zm ? Statistics.covm(x1, 0, y1_itr, 0, corrected=cr) : + cov(x1, y1_itr, corrected=cr) @test isa(c, Float64) - @test c ≈ Cxy[1,1] + @test c == c_itr == c_itrx == c_itry == Cxy[1,1] @inferred cov(x1, y1, corrected=cr) if vd == 1 - @test cov(x1, Y) == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1)) + C = cov(x1, Y) + C_itr = cov(x1_itr, Y) + @test C == C_itr == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1)) end C = zm ? Statistics.covm(x1, 0, Y, 0, vd, corrected=cr) : - cov(x1, Y, dims=vd, corrected=cr) + cov(x1, Y, dims=vd, corrected=cr) @test size(C) == (1, k) @test vec(C) ≈ Cxy[1,:] @inferred cov(x1, Y, dims=vd, corrected=cr) if vd == 1 - @test cov(X, y1) == Statistics.covm(X, mean(X, dims=1), y1, mean(y1)) + C = cov(X, y1) + C_itr = cov(X, y1_itr) + @test C == C_itr == Statistics.covm(X, mean(X, dims=1), y1, mean(y1)) end C = zm ? Statistics.covm(X, 0, y1, 0, vd, corrected=cr) : cov(X, y1, dims=vd, corrected=cr) @@ -451,12 +458,17 @@ end @test cor(x1, y1) == Statistics.corm(x1, mean(x1), y1, mean(y1)) c = zm ? Statistics.corm(x1, 0, y1, 0) : cor(x1, y1) c_itr = zm ? Statistics.corm(x1_itr, 0, y1_itr, 0) : cor(x1_itr, y1_itr) + c_itrx = zm ? Statistics.corm(x1_itr, 0, y1, 0) : cor(x1_itr, y1) + c_itry = zm ? Statistics.corm(x1, 0, y1_itr, 0) : cor(x1, y1_itr) + @test isa(c, Float64) - @test c ≈ c_itr ≈ Cxy[1,1] + @test c == c_itr == c_itrx == c_itry ≈ Cxy[1,1] @inferred cor(x1, y1) if vd == 1 - @test cor(x1, Y) == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1)) + C = cor(x1, Y) + C_itr = Statistics.corm(x1_itr, mean(x1), Y, mean(Y, dims=1)) + @test C == C_itr == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1)) end C = zm ? Statistics.corm(x1, 0, Y, 0, vd) : cor(x1, Y, dims=vd) @test size(C) == (1, k) @@ -464,11 +476,19 @@ end @inferred cor(x1, Y, dims=vd) if vd == 1 - @test cor(X, y1) == Statistics.corm(X, mean(X, dims=1), y1, mean(y1)) + C = cor(X, y1) + C_itr = cor(X, y1_itr) + @test C == C_itr == Statistics.corm(X, mean(X, dims=1), y1, mean(y1)) end + println("zm = $zm") C = zm ? Statistics.corm(X, 0, y1, 0, vd) : cor(X, y1, dims=vd) + @test size(C) == (k, 1) @test vec(C) ≈ Cxy[:,1] + if vd == 1 + C_itr = zm ? Statistics.corm(X, 0, y1_itr, 0) : cor(X, y1_itr) + @test C_itr == C + end @inferred cor(X, y1, dims=vd) @test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1)) From e3bc3cc80001445e5c00cb4198bb8f6aa048f760 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Wed, 29 Apr 2020 12:56:21 -0400 Subject: [PATCH 08/19] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- src/Statistics.jl | 7 ++++--- test/runtests.jl | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 4955ac26..0d17a528 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -559,7 +559,6 @@ if `corrected` is `false` where `n = size(X, dims)`. cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) = covm(X, _vmean(X, dims), dims; corrected=corrected) - """ cov(x::Any, y::Any; corrected::Bool=true) @@ -653,6 +652,7 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray) end # corzm (non-exported, with centered data) + function corzm(itr::Any) if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) return one(real(eltype(itr))) @@ -673,6 +673,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim))) # corm + function corm(itr::Any, itrmean) if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) return one(real(eltype(itr))) @@ -732,9 +733,9 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims) """ - cor(x::AbstractVector, y::AbstractVector) + cor(x::Any, y::Any) -Compute the Pearson correlation between the vectors `x` and `y`. +Compute the Pearson correlation between iterators `x` and `y`. """ function cor(x::Any, y::Any) cx = collect(x) diff --git a/test/runtests.jl b/test/runtests.jl index 98b5fbf8..aacec023 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -376,7 +376,7 @@ Y = [6.0 2.0; @test C == C_itr == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1)) end C = zm ? Statistics.covm(x1, 0, Y, 0, vd, corrected=cr) : - cov(x1, Y, dims=vd, corrected=cr) + cov(x1, Y, dims=vd, corrected=cr) @test size(C) == (1, k) @test vec(C) ≈ Cxy[1,:] @inferred cov(x1, Y, dims=vd, corrected=cr) @@ -480,7 +480,6 @@ end C_itr = cor(X, y1_itr) @test C == C_itr == Statistics.corm(X, mean(X, dims=1), y1, mean(y1)) end - println("zm = $zm") C = zm ? Statistics.corm(X, 0, y1, 0, vd) : cor(X, y1, dims=vd) @test size(C) == (k, 1) From 3493ed20fbb37ed16a8ca2473a56561bce972f39 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 29 Apr 2020 13:09:22 -0400 Subject: [PATCH 09/19] Polish up tests --- Project.toml | 1 + test/runtests.jl | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 21bd1852..12c96773 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,5 @@ name = "Statistics" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/test/runtests.jl b/test/runtests.jl index 98b5fbf8..c036420b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -349,6 +349,7 @@ Y = [6.0 2.0; @test isa(c, Float64) @test c == c_itr == Cxx[1,1] @inferred cov(x1, corrected=cr) + @inferred cov(x1_itr, corrected=cr) @test cov(X) == Statistics.covm(X, mean(X, dims=1)) C = zm ? Statistics.covm(X, 0, vd, corrected=cr) : @@ -369,6 +370,7 @@ Y = [6.0 2.0; @test isa(c, Float64) @test c == c_itr == c_itrx == c_itry == Cxy[1,1] @inferred cov(x1, y1, corrected=cr) + @inferred cov(x1_itr, y1_itr, corrected=cr) if vd == 1 C = cov(x1, Y) @@ -380,6 +382,9 @@ Y = [6.0 2.0; @test size(C) == (1, k) @test vec(C) ≈ Cxy[1,:] @inferred cov(x1, Y, dims=vd, corrected=cr) + if vd == 1 + @inferred cov(x1_itr, Y, corrected=cr) + end if vd == 1 C = cov(X, y1) @@ -391,7 +396,9 @@ Y = [6.0 2.0; @test size(C) == (k, 1) @test vec(C) ≈ Cxy[:,1] @inferred cov(X, y1, dims=vd, corrected=cr) - + if vd == 1 + @inferred cov(X, y1_itr, corrected=cr) + end @test cov(X, Y) == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1)) C = zm ? Statistics.covm(X, 0, Y, 0, vd, corrected=cr) : cov(X, Y, dims=vd, corrected=cr) @@ -448,6 +455,7 @@ end @test isa(c, Float64) @test c ≈ c_itr ≈ Cxx[1,1] @inferred cor(x1) + @inferred cor(x1_itr) @test cor(X) == Statistics.corm(X, mean(X, dims=1)) C = zm ? Statistics.corm(X, 0, vd) : cor(X, dims=vd) @@ -464,6 +472,7 @@ end @test isa(c, Float64) @test c == c_itr == c_itrx == c_itry ≈ Cxy[1,1] @inferred cor(x1, y1) + @inferred cor(x1_itr, y1_itr) if vd == 1 C = cor(x1, Y) @@ -474,6 +483,9 @@ end @test size(C) == (1, k) @test vec(C) ≈ Cxy[1,:] @inferred cor(x1, Y, dims=vd) + if vd == 1 + @inferred cor(x1, Y) + end if vd == 1 C = cor(X, y1) @@ -490,6 +502,9 @@ end @test C_itr == C end @inferred cor(X, y1, dims=vd) + if vd == 1 + @inferred cor(X, y1_itr) + end @test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1)) C = zm ? Statistics.corm(X, 0, Y, 0, vd) : cor(X, Y, dims=vd) From 8b497454ac3934dacd74b8bac61353b3fcfb745f Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 09:41:33 -0400 Subject: [PATCH 10/19] Errors with matrices --- Project.toml | 1 - src/Statistics.jl | 47 +++++++++++++++++++++++++++++++++++------ test/runtests.jl | 54 ++++++++++++++++++++++------------------------- 3 files changed, 65 insertions(+), 37 deletions(-) diff --git a/Project.toml b/Project.toml index 12c96773..21bd1852 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,4 @@ name = "Statistics" -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/Statistics.jl b/src/Statistics.jl index 0d17a528..5603b6c2 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -479,6 +479,30 @@ end _vmean(x::AbstractVector, vardim::Int) = mean(x) _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim) +function _matrix_error(x, y, fun) + if x isa AbstractMatrix + s = "$(fun)(x::AbstractMatrix, y::Any) is currently not allowed. " * + "Use $(fun)(x, collect(y)) instead" + throw(ArgumentError(s)) + elseif y isa AbstractMatrix + s = "$(fun)(x::Any, y::AbstractMatrix) is currently not allowed. " * + "Use $(fun)(collect(x), y) instead" + throw(ArgumentError(s)) + end +end + +function _matrix_error(x, mx, y, my, fun) + if x isa AbstractMatrix || y isa AbstractMatrix + s = "$(fun)(x::$(typeof(x)), mx, y::Any, my) is currently not allowed. " * + "Use $(fun)(x, mx, collect(y), my) instead" + throw(ArgumentError(s)) + elseif y isa AbstractMatrix + s = "$(fun)(x::Any, mx, y::$(typeof(y)), my) is currently not allowed. " * + "Use $(fun)(collect(x), mx, y, my) inistead." + throw(ArgumentError(s)) + end +end + # core functions unscaled_covzm(x::AbstractVector{<:Number}) = sum(abs2, x) @@ -505,8 +529,10 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) A .= A .* b return A end -covzm(x::Any, y::Any; corrected::Bool = true) = +function covzm(x::Any, y::Any; corrected::Bool = true) + _matrix_error(x, y, covzm) covzm(collect(x), collect(y); corrected = corrected) +end covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = unscaled_covzm(x, y) / (length(x) - Int(corrected)) function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true) @@ -527,8 +553,10 @@ covm(x::AbstractVector, xmean; corrected::Bool=true) = covzm(map(t -> t - xmean, x); corrected=corrected) covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = covzm(x .- xmean, vardim; corrected=corrected) -covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) = - covzm(x .- xmean, y .- ymean; corrected=corrected) +function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) + _matrix_error(x, xmean, y, ymean, covm) + covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) +end covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) = @@ -568,12 +596,13 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. """ function cov(x::Any, y::Any; corrected::Bool=true) + _matrix_error(x, y, cov) cx = collect(x) cy = collect(y) meanx = _vmean(cx, 1) meany = _vmean(cy, 1) - dx = x .- meanx - dy = y .- meany + dx = map!(t -> t - meanx, cx, cx) + dy = map!(t -> t - meany, cy, cy) covzm(dx, dy; corrected=corrected) end cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = @@ -683,7 +712,10 @@ function corm(itr::Any, itrmean) end corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) -corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my) +function corm(x::Any, mx, y::Any, my) + _matrix_error(x, mx, y, my, corm) + corm(collect(x), mx, collect(y), my) +end function corm(x::AbstractVector, mx, y::AbstractVector, my) require_one_based_indexing(x, y) n = length(x) @@ -738,10 +770,11 @@ cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims) Compute the Pearson correlation between iterators `x` and `y`. """ function cor(x::Any, y::Any) + _matrix_error(x, y, cor) cx = collect(x) cy = collect(y) - corm(cx, _vmean(cx, 1), cy, _vmean(cy, 1)) + corm(cx, mean(cx), cy, mean(cy)) end """ diff --git a/test/runtests.jl b/test/runtests.jl index 31a7773a..19d24288 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -373,32 +373,23 @@ Y = [6.0 2.0; @inferred cov(x1_itr, y1_itr, corrected=cr) if vd == 1 - C = cov(x1, Y) - C_itr = cov(x1_itr, Y) - @test C == C_itr == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1)) + @test cov(x1, Y) == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1)) end C = zm ? Statistics.covm(x1, 0, Y, 0, vd, corrected=cr) : cov(x1, Y, dims=vd, corrected=cr) @test size(C) == (1, k) @test vec(C) ≈ Cxy[1,:] @inferred cov(x1, Y, dims=vd, corrected=cr) - if vd == 1 - @inferred cov(x1_itr, Y, corrected=cr) - end if vd == 1 - C = cov(X, y1) - C_itr = cov(X, y1_itr) - @test C == C_itr == Statistics.covm(X, mean(X, dims=1), y1, mean(y1)) + @test cov(X, y1) == Statistics.covm(X, mean(X, dims=1), y1, mean(y1)) end C = zm ? Statistics.covm(X, 0, y1, 0, vd, corrected=cr) : cov(X, y1, dims=vd, corrected=cr) @test size(C) == (k, 1) @test vec(C) ≈ Cxy[:,1] @inferred cov(X, y1, dims=vd, corrected=cr) - if vd == 1 - @inferred cov(X, y1_itr, corrected=cr) - end + @test cov(X, Y) == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1)) C = zm ? Statistics.covm(X, 0, Y, 0, vd, corrected=cr) : cov(X, Y, dims=vd, corrected=cr) @@ -407,6 +398,17 @@ Y = [6.0 2.0; @inferred cov(X, Y, dims=vd, corrected=cr) end + @testset "errors for `cov` with non-array iterators and matrices" begin + x1_itr = (xi for xi in X[:, 1]) + y1_itr = skipmissing(Y[:, 1]) + @test_throws ArgumentError Statistics.covzm(X, y1_itr) + @test_throws ArgumentError Statistics.covzm(x1_itr, Y) + @test_throws ArgumentError Statistics.covm(X, mean(X, dims = 1), y1_itr, mean(y1_itr)) + @test_throws ArgumentError Statistics.covm(x1_itr, mean(x1_itr), Y, mean(Y, dims = 1)) + @test_throws ArgumentError cov(X, y1_itr) + @test_throws ArgumentError cov(x1_itr, Y) + end + @testset "floating point accuracy for `cov` of large numbers" begin A = [4.0, 7.0, 13.0, 16.0] C = A .+ 1.0e10 @@ -475,35 +477,20 @@ end @inferred cor(x1_itr, y1_itr) if vd == 1 - C = cor(x1, Y) - C_itr = Statistics.corm(x1_itr, mean(x1), Y, mean(Y, dims=1)) - @test C == C_itr == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1)) + @test cor(x1, Y) == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1)) end C = zm ? Statistics.corm(x1, 0, Y, 0, vd) : cor(x1, Y, dims=vd) @test size(C) == (1, k) @test vec(C) ≈ Cxy[1,:] @inferred cor(x1, Y, dims=vd) - if vd == 1 - @inferred cor(x1, Y) - end if vd == 1 - C = cor(X, y1) - C_itr = cor(X, y1_itr) - @test C == C_itr == Statistics.corm(X, mean(X, dims=1), y1, mean(y1)) + @test cor(X, y1) == Statistics.corm(X, mean(X, dims=1), y1, mean(y1)) end C = zm ? Statistics.corm(X, 0, y1, 0, vd) : cor(X, y1, dims=vd) - @test size(C) == (k, 1) @test vec(C) ≈ Cxy[:,1] - if vd == 1 - C_itr = zm ? Statistics.corm(X, 0, y1_itr, 0) : cor(X, y1_itr) - @test C_itr == C - end @inferred cor(X, y1, dims=vd) - if vd == 1 - @inferred cor(X, y1_itr) - end @test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1)) C = zm ? Statistics.corm(X, 0, Y, 0, vd) : cor(X, Y, dims=vd) @@ -522,6 +509,15 @@ end @test cor(tmp, tmp) <= 1.0 @test cor(tmp, tmp2) <= 1.0 end + + @testset "errors for `cor` with non-array iterators and matrices" begin + x1_itr = (xi for xi in X[:, 1]) + y1_itr = skipmissing(Y[:, 1]) + @test_throws ArgumentError Statistics.corm(X, mean(X, dims = 1), y1_itr, mean(y1_itr)) + @test_throws ArgumentError Statistics.corm(x1_itr, mean(x1_itr), Y, mean(Y, dims = 1)) + @test_throws ArgumentError cor(X, y1_itr) + @test_throws ArgumentError cor(x1_itr, Y) + end end @testset "quantile" begin From 2b289087cac3a053ab144af9243125bd440d57eb Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 09:45:38 -0400 Subject: [PATCH 11/19] Add _return_one method for DRY --- src/Statistics.jl | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 5603b6c2..8bb51017 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -680,15 +680,19 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray) return C end -# corzm (non-exported, with centered data) - -function corzm(itr::Any) +function _return_one(itr) if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) return one(real(eltype(itr))) else return one(real(eltype(collect(itr)))) end end + +# corzm (non-exported, with centered data) + +function corzm(itr::Any) + _return_one(itr) +end corzm(x::AbstractVector{T}) where {T} = one(real(T)) function corzm(x::AbstractMatrix, vardim::Int=1) c = unscaled_covzm(x, vardim) @@ -704,11 +708,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = # corm function corm(itr::Any, itrmean) - if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) - return one(real(eltype(itr))) - else - return one(real(eltype(collect(itr)))) - end + _return_one(itr) end corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) @@ -749,11 +749,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) = Return the number one. """ function cor(itr::Any) - if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)) - return one(real(eltype(itr))) - else - return one(real(eltype(collect(itr)))) - end + _return_one(itr) end cor(x::AbstractVector) = one(real(eltype(x))) From e42c0b0229ede7babe19361c87ccc0903e762ea7 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 09:46:29 -0400 Subject: [PATCH 12/19] Put pack uuid --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 21bd1852..12c96773 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,5 @@ name = "Statistics" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" From cb3020cd5532b4c31f380235ac43cff15b5d6868 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 10:17:24 -0400 Subject: [PATCH 13/19] _lazycollect solution --- Project.toml | 1 - src/Statistics.jl | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 12c96773..21bd1852 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,4 @@ name = "Statistics" -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/Statistics.jl b/src/Statistics.jl index 8bb51017..c47ed4ec 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -479,6 +479,9 @@ end _vmean(x::AbstractVector, vardim::Int) = mean(x) _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim) +_lazycollect(x::Any) = collect(x) +_lazycollect(x::AbstractVector) = x + function _matrix_error(x, y, fun) if x isa AbstractMatrix s = "$(fun)(x::AbstractMatrix, y::Any) is currently not allowed. " * @@ -531,7 +534,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) end function covzm(x::Any, y::Any; corrected::Bool = true) _matrix_error(x, y, covzm) - covzm(collect(x), collect(y); corrected = corrected) + covzm(_lazycollect(x), _lazycollect(y); corrected = corrected) end covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = unscaled_covzm(x, y) / (length(x) - Int(corrected)) @@ -714,7 +717,7 @@ corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) function corm(x::Any, mx, y::Any, my) _matrix_error(x, mx, y, my, corm) - corm(collect(x), mx, collect(y), my) + corm(_lazycollect(x), mx, _lazycollect(y), my) end function corm(x::AbstractVector, mx, y::AbstractVector, my) require_one_based_indexing(x, y) @@ -767,8 +770,8 @@ Compute the Pearson correlation between iterators `x` and `y`. """ function cor(x::Any, y::Any) _matrix_error(x, y, cor) - cx = collect(x) - cy = collect(y) + cx = _lazycollect(x) + cy = _lazycollect(y) corm(cx, mean(cx), cy, mean(cy)) end From 36734bf4cec23ccd4fcc6856d56f3026034d5743 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Thu, 30 Apr 2020 11:49:54 -0400 Subject: [PATCH 14/19] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/Statistics.jl | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index c47ed4ec..20173900 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -693,9 +693,7 @@ end # corzm (non-exported, with centered data) -function corzm(itr::Any) - _return_one(itr) -end +corzm(itr::Any) = _return_one(itr) corzm(x::AbstractVector{T}) where {T} = one(real(T)) function corzm(x::AbstractMatrix, vardim::Int=1) c = unscaled_covzm(x, vardim) @@ -710,9 +708,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) = # corm -function corm(itr::Any, itrmean) - _return_one(itr) -end +corm(itr::Any, itrmean) = _return_one(itr) corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) function corm(x::Any, mx, y::Any, my) @@ -751,9 +747,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) = Return the number one. """ -function cor(itr::Any) - _return_one(itr) -end +cor(itr::Any) = _return_one(itr) cor(x::AbstractVector) = one(real(eltype(x))) """ From 2f1c4041bf6c4c2f774fd3f2cbcb6de231f2b179 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 12:07:00 -0400 Subject: [PATCH 15/19] change name of lazy_collect --- src/Statistics.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 20173900..6a54fa21 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -479,8 +479,8 @@ end _vmean(x::AbstractVector, vardim::Int) = mean(x) _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim) -_lazycollect(x::Any) = collect(x) -_lazycollect(x::AbstractVector) = x +_collect_if_itr(x::Any) = collect(x) +_collect_if_itr(x::AbstractVector) = x function _matrix_error(x, y, fun) if x isa AbstractMatrix @@ -534,7 +534,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) end function covzm(x::Any, y::Any; corrected::Bool = true) _matrix_error(x, y, covzm) - covzm(_lazycollect(x), _lazycollect(y); corrected = corrected) + covzm(_collect_if_itr(x), _collect_if_itr(y); corrected = corrected) end covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = unscaled_covzm(x, y) / (length(x) - Int(corrected)) @@ -713,7 +713,7 @@ corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) function corm(x::Any, mx, y::Any, my) _matrix_error(x, mx, y, my, corm) - corm(_lazycollect(x), mx, _lazycollect(y), my) + corm(_collect_if_itr(x), mx, _collect_if_itr(y), my) end function corm(x::AbstractVector, mx, y::AbstractVector, my) require_one_based_indexing(x, y) @@ -764,8 +764,8 @@ Compute the Pearson correlation between iterators `x` and `y`. """ function cor(x::Any, y::Any) _matrix_error(x, y, cor) - cx = _lazycollect(x) - cy = _lazycollect(y) + cx = _collect_if_itr(x) + cy = _collect_if_itr(y) corm(cx, mean(cx), cy, mean(cy)) end From 4279703d04d26c803634c88ff35e3ce89f76adb2 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 12:08:22 -0400 Subject: [PATCH 16/19] get rid of unnecesary _vmean --- src/Statistics.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Statistics.jl b/src/Statistics.jl index 6a54fa21..67aa5559 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -602,8 +602,8 @@ function cov(x::Any, y::Any; corrected::Bool=true) _matrix_error(x, y, cov) cx = collect(x) cy = collect(y) - meanx = _vmean(cx, 1) - meany = _vmean(cy, 1) + meanx = mean(cx) + meany = mean(cy) dx = map!(t -> t - meanx, cx, cx) dy = map!(t -> t - meany, cy, cy) covzm(dx, dy; corrected=corrected) From b9f8f96fcf0a6fe3e04566c834d51773b950459d Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 12:33:43 -0400 Subject: [PATCH 17/19] simplify error, add back uuid --- Project.toml | 1 + src/Statistics.jl | 36 ++++++++++++------------------------ 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/Project.toml b/Project.toml index 21bd1852..12c96773 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,5 @@ name = "Statistics" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/Statistics.jl b/src/Statistics.jl index 67aa5559..ea68e89b 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -482,27 +482,15 @@ _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim) _collect_if_itr(x::Any) = collect(x) _collect_if_itr(x::AbstractVector) = x -function _matrix_error(x, y, fun) - if x isa AbstractMatrix - s = "$(fun)(x::AbstractMatrix, y::Any) is currently not allowed. " * - "Use $(fun)(x, collect(y)) instead" +function _matrix_error(x, y) + if x isa AbstractVector || y isa AbstractVector + + elseif x isa AbstractArray || y isa AbstractArray + s = "Covariance and correlation between a non-vector array and a non-vector iterator" * + "is currently disallowed. `collect` one of the arguments." throw(ArgumentError(s)) - elseif y isa AbstractMatrix - s = "$(fun)(x::Any, y::AbstractMatrix) is currently not allowed. " * - "Use $(fun)(collect(x), y) instead" - throw(ArgumentError(s)) - end -end + else -function _matrix_error(x, mx, y, my, fun) - if x isa AbstractMatrix || y isa AbstractMatrix - s = "$(fun)(x::$(typeof(x)), mx, y::Any, my) is currently not allowed. " * - "Use $(fun)(x, mx, collect(y), my) instead" - throw(ArgumentError(s)) - elseif y isa AbstractMatrix - s = "$(fun)(x::Any, mx, y::$(typeof(y)), my) is currently not allowed. " * - "Use $(fun)(collect(x), mx, y, my) inistead." - throw(ArgumentError(s)) end end @@ -533,7 +521,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true) return A end function covzm(x::Any, y::Any; corrected::Bool = true) - _matrix_error(x, y, covzm) + _matrix_error(x, y) covzm(_collect_if_itr(x), _collect_if_itr(y); corrected = corrected) end covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) = @@ -557,7 +545,7 @@ covm(x::AbstractVector, xmean; corrected::Bool=true) = covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) = covzm(x .- xmean, vardim; corrected=corrected) function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) - _matrix_error(x, xmean, y, ymean, covm) + _matrix_error(x, y) covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected) end covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) = @@ -599,7 +587,7 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``. """ function cov(x::Any, y::Any; corrected::Bool=true) - _matrix_error(x, y, cov) + _matrix_error(x, y) cx = collect(x) cy = collect(y) meanx = mean(cx) @@ -712,7 +700,7 @@ corm(itr::Any, itrmean) = _return_one(itr) corm(x::AbstractVector{T}, xmean) where {T} = one(real(T)) corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim) function corm(x::Any, mx, y::Any, my) - _matrix_error(x, mx, y, my, corm) + _matrix_error(x, y) corm(_collect_if_itr(x), mx, _collect_if_itr(y), my) end function corm(x::AbstractVector, mx, y::AbstractVector, my) @@ -763,7 +751,7 @@ cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims) Compute the Pearson correlation between iterators `x` and `y`. """ function cor(x::Any, y::Any) - _matrix_error(x, y, cor) + _matrix_error(x, y) cx = _collect_if_itr(x) cy = _collect_if_itr(y) From 14c570160fcae73a7db3f3b3a321b7ae81a82cdf Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 12:36:14 -0400 Subject: [PATCH 18/19] Futher simplify error --- Project.toml | 1 - src/Statistics.jl | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index 12c96773..21bd1852 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,4 @@ name = "Statistics" -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/Statistics.jl b/src/Statistics.jl index ea68e89b..e2cb52ba 100644 --- a/src/Statistics.jl +++ b/src/Statistics.jl @@ -483,15 +483,11 @@ _collect_if_itr(x::Any) = collect(x) _collect_if_itr(x::AbstractVector) = x function _matrix_error(x, y) - if x isa AbstractVector || y isa AbstractVector - - elseif x isa AbstractArray || y isa AbstractArray + if !(x isa AbstractVector || y isa AbstractVector) && (x isa AbstractArray || y isa AbstractArray) s = "Covariance and correlation between a non-vector array and a non-vector iterator" * "is currently disallowed. `collect` one of the arguments." throw(ArgumentError(s)) - else - - end + end end # core functions From 11bd8f5fa801512d7e70344a61cacf93398424e7 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Thu, 30 Apr 2020 12:36:27 -0400 Subject: [PATCH 19/19] add back uuid --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 21bd1852..12c96773 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,5 @@ name = "Statistics" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"