From 1cdf046c7750856fc917b56fbc39b575883a6411 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Mon, 27 Apr 2020 11:40:33 -0400
Subject: [PATCH 01/19] Initial commit, `collects` everywhere
---
Project.toml | 1 -
src/Statistics.jl | 59 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/Project.toml b/Project.toml
index 12c96773..21bd1852 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,5 +1,4 @@
name = "Statistics"
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 977ae8a6..d9726e5f 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -494,7 +494,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
# covzm (with centered data)
-
+covzm(itr::Any; corrected::Bool = true) = covzm(collect(itr); corrected = corrected)
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
C = unscaled_covzm(x, vardim)
@@ -504,6 +504,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
+covzm(x::Any, y::Any; corrected::Bool = true) = covzm(collect(x), collect(y); corrected = corrected)
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -518,16 +519,31 @@ end
# covm (with provided mean)
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
## which can't be handled by broadcast
+covm(itr::Any, itrmean; corrected::Bool = true) = covm(collect(itr), itrmean)
covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
+covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) =
+ covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, y .- ymean, vardim; corrected=corrected)
# cov (API)
+"""
+ cov(itr::Any; corrected::Bool=true)
+
+Compute the variance of the iterator `itr`. If `corrected` is `true` (the default) then the sum
+is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where
+`n = length(collect(itr))`.
+"""
+function cov(itr::Any; corrected::Bool = true)
+ x = collect(itr)
+ covm(x, mean(x); corrected = corrected)
+end
+
"""
cov(x::AbstractVector; corrected::Bool=true)
@@ -546,6 +562,22 @@ if `corrected` is `false` where `n = size(X, dims)`.
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
covm(X, _vmean(X, dims), dims; corrected=corrected)
+
+"""
+ cov(x::Any, y::Any; corrected::Bool=true)
+
+Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the
+default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
+``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is
+`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
+"""
+function cov(x::Any, y::Any; corrected::Bool = true)
+ cx = collect(x)
+ cy = collect(y)
+
+ covm(cx, mean(cx), cy, mean(cy); corrected = corrected)
+end
+
"""
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
@@ -630,7 +662,7 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
end
# corzm (non-exported, with centered data)
-
+corzm(x::Any) = corzm(collect(x))
corzm(x::AbstractVector{T}) where {T} = one(real(T))
function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
@@ -644,9 +676,10 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))
# corm
-
+corm(x::Any, xmean) = corzm(collect(x), xmean)
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
+corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my)
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
n = length(x)
@@ -674,6 +707,14 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
corzm(x .- xmean, y .- ymean, vardim)
# cor
+"""
+ cor(itr::Any)
+
+Return the number one.
+"""
+cor(itr::Any) = one(real(eltype(collect(x))))
+
+
"""
cor(x::AbstractVector)
@@ -688,6 +729,18 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di
"""
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
+"""
+ cor(x::AbstractVector, y::AbstractVector)
+
+Compute the Pearson correlation between the vectors `x` and `y`.
+"""
+function cor(x::Any, y::Any)
+ cx = collect(x)
+ cy = collect(y)
+
+ corm(cx, mean(cx), cy, mean(cy))
+end
+
"""
cor(x::AbstractVector, y::AbstractVector)
From f3e9641f928b4e024f0fdb3f6af7a741bb114472 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Mon, 27 Apr 2020 12:08:57 -0400
Subject: [PATCH 02/19] Add tests
---
src/Statistics.jl | 15 ++++++++-------
test/runtests.jl | 17 ++++++++++++++---
2 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index d9726e5f..480bf5f0 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -519,7 +519,8 @@ end
# covm (with provided mean)
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
## which can't be handled by broadcast
-covm(itr::Any, itrmean; corrected::Bool = true) = covm(collect(itr), itrmean)
+covm(itr::Any, itrmean; corrected::Bool=true) =
+ @show covm(collect(itr), itrmean; corrected=corrected)
covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
@@ -539,9 +540,9 @@ Compute the variance of the iterator `itr`. If `corrected` is `true` (the defaul
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where
`n = length(collect(itr))`.
"""
-function cov(itr::Any; corrected::Bool = true)
+function cov(itr::Any; corrected::Bool=true)
x = collect(itr)
- covm(x, mean(x); corrected = corrected)
+ covm(x, mean(x); corrected=corrected)
end
"""
@@ -571,11 +572,11 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``
``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is
`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
-function cov(x::Any, y::Any; corrected::Bool = true)
+function cov(x::Any, y::Any; corrected::Bool=true)
cx = collect(x)
cy = collect(y)
- covm(cx, mean(cx), cy, mean(cy); corrected = corrected)
+ covm(cx, mean(cx), cy, mean(cy); corrected=corrected)
end
"""
@@ -676,7 +677,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))
# corm
-corm(x::Any, xmean) = corzm(collect(x), xmean)
+corm(x::Any, xmean) = corm(collect(x), xmean)
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my)
@@ -712,7 +713,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
Return the number one.
"""
-cor(itr::Any) = one(real(eltype(collect(x))))
+cor(itr::Any) = one(real(eltype(collect(itr))))
"""
diff --git a/test/runtests.jl b/test/runtests.jl
index bc33cf57..b8aff9a3 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -339,11 +339,16 @@ Y = [6.0 2.0;
x1 = vec(X[1,:])
y1 = vec(Y[1,:])
end
+ @show x1
+ x1_itr = (x1i for x1i in x1)
+ y1_itr = skipmissing(y1)
c = zm ? Statistics.covm(x1, 0, corrected=cr) :
cov(x1, corrected=cr)
+ c_itr = zm ? Statistics.covm(x1_itr, 0, corrected=cr) :
+ cov(x1_itr, corrected=cr)
@test isa(c, Float64)
- @test c ≈ Cxx[1,1]
+ @test c ≈ c_itr ≈ Cxx[1,1]
@inferred cov(x1, corrected=cr)
@test cov(X) == Statistics.covm(X, mean(X, dims=1))
@@ -356,6 +361,8 @@ Y = [6.0 2.0;
@test cov(x1, y1) == Statistics.covm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.covm(x1, 0, y1, 0, corrected=cr) :
cov(x1, y1, corrected=cr)
+ c_itr = zm ? Statistics.covm(x1_itr, 0, y1_itr, 0, corrected=cr) :
+ cov(x1_itr, y1_itr, corrected=cr)
@test isa(c, Float64)
@test c ≈ Cxy[1,1]
@inferred cov(x1, y1, corrected=cr)
@@ -426,10 +433,13 @@ end
x1 = vec(X[1,:])
y1 = vec(Y[1,:])
end
+ x1_itr = (x1i for x1i in x1)
+ y1_itr = skipmissing(y1)
c = zm ? Statistics.corm(x1, 0) : cor(x1)
+ c_itr = zm ? Statistics.corm(x1_itr, 0) : cor(x1_itr)
@test isa(c, Float64)
- @test c ≈ Cxx[1,1]
+ @test c ≈ c_itr ≈ Cxx[1,1]
@inferred cor(x1)
@test cor(X) == Statistics.corm(X, mean(X, dims=1))
@@ -440,8 +450,9 @@ end
@test cor(x1, y1) == Statistics.corm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.corm(x1, 0, y1, 0) : cor(x1, y1)
+ c_itr = zm ? Statistics.corm(x1_itr, 0, y1_itr, 0) : cor(x1_itr, y1_itr)
@test isa(c, Float64)
- @test c ≈ Cxy[1,1]
+ @test c ≈ c_itr ≈ Cxy[1,1]
@inferred cor(x1, y1)
if vd == 1
From 2f9c4f8f28f0a5d989ca8a5174301b70922a3a12 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Tue, 28 Apr 2020 11:20:04 -0400
Subject: [PATCH 03/19] Respond to comments
---
src/Statistics.jl | 57 +++++++++++++++++++++--------------------------
1 file changed, 25 insertions(+), 32 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 480bf5f0..6f1e0dee 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -520,7 +520,7 @@ end
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
## which can't be handled by broadcast
covm(itr::Any, itrmean; corrected::Bool=true) =
- @show covm(collect(itr), itrmean; corrected=corrected)
+ covm(collect(itr), itrmean; corrected=corrected)
covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
@@ -538,19 +538,13 @@ covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corr
Compute the variance of the iterator `itr`. If `corrected` is `true` (the default) then the sum
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where
-`n = length(collect(itr))`.
+``n`` is the number of elements.
"""
function cov(itr::Any; corrected::Bool=true)
x = collect(itr)
- covm(x, mean(x); corrected=corrected)
+ meanx = mean(x)
+ covzm(map!(t -> t - meanx, x, x); corrected=corrected)
end
-
-"""
- cov(x::AbstractVector; corrected::Bool=true)
-
-Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum
-is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`.
-"""
cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected)
"""
@@ -569,24 +563,18 @@ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the
default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
-``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is
+``*`` denotes the complex conjugate and ``n`` the number of elements. If `corrected` is
`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
function cov(x::Any, y::Any; corrected::Bool=true)
cx = collect(x)
cy = collect(y)
-
- covm(cx, mean(cx), cy, mean(cy); corrected=corrected)
+ meanx = mean(cx)
+ meany = mean(cy)
+ dx = map!(t -> t - meanx, cx, cx)
+ dy = map!(t -> t - meany, cy, cy)
+ covzm(dx, dy; corrected=corrected)
end
-
-"""
- cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
-
-Compute the covariance between the vectors `x` and `y`. If `corrected` is `true` (the
-default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
-``*`` denotes the complex conjugate and `n = length(x) = length(y)`. If `corrected` is
-`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
-"""
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
covm(x, mean(x), y, mean(y); corrected=corrected)
@@ -663,7 +651,13 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
end
# corzm (non-exported, with centered data)
-corzm(x::Any) = corzm(collect(x))
+function corzm(itr::Any)
+ if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
+ return one(real(eltype(itr)))
+ else
+ return one(real(eltype(collect(itr))))
+ end
+end
corzm(x::AbstractVector{T}) where {T} = one(real(T))
function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
@@ -713,15 +707,14 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
Return the number one.
"""
-cor(itr::Any) = one(real(eltype(collect(itr))))
-
-
-"""
- cor(x::AbstractVector)
-
-Return the number one.
-"""
-cor(x::AbstractVector) = one(real(eltype(x)))
+function cor(itr::Any)
+ if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
+ return one(real(eltype(itr)))
+ else
+ return one(real(eltype(collect(itr))))
+ end
+end
+cor(x::AbstractVector{T}) where {T} = one(real(T))
"""
cor(X::AbstractMatrix; dims::Int=1)
From 52c18ea3b71f20090c608c11b732dae41d298599 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Tue, 28 Apr 2020 13:30:53 -0400
Subject: [PATCH 04/19] Apply suggestions from code review
Co-Authored-By: Milan Bouchet-Valat
---
src/Statistics.jl | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 6f1e0dee..56578b64 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -504,7 +504,8 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
-covzm(x::Any, y::Any; corrected::Bool = true) = covzm(collect(x), collect(y); corrected = corrected)
+covzm(x::Any, y::Any; corrected::Bool = true) =
+ covzm(collect(x), collect(y); corrected = corrected)
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -714,7 +715,7 @@ function cor(itr::Any)
return one(real(eltype(collect(itr))))
end
end
-cor(x::AbstractVector{T}) where {T} = one(real(T))
+cor(x::AbstractVector) = one(real(eltype(x)))
"""
cor(X::AbstractMatrix; dims::Int=1)
From 4620247ed19c0664129683ab95f9bbd849e3fcc9 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Tue, 28 Apr 2020 13:36:32 -0400
Subject: [PATCH 05/19] more comments -- ready for review
---
src/Statistics.jl | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 56578b64..721ee3ca 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -672,7 +672,13 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))
# corm
-corm(x::Any, xmean) = corm(collect(x), xmean)
+function corm(itr::Any, itrmean)
+ if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
+ return one(real(eltype(itr)))
+ else
+ return one(real(eltype(collect(itr))))
+ end
+end
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my)
From b86ddba78e224d146f7b0d4c050fb4d930462967 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Tue, 28 Apr 2020 13:38:43 -0400
Subject: [PATCH 06/19] fix deleted line
---
src/Statistics.jl | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 721ee3ca..c828272e 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -494,6 +494,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
# covzm (with centered data)
+
covzm(itr::Any; corrected::Bool = true) = covzm(collect(itr); corrected = corrected)
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
From 0221557c66f70fe714047339bc4354020c21e951 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 29 Apr 2020 11:07:00 -0400
Subject: [PATCH 07/19] many more tests
---
src/Statistics.jl | 12 ++++++------
test/runtests.jl | 38 +++++++++++++++++++++++++++++---------
2 files changed, 35 insertions(+), 15 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index c828272e..4955ac26 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -528,7 +528,7 @@ covm(x::AbstractVector, xmean; corrected::Bool=true) =
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) =
- covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
+ covzm(x .- xmean, y .- ymean; corrected=corrected)
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
@@ -571,10 +571,10 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``
function cov(x::Any, y::Any; corrected::Bool=true)
cx = collect(x)
cy = collect(y)
- meanx = mean(cx)
- meany = mean(cy)
- dx = map!(t -> t - meanx, cx, cx)
- dy = map!(t -> t - meany, cy, cy)
+ meanx = _vmean(cx, 1)
+ meany = _vmean(cy, 1)
+ dx = x .- meanx
+ dy = y .- meany
covzm(dx, dy; corrected=corrected)
end
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
@@ -740,7 +740,7 @@ function cor(x::Any, y::Any)
cx = collect(x)
cy = collect(y)
- corm(cx, mean(cx), cy, mean(cy))
+ corm(cx, _vmean(cx, 1), cy, _vmean(cy, 1))
end
"""
diff --git a/test/runtests.jl b/test/runtests.jl
index b8aff9a3..98b5fbf8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -339,7 +339,6 @@ Y = [6.0 2.0;
x1 = vec(X[1,:])
y1 = vec(Y[1,:])
end
- @show x1
x1_itr = (x1i for x1i in x1)
y1_itr = skipmissing(y1)
@@ -348,7 +347,7 @@ Y = [6.0 2.0;
c_itr = zm ? Statistics.covm(x1_itr, 0, corrected=cr) :
cov(x1_itr, corrected=cr)
@test isa(c, Float64)
- @test c ≈ c_itr ≈ Cxx[1,1]
+ @test c == c_itr == Cxx[1,1]
@inferred cov(x1, corrected=cr)
@test cov(X) == Statistics.covm(X, mean(X, dims=1))
@@ -363,21 +362,29 @@ Y = [6.0 2.0;
cov(x1, y1, corrected=cr)
c_itr = zm ? Statistics.covm(x1_itr, 0, y1_itr, 0, corrected=cr) :
cov(x1_itr, y1_itr, corrected=cr)
+ c_itrx = zm ? Statistics.covm(x1_itr, 0, y1, 0, corrected=cr) :
+ cov(x1_itr, y1, corrected=cr)
+ c_itry = zm ? Statistics.covm(x1, 0, y1_itr, 0, corrected=cr) :
+ cov(x1, y1_itr, corrected=cr)
@test isa(c, Float64)
- @test c ≈ Cxy[1,1]
+ @test c == c_itr == c_itrx == c_itry == Cxy[1,1]
@inferred cov(x1, y1, corrected=cr)
if vd == 1
- @test cov(x1, Y) == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1))
+ C = cov(x1, Y)
+ C_itr = cov(x1_itr, Y)
+ @test C == C_itr == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1))
end
C = zm ? Statistics.covm(x1, 0, Y, 0, vd, corrected=cr) :
- cov(x1, Y, dims=vd, corrected=cr)
+ cov(x1, Y, dims=vd, corrected=cr)
@test size(C) == (1, k)
@test vec(C) ≈ Cxy[1,:]
@inferred cov(x1, Y, dims=vd, corrected=cr)
if vd == 1
- @test cov(X, y1) == Statistics.covm(X, mean(X, dims=1), y1, mean(y1))
+ C = cov(X, y1)
+ C_itr = cov(X, y1_itr)
+ @test C == C_itr == Statistics.covm(X, mean(X, dims=1), y1, mean(y1))
end
C = zm ? Statistics.covm(X, 0, y1, 0, vd, corrected=cr) :
cov(X, y1, dims=vd, corrected=cr)
@@ -451,12 +458,17 @@ end
@test cor(x1, y1) == Statistics.corm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.corm(x1, 0, y1, 0) : cor(x1, y1)
c_itr = zm ? Statistics.corm(x1_itr, 0, y1_itr, 0) : cor(x1_itr, y1_itr)
+ c_itrx = zm ? Statistics.corm(x1_itr, 0, y1, 0) : cor(x1_itr, y1)
+ c_itry = zm ? Statistics.corm(x1, 0, y1_itr, 0) : cor(x1, y1_itr)
+
@test isa(c, Float64)
- @test c ≈ c_itr ≈ Cxy[1,1]
+ @test c == c_itr == c_itrx == c_itry ≈ Cxy[1,1]
@inferred cor(x1, y1)
if vd == 1
- @test cor(x1, Y) == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1))
+ C = cor(x1, Y)
+ C_itr = Statistics.corm(x1_itr, mean(x1), Y, mean(Y, dims=1))
+ @test C == C_itr == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1))
end
C = zm ? Statistics.corm(x1, 0, Y, 0, vd) : cor(x1, Y, dims=vd)
@test size(C) == (1, k)
@@ -464,11 +476,19 @@ end
@inferred cor(x1, Y, dims=vd)
if vd == 1
- @test cor(X, y1) == Statistics.corm(X, mean(X, dims=1), y1, mean(y1))
+ C = cor(X, y1)
+ C_itr = cor(X, y1_itr)
+ @test C == C_itr == Statistics.corm(X, mean(X, dims=1), y1, mean(y1))
end
+ println("zm = $zm")
C = zm ? Statistics.corm(X, 0, y1, 0, vd) : cor(X, y1, dims=vd)
+
@test size(C) == (k, 1)
@test vec(C) ≈ Cxy[:,1]
+ if vd == 1
+ C_itr = zm ? Statistics.corm(X, 0, y1_itr, 0) : cor(X, y1_itr)
+ @test C_itr == C
+ end
@inferred cor(X, y1, dims=vd)
@test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1))
From e3bc3cc80001445e5c00cb4198bb8f6aa048f760 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Wed, 29 Apr 2020 12:56:21 -0400
Subject: [PATCH 08/19] Apply suggestions from code review
Co-Authored-By: Milan Bouchet-Valat
---
src/Statistics.jl | 7 ++++---
test/runtests.jl | 3 +--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 4955ac26..0d17a528 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -559,7 +559,6 @@ if `corrected` is `false` where `n = size(X, dims)`.
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
covm(X, _vmean(X, dims), dims; corrected=corrected)
-
"""
cov(x::Any, y::Any; corrected::Bool=true)
@@ -653,6 +652,7 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
end
# corzm (non-exported, with centered data)
+
function corzm(itr::Any)
if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
return one(real(eltype(itr)))
@@ -673,6 +673,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))
# corm
+
function corm(itr::Any, itrmean)
if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
return one(real(eltype(itr)))
@@ -732,9 +733,9 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
"""
- cor(x::AbstractVector, y::AbstractVector)
+ cor(x::Any, y::Any)
-Compute the Pearson correlation between the vectors `x` and `y`.
+Compute the Pearson correlation between iterators `x` and `y`.
"""
function cor(x::Any, y::Any)
cx = collect(x)
diff --git a/test/runtests.jl b/test/runtests.jl
index 98b5fbf8..aacec023 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -376,7 +376,7 @@ Y = [6.0 2.0;
@test C == C_itr == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1))
end
C = zm ? Statistics.covm(x1, 0, Y, 0, vd, corrected=cr) :
- cov(x1, Y, dims=vd, corrected=cr)
+ cov(x1, Y, dims=vd, corrected=cr)
@test size(C) == (1, k)
@test vec(C) ≈ Cxy[1,:]
@inferred cov(x1, Y, dims=vd, corrected=cr)
@@ -480,7 +480,6 @@ end
C_itr = cor(X, y1_itr)
@test C == C_itr == Statistics.corm(X, mean(X, dims=1), y1, mean(y1))
end
- println("zm = $zm")
C = zm ? Statistics.corm(X, 0, y1, 0, vd) : cor(X, y1, dims=vd)
@test size(C) == (k, 1)
From 3493ed20fbb37ed16a8ca2473a56561bce972f39 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Wed, 29 Apr 2020 13:09:22 -0400
Subject: [PATCH 09/19] Polish up tests
---
Project.toml | 1 +
test/runtests.jl | 17 ++++++++++++++++-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/Project.toml b/Project.toml
index 21bd1852..12c96773 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,4 +1,5 @@
name = "Statistics"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/test/runtests.jl b/test/runtests.jl
index 98b5fbf8..c036420b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -349,6 +349,7 @@ Y = [6.0 2.0;
@test isa(c, Float64)
@test c == c_itr == Cxx[1,1]
@inferred cov(x1, corrected=cr)
+ @inferred cov(x1_itr, corrected=cr)
@test cov(X) == Statistics.covm(X, mean(X, dims=1))
C = zm ? Statistics.covm(X, 0, vd, corrected=cr) :
@@ -369,6 +370,7 @@ Y = [6.0 2.0;
@test isa(c, Float64)
@test c == c_itr == c_itrx == c_itry == Cxy[1,1]
@inferred cov(x1, y1, corrected=cr)
+ @inferred cov(x1_itr, y1_itr, corrected=cr)
if vd == 1
C = cov(x1, Y)
@@ -380,6 +382,9 @@ Y = [6.0 2.0;
@test size(C) == (1, k)
@test vec(C) ≈ Cxy[1,:]
@inferred cov(x1, Y, dims=vd, corrected=cr)
+ if vd == 1
+ @inferred cov(x1_itr, Y, corrected=cr)
+ end
if vd == 1
C = cov(X, y1)
@@ -391,7 +396,9 @@ Y = [6.0 2.0;
@test size(C) == (k, 1)
@test vec(C) ≈ Cxy[:,1]
@inferred cov(X, y1, dims=vd, corrected=cr)
-
+ if vd == 1
+ @inferred cov(X, y1_itr, corrected=cr)
+ end
@test cov(X, Y) == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1))
C = zm ? Statistics.covm(X, 0, Y, 0, vd, corrected=cr) :
cov(X, Y, dims=vd, corrected=cr)
@@ -448,6 +455,7 @@ end
@test isa(c, Float64)
@test c ≈ c_itr ≈ Cxx[1,1]
@inferred cor(x1)
+ @inferred cor(x1_itr)
@test cor(X) == Statistics.corm(X, mean(X, dims=1))
C = zm ? Statistics.corm(X, 0, vd) : cor(X, dims=vd)
@@ -464,6 +472,7 @@ end
@test isa(c, Float64)
@test c == c_itr == c_itrx == c_itry ≈ Cxy[1,1]
@inferred cor(x1, y1)
+ @inferred cor(x1_itr, y1_itr)
if vd == 1
C = cor(x1, Y)
@@ -474,6 +483,9 @@ end
@test size(C) == (1, k)
@test vec(C) ≈ Cxy[1,:]
@inferred cor(x1, Y, dims=vd)
+ if vd == 1
+ @inferred cor(x1, Y)
+ end
if vd == 1
C = cor(X, y1)
@@ -490,6 +502,9 @@ end
@test C_itr == C
end
@inferred cor(X, y1, dims=vd)
+ if vd == 1
+ @inferred cor(X, y1_itr)
+ end
@test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1))
C = zm ? Statistics.corm(X, 0, Y, 0, vd) : cor(X, Y, dims=vd)
From 8b497454ac3934dacd74b8bac61353b3fcfb745f Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 09:41:33 -0400
Subject: [PATCH 10/19] Errors with matrices
---
Project.toml | 1 -
src/Statistics.jl | 47 +++++++++++++++++++++++++++++++++++------
test/runtests.jl | 54 ++++++++++++++++++++++-------------------------
3 files changed, 65 insertions(+), 37 deletions(-)
diff --git a/Project.toml b/Project.toml
index 12c96773..21bd1852 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,5 +1,4 @@
name = "Statistics"
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 0d17a528..5603b6c2 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -479,6 +479,30 @@ end
_vmean(x::AbstractVector, vardim::Int) = mean(x)
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
+function _matrix_error(x, y, fun)
+ if x isa AbstractMatrix
+ s = "$(fun)(x::AbstractMatrix, y::Any) is currently not allowed. " *
+ "Use $(fun)(x, collect(y)) instead"
+ throw(ArgumentError(s))
+ elseif y isa AbstractMatrix
+ s = "$(fun)(x::Any, y::AbstractMatrix) is currently not allowed. " *
+ "Use $(fun)(collect(x), y) instead"
+ throw(ArgumentError(s))
+ end
+end
+
+function _matrix_error(x, mx, y, my, fun)
+ if x isa AbstractMatrix || y isa AbstractMatrix
+ s = "$(fun)(x::$(typeof(x)), mx, y::Any, my) is currently not allowed. " *
+ "Use $(fun)(x, mx, collect(y), my) instead"
+ throw(ArgumentError(s))
+ elseif y isa AbstractMatrix
+ s = "$(fun)(x::Any, mx, y::$(typeof(y)), my) is currently not allowed. " *
+ "Use $(fun)(collect(x), mx, y, my) inistead."
+ throw(ArgumentError(s))
+ end
+end
+
# core functions
unscaled_covzm(x::AbstractVector{<:Number}) = sum(abs2, x)
@@ -505,8 +529,10 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
-covzm(x::Any, y::Any; corrected::Bool = true) =
+function covzm(x::Any, y::Any; corrected::Bool = true)
+ _matrix_error(x, y, covzm)
covzm(collect(x), collect(y); corrected = corrected)
+end
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -527,8 +553,10 @@ covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
-covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) =
- covzm(x .- xmean, y .- ymean; corrected=corrected)
+function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
+ _matrix_error(x, xmean, y, ymean, covm)
+ covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
+end
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
@@ -568,12 +596,13 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``
`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
function cov(x::Any, y::Any; corrected::Bool=true)
+ _matrix_error(x, y, cov)
cx = collect(x)
cy = collect(y)
meanx = _vmean(cx, 1)
meany = _vmean(cy, 1)
- dx = x .- meanx
- dy = y .- meany
+ dx = map!(t -> t - meanx, cx, cx)
+ dy = map!(t -> t - meany, cy, cy)
covzm(dx, dy; corrected=corrected)
end
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
@@ -683,7 +712,10 @@ function corm(itr::Any, itrmean)
end
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
-corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my)
+function corm(x::Any, mx, y::Any, my)
+ _matrix_error(x, mx, y, my, corm)
+ corm(collect(x), mx, collect(y), my)
+end
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
n = length(x)
@@ -738,10 +770,11 @@ cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
Compute the Pearson correlation between iterators `x` and `y`.
"""
function cor(x::Any, y::Any)
+ _matrix_error(x, y, cor)
cx = collect(x)
cy = collect(y)
- corm(cx, _vmean(cx, 1), cy, _vmean(cy, 1))
+ corm(cx, mean(cx), cy, mean(cy))
end
"""
diff --git a/test/runtests.jl b/test/runtests.jl
index 31a7773a..19d24288 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -373,32 +373,23 @@ Y = [6.0 2.0;
@inferred cov(x1_itr, y1_itr, corrected=cr)
if vd == 1
- C = cov(x1, Y)
- C_itr = cov(x1_itr, Y)
- @test C == C_itr == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1))
+ @test cov(x1, Y) == Statistics.covm(x1, mean(x1), Y, mean(Y, dims=1))
end
C = zm ? Statistics.covm(x1, 0, Y, 0, vd, corrected=cr) :
cov(x1, Y, dims=vd, corrected=cr)
@test size(C) == (1, k)
@test vec(C) ≈ Cxy[1,:]
@inferred cov(x1, Y, dims=vd, corrected=cr)
- if vd == 1
- @inferred cov(x1_itr, Y, corrected=cr)
- end
if vd == 1
- C = cov(X, y1)
- C_itr = cov(X, y1_itr)
- @test C == C_itr == Statistics.covm(X, mean(X, dims=1), y1, mean(y1))
+ @test cov(X, y1) == Statistics.covm(X, mean(X, dims=1), y1, mean(y1))
end
C = zm ? Statistics.covm(X, 0, y1, 0, vd, corrected=cr) :
cov(X, y1, dims=vd, corrected=cr)
@test size(C) == (k, 1)
@test vec(C) ≈ Cxy[:,1]
@inferred cov(X, y1, dims=vd, corrected=cr)
- if vd == 1
- @inferred cov(X, y1_itr, corrected=cr)
- end
+
@test cov(X, Y) == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1))
C = zm ? Statistics.covm(X, 0, Y, 0, vd, corrected=cr) :
cov(X, Y, dims=vd, corrected=cr)
@@ -407,6 +398,17 @@ Y = [6.0 2.0;
@inferred cov(X, Y, dims=vd, corrected=cr)
end
+ @testset "errors for `cov` with non-array iterators and matrices" begin
+ x1_itr = (xi for xi in X[:, 1])
+ y1_itr = skipmissing(Y[:, 1])
+ @test_throws ArgumentError Statistics.covzm(X, y1_itr)
+ @test_throws ArgumentError Statistics.covzm(x1_itr, Y)
+ @test_throws ArgumentError Statistics.covm(X, mean(X, dims = 1), y1_itr, mean(y1_itr))
+ @test_throws ArgumentError Statistics.covm(x1_itr, mean(x1_itr), Y, mean(Y, dims = 1))
+ @test_throws ArgumentError cov(X, y1_itr)
+ @test_throws ArgumentError cov(x1_itr, Y)
+ end
+
@testset "floating point accuracy for `cov` of large numbers" begin
A = [4.0, 7.0, 13.0, 16.0]
C = A .+ 1.0e10
@@ -475,35 +477,20 @@ end
@inferred cor(x1_itr, y1_itr)
if vd == 1
- C = cor(x1, Y)
- C_itr = Statistics.corm(x1_itr, mean(x1), Y, mean(Y, dims=1))
- @test C == C_itr == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1))
+ @test cor(x1, Y) == Statistics.corm(x1, mean(x1), Y, mean(Y, dims=1))
end
C = zm ? Statistics.corm(x1, 0, Y, 0, vd) : cor(x1, Y, dims=vd)
@test size(C) == (1, k)
@test vec(C) ≈ Cxy[1,:]
@inferred cor(x1, Y, dims=vd)
- if vd == 1
- @inferred cor(x1, Y)
- end
if vd == 1
- C = cor(X, y1)
- C_itr = cor(X, y1_itr)
- @test C == C_itr == Statistics.corm(X, mean(X, dims=1), y1, mean(y1))
+ @test cor(X, y1) == Statistics.corm(X, mean(X, dims=1), y1, mean(y1))
end
C = zm ? Statistics.corm(X, 0, y1, 0, vd) : cor(X, y1, dims=vd)
-
@test size(C) == (k, 1)
@test vec(C) ≈ Cxy[:,1]
- if vd == 1
- C_itr = zm ? Statistics.corm(X, 0, y1_itr, 0) : cor(X, y1_itr)
- @test C_itr == C
- end
@inferred cor(X, y1, dims=vd)
- if vd == 1
- @inferred cor(X, y1_itr)
- end
@test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1))
C = zm ? Statistics.corm(X, 0, Y, 0, vd) : cor(X, Y, dims=vd)
@@ -522,6 +509,15 @@ end
@test cor(tmp, tmp) <= 1.0
@test cor(tmp, tmp2) <= 1.0
end
+
+ @testset "errors for `cor` with non-array iterators and matrices" begin
+ x1_itr = (xi for xi in X[:, 1])
+ y1_itr = skipmissing(Y[:, 1])
+ @test_throws ArgumentError Statistics.corm(X, mean(X, dims = 1), y1_itr, mean(y1_itr))
+ @test_throws ArgumentError Statistics.corm(x1_itr, mean(x1_itr), Y, mean(Y, dims = 1))
+ @test_throws ArgumentError cor(X, y1_itr)
+ @test_throws ArgumentError cor(x1_itr, Y)
+ end
end
@testset "quantile" begin
From 2b289087cac3a053ab144af9243125bd440d57eb Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 09:45:38 -0400
Subject: [PATCH 11/19] Add _return_one method for DRY
---
src/Statistics.jl | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 5603b6c2..8bb51017 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -680,15 +680,19 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
return C
end
-# corzm (non-exported, with centered data)
-
-function corzm(itr::Any)
+function _return_one(itr)
if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
return one(real(eltype(itr)))
else
return one(real(eltype(collect(itr))))
end
end
+
+# corzm (non-exported, with centered data)
+
+function corzm(itr::Any)
+ _return_one(itr)
+end
corzm(x::AbstractVector{T}) where {T} = one(real(T))
function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
@@ -704,11 +708,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
# corm
function corm(itr::Any, itrmean)
- if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
- return one(real(eltype(itr)))
- else
- return one(real(eltype(collect(itr))))
- end
+ _return_one(itr)
end
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
@@ -749,11 +749,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
Return the number one.
"""
function cor(itr::Any)
- if Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr))
- return one(real(eltype(itr)))
- else
- return one(real(eltype(collect(itr))))
- end
+ _return_one(itr)
end
cor(x::AbstractVector) = one(real(eltype(x)))
From e42c0b0229ede7babe19361c87ccc0903e762ea7 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 09:46:29 -0400
Subject: [PATCH 12/19] Put pack uuid
---
Project.toml | 1 +
1 file changed, 1 insertion(+)
diff --git a/Project.toml b/Project.toml
index 21bd1852..12c96773 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,4 +1,5 @@
name = "Statistics"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
From cb3020cd5532b4c31f380235ac43cff15b5d6868 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 10:17:24 -0400
Subject: [PATCH 13/19] _lazycollect solution
---
Project.toml | 1 -
src/Statistics.jl | 11 +++++++----
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/Project.toml b/Project.toml
index 12c96773..21bd1852 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,5 +1,4 @@
name = "Statistics"
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 8bb51017..c47ed4ec 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -479,6 +479,9 @@ end
_vmean(x::AbstractVector, vardim::Int) = mean(x)
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
+_lazycollect(x::Any) = collect(x)
+_lazycollect(x::AbstractVector) = x
+
function _matrix_error(x, y, fun)
if x isa AbstractMatrix
s = "$(fun)(x::AbstractMatrix, y::Any) is currently not allowed. " *
@@ -531,7 +534,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
end
function covzm(x::Any, y::Any; corrected::Bool = true)
_matrix_error(x, y, covzm)
- covzm(collect(x), collect(y); corrected = corrected)
+ covzm(_lazycollect(x), _lazycollect(y); corrected = corrected)
end
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
@@ -714,7 +717,7 @@ corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
function corm(x::Any, mx, y::Any, my)
_matrix_error(x, mx, y, my, corm)
- corm(collect(x), mx, collect(y), my)
+ corm(_lazycollect(x), mx, _lazycollect(y), my)
end
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
@@ -767,8 +770,8 @@ Compute the Pearson correlation between iterators `x` and `y`.
"""
function cor(x::Any, y::Any)
_matrix_error(x, y, cor)
- cx = collect(x)
- cy = collect(y)
+ cx = _lazycollect(x)
+ cy = _lazycollect(y)
corm(cx, mean(cx), cy, mean(cy))
end
From 36734bf4cec23ccd4fcc6856d56f3026034d5743 Mon Sep 17 00:00:00 2001
From: pdeffebach <23196228+pdeffebach@users.noreply.github.com>
Date: Thu, 30 Apr 2020 11:49:54 -0400
Subject: [PATCH 14/19] Apply suggestions from code review
Co-authored-by: Milan Bouchet-Valat
---
src/Statistics.jl | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index c47ed4ec..20173900 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -693,9 +693,7 @@ end
# corzm (non-exported, with centered data)
-function corzm(itr::Any)
- _return_one(itr)
-end
+corzm(itr::Any) = _return_one(itr)
corzm(x::AbstractVector{T}) where {T} = one(real(T))
function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
@@ -710,9 +708,7 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
# corm
-function corm(itr::Any, itrmean)
- _return_one(itr)
-end
+corm(itr::Any, itrmean) = _return_one(itr)
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
function corm(x::Any, mx, y::Any, my)
@@ -751,9 +747,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
Return the number one.
"""
-function cor(itr::Any)
- _return_one(itr)
-end
+cor(itr::Any) = _return_one(itr)
cor(x::AbstractVector) = one(real(eltype(x)))
"""
From 2f1c4041bf6c4c2f774fd3f2cbcb6de231f2b179 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 12:07:00 -0400
Subject: [PATCH 15/19] change name of lazy_collect
---
src/Statistics.jl | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 20173900..6a54fa21 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -479,8 +479,8 @@ end
_vmean(x::AbstractVector, vardim::Int) = mean(x)
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
-_lazycollect(x::Any) = collect(x)
-_lazycollect(x::AbstractVector) = x
+_collect_if_itr(x::Any) = collect(x)
+_collect_if_itr(x::AbstractVector) = x
function _matrix_error(x, y, fun)
if x isa AbstractMatrix
@@ -534,7 +534,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
end
function covzm(x::Any, y::Any; corrected::Bool = true)
_matrix_error(x, y, covzm)
- covzm(_lazycollect(x), _lazycollect(y); corrected = corrected)
+ covzm(_collect_if_itr(x), _collect_if_itr(y); corrected = corrected)
end
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
@@ -713,7 +713,7 @@ corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
function corm(x::Any, mx, y::Any, my)
_matrix_error(x, mx, y, my, corm)
- corm(_lazycollect(x), mx, _lazycollect(y), my)
+ corm(_collect_if_itr(x), mx, _collect_if_itr(y), my)
end
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
@@ -764,8 +764,8 @@ Compute the Pearson correlation between iterators `x` and `y`.
"""
function cor(x::Any, y::Any)
_matrix_error(x, y, cor)
- cx = _lazycollect(x)
- cy = _lazycollect(y)
+ cx = _collect_if_itr(x)
+ cy = _collect_if_itr(y)
corm(cx, mean(cx), cy, mean(cy))
end
From 4279703d04d26c803634c88ff35e3ce89f76adb2 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 12:08:22 -0400
Subject: [PATCH 16/19] get rid of unnecesary _vmean
---
src/Statistics.jl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 6a54fa21..67aa5559 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -602,8 +602,8 @@ function cov(x::Any, y::Any; corrected::Bool=true)
_matrix_error(x, y, cov)
cx = collect(x)
cy = collect(y)
- meanx = _vmean(cx, 1)
- meany = _vmean(cy, 1)
+ meanx = mean(cx)
+ meany = mean(cy)
dx = map!(t -> t - meanx, cx, cx)
dy = map!(t -> t - meany, cy, cy)
covzm(dx, dy; corrected=corrected)
From b9f8f96fcf0a6fe3e04566c834d51773b950459d Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 12:33:43 -0400
Subject: [PATCH 17/19] simplify error, add back uuid
---
Project.toml | 1 +
src/Statistics.jl | 36 ++++++++++++------------------------
2 files changed, 13 insertions(+), 24 deletions(-)
diff --git a/Project.toml b/Project.toml
index 21bd1852..12c96773 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,4 +1,5 @@
name = "Statistics"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 67aa5559..ea68e89b 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -482,27 +482,15 @@ _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
_collect_if_itr(x::Any) = collect(x)
_collect_if_itr(x::AbstractVector) = x
-function _matrix_error(x, y, fun)
- if x isa AbstractMatrix
- s = "$(fun)(x::AbstractMatrix, y::Any) is currently not allowed. " *
- "Use $(fun)(x, collect(y)) instead"
+function _matrix_error(x, y)
+ if x isa AbstractVector || y isa AbstractVector
+
+ elseif x isa AbstractArray || y isa AbstractArray
+ s = "Covariance and correlation between a non-vector array and a non-vector iterator" *
+ "is currently disallowed. `collect` one of the arguments."
throw(ArgumentError(s))
- elseif y isa AbstractMatrix
- s = "$(fun)(x::Any, y::AbstractMatrix) is currently not allowed. " *
- "Use $(fun)(collect(x), y) instead"
- throw(ArgumentError(s))
- end
-end
+ else
-function _matrix_error(x, mx, y, my, fun)
- if x isa AbstractMatrix || y isa AbstractMatrix
- s = "$(fun)(x::$(typeof(x)), mx, y::Any, my) is currently not allowed. " *
- "Use $(fun)(x, mx, collect(y), my) instead"
- throw(ArgumentError(s))
- elseif y isa AbstractMatrix
- s = "$(fun)(x::Any, mx, y::$(typeof(y)), my) is currently not allowed. " *
- "Use $(fun)(collect(x), mx, y, my) inistead."
- throw(ArgumentError(s))
end
end
@@ -533,7 +521,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
return A
end
function covzm(x::Any, y::Any; corrected::Bool = true)
- _matrix_error(x, y, covzm)
+ _matrix_error(x, y)
covzm(_collect_if_itr(x), _collect_if_itr(y); corrected = corrected)
end
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
@@ -557,7 +545,7 @@ covm(x::AbstractVector, xmean; corrected::Bool=true) =
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
- _matrix_error(x, xmean, y, ymean, covm)
+ _matrix_error(x, y)
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
end
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
@@ -599,7 +587,7 @@ default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``
`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
function cov(x::Any, y::Any; corrected::Bool=true)
- _matrix_error(x, y, cov)
+ _matrix_error(x, y)
cx = collect(x)
cy = collect(y)
meanx = mean(cx)
@@ -712,7 +700,7 @@ corm(itr::Any, itrmean) = _return_one(itr)
corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
function corm(x::Any, mx, y::Any, my)
- _matrix_error(x, mx, y, my, corm)
+ _matrix_error(x, y)
corm(_collect_if_itr(x), mx, _collect_if_itr(y), my)
end
function corm(x::AbstractVector, mx, y::AbstractVector, my)
@@ -763,7 +751,7 @@ cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
Compute the Pearson correlation between iterators `x` and `y`.
"""
function cor(x::Any, y::Any)
- _matrix_error(x, y, cor)
+ _matrix_error(x, y)
cx = _collect_if_itr(x)
cy = _collect_if_itr(y)
From 14c570160fcae73a7db3f3b3a321b7ae81a82cdf Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 12:36:14 -0400
Subject: [PATCH 18/19] Futher simplify error
---
Project.toml | 1 -
src/Statistics.jl | 8 ++------
2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/Project.toml b/Project.toml
index 12c96773..21bd1852 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,5 +1,4 @@
name = "Statistics"
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/Statistics.jl b/src/Statistics.jl
index ea68e89b..e2cb52ba 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -483,15 +483,11 @@ _collect_if_itr(x::Any) = collect(x)
_collect_if_itr(x::AbstractVector) = x
function _matrix_error(x, y)
- if x isa AbstractVector || y isa AbstractVector
-
- elseif x isa AbstractArray || y isa AbstractArray
+ if !(x isa AbstractVector || y isa AbstractVector) && (x isa AbstractArray || y isa AbstractArray)
s = "Covariance and correlation between a non-vector array and a non-vector iterator" *
"is currently disallowed. `collect` one of the arguments."
throw(ArgumentError(s))
- else
-
- end
+ end
end
# core functions
From 11bd8f5fa801512d7e70344a61cacf93398424e7 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 30 Apr 2020 12:36:27 -0400
Subject: [PATCH 19/19] add back uuid
---
Project.toml | 1 +
1 file changed, 1 insertion(+)
diff --git a/Project.toml b/Project.toml
index 21bd1852..12c96773 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,4 +1,5 @@
name = "Statistics"
+uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"