Skip to content

Commit

Permalink
De-lv other summary statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
brenhinkeller committed Jun 13, 2024
1 parent ea59d26 commit 8ac5d8d
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 556 deletions.
50 changes: 7 additions & 43 deletions src/ArrayStats/nancov.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ function _nancov(x::AbstractVector, y::AbstractVector, corrected::Bool, μᵪ::N
# Calculate covariance
σᵪᵧ == zero(promote_type(typeof(μᵪ), typeof(μᵧ), Int))
n = 0
@turbo check_empty=true for i eachindex(x,y)
@inbounds @simd ivdep for i eachindex(x,y)
δᵪ = x[i] - μᵪ
δᵧ = y[i] - μᵧ
δ² = δᵪ * δᵧ
Expand All @@ -19,7 +19,7 @@ function _nancov(x::AbstractVector, y::AbstractVector, corrected::Bool)
n = 0
Σᵪ = ∅ᵪ = zero(eltype(x))
Σᵧ = ∅ᵧ = zero(eltype(y))
@turbo check_empty=true for i eachindex(x,y)
@inbounds @simd ivdep for i eachindex(x,y)
xᵢ, yᵢ = x[i], y[i]
notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ)
n += notnan
Expand Down Expand Up @@ -103,49 +103,13 @@ function nancor(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
@assert eachindex(x) == eachindex(y)
return _nancor(x, y, corrected)
end

# Pair-wise nan-covariance
function _nancor(x::StridedVector{T}, y::StridedVector{T}, corrected::Bool) where T<:PrimitiveNumber
function _nancor(x::AbstractVector{Tx}, y::AbstractVector{Ty}, corrected::Bool) where {Tx, Ty}
# Parwise nan-means
n = 0
Σᵪ = ∅ᵪ = zero(T)
Σᵧ = ∅ᵧ = zero(T)
@turbo check_empty=true for i eachindex(x,y)
xᵢ, yᵢ = x[i], y[i]
notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ)
n += notnan
Σᵪ += ifelse(notnan, xᵢ, ∅ᵪ)
Σᵧ += ifelse(notnan, yᵢ, ∅ᵧ)
end
μᵪ = Σᵪ/n
μᵧ = Σᵧ/n
n == 0 && return (∅ᵪ+∅ᵧ)/0 # Return appropriate NaN if no data

# Pairwise nan-variances
σ²ᵪ = ∅ᵪ = zero(typeof(μᵪ))
σ²ᵧ = ∅ᵧ = zero(typeof(μᵧ))
@turbo check_empty=true for i eachindex(x,y)
δᵪ = x[i] - μᵪ
δᵧ = y[i] - μᵧ
notnan = (δᵪ==δᵪ) & (δᵧ==δᵧ)
σ²ᵪ += ifelse(notnan, δᵪ * δᵪ, ∅ᵪ)
σ²ᵧ += ifelse(notnan, δᵧ * δᵧ, ∅ᵧ)
end
σᵪ = sqrt(σ²ᵪ / max(n-corrected, 0))
σᵧ = sqrt(σ²ᵧ / max(n-corrected, 0))

# Covariance and correlation
σᵪᵧ = _nancov(x, y, corrected, μᵪ, μᵧ)
ρᵪᵧ = σᵪᵧ / (σᵪ * σᵧ)

return ρᵪᵧ
end
function _nancor(x::AbstractVector, y::AbstractVector, corrected::Bool)
# Parwise nan-means
n = 0
Σᵪ = ∅ᵪ = zero(eltype(x))
Σᵧ = ∅ᵧ = zero(eltype(y))
@inbounds for i eachindex(x,y)
Σᵪ = ∅ᵪ = zero(Tx)
Σᵧ = ∅ᵧ = zero(Ty)
@inbounds @simd ivdep for i eachindex(x,y)
xᵢ, yᵢ = x[i], y[i]
notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ)
n += notnan
Expand All @@ -159,7 +123,7 @@ function _nancor(x::AbstractVector, y::AbstractVector, corrected::Bool)
# Pairwise nan-variances
σ²ᵪ = ∅ᵪ = zero(typeof(μᵪ))
σ²ᵧ = ∅ᵧ = zero(typeof(μᵧ))
@inbounds for i eachindex(x,y)
@inbounds @simd ivdep for i eachindex(x,y)
δᵪ = x[i] - μᵪ
δᵧ = y[i] - μᵧ
notnan = (δᵪ==δᵪ) & (δᵧ==δᵧ)
Expand Down
134 changes: 12 additions & 122 deletions src/ArrayStats/nanmean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,45 +50,32 @@ function _nanmean(A::AbstractArray{T,N}, dims::Tuple) where {T,N}
end

# Reduce all the dims!
function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
Tₒ = Base.promote_op(/, T, Int)
function _nanmean(A, ::Colon)
Tₒ = Base.promote_op(/, eltype(A), Int)
n = 0
Σ == zero(Tₒ)
@turbo check_empty=true for i eachindex(A)
@inbounds @simd ivdep for i eachindex(A)
Aᵢ = A[i]
notnan = Aᵢ==Aᵢ
n += notnan
Σ += ifelse(notnan, A[i], ∅)
end
return Σ / n
end
function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
function _nanmean(A::AbstractArray{T}, ::Colon) where T<:Integer
Tₒ = Base.promote_op(/, T, Int)
Σ = zero(Tₒ)
@turbo check_empty=true for i eachindex(A)
@inbounds @simd ivdep for i eachindex(A)
Σ += A[i]
end
return Σ / length(A)
end
# Fallback method for non-StridedArrays
function _nanmean(A, ::Colon)
Tₒ = Base.promote_op(/, eltype(A), Int)
n = 0
Σ == zero(Tₒ)
@inbounds for i eachindex(A)
Aᵢ = A[i]
notnan = Aᵢ==Aᵢ
n += notnan
Σ += ifelse(notnan, A[i], ∅)
end
return Σ / n
end


# Metaprogramming magic adapted from Chris Elrod example:
# Generate customized set of loops for a given ndims and a vector
# `static_dims` of dimensions to reduce over
function staticdim_nanmean_quote_turbo(static_dims::Vector{Int}, N::Int)
function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int)
M = length(static_dims)
# `static_dims` now contains every dim we're taking the mean over.
Bᵥ = Expr(:call, :view, :B)
Expand Down Expand Up @@ -129,111 +116,11 @@ function staticdim_nanmean_quote_turbo(static_dims::Vector{Int}, N::Int)
# Build the reduction loop
for n reduct_inds
newblock = Expr(:block)
push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
block = newblock
end
# Push more things here if you want them in the innermost loop
push!(block.args, :(Aᵢ = $Aind))
push!(block.args, :(notnan = Aᵢ==Aᵢ))
push!(block.args, :(n += notnan))
push!(block.args, :(Σ += ifelse(notnan, Aᵢ, ∅)))
# Push more things here if you want them at the end of the reduction loop
push!(rblock.args, :($Bind = Σ * inv(n)))
# Put it all together
quote
= zero(eltype(B))
Bᵥ = $Bᵥ
@turbo $loops
return B
end
end

# Chris Elrod metaprogramming magic:
# Turn non-static integers in `dims` tuple into `StaticInt`s
# so we can construct `static_dims` vector within @generated code
function branches_nanmean_quote_turbo(N::Int, M::Int, D)
static_dims = Int[]
for m 1:M
param = D.parameters[m]
if param <: StaticInt
new_dim = _dim(param)::Int
@assert new_dim static_dims
push!(static_dims, new_dim)
if n==last(reduct_inds)
push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)))
else
t = Expr(:tuple)
for n static_dims
push!(t.args, :(StaticInt{$n}()))
end
q = Expr(:block, :(dimm = dims[$m]))
qold = q
ifsym = :if
for n 1:N
n static_dims && continue
tc = copy(t)
push!(tc.args, :(StaticInt{$n}()))
qnew = Expr(ifsym, :(dimm == $n), :(return _nanmean!(B, A, $tc)))
for r m+1:M
push!(tc.args, :(dims[$r]))
end
push!(qold.args, qnew)
qold = qnew
ifsym = :elseif
end
push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found."))))
return q
end
end
staticdim_nanmean_quote_turbo(static_dims, N)
end

# Efficient @generated in-place mean
@generated function _nanmean!(B::StridedArray{Tₒ,N}, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
N == M && return :(B[1] = _nanmean(A, :); B)
branches_nanmean_quote_turbo(N, M, D)
end

function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int)
M = length(static_dims)
# `static_dims` now contains every dim we're taking the mean over.
Bᵥ = Expr(:call, :view, :B)
reduct_inds = Int[]
nonreduct_inds = Int[]
# Firstly, build our expressions for indexing each array
Aind = :(A[])
Bind = :(Bᵥ[])
inds = Vector{Symbol}(undef, N)
for n 1:N
ind = Symbol(:i_,n)
inds[n] = ind
push!(Aind.args, ind)
if n static_dims
push!(reduct_inds, n)
push!(Bᵥ.args, :(firstindex(B,$n)))
else
push!(nonreduct_inds, n)
push!(Bᵥ.args, :)
push!(Bind.args, ind)
end
end
firstn = first(nonreduct_inds)
# Secondly, build up our set of loops
block = Expr(:block)
loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block)
if length(nonreduct_inds) > 1
for n @view(nonreduct_inds[2:end])
newblock = Expr(:block)
push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock))
block = newblock
push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
end
end
rblock = block
# Push more things here if you want them at the beginning of the reduction loop
push!(rblock.args, :(n = 0))
push!(rblock.args, :(Σ = ∅))
# Build the reduction loop
for n reduct_inds
newblock = Expr(:block)
push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
block = newblock
end
# Push more things here if you want them in the innermost loop
Expand All @@ -252,6 +139,8 @@ function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int)
end
end

# Turn non-static integers in `dims` tuple into `StaticInt`s
# so we can construct `static_dims` vector within @generated code
function branches_nanmean_quote(N::Int, M::Int, D)
static_dims = Int[]
for m 1:M
Expand Down Expand Up @@ -287,6 +176,7 @@ function branches_nanmean_quote(N::Int, M::Int, D)
staticdim_nanmean_quote(static_dims, N)
end

# Efficient @generated in-place mean
@generated function _nanmean!(B::AbstractArray{Tₒ,N}, A::AbstractArray{T,N}, dims::D) where {Tₒ,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
N == M && return :(B[1] = _nanmean(A, :); B)
branches_nanmean_quote(N, M, D)
Expand Down
Loading

0 comments on commit 8ac5d8d

Please sign in to comment.