Skip to content

Commit

Permalink
Add nanlogsumexp and nanlogcumsumexp
Browse files Browse the repository at this point in the history
  • Loading branch information
brenhinkeller committed Jul 1, 2024
1 parent 3853027 commit 7d14749
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NaNStatistics"
uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
authors = ["C. Brenhin Keller"]
version = "0.6.38"
version = "0.6.39"

[deps]
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Expand Down
19 changes: 15 additions & 4 deletions src/ArrayStats/ArrayStats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,17 @@

## --- Summary statistics of arrays with NaNs

uinit(T::Type) = uinit(float(T))
uinit(T::Type{<:Integer}) = typemax(T)
uinit(::Type{Float64}) = NaN
uinit(::Type{Float32}) = NaN32
uinit(::Type{Float16}) = NaN16
linit(T::Type) = linit(float(T))
linit(T::Type{<:Integer}) = typemin(T)
linit(::Type{Float64}) = NaN
linit(::Type{Float32}) = NaN32
linit(::Type{Float16}) = NaN16

"""
```julia
nanminimum(A; dims)
Expand All @@ -171,8 +182,8 @@
__nanminimum(A, ::Colon, ::Colon) = _nanminimum(A, :)
__nanminimum(A, region, ::Colon) = _nanminimum(A, region)
__nanminimum(A, ::Colon, region) = reducedims(_nanminimum(A, region), region)
_nanminimum(A, region) = reduce(nanmin, A, dims=region, init=float(eltype(A))(NaN))
_nanminimum(A, ::Colon) = reduce(nanmin, A, init=float(eltype(A))(NaN))
_nanminimum(A, region) = reduce(nanmin, A, dims=region, init=uinit(eltype(A)))
_nanminimum(A, ::Colon) = reduce(nanmin, A, init=uinit(eltype(A)))
export nanminimum


Expand All @@ -191,8 +202,8 @@
__nanmaximum(A, ::Colon, ::Colon) = _nanmaximum(A, :)
__nanmaximum(A, region, ::Colon) = _nanmaximum(A, region)
__nanmaximum(A, ::Colon, region) = reducedims(_nanmaximum(A, region), region)
_nanmaximum(A, region) = reduce(nanmax, A, dims=region, init=float(eltype(A))(NaN))
_nanmaximum(A, ::Colon) = reduce(nanmax, A, init=float(eltype(A))(NaN))
_nanmaximum(A, region) = reduce(nanmax, A, dims=region, init=linit(eltype(A)))
_nanmaximum(A, ::Colon) = reduce(nanmax, A, init=linit(eltype(A)))
export nanmaximum

"""
Expand Down
85 changes: 85 additions & 0 deletions src/ArrayStats/nanlogsumexp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

"""
```julia
nanlogsumexp(A)
```
Return the logarithm of the sum of `exp.(A)` -- i.e., `log(sum(exp.(A)))`, but ignoring `NaN`s and avoiding numerical over/underflow.
As `nancumsum`, but operating on logarithms; as `nanlogsumexp`, but returning a array of cumulative sums, rather than a single value.
## Examples
```julia
```
"""
nanlogsumexp(A) = _nanlogsumexp(A, nanmaximum(A))
export nanlogsumexp

function _nanlogsumexp(A, c)
Σ == zero(Base.promote_op(exp, eltype(A)))
@inbounds @simd ivdep for i eachindex(A)
Aᵢ = exp(A[i] - c)
Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ)
end
return log(Σ) + c
end


"""
```julia
nanlogcumsumexp(A)
```
Return the logarithm of the cumulative sum of `exp.(A)` -- i.e., `log.(cumsum.(exp.(A)))`, but ignoring `NaN`s and avoiding numerical over/underflow.
As `nancumsum`, but operating on logarithms; as `nanlogsumexp`, but returning a array of cumulative sums, rather than a single value.
## Examples
```julia
```
"""
nanlogcumsumexp(A; reverse=false) = _nanlogcumsumexp(A, static(reverse))
export nanlogcumsumexp

function _nanlogcumsumexp(A, ::False)
Tᵣ = Base.promote_op(exp, eltype(A))
Σ == zero(Tᵣ)
= fill!(similar(A, Tᵣ), ∅)
c = linit(eltype(A))
i₀ = firstindex(A)
@inbounds for i eachindex(A)
isnan(c) && (c = A[i])
if A[i] > c
c = A[i]
Σ =
@simd ivdep for j i₀:i
Aᵢ = exp(A[j] - c)
Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ)
end
lΣ[i] = log(Σ) + c
else
Aᵢ = exp(A[i] - c)
Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ)
lΣ[i] = log(Σ) + c
end
end
return
end
function _nanlogcumsumexp(A, ::True)
Tᵣ = Base.promote_op(exp, eltype(A))
Σ == zero(Tᵣ)
= fill!(similar(A, Tᵣ), ∅)
c = linit(eltype(A))
iₗ = lastindex(A)
@inbounds for i reverse(eachindex(A))
isnan(c) && (c = A[i])
if A[i] > c
c = A[i]
Σ =
@simd ivdep for j i:iₗ
Aᵢ = exp(A[j] - c)
Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ)
end
lΣ[i] = log(Σ) + c
else
Aᵢ = exp(A[i] - c)
Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ)
lΣ[i] = log(Σ) + c
end
end
return
end
1 change: 1 addition & 0 deletions src/NaNStatistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module NaNStatistics
include("ArrayStats/nanmean.jl")
include("ArrayStats/nansum.jl")
include("ArrayStats/nancumsum.jl")
include("ArrayStats/nanlogsumexp.jl")
include("ArrayStats/nanvar.jl")
include("ArrayStats/nanstd.jl")
include("ArrayStats/nansem.jl")
Expand Down
42 changes: 41 additions & 1 deletion test/testArrayStats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
A = [1:10.0..., NaN]
@test nansum(A) === 55.0
@test nancumsum(A) == [1.0, 3.0, 6.0, 10.0, 15.0, 21.0, 28.0, 36.0, 45.0, 55.0, 55.0]
@test nanlogsumexp(A) 10.45862974442671
@test nanlogcumsumexp(A) [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671, 10.45862974442671]
@test isequal(nanlogcumsumexp(A, reverse=true), [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0, NaN])
@test nanmean(A) === 5.5
@test nanmean(A, ones(11)) === 5.5 # weighted
@test nanrange(A) === 9.0
Expand All @@ -49,17 +52,42 @@
@test nanmin(1.,2.) === 1.0
@test nanmax(1.,2.) === 2.0

## --- Non-monotonic arrays

A = [1:5.0..., NaN, 5:-1:1...]
@test nancumsum(A) [1.0, 3.0, 6.0, 10.0, 15.0, 15.0, 20.0, 24.0, 27.0, 29.0, 30.0]
@test nanlogcumsumexp(A) [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 5.451914395937593, 5.944418386887494, 6.078136371527456, 6.123152745316754, 6.139216411276987, 6.145061576497539]
@test nanlogcumsumexp(A, reverse=true) [6.145061576497539, 6.139216411276987, 6.123152745316754, 6.078136371527456, 5.944418386887494, 5.451914395937593, 5.451914395937593, 4.440189698561196, 3.4076059644443806, 2.313261687518223, 1.0]

A = [1:10.0..., NaN, 10:-1:1...]
@test nansum(A) === 110.0
@test nanlogsumexp(A) 11.151776924986656
@test nanrange(A) === 9.0
@test nanminimum(A) === 1.0
@test nanmaximum(A) === 10.0
@test nanextrema(A) === (1.0, 10.0)
@test nanvar(A) 8.68421052631579
@test nanstd(A) 2.946898458772509
@test nansem(A) 2.946898458772509/sqrt(20)
@test nanmad(A) 2.5
@test nanaad(A) 2.5
@test nanmedian(A) 5.5
@test nanpctile(A,50) 5.5

## --- Arrays containing only NaNs should yield NaN (or 0 for sums)

A = fill(NaN,10)
@test nansum(A) == 0
@test nancumsum(A) == zeros(10)
@test isnan(nanlogsumexp(A))
@test all(isnan, nanlogcumsumexp(A))
@test all(isnan, nanlogcumsumexp(A, reverse=true))
@test isnan(nanmean(A))
@test isnan(nanmean(A, ones(10))) # weighted
@test isnan(nanrange(A))
@test isnan(nanminimum(A))
@test isnan(nanmaximum(A))
@test all(isnan.(nanextrema(A)))
@test all(isnan, nanextrema(A))
@test isnan(nanvar(A))
@test isnan(nanvar(A, mean=NaN))
@test isnan(nanstd(A))
Expand All @@ -78,6 +106,9 @@
A = Float64[]
@test nansum(A) == 0
@test nancumsum(A) == Float64[]
@test isnan(nanlogsumexp(A))
@test nanlogcumsumexp(A) == Float64[]
@test nanlogcumsumexp(A, reverse=true) == Float64[]
@test isnan(nanmean(A))
@test isnan(nanmean(A, copy(A))) # weighted
@test isnan(nanvar(A))
Expand All @@ -99,6 +130,9 @@
A = collect(1:10)
@test nansum(A) === 55
@test nancumsum(A) == [1, 3, 6, 10, 15, 21, 28, 36, 45, 55]
@test nanlogsumexp(A) 10.45862974442671
@test nanlogcumsumexp(A) [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671]
@test nanlogcumsumexp(A, reverse=true) [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0]
@test nanmean(A) === 5.5
@test nanmean(A, ones(10)) === 5.5 # weighted
@test nanrange(A) === 9
Expand All @@ -124,6 +158,9 @@
A = 1:10
@test nansum(A) === 55
@test nancumsum(A) == [1, 3, 6, 10, 15, 21, 28, 36, 45, 55]
@test nanlogsumexp(A) 10.45862974442671
@test nanlogcumsumexp(A) [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671]
@test nanlogcumsumexp(A, reverse=true) [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0]
@test nanmean(A) === 5.5
@test nanmean(A, ones(10)) === 5.5 # weighted
@test nanrange(A) === 9
Expand All @@ -145,6 +182,9 @@
A = 1:10.
@test nansum(A) === 55.0
@test nancumsum(A) == [1.0, 3.0, 6.0, 10.0, 15.0, 21.0, 28.0, 36.0, 45.0, 55.0]
@test nanlogsumexp(A) 10.45862974442671
@test nanlogcumsumexp(A) [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671]
@test nanlogcumsumexp(A, reverse=true) [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0]
@test nanmean(A) === 5.5
@test nanmean(A, ones(10)) === 5.5 # weighted
@test nanrange(A) === 9.0
Expand Down

2 comments on commit 7d14749

@brenhinkeller
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register
Release notes:

  • Add nanlogsumexp and nanlogcumsumexp

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/110138

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.39 -m "<description of version>" 7d14749cff0258f512e0afd1b161153b4cbc4a39
git push origin v0.6.39

Please sign in to comment.