Skip to content

Commit

Permalink
Add histskewness and histkurtosis
Browse files Browse the repository at this point in the history
  • Loading branch information
brenhinkeller committed Jul 1, 2024
1 parent 7d14749 commit 6bf9ff9
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NaNStatistics"
uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
authors = ["C. Brenhin Keller"]
version = "0.6.39"
version = "0.6.40"

[deps]
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Expand Down
107 changes: 107 additions & 0 deletions src/Histograms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,113 @@ end
export histvar


"""
```julia
histskewness(counts, bincenters; corrected::Bool=true)
```
Estimate the skewness of the data represented by a histogram,
specified as `counts` in equally spaced bins centered at `bincenters`.
If `counts` have been normalized, or represent an analytical estimate of a PDF
rather than a histogram representing counts of a dataset, Bessel's correction
to the variance should likely not be performed - i.e., set the
`corrected` keyword argument to `false`.
## Examples
```julia
julia> binedges = -10:0.01:10;
julia> counts = histcounts(randn(10000), binedges);
julia> bincenters = (binedges[1:end-1] + binedges[2:end])/2
-9.995:0.01:9.995
t
julia> histskewness(counts, bincenters)
0.02609968854343211
```
"""
function histskewness(counts, bincenters; corrected::Bool=true)
N = ∅ₙ = zero(eltype(counts))
Σ == zero(Base.promote_op(*, eltype(counts), eltype(bincenters)))
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
pᵢ = counts[i] * bincenters[i]
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
N += ifelse(isnan(pᵢ), ∅ₙ, counts[i])
end
μ = Σ/N
Σ =
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
dᵢ = bincenters[i] - μ
pᵢ = counts[i] * dᵢ * dᵢ
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
end
σ = sqrt/ max(N-corrected, ∅ₙ))
Σ =
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
dᵢ = bincenters[i] - μ
pᵢ = counts[i] * dᵢ^3
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
end
return/ N)/σ^3
end
export histskewness

"""
```julia
histkurtosis(counts, bincenters; corrected::Bool=true)
```
Estimate the excess kurtosis [1] of the data represented by a histogram,
specified as `counts` in equally spaced bins centered at `bincenters`.
If `counts` have been normalized, or represent an analytical estimate of a PDF
rather than a histogram representing counts of a dataset, Bessel's correction
to the variance should likely not be performed - i.e., set the
`corrected` keyword argument to `false`.
[1] We follow Distributions.jl in returning excess kurtosis rather than raw kurtosis.
Excess kurtosis is defined as as kurtosis - 3, such that a Normal distribution
has zero excess kurtosis.
## Examples
```julia
julia> binedges = -10:0.01:10;
julia> counts = histcounts(randn(10000), binedges);
julia> bincenters = (binedges[1:end-1] + binedges[2:end])/2
-9.995:0.01:9.995
t
julia> histkurtosis(counts, bincenters)
0.028863400305099596
```
"""
function histkurtosis(counts, bincenters; corrected::Bool=true)
N = ∅ₙ = zero(eltype(counts))
Σ == zero(Base.promote_op(*, eltype(counts), eltype(bincenters)))
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
pᵢ = counts[i] * bincenters[i]
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
N += ifelse(isnan(pᵢ), ∅ₙ, counts[i])
end
μ = Σ/N
Σ =
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
dᵢ = bincenters[i] - μ
pᵢ = counts[i] * dᵢ * dᵢ
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
end
σ = sqrt/ max(N-corrected, ∅ₙ))
Σ =
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
dᵢ = bincenters[i] - μ
pᵢ = counts[i] * dᵢ^4
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
end
return/ N)/σ^4 - 3
end
export histkurtosis


"""
```julia
histstd(counts, bincenters; corrected::Bool=true)
Expand Down
4 changes: 4 additions & 0 deletions test/testHistograms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
@test histmean(h, bincenters) nanmean(a) atol = 0.02
@test histvar(h, bincenters) nanvar(a) atol = 0.02
@test histstd(h, bincenters) nanstd(a) atol = 0.02
@test histskewness(h, bincenters) 0 atol = 0.2
@test histkurtosis(h, bincenters) 0 atol = 0.2

n = pdf.(Normal(0,1), bincenters)
@test histmean(n, bincenters) 0 atol = 1e-6
Expand All @@ -54,5 +56,7 @@
@test histmean(n, bincenters) 1 atol = 1e-6
@test histvar(n, bincenters, corrected=false) 4 atol = 1e-3
@test histstd(n, bincenters, corrected=false) 2 atol = 2e-6
@test histskewness(n, bincenters, corrected=false) 0 atol = 2e-6
@test histkurtosis(n, bincenters, corrected=false) 0 atol = 2e-6

## --- End of File

2 comments on commit 6bf9ff9

@brenhinkeller
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register
Release notes:

  • Add histskewness and histkurtosis

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/110168

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.40 -m "<description of version>" 6bf9ff9ea65ee81a2a9da222183041a1bfa78d0e
git push origin v0.6.40

Please sign in to comment.