Skip to content

Commit

Permalink
Add histmean and histstd
Browse files Browse the repository at this point in the history
  • Loading branch information
brenhinkeller committed Jun 28, 2024
1 parent a2ab1d7 commit ba90c2d
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NaNStatistics"
uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
authors = ["C. Brenhin Keller"]
version = "0.6.36"
version = "0.6.37"

[deps]
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Expand Down
77 changes: 77 additions & 0 deletions src/Histograms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,83 @@ function histcounts(x, xedges::AbstractRange; T=Int64)
end
histcounts(x, xmin::Number, xmax::Number, nbins::Integer; T=Int64) = histcounts(x, range(xmin, xmax, length=nbins+1); T=T)

"""
```julia
histmean(counts, bincenters)
```
Estimate the mean of the data represented by a histogram,
specified as `counts` in equally spaced bins centered at `bincenters`.
## Examples
```julia
julia> binedges = -10:0.01:10;
julia> counts = histcounts(randn(10000), binedges);
julia> bincenters = (binedges[1:end-1] + binedges[2:end])/2
-9.995:0.01:9.995
julia> histmean(counts, bincenters)
0.0039890000000003135
```
"""
function histmean(counts, bincenters)
N = ∅ₙ = zero(eltype(counts))
Σ == zero(Base.promote_op(*, eltype(counts), eltype(bincenters)))
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
pᵢ = counts[i] * bincenters[i]
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
N += ifelse(isnan(pᵢ), ∅ₙ, counts[i])
end
return Σ / N
end
export histmean

"""
```julia
histstd(counts, bincenters; corrected::Bool=true)
```
Estimate the standard deviation of the data represented by a histogram,
specified as `counts` in equally spaced bins centered at `bincenters`.
If `counts` have been normalized, or represent an analytical estimate of a PDF
rather than a histogram representing counts of a dataset, Bessel's correction
to the standard deviation should likely not be performed - i.e., set the
`corrected` keyword argument to `false`.
## Examples
```julia
julia> binedges = -10:0.01:10;
julia> counts = histcounts(randn(10000), binedges);
julia> bincenters = (binedges[1:end-1] + binedges[2:end])/2
-9.995:0.01:9.995
t
julia> histstd(counts, bincenters)
0.9991854064196424
```
"""
function histstd(counts, bincenters; corrected::Bool=true)
N = ∅ₙ = zero(eltype(counts))
Σ == zero(Base.promote_op(*, eltype(counts), eltype(bincenters)))
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
pᵢ = counts[i] * bincenters[i]
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
N += ifelse(isnan(pᵢ), ∅ₙ, counts[i])
end
μ = Σ/N
Σ =
@inbounds @simd ivdep for i in eachindex(counts, bincenters)
dᵢ = bincenters[i] - μ
pᵢ = counts[i] * dᵢ * dᵢ
Σ += ifelse(isnan(pᵢ), ∅, pᵢ)
end
return Σ / max(N-corrected, ∅ₙ)
end
export histstd



"""
```julia
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using NaNStatistics
using Test, StatsBase, Statistics, LinearAlgebra
using Test, StatsBase, Statistics, LinearAlgebra, Distributions

@testset "ArrayStats" begin include("testArrayStats.jl") end
@testset "Covariance and Correlation" begin include("testCovCor.jl") end
Expand Down
12 changes: 12 additions & 0 deletions test/testHistograms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,17 @@
w = "size(N,2) < nxbins; any x bins beyond size(N,2) will not be filled"
@test (@test_logs (:warn, w) histcounts!(zeros(10,5), x, y, xedges, yedges)) == I(10)[:,1:5]

## --- Statistics on histograms

a = randn(10000)
binedges = -10:0.1:10
bincenters = (binedges[1:end-1] + binedges[2:end])/2
h = histcounts(a, binedges)
@test histmean(h, bincenters) nanmean(a) atol = 0.1
@test histstd(h, bincenters) nanstd(a) atol = 0.1

n = pdf.(Normal(0,1), bincenters)
@test histmean(n, bincenters) 0 atol = 1e-6
@test histstd(n, bincenters, corrected=false) 1 atol = 1e-6

## --- End of File

2 comments on commit ba90c2d

@brenhinkeller
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register
Release notes:

  • Add histmean and histstd functions, to estimate the mean and standard deviation of the data represented by a histogram. As usual for this package, any NaNs will be ignored.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/109981

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.37 -m "<description of version>" ba90c2df9b0ed04224ca26ba253c66ada4a2f091
git push origin v0.6.37

Please sign in to comment.