Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate histogram functionality #6842

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,147 @@ end
# 8898
@deprecate precision(x::DateTime) eps(x)
@deprecate precision(x::Date) eps(x)

# Histogram: moved to StatsBase (#6842)
function histrange{T<:FloatingPoint,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0.0:1.0:0.0
end
lo, hi = extrema(v)
if hi == lo
step = 1.0
else
bw = (hi - lo) / n
e = 10.0^floor(log10(bw))
r = bw / e
if r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

function histrange{T<:Integer,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0:1:0
end
lo, hi = extrema(v)
if hi == lo
step = 1
else
bw = (hi - lo) / n
e = 10^max(0,floor(Int,log10(bw)))
r = bw / e
if r <= 1
step = e
elseif r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

## midpoints of intervals
midpoints(r::Range) = (depwarn("midpoints(x) is deprecated. Method now in StatsBase.jl"); r[1:length(r)-1] + 0.5*step(r))
midpoints(v::AbstractVector) = (depwarn("midpoints(x) is deprecated. Method now in StatsBase.jl"); [0.5*(v[i] + v[i+1]) for i in 1:length(v)-1])

## hist ##
function sturges(n) # Sturges' formula
n==0 && return one(n)
ceil(Int,log2(n))+1
end

function hist!{HT}(h::AbstractArray{HT}, v::AbstractVector, edg::AbstractVector; init::Bool=true)
depwarn("hist(...) and hist!(...) are deprecated. Use fit(Histogram,...) in StatsBase.jl instead.")
n = length(edg) - 1
length(h) == n || throw(DimensionMismatch("length(histogram) must equal length(edges) - 1"))
if init
fill!(h, zero(HT))
end
for x in v
i = searchsortedfirst(edg, x)-1
if 1 <= i <= n
h[i] += 1
end
end
edg, h
end

hist(v::AbstractVector, edg::AbstractVector) = hist!(Array(Int, length(edg)-1), v, edg)
hist(v::AbstractVector, n::Integer) = hist(v,histrange(v,n))
hist(v::AbstractVector) = hist(v,sturges(length(v)))

function hist!{HT}(H::AbstractArray{HT,2}, A::AbstractMatrix, edg::AbstractVector; init::Bool=true)
depwarn("hist(...) and hist!(...) are deprecated. Use fit(Histogram,...) in StatsBase.jl instead.")
m, n = size(A)
sH = size(H)
sE = (length(edg)-1,n)
sH == sE || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for j = 1:n
hist!(sub(H, :, j), sub(A, :, j), edg)
end
edg, H
end

hist(A::AbstractMatrix, edg::AbstractVector) = hist!(Array(Int, length(edg)-1, size(A,2)), A, edg)
hist(A::AbstractMatrix, n::Integer) = hist(A,histrange(A,n))
hist(A::AbstractMatrix) = hist(A,sturges(size(A,1)))


## hist2d
function hist2d!{HT}(H::AbstractArray{HT,2}, v::AbstractMatrix,
edg1::AbstractVector, edg2::AbstractVector; init::Bool=true)
depwarn("hist2d(...) is deprecated. Use fit(Histogram,...) in StatsBase.jl instead.")
size(v,2) == 2 || throw(DimensionMismatch("hist2d requires an Nx2 matrix"))
n = length(edg1) - 1
m = length(edg2) - 1
size(H) == (n, m) || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for i = 1:size(v,1)
x = searchsortedfirst(edg1, v[i,1]) - 1
y = searchsortedfirst(edg2, v[i,2]) - 1
if 1 <= x <= n && 1 <= y <= m
@inbounds H[x,y] += 1
end
end
edg1, edg2, H
end

hist2d(v::AbstractMatrix, edg1::AbstractVector, edg2::AbstractVector) =
hist2d!(Array(Int, length(edg1)-1, length(edg2)-1), v, edg1, edg2)

hist2d(v::AbstractMatrix, edg::AbstractVector) = hist2d(v, edg, edg)

hist2d(v::AbstractMatrix, n1::Integer, n2::Integer) =
hist2d(v, histrange(sub(v,:,1),n1), histrange(sub(v,:,2),n2))
hist2d(v::AbstractMatrix, n::Integer) = hist2d(v, n, n)
hist2d(v::AbstractMatrix) = hist2d(v, sturges(size(v,1)))
147 changes: 0 additions & 147 deletions base/statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -506,150 +506,3 @@ function bound_quantiles(qs::AbstractVector)
end
[min(1,max(0,q)) for q = qs]
end



##### histogram #####

## nice-valued ranges for histograms

function histrange{T<:FloatingPoint,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0.0:1.0:0.0
end
lo, hi = extrema(v)
if hi == lo
step = 1.0
else
bw = (hi - lo) / n
e = 10.0^floor(log10(bw))
r = bw / e
if r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

function histrange{T<:Integer,N}(v::AbstractArray{T,N}, n::Integer)
nv = length(v)
if nv == 0 && n < 0
throw(ArgumentError("number of bins must be ≥ 0 for an empty array, got $n"))
elseif nv > 0 && n < 1
throw(ArgumentError("number of bins must be ≥ 1 for a non-empty array, got $n"))
end
if nv == 0
return 0:1:0
end
lo, hi = extrema(v)
if hi == lo
step = 1
else
bw = (hi - lo) / n
e = 10^max(0,floor(Int,log10(bw)))
r = bw / e
if r <= 1
step = e
elseif r <= 2
step = 2*e
elseif r <= 5
step = 5*e
else
step = 10*e
end
end
start = step*(ceil(lo/step)-1)
nm1 = ceil(Int,(hi - start)/step)
start:step:(start + nm1*step)
end

## midpoints of intervals
midpoints(r::Range) = r[1:length(r)-1] + 0.5*step(r)
midpoints(v::AbstractVector) = [0.5*(v[i] + v[i+1]) for i in 1:length(v)-1]

## hist ##
function sturges(n) # Sturges' formula
n==0 && return one(n)
ceil(Int,log2(n))+1
end

function hist!{HT}(h::AbstractArray{HT}, v::AbstractVector, edg::AbstractVector; init::Bool=true)
n = length(edg) - 1
length(h) == n || throw(DimensionMismatch("length(histogram) must equal length(edges) - 1"))
if init
fill!(h, zero(HT))
end
for x in v
i = searchsortedfirst(edg, x)-1
if 1 <= i <= n
h[i] += 1
end
end
edg, h
end

hist(v::AbstractVector, edg::AbstractVector) = hist!(Array(Int, length(edg)-1), v, edg)
hist(v::AbstractVector, n::Integer) = hist(v,histrange(v,n))
hist(v::AbstractVector) = hist(v,sturges(length(v)))

function hist!{HT}(H::AbstractArray{HT,2}, A::AbstractMatrix, edg::AbstractVector; init::Bool=true)
m, n = size(A)
sH = size(H)
sE = (length(edg)-1,n)
sH == sE || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for j = 1:n
hist!(sub(H, :, j), sub(A, :, j), edg)
end
edg, H
end

hist(A::AbstractMatrix, edg::AbstractVector) = hist!(Array(Int, length(edg)-1, size(A,2)), A, edg)
hist(A::AbstractMatrix, n::Integer) = hist(A,histrange(A,n))
hist(A::AbstractMatrix) = hist(A,sturges(size(A,1)))


## hist2d
function hist2d!{HT}(H::AbstractArray{HT,2}, v::AbstractMatrix,
edg1::AbstractVector, edg2::AbstractVector; init::Bool=true)
size(v,2) == 2 || throw(DimensionMismatch("hist2d requires an Nx2 matrix"))
n = length(edg1) - 1
m = length(edg2) - 1
size(H) == (n, m) || throw(DimensionMismatch("incorrect size of histogram"))
if init
fill!(H, zero(HT))
end
for i = 1:size(v,1)
x = searchsortedfirst(edg1, v[i,1]) - 1
y = searchsortedfirst(edg2, v[i,2]) - 1
if 1 <= x <= n && 1 <= y <= m
@inbounds H[x,y] += 1
end
end
edg1, edg2, H
end

hist2d(v::AbstractMatrix, edg1::AbstractVector, edg2::AbstractVector) =
hist2d!(Array(Int, length(edg1)-1, length(edg2)-1), v, edg1, edg2)

hist2d(v::AbstractMatrix, edg::AbstractVector) = hist2d(v, edg, edg)

hist2d(v::AbstractMatrix, n1::Integer, n2::Integer) =
hist2d(v, histrange(sub(v,:,1),n1), histrange(sub(v,:,2),n2))
hist2d(v::AbstractMatrix, n::Integer) = hist2d(v, n, n)
hist2d(v::AbstractMatrix) = hist2d(v, sturges(size(v,1)))

49 changes: 0 additions & 49 deletions doc/stdlib/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1239,55 +1239,6 @@ Statistics

Like ``median``, but may overwrite the input vector.

.. function:: hist(v[, n]) -> e, counts

Compute the histogram of ``v``, optionally using approximately ``n``
bins. The return values are a range ``e``, which correspond to the
edges of the bins, and ``counts`` containing the number of elements of
``v`` in each bin.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: hist(v, e) -> e, counts

Compute the histogram of ``v`` using a vector/range ``e`` as the edges for
the bins. The result will be a vector of length ``length(e) - 1``, such that the
element at location ``i`` satisfies ``sum(e[i] .< v .<= e[i+1])``.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: hist!(counts, v, e) -> e, counts

Compute the histogram of ``v``, using a vector/range ``e`` as the edges for the bins.
This function writes the resultant counts to a pre-allocated array ``counts``.

.. function:: hist2d(M, e1, e2) -> (edge1, edge2, counts)

Compute a "2d histogram" of a set of N points specified by N-by-2 matrix ``M``.
Arguments ``e1`` and ``e2`` are bins for each dimension, specified either as
integer bin counts or vectors of bin edges. The result is a tuple of
``edge1`` (the bin edges used in the first dimension), ``edge2`` (the bin edges
used in the second dimension), and ``counts``, a histogram matrix of size
``(length(edge1)-1, length(edge2)-1)``.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: hist2d!(counts, M, e1, e2) -> (e1, e2, counts)

Compute a "2d histogram" with respect to the bins delimited by the edges given
in ``e1`` and ``e2``. This function writes the results to a pre-allocated
array ``counts``.

.. function:: histrange(v, n)

Compute *nice* bin ranges for the edges of a histogram of ``v``, using
approximately ``n`` bins. The resulting step sizes will be 1, 2 or 5
multiplied by a power of 10.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: midpoints(e)

Compute the midpoints of the bins with edges ``e``. The result is a
vector/range of length ``length(e) - 1``.
Note: Julia does not ignore ``NaN`` values in the computation.

.. function:: quantile(v, p)

Compute the quantiles of a vector ``v`` at a specified set of probability values ``p``.
Expand Down
4 changes: 0 additions & 4 deletions test/arrayops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -613,13 +613,9 @@ B = cat(3, 1, 2, 3)
begin
local a,h,i
a = rand(5,5)
h = mapslices(v -> hist(v,0:0.1:1)[2], a, 1)
H = mapslices(v -> hist(v,0:0.1:1)[2], a, 2)
s = mapslices(sort, a, [1])
S = mapslices(sort, a, [2])
for i = 1:5
@test h[:,i] == hist(a[:,i],0:0.1:1)[2]
@test vec(H[i,:]) == hist(vec(a[i,:]),0:0.1:1)[2]
@test s[:,i] == sort(a[:,i])
@test vec(S[i,:]) == sort(vec(a[i,:]))
end
Expand Down
3 changes: 2 additions & 1 deletion test/parallel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ map!(x->1, d)

# Test @parallel load balancing - all processors should get either M or M+1
# iterations out of the loop range for some M.
workloads = hist(@parallel((a,b)->[a;b], for i=1:7; myid(); end), nprocs())[2]
ids = @parallel((a,b)->[a;b], for i=1:7; myid(); end)
workloads = Int[sum(ids .== i) for i in 1:nprocs()]
@test maximum(workloads) - minimum(workloads) <= 1

# @parallel reduction should work even with very short ranges
Expand Down
Loading