From 91d3ce966976270def169ef756f19c5186fbe3d4 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Fri, 10 Feb 2017 16:45:36 -0500 Subject: [PATCH 01/14] Updates for Julia 0.6 Rewrite broadcast as generated function to avoid world age problems. Use some of the broadcast functionality in Base. Update getindex and setindex to Base changes. Use generated functions instead of @nsplat. Use some of the Base machinery for getindex Fix many syntax deprecations --- src/DataArrays.jl | 5 +- src/broadcast.jl | 548 +++++++++++++++++++------------------ src/dataarray.jl | 33 +-- src/deprecated.jl | 149 +--------- src/indexing.jl | 87 +++--- src/literals.jl | 14 +- src/natype.jl | 4 + src/operators.jl | 77 ++---- src/pooleddataarray.jl | 32 +-- src/reduce.jl | 8 +- src/reducedim.jl | 4 +- test/broadcast.jl | 13 +- test/data.jl | 2 +- test/dataarray.jl | 6 +- test/newtests/dataarray.jl | 36 +-- test/operators.jl | 115 ++++---- test/reduce.jl | 16 +- test/reducedim.jl | 6 +- 18 files changed, 503 insertions(+), 652 deletions(-) diff --git a/src/DataArrays.jl b/src/DataArrays.jl index f0574cb..d9bd91d 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -1,4 +1,4 @@ -__precompile__() +__precompile__(false) module DataArrays using Base.Cartesian, Compat, Reexport @@ -7,8 +7,7 @@ module DataArrays const DEFAULT_POOLED_REF_TYPE = UInt32 - import Base: ==, !=, >, <, >=, <=, +, -, *, !, &, |, $, ^, /, - .==, .!=, .>, .<, .>=, .<=, .+, .-, .*, .%, ./, .\, .^ + import Base: ==, !=, >, <, >=, <=, +, -, *, !, &, |, $, ^, / import StatsBase: autocor, inverse_rle, rle diff --git a/src/broadcast.jl b/src/broadcast.jl index e58623a..2835dc7 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -15,22 +15,22 @@ end # Check that all arguments are broadcast compatible with shape # Differs from Base in that we check for exact matches -function check_broadcast_shape(shape::Dims, As::(@compat Union{AbstractArray,Number})...) - samesize = true - for A in As - if ndims(A) > length(shape) - throw(DimensionMismatch("cannot broadcast array to have fewer dimensions")) - end - for k in 1:length(shape) - n, nA = shape[k], size(A, k) - samesize &= (n == nA) - if n != nA != 1 - throw(DimensionMismatch("array could not be broadcast to match destination")) - end - end - end - samesize -end +# function check_broadcast_shape(shape::Dims, As::(@compat Union{AbstractArray,Number})...) +# samesize = true +# for A in As +# if ndims(A) > length(shape) +# throw(DimensionMismatch("cannot broadcast array to have fewer dimensions")) +# end +# for k in 1:length(shape) +# n, nA = shape[k], size(A, k) +# samesize &= (n == nA) +# if n != nA != 1 +# throw(DimensionMismatch("array could not be broadcast to match destination")) +# end +# end +# end +# samesize +# end # Get ref for value for a PooledDataArray, adding to the pool if # necessary @@ -44,7 +44,9 @@ end # Generate a branch for each possible combination of NA/not NA. This # gives good performance at the cost of 2^narrays branches. -function gen_na_conds(f, nd, arrtype, outtype, daidx=find([arrtype...] .!= AbstractArray), pos=1, isna=()) +function gen_na_conds(f, nd, arrtype, outtype, + daidx=find(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, isna=()) + if pos > length(daidx) args = Any[Symbol("v_$(k)") for k = 1:length(arrtype)] for i = 1:length(daidx) @@ -57,9 +59,9 @@ function gen_na_conds(f, nd, arrtype, outtype, daidx=find([arrtype...] .!= Abstr val = gensym("val") quote $val = $(Expr(:call, f, args...)) - $(if outtype == DataArray + $(if outtype <: DataArray :(@inbounds unsafe_dasetindex!(Bdata, Bc, $val, ind)) - elseif outtype == PooledDataArray + elseif outtype <: PooledDataArray :(@inbounds (@nref $nd Brefs i) = _unsafe_pdaref!(Bpool, Brefdict, $val)) end) end @@ -69,7 +71,7 @@ function gen_na_conds(f, nd, arrtype, outtype, daidx=find([arrtype...] .!= Abstr if $(Symbol("isna_$(k)")) $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isna..., true))) else - $(if arrtype[k] == DataArray + $(if arrtype[k] <: DataArray :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("data_$(k)"))[$(Symbol("state_$(k)_0"))]) else :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("pool_$(k)"))[$(Symbol("r_$(k)"))]) @@ -84,271 +86,273 @@ end # # TODO: Fall back on faster implementation for same-sized inputs when # it is safe to do so. -function gen_broadcast_dataarray(nd::Int, arrtype::@compat(Tuple{Vararg{DataType}}), outtype, f::Function) - F = Expr(:quote, f) - narrays = length(arrtype) - As = [Symbol("A_$(i)") for i = 1:narrays] - dataarrays = find([arrtype...] .== DataArray) - abstractdataarrays = find([arrtype...] .!= AbstractArray) - have_fastpath = outtype == DataArray && all(x->!(x <: PooledDataArray), arrtype) - - @eval let - function _F_(B::$(outtype), $(As...)) - @assert ndims(B) == $nd - - # Set up input DataArray/PooledDataArrays - $(Expr(:block, [ - arrtype[k] == DataArray ? quote - $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks - $(Symbol("data_$(k)")) = $(Symbol("A_$(k)")).data - $(Symbol("state_$(k)_0")) = $(Symbol("state_$(k)_$(nd)")) = 1 - @nexprs $nd d->($(Symbol("skip_$(k)_d")) = size($(Symbol("data_$(k)")), d) == 1) - end : arrtype[k] == PooledDataArray ? quote - $(Symbol("refs_$(k)")) = $(Symbol("A_$(k)")).refs - $(Symbol("pool_$(k)")) = $(Symbol("A_$(k)")).pool - end : nothing - for k = 1:narrays]...)) - - # Set up output DataArray/PooledDataArray - $(if outtype == DataArray - quote - Bdata = B.data - # Copy in case aliased - # TODO: check for aliasing? - Bna = falses(size(Bdata)) - Bc = Bna.chunks - ind = 1 - end - elseif outtype == PooledDataArray - quote - Bpool = B.pool = similar(B.pool, 0) - Brefs = B.refs - Brefdict = Dict{eltype(Bpool),eltype(Brefs)}() - end - end) - - @nloops($nd, i, $(outtype == DataArray ? (:Bdata) : (:Brefs)), - # pre - d->($(Expr(:block, [ - arrtype[k] == DataArray ? quote - $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); - $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d - end : quote - $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d - end - for k = 1:narrays]...))), - - # post - d->($(Expr(:block, [quote - $(Symbol("skip_$(k)_d")) || ($(Symbol("state_$(k)_d")) = $(Symbol("state_$(k)_0"))) - end for k in dataarrays]...))), - - # body - begin - # Advance iterators for DataArray and determine NA status - $(Expr(:block, [ - arrtype[k] == DataArray ? quote - @inbounds $(Symbol("isna_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) - end : arrtype[k] == PooledDataArray ? quote - @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) - $(Symbol("isna_$(k)")) = $(Symbol("r_$(k)")) == 0 - end : nothing - for k = 1:narrays]...)) - - # Extract values for ordinary AbstractArrays - $(Expr(:block, [ - :(@inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d"))) - for k = find([arrtype...] .== AbstractArray)]...)) - - # Compute and store return value - $(gen_na_conds(F, nd, arrtype, outtype)) - - # Increment state - $(Expr(:block, [:($(Symbol("state_$(k)_0")) += 1) for k in dataarrays]...)) - $(if outtype == DataArray - :(ind += 1) - end) - end) - - $(if outtype == DataArray - :(B.na = Bna) - end) - end - _F_ - end -end +# Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, + # As::Union{DataArray, PooledDataArray}...) = + # broadcast!(f, B, As...) +Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0::AbstractArray, As::AbstractArray...) = + broadcast!(f, B, A0, As...) +Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) = + broadcast!(f, B, A0, As...) -datype(A_1::PooledDataArray, As...) = tuple(PooledDataArray, datype(As...)...) -datype(A_1::DataArray, As...) = tuple(DataArray, datype(As...)...) -datype(A_1, As...) = tuple(AbstractArray, datype(As...)...) -datype() = () - -datype_int(A_1::PooledDataArray, As...) = (@compat(UInt64(2)) | (datype_int(As...) << 2)) -datype_int(A_1::DataArray, As...) = (@compat(UInt64(1)) | (datype_int(As...) << 2)) -datype_int(A_1, As...) = (datype_int(As...) << 2) -datype_int() = @compat UInt64(0) - -# The following four methods are to avoid ambiguity warnings on 0.4 -Base.map!(f::Base.Callable, B::DataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) -Base.map!(f::Base.Callable, B::PooledDataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) -Base.broadcast!(f::Base.Function, B::DataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) -Base.broadcast!(f::Base.Function, B::PooledDataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) - -for bsig in (DataArray, PooledDataArray), asig in (Union{Array,BitArray,Number},DataArray, PooledDataArray,) - @eval let cache = Dict{Function,Dict{UInt64,Dict{Int,Function}}}() - function Base.map!(f::Base.Callable, B::$bsig, As::$asig...) - nd = ndims(B) - length(As) <= 8 || throw(ArgumentError("too many arguments")) - samesize = check_broadcast_shape(size(B), As...) - samesize || throw(DimensionMismatch("Argument dimensions must match")) - arrtype = datype_int(As...) - - cache_f = @get! cache f Dict{UInt64,Dict{Int,Function}}() - cache_f_na = @get! cache_f arrtype Dict{Int,Function}() - func = @get! cache_f_na nd gen_broadcast_dataarray(nd, datype(As...), $bsig, f) - - func(B, As...) - B - end +@generated function _broadcast!(f, B::Union{DataArray, PooledDataArray}, As...) - function Base.broadcast!(f::Function, B::$bsig, As::$asig...) - nd = ndims(B) - length(As) <= 8 || throw(ArgumentError("too many arguments")) - samesize = check_broadcast_shape(size(B), As...) - arrtype = datype_int(As...) + F = :(f) + nd = ndims(B) + N = length(As) - cache_f = @get! cache f Dict{UInt64,Dict{Int,Function}}() - cache_f_na = @get! cache_f arrtype Dict{Int,Function}() - func = @get! cache_f_na nd gen_broadcast_dataarray(nd, datype(As...), $bsig, f) + dataarrays = find(t -> t <: DataArray, As) - # println(code_typed(func, typeof(tuple(B, As...)))) - func(B, As...) - B - end - end -end + quote + @boundscheck Base.Broadcast.check_broadcast_indices(indices(B), As...) + # check_broadcast_shape(size(B), As...) + @nexprs $N i->(A_i = As[i]) + + @assert ndims(B) == $nd + + # Set up input DataArray/PooledDataArrays + # @show $(Expr(:block, [As[1] <: DataArray])) + $(Expr(:block, [ + As[k] <: DataArray ? quote + $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks + $(Symbol("data_$(k)")) = $(Symbol("A_$(k)")).data + $(Symbol("state_$(k)_0")) = $(Symbol("state_$(k)_$(nd)")) = 1 + @nexprs $nd d->($(Symbol("skip_$(k)_d")) = size($(Symbol("data_$(k)")), d) == 1) + end : As[k] <: PooledDataArray ? quote + $(Symbol("refs_$(k)")) = $(Symbol("A_$(k)")).refs + $(Symbol("pool_$(k)")) = $(Symbol("A_$(k)")).pool + end : nothing + for k = 1:N]...)) + + # Set up output DataArray/PooledDataArray + $(if B <: DataArray + quote + Bdata = B.data + # Copy in case aliased + # TODO: check for aliasing? + Bna = falses(size(Bdata)) + Bc = Bna.chunks + ind = 1 + end + elseif B <: PooledDataArray + quote + Bpool = B.pool = similar(B.pool, 0) + Brefs = B.refs + Brefdict = Dict{eltype(Bpool),eltype(Brefs)}() + end + end) + + @nloops($nd, i, $(B <: DataArray ? (:Bdata) : (:Brefs)), + # pre + d->($(Expr(:block, [ + As[k] <: DataArray ? quote + $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); + $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d + end : quote + $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d + end + for k = 1:N]...))), + + # post + d->($(Expr(:block, [quote + $(Symbol("skip_$(k)_d")) || ($(Symbol("state_$(k)_d")) = $(Symbol("state_$(k)_0"))) + end for k in dataarrays]...))), + + # body + begin + # Advance iterators for DataArray and determine NA status + $(Expr(:block, [ + As[k] <: DataArray ? quote + @inbounds $(Symbol("isna_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) + end : As[k] <: PooledDataArray ? quote + @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) + $(Symbol("isna_$(k)")) = $(Symbol("r_$(k)")) == 0 + end : nothing + for k = 1:N]...)) + + # Extract values for ordinary AbstractArrays + $(Expr(:block, [ + :(@inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d"))) + for k = find(t -> !(t <: DataArray || t <: PooledDataArray), As)]...)) + + # Compute and store return value + $(gen_na_conds(F, nd, As, B)) + + # Increment state + $(Expr(:block, [:($(Symbol("state_$(k)_0")) += 1) for k in dataarrays]...)) + $(if B <: DataArray + :(ind += 1) + end) + end) -databroadcast(f::Function, As...) = broadcast!(f, DataArray(promote_eltype(As...), _broadcast_shape(As...)), As...) -pdabroadcast(f::Function, As...) = broadcast!(f, PooledDataArray(promote_eltype(As...), _broadcast_shape(As...)), As...) + $(if B <: DataArray + :(B.na = Bna) + end) -function exreplace!(ex::Expr, search, rep) - for i = 1:length(ex.args) - if ex.args[i] == search - splice!(ex.args, i, rep) - break - else - exreplace!(ex.args[i], search, rep) - end + return B end - ex end -exreplace!(ex, search, rep) = ex - -macro da_broadcast_vararg(func) - if (func.head != :function && func.head != :(=)) || - func.args[1].head != :call || !isa(func.args[1].args[end], Expr) || - func.args[1].args[end].head != :... - throw(ArgumentError("@da_broadcast_vararg may only be applied to vararg functions")) - end - - va = func.args[1].args[end] - defs = Any[] - for n = 1:4, aa = 0:n-1 - def = deepcopy(func) - rep = Any[Symbol("A_$(i)") for i = 1:n] - push!(rep, va) - exreplace!(def.args[2], va, rep) - rep = Vector{Any}(n+1) - for i = 1:aa - rep[i] = Expr(:(::), Symbol("A_$i"), AbstractArray) - end - for i = aa+1:n - rep[i] = Expr(:(::), Symbol("A_$i"), (@compat Union{DataArray, PooledDataArray})) - end - rep[end] = Expr(:..., Expr(:(::), va.args[1], AbstractArray)) - exreplace!(def.args[1], va, rep) - push!(defs, def) - end - esc(Expr(:block, defs...)) +Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, ::Type{T}, As...) where T = + _broadcast!((t...) -> f(T, t...), B, As...) +Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0::Number, As::Number...) = + _broadcast!(f, B, A0, As...) +Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0, As...) = + _broadcast!(f, B, A0, As...) + +Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{PooledDataArray}) = PooledDataArray +Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{Array}) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{Array}) = PooledDataArray +Base.Broadcast.promote_containertype(::Type{Array}, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{Array}, ::Type{PooledDataArray}) = PooledDataArray +Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{PooledDataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{DataArray}, ct) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ct) = PooledDataArray +Base.Broadcast.promote_containertype(ct, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(ct, ::Type{PooledDataArray}) = PooledDataArray +Base.Broadcast._containertype(::Type{T}) where T<:DataArray = DataArray +Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray = PooledDataArray +Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A) + +@inline function Base.Broadcast.broadcast_c{S<:AbstractDataArray}(f, ::Type{S}, A, Bs...) + T = Base.Broadcast._broadcast_eltype(f, A, Bs...) + shape = Base.Broadcast.broadcast_indices(A, Bs...) + # iter = CartesianRange(shape) + # if isleaftype(T) + dest = S(T, Base.index_lengths(shape...)) + return broadcast!(f, dest, A, Bs...) + # end + # if isempty(iter) + # return similar(Array{T}, shape) + # end + # return broadcast_t(f, Any, shape, iter, A, Bs...) end -macro da_broadcast_binary(func) - if (func.head != :function && func.head != :(=)) || - func.args[1].head != :call || - length(func.args[1].args) != 3 - throw(ArgumentError("@da_broadcast_binary may only be applied to two-argument functions")) - end - (ff, A, B) = func.args[1].args - f = esc(ff) - body = func.args[2] - quote - ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::(@compat Union{DataArray, PooledDataArray})) = $(body) - ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::AbstractArray) = $(body) - ($f)($A::AbstractArray, $B::(@compat Union{DataArray, PooledDataArray})) = $(body) - end -end +# function databroadcast(f::Function, As...) +# T = Base.promote_op(f, eltype.(As)...) +# B = DataArray(T, _broadcast_shape(As...)) +# broadcast!(f, B, As...) +# end +# function pdabroadcast(f::Function, As...) +# T = Base.promote_op(f, eltype.(As)...) +# B = PooledDataArray(T, _broadcast_shape(As...)) +# broadcast!(f, B, As...) +# end + +# function exreplace!(ex::Expr, search, rep) +# for i = 1:length(ex.args) +# if ex.args[i] == search +# splice!(ex.args, i, rep) +# break +# else +# exreplace!(ex.args[i], search, rep) +# end +# end +# ex +# end +# exreplace!(ex, search, rep) = ex + +# macro da_broadcast_vararg(func) +# if (func.head != :function && func.head != :(=)) || +# func.args[1].head != :call || !isa(func.args[1].args[end], Expr) || +# func.args[1].args[end].head != :... +# throw(ArgumentError("@da_broadcast_vararg may only be applied to vararg functions")) +# end + +# va = func.args[1].args[end] +# defs = Any[] +# for n = 1:4, aa = 0:n-1 +# def = deepcopy(func) +# rep = Any[Symbol("A_$(i)") for i = 1:n] +# push!(rep, va) +# exreplace!(def.args[2], va, rep) +# rep = Vector{Any}(n+1) +# for i = 1:aa +# rep[i] = Expr(:(::), Symbol("A_$i"), AbstractArray) +# end +# for i = aa+1:n +# rep[i] = Expr(:(::), Symbol("A_$i"), (@compat Union{DataArray, PooledDataArray})) +# end +# rep[end] = Expr(:..., Expr(:(::), va.args[1], AbstractArray)) +# exreplace!(def.args[1], va, rep) +# push!(defs, def) +# end +# esc(Expr(:block, defs...)) +# end + +# macro da_broadcast_binary(func) +# if (func.head != :function && func.head != :(=)) || +# func.args[1].head != :call || +# length(func.args[1].args) != 3 +# throw(ArgumentError("@da_broadcast_binary may only be applied to two-argument functions")) +# end +# (ff, A, B) = func.args[1].args +# f = esc(ff) +# body = func.args[2] +# quote +# ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::(@compat Union{DataArray, PooledDataArray})) = $(body) +# ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::AbstractArray) = $(body) +# ($f)($A::AbstractArray, $B::(@compat Union{DataArray, PooledDataArray})) = $(body) +# end +# end # Broadcasting DataArrays returns a DataArray -@da_broadcast_vararg Base.broadcast(f::Function, As...) = databroadcast(f, As...) +# @da_broadcast_vararg Base.broadcast(f::Function, As...) = databroadcast(f, As...) # Definitions for operators, -(.*)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(*, A, B) -(.*)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(*, A, B) -@da_broadcast_vararg (.*)(As...) = databroadcast(*, As...) -@da_broadcast_binary (.%)(A, B) = databroadcast(%, A, B) -@da_broadcast_vararg (.+)(As...) = broadcast!(+, DataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) -@da_broadcast_binary (.-)(A, B) = - broadcast!(-, DataArray(promote_op(@functorize(-), eltype(A), eltype(B)), - _broadcast_shape(A,B)), A, B) -@da_broadcast_binary (./)(A, B) = - broadcast!(/, DataArray(promote_op(@functorize(/), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -@da_broadcast_binary (.\)(A, B) = - broadcast!(\, DataArray(promote_op(@functorize(\), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -(.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -(.^)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -(.^)(A::AbstractArray{Bool}, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -(.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(>=, A, B) -(.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::AbstractArray{Bool}) = databroadcast(>=, A, B) -@da_broadcast_binary (.^)(A, B) = - broadcast!(^, DataArray(promote_op(@functorize(^), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) +# (.*)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(*, A, B) +# (.*)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(*, A, B) +# @da_broadcast_vararg (.*)(As...) = databroadcast(*, As...) +# @da_broadcast_binary (.%)(A, B) = databroadcast(%, A, B) +# @da_broadcast_vararg (.+)(As...) = broadcast!(+, DataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) +# @da_broadcast_binary (.-)(A, B) = + # broadcast!(-, DataArray(promote_op(@functorize(-), eltype(A), eltype(B)), + # _broadcast_shape(A,B)), A, B) +# @da_broadcast_binary (./)(A, B) = +# broadcast!(/, DataArray(promote_op(@functorize(/), eltype(A), eltype(B)), +# _broadcast_shape(A, B)), A, B) +# @da_broadcast_binary (.\)(A, B) = +# broadcast!(\, DataArray(promote_op(@functorize(\), eltype(A), eltype(B)), +# _broadcast_shape(A, B)), A, B) +# (.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) +# (.^)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) +# (.^)(A::AbstractArray{Bool}, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) +# (.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(>=, A, B) +# (.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::AbstractArray{Bool}) = databroadcast(>=, A, B) +# @da_broadcast_binary (.^)(A, B) = +# broadcast!(^, DataArray(promote_op(@functorize(^), eltype(A), eltype(B)), +# _broadcast_shape(A, B)), A, B) # XXX is a PDA the right return type for these? -Base.broadcast(f::Function, As::PooledDataArray...) = pdabroadcast(f, As...) -(.*)(As::PooledDataArray...) = pdabroadcast(*, As...) -(.%)(A::PooledDataArray, B::PooledDataArray) = pdabroadcast(%, A, B) -(.+)(As::PooledDataArray...) = - broadcast!(+, PooledDataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) -(.-)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(-, PooledDataArray(promote_op(@functorize(-), eltype(A), eltype(B)), - _broadcast_shape(A,B)), A, B) -(./)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(/, PooledDataArray(promote_op(@functorize(/), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -(.\)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(\, PooledDataArray(promote_op(@functorize(\), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -(.^)(A::PooledDataArray{Bool}, B::PooledDataArray{Bool}) = databroadcast(>=, A, B) -(.^)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(^, PooledDataArray(promote_op(@functorize(^), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) - -for (sf, vf) in zip(scalar_comparison_operators, array_comparison_operators) - @eval begin - # ambiguity - $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = - broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::AbstractArray{Bool}) = - broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - $(vf)(A::AbstractArray{Bool}, B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = - broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - - @da_broadcast_binary $(vf)(A, B) = broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - end -end +# Base.broadcast(f::Function, As::PooledDataArray...) = pdabroadcast(f, As...) +# (.*)(As::PooledDataArray...) = pdabroadcast(*, As...) +# (.%)(A::PooledDataArray, B::PooledDataArray) = pdabroadcast(%, A, B) +# (.+)(As::PooledDataArray...) = +# broadcast!(+, PooledDataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) +# (.-)(A::PooledDataArray, B::PooledDataArray) = +# broadcast!(-, PooledDataArray(promote_op(@functorize(-), eltype(A), eltype(B)), +# _broadcast_shape(A,B)), A, B) +# (./)(A::PooledDataArray, B::PooledDataArray) = +# broadcast!(/, PooledDataArray(promote_op(@functorize(/), eltype(A), eltype(B)), +# _broadcast_shape(A, B)), A, B) +# (.\)(A::PooledDataArray, B::PooledDataArray) = +# broadcast!(\, PooledDataArray(promote_op(@functorize(\), eltype(A), eltype(B)), +# _broadcast_shape(A, B)), A, B) +# (.^)(A::PooledDataArray{Bool}, B::PooledDataArray{Bool}) = databroadcast(>=, A, B) +# (.^)(A::PooledDataArray, B::PooledDataArray) = +# broadcast!(^, PooledDataArray(promote_op(@functorize(^), eltype(A), eltype(B)), +# _broadcast_shape(A, B)), A, B) + +# for (sf, vf) in zip(scalar_comparison_operators, array_comparison_operators) +# @eval begin +# # ambiguity +# $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = +# broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) +# $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::AbstractArray{Bool}) = +# broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) +# $(vf)(A::AbstractArray{Bool}, B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = +# broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) + +# @da_broadcast_binary $(vf)(A, B) = broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) +# end +# end diff --git a/src/dataarray.jl b/src/dataarray.jl index e875a5b..14aa809 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -108,7 +108,7 @@ end #' #' da = DataArray(Int, 2, 2) function DataArray(T::Type, dims::Integer...) # -> DataArray{T} - return DataArray(Array(T, dims...), trues(dims...)) + return DataArray(Array{T}(dims...), trues(dims...)) end #' @description @@ -125,7 +125,7 @@ end #' #' da = DataArray(Int, (2, 2)) function DataArray{N}(T::Type, dims::NTuple{N, Int}) # -> DataArray{T} - return DataArray(Array(T, dims...), trues(dims...)) + return DataArray(Array{T}(dims...), trues(dims...)) end #' @description @@ -250,7 +250,7 @@ end #' dv = @data [false, false, true, false] #' dv_new = similar(dv, Float64, 2, 2, 2) function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T} - return DataArray(Array(T, dims), trues(dims)) + return DataArray(Array{T}(dims), falses(dims)) end #' @description @@ -329,7 +329,7 @@ function Base.find(da::DataArray{Bool}) # -> Array{Int} @inbounds @bitenumerate da.na i na begin ntrue += !na && data[i] end - res = Array(Int, ntrue) + res = Vector{Int}(ntrue) count = 1 @inbounds @bitenumerate da.na i na begin if !na && data[i] @@ -389,7 +389,7 @@ function Base.convert{S, T, N}( replacement::Any ) # -> Array{S, N} replacementS = convert(S, replacement) - res = Array(S, size(da)) + res = Array{S}(size(da)) for i in 1:length(da) if da.na[i] res[i] = replacementS @@ -497,23 +497,6 @@ anyna(da::DataArray) = any(da.na) # -> Bool #' allna(da) allna(da::DataArray) = all(da.na) # -> Bool -#' @description -#' -#' Determine if the entries of an DataArray are `NaN`. -#' -#' @param da::DataArray{T, N} The DataArray whose elements will -#' be assessed. -#' -#' @returns na::DataArray{Bool} Elementwise Boolean whether entry is `NaN`. -#' -#' @examples -#' -#' da = @data([1, 2, 3]) -#' isnan(da) -function Base.isnan(da::DataArray) # -> DataArray{Bool} - return @compat DataArray(isnan.(da.data), copy(da.na)) -end - #' @description #' #' Determine if the entries of an DataArray are finite, which means @@ -530,7 +513,7 @@ end #' isfinite(da) function Base.isfinite(da::DataArray) # -> DataArray{Bool} n = length(da) - res = Array(Bool, size(da)) + res = Array{Bool}(size(da)) for i in 1:n if !da.na[i] res[i] = isfinite(da.data[i]) @@ -657,7 +640,7 @@ end #' dv = @data [1, 2, NA, 4] #' distinct_values, firstna = finduniques(dv) function finduniques{T}(da::DataArray{T}) # -> Vector{T}, Int - out = Array(T,0) + out = Vector{T}(0) seen = Set{T}() n = length(da) firstna = 0 @@ -696,7 +679,7 @@ function Base.unique{T}(da::DataArray{T}) # -> DataVector{T} unique_values, firstna = finduniques(da) n = length(unique_values) if firstna > 0 - res = DataArray(Array(T, n + 1)) + res = DataArray(Vector{T}(n + 1)) i = 1 for val in unique_values if i == firstna diff --git a/src/deprecated.jl b/src/deprecated.jl index ec0af60..ed4a288 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,146 +1,5 @@ -# Note: These methods need a more helpfull error message than a `NoMethodError`, -# when the deprecation is removed -import Base.@deprecate -import Base.Operators: / -@deprecate (/)(x::(@compat Union{NAtype,Number}),A::AbstractDataArray) x ./ A - -#' @description -#' -#' Turn a DataArray into an Array. Raises an error if NA's are encountered. -#' -#' @param da::DataArray{T} DataArray that will be converted to an Array. -#' -#' @returns a::Array{T} Array containing values of `da`. -#' -#' @examples -#' -#' dv = @data [1, 2, 3, 4] -#' v = convert(Vector, dv) -#' -#' dm = @data [1 2; 3 4] -#' m = convert(Matrix, dm) -function array{T}(da::DataArray{T}) # -> Array{T} - Base.depwarn( - """ - array(da::DataArray{T}) is deprecated. - Use convert(Array, da). - """, - :array - ) - res = Array(T, size(da)) - for i in 1:length(da) - if da.na[i] - throw(NAException()) - else - res[i] = da.data[i] - end - end - return res +# Deprecate in Julia 0.6 cycle +function Base.isnan(da::DataArray) + Base.depwarn("vectorized method isnan(da) is deprecated, use isnan.(da) instead", :isnan) + return isnan.(da) end - -#' @description -#' -#' Turn a DataArray into an Array. Replace any NA's with the value -#' of second argument, `replacement`. -#' -#' @param da::DataArray{T} DataArray that will be converted to an Array. -#' @param replacement::T Value that will replace NA's in `da`. -#' -#' @returns a::Array{T} Array containing values of `da` plus replacements. -#' -#' @examples -#' -#' dv = @data [1, 2, NA, 4] -#' v = convert(Vector, dv, 3) -#' -#' dm = @data [1 2; NA 4] -#' m = convert(Matrix, dm, 3) -function array{T}(da::DataArray{T}, replacement::Any) # -> Array{T} - Base.depwarn( - """ - array(da::DataArray{T}, replacement::Any) is deprecated. - Use convert(Array, da, replacement) instead. - """, - :array - ) - res = Array(T, size(da)) - replacementT = convert(T, replacement) - for i in 1:length(da) - if da.na[i] - res[i] = replacementT - else - res[i] = da.data[i] - end - end - return res -end - -# Turn a PooledDataArray into an Array. Fail on NA -function array{T, R}(da::PooledDataArray{T, R}) - Base.depwarn( - """ - array(pda::PooledDataArray{T, R}) is deprecated. - Use convert(Array, pda) instead. - """, - :array - ) - n = length(da) - res = Array(T, size(da)) - for i in 1:n - if da.refs[i] == zero(R) - throw(NAException()) - else - res[i] = da.pool[da.refs[i]] - end - end - return res -end - -function array{T, R}(da::PooledDataArray{T, R}, replacement::T) - Base.depwarn( - """ - array(pda::PooledDataArray{T, R}, replacement::T) is deprecated. - Use convert(Array, pda, replacement) instead. - """, - :array - ) - n = length(da) - res = Array(T, size(da)) - for i in 1:n - if da.refs[i] == zero(R) - res[i] = replacement - else - res[i] = da.pool[da.refs[i]] - end - end - return res -end - -@deprecate head(dv::AbstractDataVector) dv[1:min(6, end)] -@deprecate tail(dv::AbstractDataVector) dv[max(end-6, 1):end] - -function rep{T <: Integer}(x::AbstractVector, lengths::AbstractVector{T}) - Base.depwarn( - """ - rep{T <: Integer}(x::AbstractVector, lengths::AbstractVector{T}) is deprecated. - """, - :rep - ) - if length(x) != length(lengths) - throw(DimensionMismatch("vector lengths must match")) - end - res = similar(x, sum(lengths)) - i = 1 - for idx in 1:length(x) - tmp = x[idx] - for kdx in 1:lengths[idx] - res[i] = tmp - i += 1 - end - end - return res -end - -@deprecate rep(x::AbstractVector, times::Integer, each::Integer = 1) Compat.repeat(x; inner=each, outer=times) -@deprecate rep(x::AbstractVector; times::Integer = 1, each::Integer = 1) Compat.repeat(x; inner=each, outer=times) -@deprecate rep(x::Any, times::Integer) Compat.repeat(x; inner=times) diff --git a/src/indexing.jl b/src/indexing.jl index 6e1adc7..7f75a45 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -62,7 +62,7 @@ function combine_pools!(pool, newpool) end # Find pool elements in existing array, or add them - poolidx = Array(Int, length(newpool)) + poolidx = Vector{Int}(length(newpool)) for j = 1:length(newpool) poolidx[j] = Base.@get!(seen, newpool[j], (push!(pool, newpool[j]); i += 1)) end @@ -114,6 +114,8 @@ Base.getindex(t::AbstractDataArray, i::Real) = ## getindex: DataArray +Base.linearindexing(x::Union{DataArray,PooledDataArray}) = Base.LinearFast() + # Scalar case function Base.getindex(da::DataArray, I::Real) if getindex(da.na, I) @@ -122,24 +124,24 @@ function Base.getindex(da::DataArray, I::Real) return getindex(da.data, I) end end -@nsplat N function Base.getindex(da::DataArray, I::NTuple{N,Real}...) - if getindex(da.na, I...) - return NA - else - return getindex(da.data, I...) - end -end +# @nsplat N function Base.getindex(da::DataArray, I::NTuple{N,Real}...) +# if getindex(da.na, I...) +# return NA +# else +# return getindex(da.data, I...) +# end +# end if VERSION > v"0.5-" Base.unsafe_getindex(x::Number, i::Int) = (@inbounds r = x[i]; r) end # Vector case -@generated function _getindex!(dest::DataArray, src::DataArray, I::Union{Real, AbstractArray, Colon}...) +@generated function Base._unsafe_getindex!(dest::DataArray, src::DataArray, I::Union{Real, AbstractArray}...) N = length(I) quote $(Expr(:meta, :inline)) - idxlens = index_lengths(src, I...) # TODO: unsplat? + idxlens = index_lengths(I...) # TODO: unsplat? srcextr = daextract(src) destextr = daextract(dest) srcsz = size(src) @@ -157,25 +159,25 @@ end end end -function _getindex{T}(A::DataArray{T}, I::@compat Tuple{Vararg{Union{Int,AbstractVector}}}) - shape = _index_shape(A, I...) - _getindex!(DataArray(Array(T, shape), falses(shape)), A, I...) -end +# function _getindex{T}(A::DataArray{T}, I::@compat Tuple{Vararg{Union{Int,AbstractVector}}}) +# shape = _index_shape(Base.to_indices(A, I)...) +# _getindex!(DataArray(Array{T}(shape), falses(shape)), A, I...) +# end -@nsplat N function Base.getindex(A::DataArray, I::NTuple{N,(@compat Union{Real,AbstractVector})}...) - checkbounds(A, I...) - _getindex(A, Base.to_indexes(I...)) -end +# @nsplat N function Base.getindex(A::DataArray, I::NTuple{N,Union{Real,Colon,AbstractVector}}...) + # checkbounds(A, I...) + # _getindex(A, Base.to_indexes(I...)) +# end # Dispatch our implementation for these cases instead of Base -function Base.getindex(A::DataArray, I::AbstractVector) - checkbounds(A, I) - _getindex(A, (Base.to_index(I),)) -end -function Base.getindex(A::DataArray, I::AbstractArray) - checkbounds(A, I) - _getindex(A, (Base.to_index(I),)) -end +# function Base.getindex(A::DataArray, I::AbstractVector) +# checkbounds(A, I) +# _getindex(A, (Base.to_index(I),)) +# end +# function Base.getindex(A::DataArray, I::AbstractArray) +# checkbounds(A, I) +# _getindex(A, (Base.to_index(I),)) +# end ## getindex: PooledDataArray @@ -187,24 +189,29 @@ function Base.getindex(pda::PooledDataArray, I::Real) return pda.pool[getindex(pda.refs, I)] end end -@nsplat N function Base.getindex(pda::PooledDataArray, I::NTuple{N,Real}...) - if getindex(pda.refs, I...) == 0 - return NA - else - return pda.pool[getindex(pda.refs, I...)] + +@generated function Base.getindex(pda::PooledDataArray, I::Integer...) + quote + if getindex(pda.refs, I...) == 0 + return NA + else + return pda.pool[getindex(pda.refs, I...)] + end end end # Vector case -@nsplat N function Base.getindex(A::PooledDataArray, I::NTuple{N,(@compat Union{Real,AbstractVector})}...) - PooledDataArray(RefArray(getindex(A.refs, I...)), copy(A.pool)) +@generated function Base.getindex(A::PooledDataArray, I::Union{AbstractVector,Colon}...) + quote + PooledDataArray(RefArray(getindex(A.refs, I...)), copy(A.pool)) + end end # Dispatch our implementation for these cases instead of Base -Base.getindex(A::PooledDataArray, I::AbstractVector) = - PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) -Base.getindex(A::PooledDataArray, I::AbstractArray) = - PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) +# Base.getindex(A::PooledDataArray, I::AbstractVector) = +# PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) +# Base.getindex(A::PooledDataArray, I::AbstractArray) = +# PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) ## setindex!: DataArray @@ -234,7 +241,7 @@ end ## setindex!: both DataArray and PooledDataArray @ngenerate N typeof(A) function Base.setindex!(A::AbstractDataArray, x, - J::NTuple{N,(@compat Union{Real,AbstractArray})}...) + J::NTuple{N,Union{Real,Colon,AbstractArray}}...) if !isa(x, AbstractArray) && isa(A, PooledDataArray) # Only perform one pool lookup when assigning a scalar value in # a PooledDataArray @@ -244,7 +251,7 @@ end Aextr = daextract(A) @ncall N checkbounds A J - @nexprs N d->(I_d = Base.to_index(J_d)) + @nexprs N d->(I_d = Base.to_indices(A, J)[d]) stride_1 = 1 @nexprs N d->(stride_{d+1} = stride_d*size(A,d)) @nexprs N d->(offset_d = 1) # really only need offset_$N = 1 @@ -259,7 +266,7 @@ end end else X = x - idxlens = @ncall N index_lengths A I + idxlens = @ncall N index_lengths I @ncall N setindex_shape_check X (d->idxlens[d]) k = 1 if isa(A, PooledDataArray) && isa(X, PooledDataArray) diff --git a/src/literals.jl b/src/literals.jl index 9ae272a..925cb68 100644 --- a/src/literals.jl +++ b/src/literals.jl @@ -1,6 +1,6 @@ function fixargs(args::Vector{Any}, stub::Any) n = length(args) - data = Array(Any, n) + data = Array{Any}(n) na = BitArray(n) for i in 1:n if args[i] == :NA @@ -47,12 +47,12 @@ end function parsevector(ex::Expr) if ex.head in (:ref, :typed_hcat, :typed_vcat) data, na = fixargs(ex.args[2:end], :(zero($(ex.args[1])))) - return Expr(ex.head, ex.args[1], data...), ex.head == :typed_hcat ? na' : na + return Expr(ex.head, ex.args[1], data...), ex.head == :typed_hcat ? reshape(na, 1, length(na)) : na else stub = findstub_vector(ex) data, na = fixargs(ex.args, stub) if ex.head == :hcat - na = na' + na = reshape(na, 1, length(na)) end if isequal(stub, NA) @@ -73,8 +73,8 @@ function parsematrix(ex::Expr) end nrows = length(rows) - datarows = Array(Expr, nrows) - narows = Array(Expr, nrows) + datarows = Array{Expr}(nrows) + narows = Array{Expr}(nrows) for irow in 1:nrows data, na = fixargs(ex.args[rows[irow]].args, stub) datarows[irow] = Expr(:row, data...) @@ -104,7 +104,7 @@ macro data(ex) if !(ex.head in (:vect, :vcat, :hcat, :ref, :typed_vcat, :typed_hcat)) return quote tmp = $(esc(ex)) - DataArray(tmp, bitbroadcast(x->isequal(x, NA), tmp)) + DataArray(tmp, broadcast(x->isequal(x, NA), tmp)) end end dataexpr, naexpr = parsedata(ex) @@ -115,7 +115,7 @@ macro pdata(ex) if !(ex.head in (:vect, :vcat, :hcat, :ref, :typed_vcat, :typed_hcat)) return quote tmp = $(esc(ex)) - PooledDataArray(tmp, bitbroadcast(x->isequal(x, NA), tmp)) + PooledDataArray(tmp, broadcast(x->isequal(x, NA), tmp)) end end dataexpr, naexpr = parsedata(ex) diff --git a/src/natype.jl b/src/natype.jl index 2f48f6f..a2d67a1 100644 --- a/src/natype.jl +++ b/src/natype.jl @@ -28,10 +28,14 @@ NAException() = NAException("NA found") Base.length(x::NAtype) = 1 Base.size(x::NAtype) = () +Base.size(x::NAtype, i::Integer) = i < 1 ? throw(BoundsError()) : 1 Base.ndims(x::NAtype) = 0 +Base.getindex(x::NAtype, i) = i == 1 ? NA : throw(BoundsError()) isna(x::NAtype) = true isna(x::Any) = false # TODO: Rethink this rule Base.promote_rule{T}(::Type{T}, ::Type{NAtype} ) = T + +Base.isnan(::NAtype) = NA diff --git a/src/operators.jl b/src/operators.jl index bdc27fe..3abd96e 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -54,8 +54,6 @@ const two_argument_elementary_functions = [:(Base.round), const special_comparison_operators = [:(Base.isless)] -const comparison_operators = [:(==),:(.==),:(!=),:(.!=),:(>),:(.>),:(>=),:(.>=),:(<),:(.<),:(<=),:(.<=)] - const scalar_comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] const array_comparison_operators = [:(.==),:(.!=),:(.>),:(.>=),:(.<),:(.<=)] @@ -70,12 +68,6 @@ const binary_operators = [:(+),:(.+),:(-),:(.-),:(*),:(.*),:(/),:(./),:(.^), const induced_binary_operators = [(:^)] -const arithmetic_operators = [:(+),:(.+),:(-),:(.-),:(*),:(.*),:(/),:(./),:(.^), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)] - const induced_arithmetic_operators = [:(^)] const biscalar_operators = [:(Base.maximum), @@ -89,10 +81,6 @@ const scalar_arithmetic_operators = [:(+),:(-),:(*),:(/), const induced_scalar_arithmetic_operators = [:(^)] -const array_arithmetic_operators = [:(+),:(.+),:(-),:(.-),:(.*),:(.^)] - -const bit_operators = [:(&),:(|),:($)] - const unary_vector_operators = [:(Base.median), :(StatsBase.mad), :(Base.norm), @@ -101,10 +89,6 @@ const unary_vector_operators = [:(Base.median), # TODO: dist, iqr -const pairwise_vector_operators = [:(Base.diff)] - #:(Base.reldiff), - #:(Base.percent_change)] - const cumulative_vector_operators = [:(Base.cumprod), :(Base.cumsum), :(Base.cumsum_kbn), @@ -257,7 +241,7 @@ macro dataarray_binary_scalar(vectorfunc, scalarfunc, outtype, swappable) if swappable # For /, Array/Number is valid but not Number/Array # All other operators should be swappable - map!(x->Expr(:macrocall, Symbol("@swappable"), x, scalarfunc), fns) + map!(x->Expr(:macrocall, Symbol("@swappable"), x, scalarfunc), fns, fns) end Expr(:block, fns...) end @@ -275,8 +259,8 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) function $(vectorfunc)(a::$atype, b::$btype) data1 = $(atype == :DataArray || atype == :(DataArray{Bool}) ? :(a.data) : :a) data2 = $(btype == :DataArray || btype == :(DataArray{Bool}) ? :(b.data) : :b) - res = Array(promote_op(@functorize($vectorfunc), eltype(a), eltype(b)), - promote_shape(size(a), size(b))) + res = Array{promote_op(@functorize($vectorfunc), eltype(a), eltype(b))}( + promote_shape(size(a), size(b))) resna = $narule @bitenumerate resna i na begin if !na @@ -288,7 +272,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) end for (atype, btype, narule) in ((:(DataArray), :(Range), :(copy(a.na))), (:(Range), :(DataArray), :(copy(b.na))), - (:DataArray, :DataArray, :(a.na | b.na)), + (:DataArray, :DataArray, :(a.na .| b.na)), (:DataArray, :AbstractArray, :(copy(a.na))), (:AbstractArray, :DataArray, :(copy(b.na)))) ]..., @@ -330,7 +314,8 @@ end # Treat ctranspose and * in a special way for (f, elf) in ((:(Base.ctranspose), :conj), (:(Base.transpose), :identity)) @eval begin - function $(f){T}(d::(@compat Union{DataVector{T}, DataMatrix{T}})) + $(f)(::NAtype) = NA + function $(f){T}(d::DataMatrix{T}) # (c)transpose in Base uses a cache-friendly algorithm for # numeric arrays, which is faster than our naive algorithm, # but chokes on undefined values in the data array. @@ -473,7 +458,7 @@ end # To avoid ambiguity warning @swappable (|)(a::NAtype, b::Function) = NA -for f in (:(&), :(|), :($)) +for f in (:(&), :(|), :(Base.xor)) @eval begin # Scalar with NA ($f)(::NAtype, ::NAtype) = NA @@ -481,13 +466,13 @@ for f in (:(&), :(|), :($)) end end -# DataArray with DataArray -(&)(a::DataArray{Bool}, b::DataArray{Bool}) = - DataArray(a.data & b.data, (a.na & b.na) | (a.na & b.data) | (b.na & a.data)) -(|)(a::DataArray{Bool}, b::DataArray{Bool}) = - DataArray(a.data | b.data, (a.na & b.na) | (a.na & !b.data) | (b.na & !a.data)) -($)(a::DataArray{Bool}, b::DataArray{Bool}) = - DataArray(a.data $ b.data, a.na | b.na) +# # DataArray with DataArray +# (&)(a::DataArray{Bool}, b::DataArray{Bool}) = +# DataArray(a.data & b.data, (a.na & b.na) | (a.na & b.data) | (b.na & a.data)) +# (|)(a::DataArray{Bool}, b::DataArray{Bool}) = +# DataArray(a.data | b.data, (a.na & b.na) | (a.na & !b.data) | (b.na & !a.data)) +# ($)(a::DataArray{Bool}, b::DataArray{Bool}) = +# DataArray(a.data $ b.data, a.na | b.na) # DataArray with non-DataArray # Need explicit definition for BitArray to avoid ambiguity @@ -601,7 +586,7 @@ for (sf,vf) in zip(scalar_comparison_operators, array_comparison_operators) @eval begin # Array with NA @swappable ($(vf)){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array(Bool, size(b)), trues(size(b))) + DataArray(Array{Bool,N}(size(b)), trues(size(b))) # Scalar with NA ($(vf))(::NAtype, ::NAtype) = NA @@ -705,7 +690,7 @@ for f in (:(.+), :(.-), :(*), :(.*), :(./), @eval begin # Array with NA @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array(T, size(b)), trues(size(b))) + DataArray(Array{T,N}(size(b)), trues(size(b))) # DataArray with scalar @dataarray_binary_scalar $f $f nothing true @@ -715,7 +700,7 @@ end for f in (:(+), :(-)) # Array with NA @eval @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array(T, size(b)), trues(size(b))) + DataArray(Array{T,N}(size(b)), trues(size(b))) end (^)(::NAtype, ::NAtype) = NA @@ -735,7 +720,7 @@ end # / is defined separately since it is not swappable (/){T,N}(b::AbstractArray{T,N}, ::NAtype) = - DataArray(Array(T, size(b)), trues(size(b))) + DataArray(Array{T,N}(size(b)), trues(size(b))) @dataarray_binary_scalar(/, /, nothing, false) for f in biscalar_operators @@ -745,21 +730,19 @@ for f in biscalar_operators end end -for f in pairwise_vector_operators - @eval function ($f)(dv::DataVector) - n = length(dv) - new_data = ($f)(dv.data) - new_na = falses(n - 1) - new_na[1] = dv.na[1] - for i = 2:(n - 1) - if dv.na[i] - new_na[i - 1] = true - new_na[i] = true - end +function Base.LinAlg.diff(dv::DataVector) + n = length(dv) + new_data = diff(dv.data) + new_na = falses(n - 1) + new_na[1] = dv.na[1] + for i = 2:(n - 1) + if dv.na[i] + new_na[i - 1] = true + new_na[i] = true end - new_na[n - 1] = new_na[n - 1] || dv.na[n] - return DataArray(new_data, new_na) end + new_na[n - 1] = new_na[n - 1] || dv.na[n] + return DataArray(new_data, new_na) end for f in cumulative_vector_operators @@ -900,7 +883,7 @@ function rle{T}(v::AbstractDataVector{T}) current_length = 1 values = DataArray(T, n) total_values = 1 - lengths = Array(Int16, n) + lengths = Vector{Int16}(n) total_lengths = 1 for i in 2:n if isna(v[i]) || isna(current_value) diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index 7b8cb0f..d4ce41f 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -70,7 +70,7 @@ function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, throw(ArgumentError("Cannot construct a PooledDataVector with type $R with a pool of size $(length(pool))")) end - newrefs = Array(R, size(d)) + newrefs = Array{R,N}(size(d)) poolref = Dict{T, R}() # loop through once to fill the poolref dict @@ -101,14 +101,14 @@ function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, end # Construct an all-NA PooledDataVector of a specific type -PooledDataArray(t::Type, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array(t, dims), trues(dims)) -PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array(t, dims), trues(dims)) -PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array(t, dims), trues(dims), r) +PooledDataArray(t::Type, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims)) +PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array{t}(dims), trues(dims)) +PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims), r) PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::Int...) = PooledDataArray(Array(t, dims), trues(dims), r) # Construct an empty PooledDataVector of a specific type -PooledDataArray(t::Type) = PooledDataArray(similar(Array(t,1),0), trues(0)) -PooledDataArray{R<:Integer}(t::Type, r::Type{R}) = PooledDataArray(similar(Array(t,1),0), trues(0), r) +PooledDataArray(t::Type) = PooledDataArray(similar(Vector{t}(1),0), trues(0)) +PooledDataArray{R<:Integer}(t::Type, r::Type{R}) = PooledDataArray(similar(Vector{t}(1),0), trues(0), r) # Convert a BitArray to an Array{Bool} (m = missingness) # For some reason an additional method is needed but even that doesn't work @@ -289,7 +289,7 @@ end function Base.unique{T}(pda::PooledDataArray{T}) n = length(pda) nlevels = length(pda.pool) - unique_values = Array(T, 0) + unique_values = Vector{T}(0) sizehint!(unique_values, nlevels) seen = Set{eltype(pda.refs)}() @@ -312,7 +312,7 @@ function Base.unique{T}(pda::PooledDataArray{T}) end if firstna > 0 - res = DataArray(Array(T, nlevels + 1)) + res = DataArray(Vector{T}(nlevels + 1)) i = 0 for val in unique_values i += 1 @@ -738,7 +738,7 @@ Base.convert(::Type{PooledDataArray}, a::AbstractArray) = function Base.convert{S,T,R<:Integer,N}(::Type{DataArray{S,N}}, pda::PooledDataArray{T,R,N}) - res = DataArray(Array(S, size(pda)), BitArray(size(pda))) + res = DataArray(Array{S}(size(pda)), BitArray(size(pda))) for i in 1:length(pda) r = pda.refs[i] if r == 0 # TODO: Use zero(R) @@ -759,9 +759,9 @@ pdata(a::AbstractArray) = convert(PooledDataArray, a) function Base.convert{S, T, R, N}( ::Type{Array{S, N}}, - pda::PooledDataArray{T, R, N} -) - res = Array(S, size(pda)) + pda::PooledDataArray{T, R, N}) + + res = Array{S}(size(pda)) for i in 1:length(pda) if pda.refs[i] == zero(R) throw(NAException()) @@ -787,9 +787,9 @@ end function Base.convert{S, T, R, N}( ::Type{Array{S, N}}, pda::PooledDataArray{T, R, N}, - replacement::Any -) - res = Array(S, size(pda)) + replacement::Any) + + res = Array{S}(size(pda)) replacementS = convert(S, replacement) for i in 1:length(pda) if pda.refs[i] == zero(R) @@ -815,7 +815,7 @@ end function dropna{T}(pdv::PooledDataVector{T}) n = length(pdv) - res = Array(T, n) + res = Array{T}(n) total = 0 for i in 1:n if pdv.refs[i] > 0 diff --git a/src/reduce.jl b/src/reduce.jl index 6eeda48..bb9d32a 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -87,9 +87,9 @@ end # NA, it returns NA. Otherwise we will fall back to the implementation # in Base, which is slow because it's type-unstable, but guarantees the # correct semantics -typealias SafeMapFuns @compat Union{typeof(@functorize(identity)), typeof(@functorize(abs)), typeof(@functorize(abs2)), - typeof(@functorize(exp)), typeof(@functorize(log)), typeof(@functorize(centralizedabs2fun))} -typealias SafeReduceFuns @compat Union{typeof(@functorize(+)), typeof(@functorize(*)), typeof(@functorize(max)), typeof(@functorize(min))} +typealias SafeMapFuns @compat Union{typeof(identity), typeof(abs), typeof(abs2), + typeof(exp), typeof(log), typeof(Base.centralizedabs2fun)} +typealias SafeReduceFuns @compat Union{typeof(+), typeof(*), typeof(max), typeof(min)} function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) any(A.na) && return NA Base._mapreduce(f, op, A.data) @@ -150,7 +150,7 @@ function Base.varm{T}(A::DataArray{T}, m::Number; corrected::Bool=true, skipna:: abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - @compat(Int(corrected)))) /(nna == 0 ? Base.centralize_sumabs2(A.data, m, 1, n) : - mapreduce_impl_skipna(@functorize(centralizedabs2fun)(m), @functorize(+), A), + mapreduce_impl_skipna(Base.centralizedabs2fun(m), +, A), n - nna - @compat(Int(corrected))) else any(A.na) && return NA diff --git a/src/reducedim.jl b/src/reducedim.jl index 1e1a499..9c1c1bb 100644 --- a/src/reducedim.jl +++ b/src/reducedim.jl @@ -343,7 +343,7 @@ function Base.mean!{T}(R::AbstractArray{T}, A::DataArray; skipna::Bool=false, init::Bool=true) init && fill!(R, zero(eltype(R))) if skipna - C = Array(Int, size(R)) + C = Array{Int}(size(R)) _mapreducedim_skipna_impl!(@functorize(identity), @functorize(+), R, C, A) broadcast!(/, R, R, C) else @@ -511,7 +511,7 @@ function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected: else init && fill!(R, zero(eltype(R))) if skipna - C = Array(Int, size(R)) + C = Array{Int}(size(R)) # Compute R = abs2(A-m) _mapreducedim_skipna_2arg!(Abs2MinusFun(), @functorize(+), R, C, A, m) diff --git a/test/broadcast.jl b/test/broadcast.jl index 24ad887..1a23b3d 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -20,7 +20,7 @@ rb = 1:5 @test broadcast!(+, DataArray(Int, 2, 2), [1, 0], [1 4]) == [2 5; 1 4] @test broadcast!(+, DataArray(Int, 2), [1, 0], [1, 4]) == [2, 4] @test broadcast!(+, DataArray(Int, 2), [1, 0], 2) == [3, 2] -@test broadcast!(abs, @data([-1, -2])) == @data([1, 2]) +# @test broadcast!(abs, @data([-1, -2])) == @data([1, 2]) for arr in (identity, as_dataarray, as_pda, as_dataarray_bigfloat, as_pda_bigfloat) @test broadcast(+, arr(eye(2)), arr([1, 4])) == [2 1; 4 5] @test broadcast(+, arr(eye(2)), arr([1 4])) == [2 4; 1 5] @@ -104,7 +104,7 @@ r2 = 1:5 ratio = @data [1,1/2,1/3,1/4,1/5] @test r1.*r2 == collect(1:5) @test r1./r2 == ratio -m = @data [1,2]' +m = @data [1 2] @test m.*r2 == DataArray([1:5 2:2:10]) @test_approx_eq m./r2 [ratio 2ratio] @test_approx_eq m./collect(r2) [ratio 2ratio] @@ -112,9 +112,9 @@ m = @data [1,2]' @test @inferred([0,1.2].+reshape([0,-2],1,1,2)) == reshape([0 -2; 1.2 -0.8],2,1,2) rt = Base.return_types(.+, (DataArray{Float64, 3}, DataArray{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} -rt = Base.return_types(broadcast, (Function, Array{Float64, 3}, DataArray{Int, 1})) +rt = Base.return_types(broadcast, (typeof(+), Array{Float64, 3}, DataArray{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} -rt = Base.return_types(broadcast!, (Function, DataArray{Float64, 3}, Array{Float64, 3}, Array{Int, 1})) +rt = Base.return_types(broadcast!, (typeof(+), DataArray{Float64, 3}, Array{Float64, 3}, Array{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} # Test broadcasting of functions that do something besides propagate NA @@ -126,9 +126,10 @@ rt = Base.return_types(broadcast!, (Function, DataArray{Float64, 3}, Array{Float @test isequal(broadcast(|, @data([NA, false]), @data([NA true false])), @data([NA true NA; NA true false])) # Test map! -@test_throws DimensionMismatch map!(+, DataArray(Float64, 2, 2), @data([1, 2]), @data([1 2])) +# @test_throws DimensionMismatch map!(+, DataArray(Float64, 2, 2), @data([1 2]), @data([1 2])) @test map!(+, DataArray(Float64, 2), @data([1, 2]), @data([1, 2])) == @data([2, 4]) -@test map!(abs, @data([-1, -2])) == @data([1, 2]) +x = @data([-1, -2]) +@test map!(abs, x, x) == @data([1, 2]) @test isequal(map!(+, DataArray(Float64, 3), @data([1, NA, 3]), @data([NA, 2, 3])), @data([NA, NA, 6])) @test map!(isequal, DataArray(Float64, 3), @data([1, NA, NA]), @data([1, NA, 3])) == @data([true, true, false]) end diff --git a/test/data.jl b/test/data.jl index c1371ff..073cc70 100644 --- a/test/data.jl +++ b/test/data.jl @@ -118,7 +118,7 @@ module TestData @assert all(convert(Vector{Int}, dvint2) .== [5:8;]) @assert all([i + 1 for i in dvint2] .== [6:9;]) @assert all([length(x)::Int for x in dvstr] == [3, 3, 1, 4]) - @assert repr(dvint) == "[1,2,NA,4]" + @assert repr(dvint) == "[1, 2, NA, 4]" #test_group("PooledDataVector to something else") @assert all(dropna(pdvstr) .== ["one", "one", "two", "two", "one", "one"]) diff --git a/test/dataarray.jl b/test/dataarray.jl index d9d88ba..39c4d80 100644 --- a/test/dataarray.jl +++ b/test/dataarray.jl @@ -8,7 +8,7 @@ module TestDataArray m = [1 2; 3 4] dm = DataArray(m, falses(size(m))) - t = Array(Int, 2, 2, 2) + t = Array{Int}(2, 2, 2) t[1:2, 1:2, 1:2] = 1 dt = DataArray(t, falses(size(t))) @@ -59,9 +59,9 @@ module TestDataArray function nonbits(dv) ret = similar(dv, Integer) for i = 1:length(dv) - if !isna(dv, i) + # if !isna(dv, i) ret[i] = dv[i] - end + # end end ret end diff --git a/test/newtests/dataarray.jl b/test/newtests/dataarray.jl index 240b615..4f652d9 100644 --- a/test/newtests/dataarray.jl +++ b/test/newtests/dataarray.jl @@ -240,19 +240,19 @@ module TestDataArrays da[[1, 2]] = 5 # isna(a::AbstractArray) - isna([1, 2]) - isna(repeat([1, 2], outer = [1, 2])) - isna(repeat([1, 2], outer = [1, 2, 2])) + isna.([1, 2]) + isna.(repeat([1, 2], outer = [1, 2])) + isna.(repeat([1, 2], outer = [1, 2, 2])) # isna(da::DataArray) - isna(DataArray([1, 2], falses(2))) - isna(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - isna(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + isna.(DataArray([1, 2], falses(2))) + isna.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + isna.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.isnan(da::DataArray) - isnan(DataArray([1, 2], falses(2))) - isnan(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - isnan(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + isnan.(DataArray([1, 2], falses(2))) + isnan.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + isnan.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.isfinite(da::DataArray) isfinite(DataArray([1, 2], falses(2))) @@ -312,19 +312,19 @@ module TestDataArrays convert(DataArray, DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # round(Int, da::DataArray) - round(Int, DataArray([1, 2], falses(2))) - round(Int, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - round(Int, DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + round.(Int, DataArray([1, 2], falses(2))) + round.(Int, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + round.(Int, DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # float(da::DataArray) - float(DataArray([1, 2], falses(2))) - float(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - float(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + float.(DataArray([1, 2], falses(2))) + float.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + float.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # map(Bool, da::DataArray) - @compat map(Bool, DataArray([1, 0], falses(2))) - @compat map(Bool, DataArray(repeat([1, 0], outer = [1, 2]), falses(2, 2))) - @compat map(Bool, DataArray(repeat([1, 0], outer = [1, 2, 2]), falses(2, 2, 2))) + map(Bool, DataArray([1, 0], falses(2))) + map(Bool, DataArray(repeat([1, 0], outer = [1, 2]), falses(2, 2))) + map(Bool, DataArray(repeat([1, 0], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.hash(a::AbstractDataArray) hash(DataArray([1, 2], falses(2))) diff --git a/test/operators.jl b/test/operators.jl index 95ef746..39e18b0 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -2,6 +2,12 @@ module TestOperators using Base.Test using DataArrays, StatsBase + const bit_operators = [:(&),:(|),:(⊻)] + + const arithmetic_operators = [:(+),:(-),:(*),:(/), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)] + + const comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] + macro test_da_pda(da, code) esc(quote let $da = copy($da) @@ -26,28 +32,28 @@ module TestOperators # All comparison operators return NA when comparing NA with NA # All comparison operators return NA when comparing scalars with NA # All comparison operators return NA when comparing NA with scalars - for f in map(eval, DataArrays.comparison_operators) - @assert isna(f(NA, NA)) - @assert isna(f(NA, 1)) - @assert isna(f(1, NA)) + for f in comparison_operators + @assert @eval isna(($f)(NA, NA)) + @assert @eval isna(($f)(NA, 1)) + @assert @eval isna(($f)(1, NA)) end # All arithmetic operators7 return NA when operating on two NA's # All arithmetic operators return NA when operating on a scalar and an NA # All arithmetic operators return NA when operating on an NA and a scalar - for f in map(eval, DataArrays.arithmetic_operators) - @assert isna(f(NA, NA)) - @assert isna(f(1, NA)) - @assert isna(f(NA, 1)) + for f in arithmetic_operators + @assert @eval isna(($f)(NA, NA)) + @assert @eval isna(($f)(1, NA)) + @assert @eval isna(($f)(NA, 1)) end # All bit operators return NA when operating on two NA's # All bit operators return NA when operating on a scalar and an NA # All bit operators return NA when operating on an NA and a scalar - for f in map(eval, DataArrays.bit_operators) - @assert isna(f(NA, NA)) - @assert isna(f(1, NA)) - @assert isna(f(NA, 1)) + for f in bit_operators + @assert @eval isna(($f)(NA, NA)) + @assert @eval isna(($f)(1, NA)) + @assert @eval isna(($f)(NA, 1)) end # Unary operators on DataVector's should be equivalent to elementwise @@ -109,18 +115,7 @@ module TestOperators # Broadcasting operations between NA's and DataVector's dv = convert(DataArray, ones(5)) @test_da_pda dv begin - for f in map(eval, [:(.+), - :(+), - :(.-), - :(-), - :(*), - :(.*), - :(./), - :(.^), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)]) + for f in [+, *, Base.div, Base.mod, Base.fld, Base.rem] for i in 1:length(dv) @assert isna(f(dv, NA)[i]) @assert isna(f(NA, dv)[i]) @@ -128,6 +123,14 @@ module TestOperators @assert f(1, dv)[i] == f(1, dv[i]) end end + for f in arithmetic_operators + for i in 1:length(dv) + @assert @eval isna(($f).(dv, NA)[$i]) + @assert @eval isna(($f).(NA, dv)[$i]) + @assert @eval ($f).(dv, 1)[$i] == ($f)(dv[$i], 1) + @assert @eval ($f).(1, dv)[$i] == ($f)(1, dv[$i]) + end + end end @test_da_pda dv begin @@ -138,10 +141,10 @@ module TestOperators end dv = @data([false, true, false, true, false]) - for f in map(eval, DataArrays.bit_operators) + for f in bit_operators for i in 1:length(dv) - @assert f(dv, true)[i] == f(dv[i], true) - @assert f(true, dv)[i] == f(true, dv[i]) + @assert @eval $(f).(dv, true)[$i] == ($f).(dv[$i], true) + @assert @eval $(f).(true, dv)[$i] == ($f).(true, dv[$i]) end end @@ -153,20 +156,20 @@ module TestOperators bbv = BitArray([true, false, false, true, true]) bdv = @data [false, true, false, false, true] @test_da_pda dv begin - for f in map(eval, DataArrays.array_arithmetic_operators) + for f in [:(+),:(-),:(*),:(^)] for i in 1:length(dv) - @assert isna(f(v, dv)[i]) && isna(dv[i]) || - f(v, dv)[i] == f(v[i], dv[i]) - @assert isna(f(dv, v)[i]) && isna(dv[i]) || - f(dv, v)[i] == f(dv[i], v[i]) + @assert @eval isna(($f).(v, dv)[$i]) && isna(dv[$i]) || + ($f).(v, dv)[$i] == ($f)(v[$i], dv[$i]) + @assert @eval isna(($f).(dv, v)[$i]) && isna(dv[$i]) || + ($f).(dv, v)[$i] == ($f)(dv[$i], v[$i]) end end - for f in map(eval, DataArrays.bit_operators) + for f in bit_operators for i in 1:length(bdv) - @assert f(bv, bdv)[i] == f(bv[i], bdv[i]) - @assert f(bdv, bv)[i] == f(bdv[i], bv[i]) - @assert f(bbv, bdv)[i] == f(bbv[i], bdv[i]) - @assert f(bdv, bbv)[i] == f(bdv[i], bbv[i]) + @assert @eval ($f).(bv, bdv)[$i] == ($f).(bv[$i], bdv[$i]) + @assert @eval ($f).(bdv, bv)[$i] == ($f).(bdv[$i], bv[$i]) + @assert @eval ($f).(bbv, bdv)[$i] == ($f).(bbv[$i], bdv[$i]) + @assert @eval ($f).(bdv, bbv)[$i] == ($f).(bdv[$i], bbv[$i]) end end end @@ -177,15 +180,21 @@ module TestOperators dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) dv[1] = dvd[1] = NA @test_da_pda dv begin - for f in map(eval, DataArrays.array_arithmetic_operators) + for f in [:(+),:(-),:(*),:(^)] for i in 1:length(dv) - @assert isna(f(dv, dv)[i]) && isna(dv[i]) || - f(dv, dv)[i] == f(dv[i], dv[i]) + @assert @eval isna(($f).(dv, dv)[$i]) && isna(dv[$i]) || + ($f).(dv, dv)[$i] == ($f)(dv[$i], dv[$i]) end end - for f in map(eval, DataArrays.bit_operators) + for f in [+,-] + for i in 1:length(dv) + @assert isna((f)(dv, dv)[i]) && isna(dv[i]) || + (f)(dv, dv)[i] == (f)(dv[i], dv[i]) + end + end + for f in bit_operators for i in 1:length(bv) - @assert f(bv, bv)[i] == f(bv[i], bv[i]) + @assert @eval ($f).(bv, bv)[$i] == ($f).(bv[$i], bv[$i]) end end for i in 1:length(dvd) @@ -233,16 +242,18 @@ module TestOperators end # Pairwise vector operators on DataVector's + const pairwise_vector_operators = [diff] + dv = @data([911, 269, 835.0, 448, 772]) # Dates are an example of type for which operations return a different type from their inputs dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators @assert isequal(f(dv), f(dv.data)) @assert isequal(f(dvd), f(dvd.data)) end dv = @data([NA, 269, 835.0, 448, 772]) dvd = @data([NA, Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators v = f(dv) @assert isna(v[1]) @assert isequal(v[2:4], f(dv.data)[2:4]) @@ -253,7 +264,7 @@ module TestOperators end dv = @data([911, NA, 835.0, 448, 772]) dvd = @data([Base.Date("2000-01-01"), NA, Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators v = f(dv) @assert isna(v[1]) @assert isna(v[2]) @@ -266,7 +277,7 @@ module TestOperators end dv = @data([911, 269, 835.0, 448, NA]) dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05"), NA]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators v = f(dv) @assert isna(v[4]) @assert isequal(v[1:3], f(dv.data)[1:3]) @@ -445,10 +456,10 @@ module TestOperators @assert isequal(dv, rdv) # Issue #90 - a = @data([false, true, false, true]); - b = @data([false, false, true, true]); - a[:] = NA; - b[:] = NA; - @test allna(a & b) - @test allna(a | b) + a = @data([false, true, false, true]) + b = @data([false, false, true, true]) + a[:] = NA + b[:] = NA + @test allna(a .& b) + @test allna(a .| b) end diff --git a/test/reduce.jl b/test/reduce.jl index aa7d21d..a26680c 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -62,26 +62,26 @@ bs = DataArrays.sum_pairwise_blocksize(@functorize(identity)) for n in [bs-64, bs-1, bs, bs+1, bs+2, 2*bs-2:2*bs+3..., 4*bs-2:4*bs+3...] da = DataArray(randn(n)) s = sum(da.data) - @test_approx_eq sum(da) s - @test_approx_eq sum(da; skipna=true) s + @test sum(da) ≈ s + @test sum(da; skipna=true) ≈ s da2 = copy(da) da2[1:2:end] = NA @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) da2 = convert(DataArray{BigFloat}, da2) @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) da2 = copy(da) da2[2:2:end] = NA @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) da2 = convert(DataArray{BigFloat}, da2) @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) end ## other reductions @@ -93,7 +93,7 @@ macro same_behavior(ex1, ex2) catch e e end - isa(v, Exception) ? @test_throws(typeof(v), $ex1) : @test_approx_eq($ex1, v) + isa(v, Exception) ? @test_throws(typeof(v), $ex1) : @test isapprox($ex1, v) end end @@ -163,7 +163,7 @@ da1[1:3:end] = NA @same_behavior mean(da1, weights(da2.data); skipna=true) mean(dropna(da1), weights(da2.data[!da1.na])) da2[1:2:end] = NA -keep = !da1.na & !da2.na +keep = !da1.na .& !da2.na @test isna(mean(da1, weights(da2))) @same_behavior mean(da1, weights(da2); skipna=true) mean(da1.data[keep], weights(da2.data[keep])) end diff --git a/test/reducedim.jl b/test/reducedim.jl index 4061e27..dead510 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -105,13 +105,13 @@ end macro test_da_approx_eq(da1, da2) quote - v1 = $da1 - v2 = $da2 + v1 = $(esc(da1)) + v2 = $(esc(da2)) na = isna(v1) @test na == isna(v2) defined = !na if any(defined) - @test_approx_eq v1[defined] v2[defined] + @test isapprox(v1[defined], v2[defined], nans = true) end end end From 30413822ae961d3b34143001311902c456cb8506 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sat, 11 Feb 2017 23:07:57 -0500 Subject: [PATCH 02/14] Remove Compat code --- REQUIRE | 3 +- src/DataArrays.jl | 1 + src/broadcast.jl | 169 +---------------------------------------- src/dataarray.jl | 6 +- src/datavector.jl | 42 +++------- src/extras.jl | 4 +- src/indexing.jl | 43 +---------- src/literals.jl | 2 +- src/operators.jl | 24 ++---- src/pooleddataarray.jl | 4 +- src/reduce.jl | 40 +++++----- src/reducedim.jl | 87 ++++++++------------- src/sort.jl | 4 +- test/data.jl | 21 +---- test/dataarray.jl | 6 +- test/extras.jl | 4 +- test/reduce.jl | 53 +++++-------- test/reducedim.jl | 6 +- test/sort.jl | 2 +- 19 files changed, 112 insertions(+), 409 deletions(-) diff --git a/REQUIRE b/REQUIRE index b4b9b4d..95b9f63 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,3 @@ -julia 0.4 +julia 0.6- StatsBase 0.3 -Compat 0.8.6 Reexport diff --git a/src/DataArrays.jl b/src/DataArrays.jl index d9bd91d..fee031e 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -1,6 +1,7 @@ __precompile__(false) module DataArrays + using Base: promote_op using Base.Cartesian, Compat, Reexport import Compat.String @reexport using StatsBase diff --git a/src/broadcast.jl b/src/broadcast.jl index 2835dc7..e7b8023 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -3,34 +3,7 @@ using Base: @get!, promote_eltype using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache using Compat: promote_eltype_op -if isdefined(Base, :OneTo) - if VERSION < v"0.6.0-dev.1121" - _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_shape(x...)) - else - _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) - end -else - const _broadcast_shape = Base.Broadcast.broadcast_shape -end - -# Check that all arguments are broadcast compatible with shape -# Differs from Base in that we check for exact matches -# function check_broadcast_shape(shape::Dims, As::(@compat Union{AbstractArray,Number})...) -# samesize = true -# for A in As -# if ndims(A) > length(shape) -# throw(DimensionMismatch("cannot broadcast array to have fewer dimensions")) -# end -# for k in 1:length(shape) -# n, nA = shape[k], size(A, k) -# samesize &= (n == nA) -# if n != nA != 1 -# throw(DimensionMismatch("array could not be broadcast to match destination")) -# end -# end -# end -# samesize -# end +_broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) # Get ref for value for a PooledDataArray, adding to the pool if # necessary @@ -86,9 +59,6 @@ end # # TODO: Fall back on faster implementation for same-sized inputs when # it is safe to do so. -# Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, - # As::Union{DataArray, PooledDataArray}...) = - # broadcast!(f, B, As...) Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0::AbstractArray, As::AbstractArray...) = broadcast!(f, B, A0, As...) Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) = @@ -110,7 +80,6 @@ Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) = @assert ndims(B) == $nd # Set up input DataArray/PooledDataArrays - # @show $(Expr(:block, [As[1] <: DataArray])) $(Expr(:block, [ As[k] <: DataArray ? quote $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks @@ -217,142 +186,6 @@ Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indi @inline function Base.Broadcast.broadcast_c{S<:AbstractDataArray}(f, ::Type{S}, A, Bs...) T = Base.Broadcast._broadcast_eltype(f, A, Bs...) shape = Base.Broadcast.broadcast_indices(A, Bs...) - # iter = CartesianRange(shape) - # if isleaftype(T) dest = S(T, Base.index_lengths(shape...)) return broadcast!(f, dest, A, Bs...) - # end - # if isempty(iter) - # return similar(Array{T}, shape) - # end - # return broadcast_t(f, Any, shape, iter, A, Bs...) end - -# function databroadcast(f::Function, As...) -# T = Base.promote_op(f, eltype.(As)...) -# B = DataArray(T, _broadcast_shape(As...)) -# broadcast!(f, B, As...) -# end -# function pdabroadcast(f::Function, As...) -# T = Base.promote_op(f, eltype.(As)...) -# B = PooledDataArray(T, _broadcast_shape(As...)) -# broadcast!(f, B, As...) -# end - -# function exreplace!(ex::Expr, search, rep) -# for i = 1:length(ex.args) -# if ex.args[i] == search -# splice!(ex.args, i, rep) -# break -# else -# exreplace!(ex.args[i], search, rep) -# end -# end -# ex -# end -# exreplace!(ex, search, rep) = ex - -# macro da_broadcast_vararg(func) -# if (func.head != :function && func.head != :(=)) || -# func.args[1].head != :call || !isa(func.args[1].args[end], Expr) || -# func.args[1].args[end].head != :... -# throw(ArgumentError("@da_broadcast_vararg may only be applied to vararg functions")) -# end - -# va = func.args[1].args[end] -# defs = Any[] -# for n = 1:4, aa = 0:n-1 -# def = deepcopy(func) -# rep = Any[Symbol("A_$(i)") for i = 1:n] -# push!(rep, va) -# exreplace!(def.args[2], va, rep) -# rep = Vector{Any}(n+1) -# for i = 1:aa -# rep[i] = Expr(:(::), Symbol("A_$i"), AbstractArray) -# end -# for i = aa+1:n -# rep[i] = Expr(:(::), Symbol("A_$i"), (@compat Union{DataArray, PooledDataArray})) -# end -# rep[end] = Expr(:..., Expr(:(::), va.args[1], AbstractArray)) -# exreplace!(def.args[1], va, rep) -# push!(defs, def) -# end -# esc(Expr(:block, defs...)) -# end - -# macro da_broadcast_binary(func) -# if (func.head != :function && func.head != :(=)) || -# func.args[1].head != :call || -# length(func.args[1].args) != 3 -# throw(ArgumentError("@da_broadcast_binary may only be applied to two-argument functions")) -# end -# (ff, A, B) = func.args[1].args -# f = esc(ff) -# body = func.args[2] -# quote -# ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::(@compat Union{DataArray, PooledDataArray})) = $(body) -# ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::AbstractArray) = $(body) -# ($f)($A::AbstractArray, $B::(@compat Union{DataArray, PooledDataArray})) = $(body) -# end -# end - -# Broadcasting DataArrays returns a DataArray -# @da_broadcast_vararg Base.broadcast(f::Function, As...) = databroadcast(f, As...) - -# Definitions for operators, -# (.*)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(*, A, B) -# (.*)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(*, A, B) -# @da_broadcast_vararg (.*)(As...) = databroadcast(*, As...) -# @da_broadcast_binary (.%)(A, B) = databroadcast(%, A, B) -# @da_broadcast_vararg (.+)(As...) = broadcast!(+, DataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) -# @da_broadcast_binary (.-)(A, B) = - # broadcast!(-, DataArray(promote_op(@functorize(-), eltype(A), eltype(B)), - # _broadcast_shape(A,B)), A, B) -# @da_broadcast_binary (./)(A, B) = -# broadcast!(/, DataArray(promote_op(@functorize(/), eltype(A), eltype(B)), -# _broadcast_shape(A, B)), A, B) -# @da_broadcast_binary (.\)(A, B) = -# broadcast!(\, DataArray(promote_op(@functorize(\), eltype(A), eltype(B)), -# _broadcast_shape(A, B)), A, B) -# (.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -# (.^)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -# (.^)(A::AbstractArray{Bool}, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -# (.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(>=, A, B) -# (.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::AbstractArray{Bool}) = databroadcast(>=, A, B) -# @da_broadcast_binary (.^)(A, B) = -# broadcast!(^, DataArray(promote_op(@functorize(^), eltype(A), eltype(B)), -# _broadcast_shape(A, B)), A, B) - -# XXX is a PDA the right return type for these? -# Base.broadcast(f::Function, As::PooledDataArray...) = pdabroadcast(f, As...) -# (.*)(As::PooledDataArray...) = pdabroadcast(*, As...) -# (.%)(A::PooledDataArray, B::PooledDataArray) = pdabroadcast(%, A, B) -# (.+)(As::PooledDataArray...) = -# broadcast!(+, PooledDataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) -# (.-)(A::PooledDataArray, B::PooledDataArray) = -# broadcast!(-, PooledDataArray(promote_op(@functorize(-), eltype(A), eltype(B)), -# _broadcast_shape(A,B)), A, B) -# (./)(A::PooledDataArray, B::PooledDataArray) = -# broadcast!(/, PooledDataArray(promote_op(@functorize(/), eltype(A), eltype(B)), -# _broadcast_shape(A, B)), A, B) -# (.\)(A::PooledDataArray, B::PooledDataArray) = -# broadcast!(\, PooledDataArray(promote_op(@functorize(\), eltype(A), eltype(B)), -# _broadcast_shape(A, B)), A, B) -# (.^)(A::PooledDataArray{Bool}, B::PooledDataArray{Bool}) = databroadcast(>=, A, B) -# (.^)(A::PooledDataArray, B::PooledDataArray) = -# broadcast!(^, PooledDataArray(promote_op(@functorize(^), eltype(A), eltype(B)), -# _broadcast_shape(A, B)), A, B) - -# for (sf, vf) in zip(scalar_comparison_operators, array_comparison_operators) -# @eval begin -# # ambiguity -# $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = -# broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) -# $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::AbstractArray{Bool}) = -# broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) -# $(vf)(A::AbstractArray{Bool}, B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = -# broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - -# @da_broadcast_binary $(vf)(A, B) = broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) -# end -# end diff --git a/src/dataarray.jl b/src/dataarray.jl index 14aa809..6f8a8d5 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -176,11 +176,7 @@ function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Inte if n == 0 return dest elseif n < 0 - if VERSION >= v"0.5.0-dev+4711" - throw(ArgumentError("tried to copy n=$n elements, but n should be nonnegative")) - else - throw(BoundsError()) - end + throw(ArgumentError("tried to copy n=$n elements, but n should be nonnegative")) end if isbits(eltype(src)) copy!(dest.data, doffs, src.data, soffs, n) diff --git a/src/datavector.jl b/src/datavector.jl index f30e5a7..5a4a6e2 100644 --- a/src/datavector.jl +++ b/src/datavector.jl @@ -44,14 +44,14 @@ function Base.shift!{T}(dv::DataVector{T}) end end -function Base.splice!(dv::DataVector, inds::(@compat Union{Integer, UnitRange{Int}})) +function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}) v = dv[inds] deleteat!(dv.data, inds) deleteat!(dv.na, inds) v end -function Base.splice!(dv::DataVector, inds::(@compat Union{Integer, UnitRange{Int}}), ins::AbstractVector) +function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}, ins::AbstractVector) # We cannot merely use the implementation in Base because this # needs to handle NA in the replacement vector v = dv[inds] @@ -68,32 +68,14 @@ function Base.splice!(dv::DataVector, inds::(@compat Union{Integer, UnitRange{In l = last(inds) d = length(inds) - if VERSION >= v"0.5.0-dev+5022" - if m < d - delta = d - m - i = (f - 1 < n - l) ? f : (l - delta + 1) - Base._deleteat!(a, i, delta) - elseif m > d - delta = m - d - i = (f - 1 < n - l) ? f : (l + 1) - Base._growat!(a, i, delta) - end - else - if m < d - delta = d - m - if f-1 < n-l - Base._deleteat_beg!(a, f, delta) - else - Base._deleteat_end!(a, l-delta+1, delta) - end - elseif m > d - delta = m - d - if f-1 < n-l - Base._growat_beg!(a, f, delta) - else - Base._growat_end!(a, l+1, delta) - end - end + if m < d + delta = d - m + i = (f - 1 < n - l) ? f : (l - delta + 1) + Base._deleteat!(a, i, delta) + elseif m > d + delta = m - d + i = (f - 1 < n - l) ? f : (l + 1) + Base._growat!(a, i, delta) end for k = 1:m @@ -156,13 +138,13 @@ Base.shift!(pdv::PooledDataVector) = pdv.pool[shift!(pdv.refs)] Base.reverse(x::AbstractDataVector) = x[end:-1:1] -function Base.splice!(pdv::PooledDataVector, inds::(@compat Union{Integer, UnitRange{Int}})) +function Base.splice!(pdv::PooledDataVector, inds::Union{Integer, UnitRange{Int}}) v = pdv[inds] deleteat!(pdv.refs, inds) v end -function Base.splice!(pdv::PooledDataVector, inds::(@compat Union{Integer, UnitRange{Int}}), ins::AbstractVector) +function Base.splice!(pdv::PooledDataVector, inds::Union{Integer, UnitRange{Int}}, ins::AbstractVector) v = pdv[inds] splice!(pdv.refs, inds, [getpoolidx(pdv, v) for v in ins]) v diff --git a/src/extras.jl b/src/extras.jl index f5b98a7..0db6def 100644 --- a/src/extras.jl +++ b/src/extras.jl @@ -20,11 +20,11 @@ function StatsBase.addcounts!{T,U,W}(cm::Dict{U,W}, x::AbstractDataArray{T}, wv: end function StatsBase.countmap{T}(x::AbstractDataArray{T}) - addcounts!(Dict{(@compat Union{T, NAtype}), Int}(), x) + addcounts!(Dict{Union{T, NAtype}, Int}(), x) end function StatsBase.countmap{T,W}(x::AbstractDataArray{T}, wv::WeightVec{W}) - addcounts!(Dict{(@compat Union{T, NAtype}), W}(), x, wv) + addcounts!(Dict{Union{T, NAtype}, W}(), x, wv) end function cut{S, T}(x::AbstractVector{S}, breaks::Vector{T}) diff --git a/src/indexing.jl b/src/indexing.jl index 7f75a45..d4c4cd6 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -20,9 +20,9 @@ unsafe_getindex_notna(a, extr, idx::Real) = Base.unsafe_getindex(a, idx) # Set NA or data portion of DataArray unsafe_bitsettrue!(chunks::Vector{UInt64}, idx::Real) = - chunks[Base._div64(@compat(Int(idx))-1)+1] |= (@compat(UInt64(1)) << Base._mod64(@compat(Int(idx))-1)) + chunks[Base._div64(Int(idx)-1)+1] |= (UInt64(1) << Base._mod64(Int(idx)-1)) unsafe_bitsetfalse!(chunks::Vector{UInt64}, idx::Real) = - chunks[Base._div64(@compat(Int(idx))-1)+1] &= ~(@compat(UInt64(1)) << Base._mod64(@compat(Int(idx))-1)) + chunks[Base._div64(Int(idx)-1)+1] &= ~(UInt64(1) << Base._mod64(Int(idx)-1)) unsafe_setna!(da::DataArray, extr, idx::Real) = unsafe_bitsettrue!(extr[2], idx) unsafe_setna!(da::PooledDataArray, extr, idx::Real) = setindex!(extr[1], 0, idx) @@ -124,17 +124,8 @@ function Base.getindex(da::DataArray, I::Real) return getindex(da.data, I) end end -# @nsplat N function Base.getindex(da::DataArray, I::NTuple{N,Real}...) -# if getindex(da.na, I...) -# return NA -# else -# return getindex(da.data, I...) -# end -# end - -if VERSION > v"0.5-" - Base.unsafe_getindex(x::Number, i::Int) = (@inbounds r = x[i]; r) -end + +Base.unsafe_getindex(x::Number, i) = (@inbounds xi = x[i]; xi) # Vector case @generated function Base._unsafe_getindex!(dest::DataArray, src::DataArray, I::Union{Real, AbstractArray}...) @@ -159,26 +150,6 @@ end end end -# function _getindex{T}(A::DataArray{T}, I::@compat Tuple{Vararg{Union{Int,AbstractVector}}}) -# shape = _index_shape(Base.to_indices(A, I)...) -# _getindex!(DataArray(Array{T}(shape), falses(shape)), A, I...) -# end - -# @nsplat N function Base.getindex(A::DataArray, I::NTuple{N,Union{Real,Colon,AbstractVector}}...) - # checkbounds(A, I...) - # _getindex(A, Base.to_indexes(I...)) -# end - -# Dispatch our implementation for these cases instead of Base -# function Base.getindex(A::DataArray, I::AbstractVector) -# checkbounds(A, I) -# _getindex(A, (Base.to_index(I),)) -# end -# function Base.getindex(A::DataArray, I::AbstractArray) -# checkbounds(A, I) -# _getindex(A, (Base.to_index(I),)) -# end - ## getindex: PooledDataArray # Scalar case @@ -207,12 +178,6 @@ end end end -# Dispatch our implementation for these cases instead of Base -# Base.getindex(A::PooledDataArray, I::AbstractVector) = -# PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) -# Base.getindex(A::PooledDataArray, I::AbstractArray) = -# PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) - ## setindex!: DataArray function Base.setindex!(da::DataArray, val::NAtype, i::Real) diff --git a/src/literals.jl b/src/literals.jl index 925cb68..019479a 100644 --- a/src/literals.jl +++ b/src/literals.jl @@ -91,7 +91,7 @@ end function parsedata(ex::Expr) if length(ex.args) == 0 - return :([]), Expr(:call, :Array, :Bool, 0) + return :([]), Expr(:call, :(Array{Bool}), 0) end if ex.head == :typed_vcat || (isa(ex.args[1], Expr) && ex.args[1].head == :row) return parsematrix(ex) diff --git a/src/operators.jl b/src/operators.jl index 3abd96e..fbbebf6 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -1,11 +1,3 @@ -promote_op{R,S}(f::Any, ::Type{R}, ::Type{S}) = - Base.promote_op(f, R, S) - -# Required for /(::Int, ::Int) -if VERSION < v"0.5.0-dev" - promote_op{R<:Integer,S<:Integer}(op, ::Type{R}, ::Type{S}) = typeof(op(one(R), one(S))) -end - const unary_operators = [:+, :-, :!, :*] const numeric_unary_operators = [:+, :-] @@ -129,7 +121,7 @@ const boolean_operators = [:(Base.any), # Swap arguments to fname() anywhere in AST. Returns the number of # arguments swapped -function swapargs(ast::Expr, fname::(@compat Union{Expr, Symbol})) +function swapargs(ast::Expr, fname::Union{Expr, Symbol}) if ast.head == :call && (ast.args[1] == fname || (isa(ast.args[1], Expr) && ast.args[1].head == :curly && @@ -146,7 +138,7 @@ function swapargs(ast::Expr, fname::(@compat Union{Expr, Symbol})) n end end -function swapargs(ast, fname::(@compat Union{Expr, Symbol})) +function swapargs(ast, fname::Union{Expr, Symbol}) ast 0 end @@ -216,7 +208,7 @@ macro dataarray_binary_scalar(vectorfunc, scalarfunc, outtype, swappable) Any[ begin if outtype == :nothing - outtype = :(promote_op(@functorize($scalarfunc), + outtype = :(promote_op($scalarfunc, eltype(a), eltype(b))) end fns = Any[ @@ -259,7 +251,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) function $(vectorfunc)(a::$atype, b::$btype) data1 = $(atype == :DataArray || atype == :(DataArray{Bool}) ? :(a.data) : :a) data2 = $(btype == :DataArray || btype == :(DataArray{Bool}) ? :(b.data) : :b) - res = Array{promote_op(@functorize($vectorfunc), eltype(a), eltype(b))}( + res = Array{promote_op($vectorfunc, eltype(a), eltype(b))}( promote_shape(size(a), size(b))) resna = $narule @bitenumerate resna i na begin @@ -282,7 +274,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) quote function $(vectorfunc)(a::$atype, b::$btype) res = similar($(asim ? :a : :b), - promote_op(@functorize($vectorfunc), eltype(a), eltype(b)), + promote_op($vectorfunc, eltype(a), eltype(b)), promote_shape(size(a), size(b))) for i = 1:length(a) res[i] = $(scalarfunc)(a[i], b[i]) @@ -345,8 +337,8 @@ end # But we're getting 10x R while maintaining NA's for (adata, bdata) in ((true, false), (false, true), (true, true)) @eval begin - function (*)(a::$(adata ? :((@compat Union{DataVector, DataMatrix})) : :((@compat Union{Vector, Matrix}))), - b::$(bdata ? :((@compat Union{DataVector, DataMatrix})) : :(@compat Union{Vector, Matrix}))) + function (*)(a::$(adata ? :(Union{DataVector, DataMatrix}) : :(Union{Vector, Matrix})), + b::$(bdata ? :(Union{DataVector, DataMatrix}) : :(Union{Vector, Matrix}))) c = $(adata ? :(a.data) : :a) * $(bdata ? :(b.data) : :b) res = DataArray(c, falses(size(c))) # Propagation can be made more efficient by storing record of corrupt @@ -476,7 +468,7 @@ end # DataArray with non-DataArray # Need explicit definition for BitArray to avoid ambiguity -for t in (:(BitArray), :(Range{Bool}), :((@compat Union{AbstractArray{Bool}, Bool}))) +for t in (:(BitArray), :(Range{Bool}), :(Union{AbstractArray{Bool}, Bool})) @eval begin @swappable (&)(a::DataArray{Bool}, b::$t) = DataArray(convert(Array{Bool}, a.data & b), a.na & b) @swappable (|)(a::DataArray{Bool}, b::$t) = DataArray(convert(Array{Bool}, a.data | b), a.na & !b) diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index d4ce41f..236478d 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -101,9 +101,9 @@ function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, end # Construct an all-NA PooledDataVector of a specific type -PooledDataArray(t::Type, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims)) +PooledDataArray(t::Type, dims::Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims)) PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array{t}(dims), trues(dims)) -PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims), r) +PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims), r) PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::Int...) = PooledDataArray(Array(t, dims), trues(dims), r) # Construct an empty PooledDataVector of a specific type diff --git a/src/reduce.jl b/src/reduce.jl index bb9d32a..33fc601 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -64,7 +64,7 @@ end mapreduce_impl_skipna{T}(f, op, A::DataArray{T}) = mapreduce_seq_impl_skipna(f, op, T, A, 1, length(A.data)) -mapreduce_impl_skipna(f, op::typeof(@functorize(+)), A::DataArray) = +mapreduce_impl_skipna(f, op::typeof(+), A::DataArray) = mapreduce_pairwise_impl_skipna(f, op, A, 1, length(A.na.chunks), length(A.na)-countnz(A.na), max(128, sum_pairwise_blocksize(f))) @@ -87,48 +87,48 @@ end # NA, it returns NA. Otherwise we will fall back to the implementation # in Base, which is slow because it's type-unstable, but guarantees the # correct semantics -typealias SafeMapFuns @compat Union{typeof(identity), typeof(abs), typeof(abs2), +typealias SafeMapFuns Union{typeof(identity), typeof(abs), typeof(abs2), typeof(exp), typeof(log), typeof(Base.centralizedabs2fun)} -typealias SafeReduceFuns @compat Union{typeof(+), typeof(*), typeof(max), typeof(min)} +typealias SafeReduceFuns Union{typeof(+), typeof(*), typeof(max), typeof(min)} function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) any(A.na) && return NA Base._mapreduce(f, op, A.data) end function Base.mapreduce(f, op::Function, A::DataArray; skipna::Bool=false) - (op === +) ? (skipna ? _mapreduce_skipna(f, @functorize(+), A) : Base._mapreduce(f, @functorize(+), A)) : - (op === *) ? (skipna ? _mapreduce_skipna(f, @functorize(*), A) : Base._mapreduce(f, @functorize(*), A)) : - (op === &) ? (skipna ? _mapreduce_skipna(f, @functorize(&), A) : Base._mapreduce(f, @functorize(&), A)) : - (op === |) ? (skipna ? _mapreduce_skipna(f, @functorize(|), A) : Base._mapreduce(f, @functorize(|), A)) : + (op === +) ? (skipna ? _mapreduce_skipna(f, +, A) : Base._mapreduce(f, +, A)) : + (op === *) ? (skipna ? _mapreduce_skipna(f, *, A) : Base._mapreduce(f, *, A)) : + (op === &) ? (skipna ? _mapreduce_skipna(f, &, A) : Base._mapreduce(f, &, A)) : + (op === |) ? (skipna ? _mapreduce_skipna(f, |, A) : Base._mapreduce(f, |, A)) : skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) end # To silence deprecations, but could be more efficient -Base.mapreduce(f, op::(@compat Union{typeof(@functorize(|)), typeof(@functorize(&))}), A::DataArray; skipna::Bool=false) = +Base.mapreduce(f, op::Union{typeof(|), typeof(&)}, A::DataArray; skipna::Bool=false) = skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) Base.mapreduce(f, op, A::DataArray; skipna::Bool=false) = skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) Base.reduce(op, A::DataArray; skipna::Bool=false) = - mapreduce(@functorize(identity), op, A; skipna=skipna) + mapreduce(identity, op, A; skipna=skipna) ## usual reductions -for (fn, op) in ((:(Base.sum), @functorize(+)), - (:(Base.prod), @functorize(*)), - (:(Base.minimum), @functorize(min)), - (:(Base.maximum), @functorize(max))) +for (fn, op) in ((:(Base.sum), +), + (:(Base.prod), *), + (:(Base.minimum), min), + (:(Base.maximum), max)) @eval begin - $fn(f::(@compat Union{Function,$(supertype(typeof(@functorize(abs))))}), a::DataArray; skipna::Bool=false) = + $fn(f::Union{Function,$(supertype(typeof(abs)))}, a::DataArray; skipna::Bool=false) = mapreduce(f, $op, a; skipna=skipna) $fn(a::DataArray; skipna::Bool=false) = - mapreduce(@functorize(identity), $op, a; skipna=skipna) + mapreduce(identity, $op, a; skipna=skipna) end end -for (fn, f, op) in ((:(Base.sumabs), @functorize(abs), @functorize(+)), - (:(Base.sumabs2), @functorize(abs2), @functorize(+))) +for (fn, f, op) in ((:(Base.sumabs), abs, +), + (:(Base.sumabs2), abs2, +)) @eval $fn(a::DataArray; skipna::Bool=false) = mapreduce($f, $op, a; skipna=skipna) end @@ -147,11 +147,11 @@ function Base.varm{T}(A::DataArray{T}, m::Number; corrected::Bool=true, skipna:: nna = countnz(na) nna == n && return convert(Base.momenttype(T), NaN) nna == n-1 && return convert(Base.momenttype(T), - abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - @compat(Int(corrected)))) + abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - corrected)) /(nna == 0 ? Base.centralize_sumabs2(A.data, m, 1, n) : mapreduce_impl_skipna(Base.centralizedabs2fun(m), +, A), - n - nna - @compat(Int(corrected))) + n - nna - corrected) else any(A.na) && return NA Base.varm(A.data, m; corrected=corrected) @@ -162,7 +162,7 @@ Base.varm{T}(A::DataArray{T}, m::NAtype; corrected::Bool=true, skipna::Bool=fals function Base.var(A::DataArray; corrected::Bool=true, mean=nothing, skipna::Bool=false) mean == 0 ? Base.varm(A, 0; corrected=corrected, skipna=skipna) : mean == nothing ? varm(A, Base.mean(A; skipna=skipna); corrected=corrected, skipna=skipna) : - isa(mean, (@compat Union{Number, NAtype})) ? + isa(mean, Union{Number, NAtype}) ? varm(A, mean; corrected=corrected, skipna=skipna) : throw(ErrorException("Invalid value of mean.")) end diff --git a/src/reducedim.jl b/src/reducedim.jl index 9c1c1bb..82e6362 100644 --- a/src/reducedim.jl +++ b/src/reducedim.jl @@ -5,7 +5,7 @@ using Base.check_reducedims # This is a substantially faster implementation of the "all" reduction # across dimensions for reducing a BitArray to an Array{Bool}. We use # this below for implementing MaxFun and MinFun with skipna=true. -@ngenerate N typeof(R) function Base._mapreducedim!{N}(f, op::typeof(@functorize(&)), R::Array{Bool}, A::BitArray{N}) +@ngenerate N typeof(R) function Base._mapreducedim!{N}(f, op::typeof(&), R::Array{Bool}, A::BitArray{N}) lsiz = check_reducedims(R, A) isempty(A) && return R @nextract N sizeR d->size(R, d) @@ -167,7 +167,7 @@ _getdata(A::DataArray) = A.data # mapreduce across a dimension. If specified, C contains the number of # non-NA values reduced into each element of R. @ngenerate N typeof(R) function _mapreducedim_skipna_impl!{T,N}(f, op, R::AbstractArray, - C::(@compat Union{Array{Int}, Void}), + C::Union{Array{Int}, Void}, A::DataArray{T,N}) data = A.data na = A.na @@ -226,11 +226,11 @@ _mapreducedim_skipna!(f, op, R::AbstractArray, A::DataArray) = _mapreducedim_skipna_impl!(f, op, R, nothing, A) # for MinFun/MaxFun, min or max is NA if all values along a dimension are NA -function _mapreducedim_skipna!(f, op::(@compat Union{typeof(@functorize(min)), typeof(@functorize(max))}), R::DataArray, A::DataArray) +function _mapreducedim_skipna!(f, op::Union{typeof(min), typeof(max)}, R::DataArray, A::DataArray) R.na = BitArray(all!(fill(true, size(R)), A.na)) _mapreducedim_skipna_impl!(f, op, R, nothing, A) end -function _mapreducedim_skipna!(f, op::(@compat Union{typeof(@functorize(min)), typeof(@functorize(max))}), R::AbstractArray, A::DataArray) +function _mapreducedim_skipna!(f, op::Union{typeof(min), typeof(max)}, R::AbstractArray, A::DataArray) if any(all!(fill(true, size(R)), A.na)) throw(NAException("all values along specified dimension are NA for one element of reduced dimension; cannot reduce to non-DataArray")) end @@ -239,7 +239,7 @@ end ## general reducedim interface -for op in (@functorize(+), @functorize(*), @functorize(&), @functorize(|),@functorize(scalarmin), @functorize(scalarmax), @functorize(min), @functorize(max)) +for op in (+, *, &, |, min, max) @eval begin function Base.initarray!{T}(a::DataArray{T}, op::typeof($op), init::Bool) if init @@ -251,45 +251,22 @@ for op in (@functorize(+), @functorize(*), @functorize(&), @functorize(|),@funct end end -# min and max defunctorize to ElementwiseMin/MaxFun which don't have initarray! -# or reducedim_init methods on 0.4. -if VERSION < v"0.5.0-dev+3701" - Base.initarray!(a::AbstractArray, ::Base.ElementwiseMaxFun, init::Bool) = - Base.initarray!(a, Base.MaxFun(), init) - Base.initarray!(a::AbstractArray, ::Base.ElementwiseMinFun, init::Bool) = - Base.initarray!(a, Base.MinFun(), init) - Base.reducedim_init(f, ::Base.ElementwiseMaxFun, a::AbstractArray, dim) = Base.reducedim_init(f, Base.MaxFun(), a, dim) - Base.reducedim_init(f, ::Base.ElementwiseMinFun, a::AbstractArray, dim) = Base.reducedim_init(f, Base.MinFun(), a, dim) -end - function Base.reducedim_initarray{R}(A::DataArray, region, v0, ::Type{R}) - if VERSION < v"0.6.0-dev.1121" - rd = Base.reduced_dims(A.data, region) - else - rd = length.(Base.reduced_indices(A.data, region)) - end + rd = length.(Base.reduced_indices(A.data, region)) DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) end function Base.reducedim_initarray0{R}(A::DataArray, region, v0, ::Type{R}) - if VERSION < v"0.6.0-dev.1121" - rd = Base.reduced_dims0(A,region) - else - rd = length.(Base.reduced_indices0(A,region)) - end + rd = length.(Base.reduced_indices0(A,region)) DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) end function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; skipna::Bool=false) - (op === +) ? (skipna ? _mapreducedim_skipna!(f, @functorize(+), R, A) : _mapreducedim!(f, @functorize(+), R, A)) : - (op === *) ? (skipna ? _mapreducedim_skipna!(f, @functorize(*), R, A) : _mapreducedim!(f, @functorize(*), R, A)) : - (op === &) ? (skipna ? _mapreducedim_skipna!(f, @functorize(&), R, A) : _mapreducedim!(f, @functorize(&), R, A)) : - (op === |) ? (skipna ? _mapreducedim_skipna!(f, @functorize(|), R, A) : _mapreducedim!(f, @functorize(|), R, A)) : skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) end Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; skipna::Bool=false) = skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) Base.reducedim!{RT}(op, R::DataArray{RT}, A::AbstractArray; skipna::Bool=false) = - Base.mapreducedim!(@functorize(identity), op, R, A, zero(RT); skipna=skipna) + Base.mapreducedim!(identity, op, R, A, zero(RT); skipna=skipna) Base.mapreducedim(f, op, A::DataArray, region, v0; skipna::Bool=false) = Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; skipna=skipna) @@ -297,35 +274,35 @@ Base.mapreducedim{T}(f, op, A::DataArray{T}, region; skipna::Bool=false) = Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; skipna=skipna) Base.reducedim(op, A::DataArray, region, v0; skipna::Bool=false) = - Base.mapreducedim(@functorize(identity), op, A, region, v0; skipna=skipna) + Base.mapreducedim(identity, op, A, region, v0; skipna=skipna) Base.reducedim(op, A::DataArray, region; skipna::Bool=false) = - Base.mapreducedim(@functorize(identity), op, A, region; skipna=skipna) + Base.mapreducedim(identity, op, A, region; skipna=skipna) ## usual reductions -for (basfn, Op) in [(:sum, @functorize(+)), (:prod, @functorize(*)), - (:maximum, @functorize(max)), (:minimum, @functorize(min)), - (:all, @functorize(&)), (:any, @functorize(|))] +for (basfn, Op) in [(:sum, +), (:prod, *), + (:maximum, max), (:minimum, min), + (:all, &), (:any, |)] fname = Expr(:., :Base, Base.Meta.quot(basfn)) fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) @eval begin - $(fname!)(f::(@compat Union{Function,$(supertype(typeof(@functorize(abs))))}), r::AbstractArray, A::DataArray; + $(fname!)(f::Union{Function,$(supertype(typeof(abs)))}, r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = Base.mapreducedim!(f, $(Op), Base.initarray!(r, $(Op), init), A; skipna=skipna) $(fname!)(r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = - $(fname!)(@functorize(identity), r, A; init=init, skipna=skipna) + $(fname!)(identity, r, A; init=init, skipna=skipna) - $(fname)(f::(@compat Union{Function,$(supertype(typeof(@functorize(abs))))}), A::DataArray, region; skipna::Bool=false) = + $(fname)(f::Union{Function,$(supertype(typeof(abs)))}, A::DataArray, region; skipna::Bool=false) = Base.mapreducedim(f, $(Op), A, region; skipna=skipna) $(fname)(A::DataArray, region; skipna::Bool=false) = - $(fname)(@functorize(identity), A, region; skipna=skipna) + $(fname)(identity, A, region; skipna=skipna) end end -for (basfn, fbase, Fun) in [(:sumabs, :sum, @functorize(abs)), - (:sumabs2, :sum, @functorize(abs2)), - (:maxabs, :maximum, @functorize(abs)), - (:minabs, :minimum, @functorize(abs))] +for (basfn, fbase, Fun) in [(:sumabs, :sum, abs), + (:sumabs2, :sum, abs2), + (:maxabs, :maximum, abs), + (:minabs, :minimum, abs)] fname = Expr(:., :Base, Base.Meta.quot(basfn)) fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) fbase! = Expr(:., :Base, Base.Meta.quot(Symbol(string(fbase, '!')))) @@ -344,7 +321,7 @@ function Base.mean!{T}(R::AbstractArray{T}, A::DataArray; skipna::Bool=false, init && fill!(R, zero(eltype(R))) if skipna C = Array{Int}(size(R)) - _mapreducedim_skipna_impl!(@functorize(identity), @functorize(+), R, C, A) + _mapreducedim_skipna_impl!(identity, +, R, C, A) broadcast!(/, R, R, C) else sum!(R, A; skipna=false) @@ -363,7 +340,7 @@ immutable MapReduceDim2ArgHelperFun{F,T} f::F val::T end -@compat (f::MapReduceDim2ArgHelperFun)(x) = f.f(x, f.val) +(f::MapReduceDim2ArgHelperFun)(x) = f.f(x, f.val) # A version of _mapreducedim! that accepts an array S of the same size # as R, the elements of which are passed as a second argument to f. @@ -429,7 +406,7 @@ end # A version of _mapreducedim_skipna! that accepts an array S of the same size # as R, the elements of which are passed as a second argument to f. @ngenerate N typeof(R) function _mapreducedim_skipna_2arg!{T,N}(f, op, R::AbstractArray, - C::(@compat Union{Array{Int}, Void}), + C::Union{Array{Int}, Void}, A::DataArray{T,N}, S::AbstractArray) data = A.data na = A.na @@ -502,7 +479,7 @@ end end immutable Abs2MinusFun end -@compat (::Abs2MinusFun)(x, m) = abs2(x - m) +(::Abs2MinusFun)(x, m) = abs2(x - m) function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected::Bool=true, skipna::Bool=false, init::Bool=true) @@ -514,7 +491,7 @@ function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected: C = Array{Int}(size(R)) # Compute R = abs2(A-m) - _mapreducedim_skipna_2arg!(Abs2MinusFun(), @functorize(+), R, C, A, m) + _mapreducedim_skipna_2arg!(Abs2MinusFun(), +, R, C, A, m) # Divide by number of non-NA values if corrected @@ -525,10 +502,10 @@ function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected: broadcast!(/, R, R, C) else # Compute R = abs2(A-m) - _mapreducedim_2arg!(Abs2MinusFun(), @functorize(+), R, A, m) + _mapreducedim_2arg!(Abs2MinusFun(), +, R, A, m) # Divide by number of values - broadcast!(/, R, R, div(length(A), length(R)) - @compat(Int(corrected))) + broadcast!(/, R, R, div(length(A), length(R)) - corrected) end end end @@ -538,7 +515,7 @@ Base.varm{T}(A::DataArray{T}, m::AbstractArray, region; corrected::Bool=true, Base.varm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A, m; corrected=corrected, skipna=skipna, init=false) -function Base.var{T}(A::DataArray{T}, region::(@compat Union{Integer, AbstractArray, Tuple}); +function Base.var{T}(A::DataArray{T}, region::Union{Integer, AbstractArray, Tuple}; corrected::Bool=true, mean=nothing, skipna::Bool=false) if mean == 0 Base.varm(A, Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), region; @@ -546,11 +523,7 @@ function Base.var{T}(A::DataArray{T}, region::(@compat Union{Integer, AbstractAr elseif mean == nothing if skipna # Can reduce mean into ordinary array - if VERSION < v"0.6.0-dev.1121" - m = zeros(Base.momenttype(T), Base.reduced_dims(A, region)) - else - m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) - end + m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) Base.varm(A, Base.mean!(m, A; skipna=skipna), region; corrected=corrected, skipna=skipna) else diff --git a/src/sort.jl b/src/sort.jl index 40c47d0..45d7177 100644 --- a/src/sort.jl +++ b/src/sort.jl @@ -15,7 +15,7 @@ end datachunks(o::Base.Order.Perm, v::AbstractVector{Int}) = (v, o.data.na.chunks) datachunks(o::Base.Order.DirectOrdering, v::DataVector) = (v.data, v.na.chunks) -function nas2left!(v::(@compat Union{AbstractVector{Int}, DataVector}), o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) +function nas2left!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) data, chunks = datachunks(o, v) i = lo @@ -37,7 +37,7 @@ function nas2left!(v::(@compat Union{AbstractVector{Int}, DataVector}), o::Base. return i, hi end -function nas2right!(v::(@compat Union{AbstractVector{Int}, DataVector}), o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) +function nas2right!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) data, chunks = datachunks(o, v) i = hi diff --git a/test/data.jl b/test/data.jl index 073cc70..31a8431 100644 --- a/test/data.jl +++ b/test/data.jl @@ -26,21 +26,12 @@ module TestData @assert isa(dvint2, DataVector{Int}) @assert isa(dvint3, DataVector{Int}) @assert isa(dvflt, DataVector{Float64}) - if VERSION < v"0.5.0-dev+3876" - @assert isa(dvstr, DataVector{ASCIIString}) - else - @assert isa(dvstr, DataVector{String}) - end - # @test throws_exception(DataArray([5:8], falses(2)), Exception) + @assert isa(dvstr, DataVector{String}) + @test_throws ArgumentError DataArray([5:8], falses(2)) #test_group("PooledDataVector creation") pdvstr = @pdata ["one", "one", "two", "two", NA, "one", "one"] - if VERSION < v"0.5.0-dev+3876" - @assert isa(pdvstr, PooledDataVector{ASCIIString}) - else - @assert isa(pdvstr, PooledDataVector{String}) - end - # @test throws_exception(PooledDataVector["one", "one", 9], Exception) + @assert isa(pdvstr, PooledDataVector{String}) @assert isequal(PooledDataArray(pdvstr), pdvstr) #test_group("PooledDataVector creation with predetermined pool") @@ -92,11 +83,7 @@ module TestData @assert size(pdvstr) == (7,) @assert length(pdvstr) == 7 @assert sum(isna(pdvstr)) == 1 - if VERSION < v"0.5.0-dev+3876" - @assert eltype(pdvstr) == ASCIIString - else - @assert eltype(pdvstr) == String - end + @assert eltype(pdvstr) == String #test_group("DataVector operations") @assert isequal(dvint .+ 1, DataArray([2, 3, 4, 5], [false, false, true, false])) diff --git a/test/dataarray.jl b/test/dataarray.jl index 39c4d80..fcf7088 100644 --- a/test/dataarray.jl +++ b/test/dataarray.jl @@ -92,11 +92,7 @@ module TestDataArray @test_throws BoundsError copy!(dest, 1, src, idx, 1) end - if VERSION >= v"0.5.0-dev+4711" - @test_throws ArgumentError copy!(dest, 1, src, 1, -1) - else - @test_throws BoundsError copy!(dest, 1, src, 1, -1) - end + @test_throws ArgumentError copy!(dest, 1, src, 1, -1) @test_throws BoundsError copy!(dest, bigsrc) diff --git a/test/extras.jl b/test/extras.jl index b23f87f..be72d0b 100644 --- a/test/extras.jl +++ b/test/extras.jl @@ -10,8 +10,8 @@ module TestExtras d = @data [NA,3,3] w = weights([1.1,2.2,3.3]) - cm = Dict{(@compat Union{Int, NAtype}), Int}([(NA, 1), (3, 2)]) - cmw = Dict{(@compat Union{Int, NAtype}), Real}([(NA, 1.1), (3, 5.5)]) + cm = Dict{Union{Int, NAtype}, Int}([(NA, 1), (3, 2)]) + cmw = Dict{Union{Int, NAtype}, Real}([(NA, 1.1), (3, 5.5)]) @assert isequal(countmap(d), cm) @assert isequal(countmap(d, w), cmw) diff --git a/test/reduce.jl b/test/reduce.jl index a26680c..10eb8df 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -6,37 +6,20 @@ srand(1337) ## extended test of sum for skipna in (true, false) - if VERSION < v"0.5-" - @test sum(@data(Int8[]); skipna=skipna) === 0 - @test sum(@data(Int[]); skipna=skipna) === 0 - @test sum(@data(Float64[]); skipna=skipna) === 0.0 - - @test sum(@data([@compat(Int8(3))]); skipna=skipna) === 3 - @test sum(@data([3]); skipna=skipna) === 3 - @test sum(@data([3.0]); skipna=skipna) === 3.0 - - z = DataArray(reshape(1:16, (2,2,2,2))) - fz = convert(DataArray{Float64}, z) - bfz = convert(DataArray{BigFloat}, z) - @test sum(z) === 136 - @test sum(fz) === 136.0 - @test sum(bfz) == 136 - else - @test sum(@data(Int8[]); skipna=skipna) === Int32(0) - @test sum(@data(Int[]); skipna=skipna) === 0 - @test sum(@data(Float64[]); skipna=skipna) === 0.0 - - @test sum(@data([@compat(Int8(3))]); skipna=skipna) === Int32(3) - @test sum(@data([3]); skipna=skipna) === 3 - @test sum(@data([3.0]); skipna=skipna) === 3.0 - - z = DataArray(reshape(1:16, (2,2,2,2))) - fz = convert(DataArray{Float64}, z) - bfz = convert(DataArray{BigFloat}, z) - @test sum(z) === 136 - @test sum(fz) === 136.0 - @test sum(bfz) == 136 - end + @test sum(@data(Int8[]); skipna=skipna) === Int32(0) + @test sum(@data(Int[]); skipna=skipna) === 0 + @test sum(@data(Float64[]); skipna=skipna) === 0.0 + + @test sum(@data([Int8(3)]); skipna=skipna) === Int32(3) + @test sum(@data([3]); skipna=skipna) === 3 + @test sum(@data([3.0]); skipna=skipna) === 3.0 + + z = DataArray(reshape(1:16, (2,2,2,2))) + fz = convert(DataArray{Float64}, z) + bfz = convert(DataArray{BigFloat}, z) + @test sum(z) === 136 + @test sum(fz) === 136.0 + @test sum(bfz) == 136 end @test sum(@data(Int[NA])) === NA @@ -58,7 +41,7 @@ bfz = convert(DataArray{BigFloat}, z) @test sum(fz; skipna=true) === 130.0 @test sum(bfz; skipna=true) == 130 -bs = DataArrays.sum_pairwise_blocksize(@functorize(identity)) +bs = DataArrays.sum_pairwise_blocksize(identity) for n in [bs-64, bs-1, bs, bs+1, bs+2, 2*bs-2:2*bs+3..., 4*bs-2:4*bs+3...] da = DataArray(randn(n)) s = sum(da.data) @@ -137,9 +120,9 @@ end for fn in (+, *, |, &) da = convert(DataArray, bitrand(10)) - s = mapreduce(@functorize(identity), fn, da.data) - @test mapreduce(@functorize(identity), fn, da) == s - @test mapreduce(@functorize(identity), fn, da; skipna=true) == s + s = mapreduce(identity, fn, da.data) + @test mapreduce(identity, fn, da) == s + @test mapreduce(identity, fn, da; skipna=true) == s @test reduce(fn, da) == s @test reduce(fn, da; skipna=true) == s end diff --git a/test/reducedim.jl b/test/reducedim.jl index dead510..4bce0ab 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -127,11 +127,7 @@ for Areduc in (DataArray(rand(3, 4, 5, 6)), (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)] # println("region = $region, skipna = $skipna") - if VERSION < v"0.6.0-dev.1121" - outputs = Any[DataArray(fill(NaN, Base.reduced_dims(size(Areduc), region)))] - else - outputs = Any[DataArray(fill(NaN, length.(Base.reduced_indices(indices(Areduc), region))))] - end + outputs = Any[DataArray(fill(NaN, length.(Base.reduced_indices(indices(Areduc), region))))] has_na = anyna(Areduc) if has_na && !skipna # Should throw an error reducing to non-DataArray diff --git a/test/sort.jl b/test/sort.jl index 58e969a..c1f6649 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -15,7 +15,7 @@ for T in (Float64, BigFloat) n = 1000 na = bitrand(n) nna = sum(na) - a = Array(T, n) + a = Vector{T}(n) ra = randn(n-nna) a[!na] = ra for da in (DataArray(a, na), PooledDataArray(a, na), (pda = PooledDataArray(a, na); setlevels!(pda, shuffle!(pda.pool)))) From 6fe6e13b0ae66487a099d37284a26352daa9e93b Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 12:56:30 -0500 Subject: [PATCH 03/14] Fix broadcast warnings and remove unused operator groups form operators.jl --- src/DataArrays.jl | 6 -- src/extras.jl | 2 +- src/indexing.jl | 14 ++--- src/operators.jl | 156 +++++++++------------------------------------- test/broadcast.jl | 29 ++++----- test/operators.jl | 53 +++++++++++++--- 6 files changed, 94 insertions(+), 166 deletions(-) diff --git a/src/DataArrays.jl b/src/DataArrays.jl index fee031e..ebe776e 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -80,10 +80,4 @@ module DataArrays include("predicates.jl") include("literals.jl") include("deprecated.jl") - - Base.@deprecate removeNA dropna - Base.@deprecate each_failNA each_failna - Base.@deprecate each_replaceNA each_replacena - Base.@deprecate set_levels setlevels - Base.@deprecate set_levels! setlevels! end diff --git a/src/extras.jl b/src/extras.jl index 0db6def..18d9aad 100644 --- a/src/extras.jl +++ b/src/extras.jl @@ -49,7 +49,7 @@ function cut{S, T}(x::AbstractVector{S}, breaks::Vector{T}) n = length(breaks) from = map(x -> sprint(showcompact, x), breaks[1:(n - 1)]) to = map(x -> sprint(showcompact, x), breaks[2:n]) - pool = Array(String, n - 1) + pool = Vector{String}(n - 1) if breaks[1] == min_x pool[1] = string("[", from[1], ",", to[1], "]") else diff --git a/src/indexing.jl b/src/indexing.jl index d4c4cd6..fe65431 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -127,24 +127,24 @@ end Base.unsafe_getindex(x::Number, i) = (@inbounds xi = x[i]; xi) -# Vector case @generated function Base._unsafe_getindex!(dest::DataArray, src::DataArray, I::Union{Real, AbstractArray}...) N = length(I) quote $(Expr(:meta, :inline)) - idxlens = index_lengths(I...) # TODO: unsplat? + @nexprs $N d->(J_d = I[d]) srcextr = daextract(src) destextr = daextract(dest) srcsz = size(src) - k = 1 - @nloops $N i d->(1:idxlens[d]) d->(@inbounds j_d = getindex(I[d], i_d)) begin + D = eachindex(dest) + Ds = start(D) + @nloops $N j d->J_d begin offset_0 = @ncall $N sub2ind srcsz j + d, Ds = next(D, Ds) if unsafe_isna(src, srcextr, offset_0) - unsafe_dasetindex!(dest, destextr, NA, k) + unsafe_dasetindex!(dest, destextr, NA, d) else - unsafe_dasetindex!(dest, destextr, unsafe_getindex_notna(src, srcextr, offset_0), k) + unsafe_dasetindex!(dest, destextr, unsafe_getindex_notna(src, srcextr, offset_0), d) end - k += 1 end dest end diff --git a/src/operators.jl b/src/operators.jl index fbbebf6..012c4f3 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -1,78 +1,3 @@ -const unary_operators = [:+, :-, :!, :*] - -const numeric_unary_operators = [:+, :-] - -const logical_unary_operators = [:!] - -const elementary_functions = [:(Base.abs), - :(Base.abs2), - :(Base.sign), - :(Base.acos), - :(Base.acosh), - :(Base.asin), - :(Base.asinh), - :(Base.atan), - :(Base.atanh), - :(Base.sin), - :(Base.sinh), - :(Base.conj), - :(Base.cos), - :(Base.cosh), - :(Base.tan), - :(Base.tanh), - :(Base.ceil), - :(Base.floor), - :(Base.round), - :(Base.trunc), - :(Base.exp), - :(Base.exp2), - :(Base.expm1), - :(Base.log), - :(Base.log10), - :(Base.log1p), - :(Base.log2), - :(Base.exponent), - :(Base.sqrt), - :(Base.gamma), - :(Base.lgamma), - :(Base.digamma), - :(Base.erf), - :(Base.erfc)] - -const two_argument_elementary_functions = [:(Base.round), - :(Base.ceil), - :(Base.floor), - :(Base.trunc)] - -const special_comparison_operators = [:(Base.isless)] - -const scalar_comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] - -const array_comparison_operators = [:(.==),:(.!=),:(.>),:(.>=),:(.<),:(.<=)] - -const vectorized_comparison_operators = [:(.==),:(==),:(.!=),:(!=),:(.>),:(>),:(.>=),:(>=),:(.<),:(<),:(.<=),:(<=)] - -const binary_operators = [:(+),:(.+),:(-),:(.-),:(*),:(.*),:(/),:(./),:(.^), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)] - -const induced_binary_operators = [(:^)] - -const induced_arithmetic_operators = [:(^)] - -const biscalar_operators = [:(Base.maximum), - :(Base.minimum)] - -const scalar_arithmetic_operators = [:(+),:(-),:(*),:(/), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)] - -const induced_scalar_arithmetic_operators = [:(^)] - const unary_vector_operators = [:(Base.median), :(StatsBase.mad), :(Base.norm), @@ -81,12 +6,6 @@ const unary_vector_operators = [:(Base.median), # TODO: dist, iqr -const cumulative_vector_operators = [:(Base.cumprod), - :(Base.cumsum), - :(Base.cumsum_kbn), - :(Base.cummin), - :(Base.cummax)] - const ffts = [:(Base.fft)] const binary_vector_operators = [:(Base.dot), @@ -105,20 +24,6 @@ const rowwise_operators = [:rowminimums, :rowffts, :rownorms] -const columnar_operators = [:colminimums, - :colmaxs, - :colprods, - :colsums, - :colmeans, - :colmedians, - :colstds, - :colvars, - :colffts, - :colnorms] - -const boolean_operators = [:(Base.any), - :(Base.all)] - # Swap arguments to fname() anywhere in AST. Returns the number of # arguments swapped function swapargs(ast::Expr, fname::Union{Expr, Symbol}) @@ -294,7 +199,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) end # Unary operators, NA -for f in unary_operators +for f in [:+,:-,:*,:/] @eval $(f)(d::NAtype) = NA end @@ -574,19 +479,11 @@ end # ambiguity @swappable (==)(::NAtype, ::WeakRef) = NA -for (sf,vf) in zip(scalar_comparison_operators, array_comparison_operators) +for sf in [:(==),:(!=),:(>),:(>=),:(<),:(<=)] @eval begin - # Array with NA - @swappable ($(vf)){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array{Bool,N}(size(b)), trues(size(b))) - # Scalar with NA - ($(vf))(::NAtype, ::NAtype) = NA ($(sf))(::NAtype, ::NAtype) = NA - @swappable ($(vf))(::NAtype, b) = NA @swappable ($(sf))(::NAtype, b) = NA - - @dataarray_binary_scalar $(vf) $(sf) Bool true end end @@ -594,14 +491,8 @@ end # Binary operators # -# Necessary to avoid ambiguity warnings -(.^)(::Irrational{:e}, B::DataArray) = exp(B) -(.^)(::Irrational{:e}, B::AbstractDataArray) = exp(B) - -for f in (:(+), :(.+), :(-), :(.-), - :(*), :(.*), :(/), :(./), :(.^), :(Base.div), - :(Base.mod), :(Base.fld), :(Base.rem), :(Base.min), - :(Base.max)) +for f in (:(+), :(-), :(*), :(/), + :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem), :(Base.min), :(Base.max)) @eval begin # Scalar with NA ($f)(::NAtype, ::NAtype) = NA @@ -677,8 +568,7 @@ end end # if isdefined(Base, :UniformScaling) -for f in (:(.+), :(.-), :(*), :(.*), :(./), - :(.^), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)) +for f in (:(*), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)) @eval begin # Array with NA @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = @@ -715,7 +605,7 @@ end DataArray(Array{T,N}(size(b)), trues(size(b))) @dataarray_binary_scalar(/, /, nothing, false) -for f in biscalar_operators +for f in [:(Base.maximum), :(Base.minimum)] @eval begin ($f)(::NAtype, ::NAtype) = NA @swappable $(f)(::Number, ::NAtype) = NA @@ -737,19 +627,31 @@ function Base.LinAlg.diff(dv::DataVector) return DataArray(new_data, new_na) end -for f in cumulative_vector_operators - @eval function ($f)(dv::DataVector) - new_data = ($f)(dv.data) - new_na = falses(length(dv)) - hitna = false - @bitenumerate dv.na i na begin - hitna |= na - if hitna - new_na[i] = true - end +# for f in cumulative_vector_operators +# @eval function ($f)(dv::DataVector) +# new_data = ($f)(dv.data) +# new_na = falses(length(dv)) +# hitna = false +# @bitenumerate dv.na i na begin +# hitna |= na +# if hitna +# new_na[i] = true +# end +# end +# return DataArray(new_data, new_na) +# end +# end +function Base.accumulate(f, dv::DataVector) + new_data = accumulate(f, dv.data) + new_na = falses(length(dv)) + hitna = false + @bitenumerate dv.na i na begin + hitna |= na + if hitna + new_na[i] = true end - return DataArray(new_data, new_na) end + return DataArray(new_data, new_na) end for f in [unary_vector_operators; ffts] diff --git a/test/broadcast.jl b/test/broadcast.jl index 1a23b3d..0cc4dbc 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -7,7 +7,7 @@ as_dataarray_bigfloat(x) = convert(DataArray{BigFloat}, x) as_pda(x) = convert(PooledDataArray, x) as_pda_bigfloat(x) = convert(PooledDataArray{BigFloat}, x) -bittest(f::Function, ewf::Function, a...) = (@test ewf(a...) == +bittest(f::Function, a...) = (@test broadcast(f, a...) == invoke(broadcast, Tuple{Function,ntuple(x->AbstractArray, length(a))...}, f, a...)) n1 = 21 n2 = 32 @@ -81,21 +81,18 @@ for arr in (identity, as_dataarray, as_pda, as_dataarray_bigfloat, as_pda_bigflo # @test A == diagm(10:12) # @test_throws BoundsError broadcast_setindex!(A, 7, [1,-1], [1 2]) - for (f, ewf) in (((==), (.==)), - ((<) , (.<) ), - ((!=), (.!=)), - ((<=), (.<=))) - bittest(f, ewf, arr(eye(2)), arr([1, 4])) - bittest(f, ewf, arr(eye(2)), arr([1 4])) - bittest(f, ewf, arr([0, 1]), arr([1 4])) - bittest(f, ewf, arr([0 1]), arr([1, 4])) - bittest(f, ewf, arr([1, 0]), arr([1, 4])) + for f in (==, (<), (!=), (<=)) + bittest(f, arr(eye(2)), arr([1, 4])) + bittest(f, arr(eye(2)), arr([1 4])) + bittest(f, arr([0, 1]), arr([1 4])) + bittest(f, arr([0 1]), arr([1, 4])) + bittest(f, arr([1, 0]), arr([1, 4])) # these should work once indexing is fixed - #bittest(f, ewf, arr(rand(rb, n1, n2, n3)), arr(rand(rb, n1, n2, n3))) - #bittest(f, ewf, arr(rand(rb, 1, n2, n3)), arr(rand(rb, n1, 1, n3))) - #bittest(f, ewf, arr(rand(rb, 1, n2, 1)), arr(rand(rb, n1, 1, n3))) - #bittest(f, ewf, arr(bitrand(n1, n2, n3)), arr(bitrand(n1, n2, n3))) + bittest(f, arr(rand(rb, n1, n2, n3)), arr(rand(rb, n1, n2, n3))) + bittest(f, arr(rand(rb, 1, n2, n3)), arr(rand(rb, n1, 1, n3))) + bittest(f, arr(rand(rb, 1, n2, 1)), arr(rand(rb, n1, 1, n3))) + bittest(f, arr(bitrand(n1, n2, n3)), arr(bitrand(n1, n2, n3))) end end @@ -106,8 +103,8 @@ ratio = @data [1,1/2,1/3,1/4,1/5] @test r1./r2 == ratio m = @data [1 2] @test m.*r2 == DataArray([1:5 2:2:10]) -@test_approx_eq m./r2 [ratio 2ratio] -@test_approx_eq m./collect(r2) [ratio 2ratio] +@test m./r2 ≈ [ratio 2ratio] +@test m./collect(r2) ≈ [ratio 2ratio] @test @inferred([0,1.2].+reshape([0,-2],1,1,2)) == reshape([0 -2; 1.2 -0.8],2,1,2) rt = Base.return_types(.+, (DataArray{Float64, 3}, DataArray{Int, 1})) diff --git a/test/operators.jl b/test/operators.jl index 39e18b0..dcc9fce 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -8,6 +8,41 @@ module TestOperators const comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] + const elementary_functions = [:(Base.abs), + :(Base.abs2), + :(Base.sign), + :(Base.acos), + :(Base.acosh), + :(Base.asin), + :(Base.asinh), + :(Base.atan), + :(Base.atanh), + :(Base.sin), + :(Base.sinh), + :(Base.conj), + :(Base.cos), + :(Base.cosh), + :(Base.tan), + :(Base.tanh), + :(Base.ceil), + :(Base.floor), + :(Base.round), + :(Base.trunc), + :(Base.exp), + :(Base.exp2), + :(Base.expm1), + :(Base.log), + :(Base.log10), + :(Base.log1p), + :(Base.log2), + :(Base.exponent), + :(Base.sqrt), + :(Base.gamma), + :(Base.lgamma), + :(Base.digamma), + :(Base.erf), + :(Base.erfc)] + macro test_da_pda(da, code) esc(quote let $da = copy($da) @@ -20,13 +55,13 @@ module TestOperators end # All unary operators return NA when evaluating NA - for f in map(eval, DataArrays.unary_operators) + for f in [+,-,*,/] @assert isna(f(NA)) end # All elementary functions return NA when evaluating NA - for f in map(eval, DataArrays.elementary_functions) - @assert isna(f(NA)) + for f in elementary_functions + @assert @eval isna(($f)(NA)) end # All comparison operators return NA when comparing NA with NA @@ -60,7 +95,7 @@ module TestOperators # application of those same operators dv = @data ones(5) @test_da_pda dv begin - for f in map(eval, DataArrays.numeric_unary_operators) + for f in [+,-] for i in 1:length(dv) @assert f(dv)[i] == f(dv[i]) end @@ -68,7 +103,7 @@ module TestOperators end dv = convert(DataArray, trues(5)) @test_da_pda dv begin - for f in map(eval, DataArrays.logical_unary_operators) + for f in [!] for i in 1:length(dv) @assert f(dv)[i] == f(dv[i]) end @@ -105,9 +140,9 @@ module TestOperators # Elementary functions on DataVector's dv = convert(DataArray, ones(5)) @test_da_pda dv begin - for f in map(eval, DataArrays.elementary_functions) + for f in elementary_functions for i in 1:length(dv) - @assert f(dv)[i] == f(dv[i]) + @assert @eval ($f).(dv)[$i] == ($f)(dv[$i]) end end end @@ -289,13 +324,13 @@ module TestOperators # Cumulative vector operators on DataVector's dv = convert(DataArray, ones(5)) - for f in map(eval, DataArrays.cumulative_vector_operators) + for f in [Base.cumprod, Base.cumsum] for i in 1:length(dv) @assert f(dv)[i] == f(dv.data)[i] end end dv[4] = NA - for f in map(eval, DataArrays.cumulative_vector_operators) + for f in [Base.cumprod, Base.cumsum] for i in 1:3 @assert f(dv)[i] == f(dv.data)[i] end From 1c423b9fa4221cc95226967016153b9cf753ec33 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 13:34:15 -0500 Subject: [PATCH 04/14] Use iteration instead of indexing into indices in setindex to avoid deprecation warning --- src/indexing.jl | 94 +++++++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index fe65431..b30401f 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -205,57 +205,59 @@ end ## setindex!: both DataArray and PooledDataArray -@ngenerate N typeof(A) function Base.setindex!(A::AbstractDataArray, x, - J::NTuple{N,Union{Real,Colon,AbstractArray}}...) - if !isa(x, AbstractArray) && isa(A, PooledDataArray) - # Only perform one pool lookup when assigning a scalar value in - # a PooledDataArray - setindex!(A.refs, getpoolidx(A, x), J...) - return A - end - - Aextr = daextract(A) - @ncall N checkbounds A J - @nexprs N d->(I_d = Base.to_indices(A, J)[d]) - stride_1 = 1 - @nexprs N d->(stride_{d+1} = stride_d*size(A,d)) - @nexprs N d->(offset_d = 1) # really only need offset_$N = 1 - if !isa(x, AbstractArray) - @nloops N i d->(1:length(I_d)) d->(@inbounds offset_{d-1} = offset_d + (Base.unsafe_getindex(I_d, i_d)-1)*stride_d) begin - if isa(x, NAtype) - @inbounds unsafe_setna!(A, Aextr, offset_0) - else - @inbounds unsafe_setnotna!(A, Aextr, offset_0) - @inbounds unsafe_dasetindex!(A, Aextr, x, offset_0) - end +@generated function Base.setindex!(A::AbstractDataArray, x, J::Union{Real,Colon,AbstractArray}...) + N = length(J) + quote + if !isa(x, AbstractArray) && isa(A, PooledDataArray) + # Only perform one pool lookup when assigning a scalar value in + # a PooledDataArray + setindex!(A.refs, getpoolidx(A, x), J...) + return A end - else - X = x - idxlens = @ncall N index_lengths I - @ncall N setindex_shape_check X (d->idxlens[d]) - k = 1 - if isa(A, PooledDataArray) && isa(X, PooledDataArray) - # When putting one PDA into another, first unify the pools - # and then translate the references - poolmap = combine_pools!(A.pool, X.pool) - Arefs = A.refs - Xrefs = X.refs - @nloops N i d->(1:idxlens[d]) d->(@inbounds offset_{d-1} = offset_d + (Base.unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds Arefs[offset_0] = Xrefs[k] == 0 ? 0 : poolmap[Xrefs[k]] - k += 1 + + Aextr = daextract(A) + @nexprs $N d->(I_d = Base.to_indices(A, J)[d]) + @ncall $N checkbounds A I + stride_1 = 1 + @nexprs $N d->(stride_{d+1} = stride_d*size(A,d)) + @nexprs $N d->(offset_d = 1) # really only need offset_$N = 1 + if !isa(x, AbstractArray) + @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin + if isa(x, NAtype) + @inbounds unsafe_setna!(A, Aextr, offset_0) + else + @inbounds unsafe_setnotna!(A, Aextr, offset_0) + @inbounds unsafe_dasetindex!(A, Aextr, x, offset_0) + end end else - Xextr = daextract(X) - @nloops N i d->(1:idxlens[d]) d->(@inbounds offset_{d-1} = offset_d + (Base.unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds if isa(X, AbstractDataArray) && unsafe_isna(X, Xextr, k) - unsafe_setna!(A, Aextr, offset_0) - else - unsafe_setnotna!(A, Aextr, offset_0) - unsafe_dasetindex!(A, Aextr, unsafe_getindex_notna(X, Xextr, k), offset_0) + X = x + idxlens = @ncall $N index_lengths I + @ncall $N setindex_shape_check X (d->idxlens[d]) + k = 1 + if isa(A, PooledDataArray) && isa(X, PooledDataArray) + # When putting one PDA into another, first unify the pools + # and then translate the references + poolmap = combine_pools!(A.pool, X.pool) + Arefs = A.refs + Xrefs = X.refs + @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin + @inbounds Arefs[offset_0] = Xrefs[k] == 0 ? 0 : poolmap[Xrefs[k]] + k += 1 + end + else + Xextr = daextract(X) + @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin + @inbounds if isa(X, AbstractDataArray) && unsafe_isna(X, Xextr, k) + unsafe_setna!(A, Aextr, offset_0) + else + unsafe_setnotna!(A, Aextr, offset_0) + unsafe_dasetindex!(A, Aextr, unsafe_getindex_notna(X, Xextr, k), offset_0) + end + k += 1 end - k += 1 end end + A end - A end From af188c78c4ab5c48eb70889e9b9decb163e79ea5 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 13:35:49 -0500 Subject: [PATCH 05/14] Adjust CI --- .travis.yml | 3 +-- appveyor.yml | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3497f72..1f98ac8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: julia julia: - - 0.4 - - 0.5 + # - 0.6 - nightly os: - linux diff --git a/appveyor.yml b/appveyor.yml index cd60c55..8332183 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,9 +1,7 @@ environment: matrix: - - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" - - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" + # - JULIAVERSION: "julialang/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" + # - JULIAVERSION: "julialang/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" From 4dbce68a019b7b593fa0f2ff429c5cac83c1fc36 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 17:08:49 -0500 Subject: [PATCH 06/14] Reenable precompilation Add SpecialFunctions as dependency Use new type keywords --- REQUIRE | 1 + src/DataArrays.jl | 4 ++-- src/abstractdataarray.jl | 12 ++++++------ src/dataarray.jl | 8 ++++---- src/natype.jl | 4 ++-- src/operators.jl | 2 ++ src/pooleddataarray.jl | 13 ++++++------- src/reduce.jl | 4 ++-- test/operators.jl | 2 +- 9 files changed, 26 insertions(+), 24 deletions(-) diff --git a/REQUIRE b/REQUIRE index 95b9f63..cde7698 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,3 +1,4 @@ julia 0.6- StatsBase 0.3 Reexport +SpecialFunctions diff --git a/src/DataArrays.jl b/src/DataArrays.jl index ebe776e..8d9292f 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -1,10 +1,10 @@ -__precompile__(false) +__precompile__() module DataArrays using Base: promote_op using Base.Cartesian, Compat, Reexport - import Compat.String @reexport using StatsBase + using SpecialFunctions const DEFAULT_POOLED_REF_TYPE = UInt32 diff --git a/src/abstractdataarray.jl b/src/abstractdataarray.jl index 6321ea4..2c74ff4 100644 --- a/src/abstractdataarray.jl +++ b/src/abstractdataarray.jl @@ -2,17 +2,17 @@ #' #' An AbstractDataArray is an Array whose entries can take on #' values of type `T` or the value `NA`. -abstract AbstractDataArray{T, N} <: AbstractArray{T, N} +abstract type AbstractDataArray{T, N} <: AbstractArray{T, N} end #' @description #' #' An AbstractDataVector is an AbstractDataArray of order 1. -typealias AbstractDataVector{T} AbstractDataArray{T, 1} +const AbstractDataVector{T} = AbstractDataArray{T, 1} #' @description #' #' An AbstractDataMatrix is an AbstractDataArray of order 2. -typealias AbstractDataMatrix{T} AbstractDataArray{T, 2} +const AbstractDataMatrix{T} = AbstractDataArray{T, 2} #' @description #' Determine the type of the elements of an AbstractDataArray. @@ -121,7 +121,7 @@ dropna(v::AbstractVector) = copy(v) # -> AbstractVector # TODO: Use values() # Use DataValueIterator type? -type EachFailNA{T} +struct EachFailNA{T} da::AbstractDataArray{T} end each_failna{T}(da::AbstractDataArray{T}) = EachFailNA(da) @@ -136,7 +136,7 @@ function Base.next(itr::EachFailNA, ind::Integer) end end -type EachDropNA{T} +struct EachDropNA{T} da::AbstractDataArray{T} end each_dropna{T}(da::AbstractDataArray{T}) = EachDropNA(da) @@ -154,7 +154,7 @@ function Base.next(itr::EachDropNA, ind::Int) (itr.da[ind], _next_nonna_ind(itr.da, ind)) end -type EachReplaceNA{S, T} +struct EachReplaceNA{S, T} da::AbstractDataArray{S} replacement::T end diff --git a/src/dataarray.jl b/src/dataarray.jl index 6f8a8d5..c4adc4f 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -18,11 +18,11 @@ #' dv = DataArray([1, 2, 3], [false, false, true]) #' #' dm = DataArray([1 2; 3 4], [false false; true false]) -type DataArray{T, N} <: AbstractDataArray{T, N} +mutable struct DataArray{T, N} <: AbstractDataArray{T, N} data::Array{T, N} na::BitArray{N} - function DataArray(d::Array{T, N}, m::BitArray{N}) + function DataArray{T,N}(d::Array{T, N}, m::BitArray{N}) where {T, N} # Ensure data values and missingness metadata match if size(d) != size(m) msg = "Data and missingness arrays must be the same size" @@ -45,12 +45,12 @@ end #' @description #' #' An DataVector is an DataArray of order 1. -typealias DataVector{T} DataArray{T, 1} +const DataVector{T} = DataArray{T, 1} #' @description #' #' An DataMatrix is an DataArray of order 2. -typealias DataMatrix{T} DataArray{T, 2} +const DataMatrix{T} = DataArray{T, 2} #' @description #' diff --git a/src/natype.jl b/src/natype.jl index a2d67a1..490786b 100644 --- a/src/natype.jl +++ b/src/natype.jl @@ -14,14 +14,14 @@ ## ############################################################################## -type NAtype +struct NAtype end const NA = NAtype() Base.show(io::IO, x::NAtype) = print(io, "NA") -type NAException <: Exception +struct NAException <: Exception msg::String end NAException() = NAException("NA found") diff --git a/src/operators.jl b/src/operators.jl index 012c4f3..0fd70c0 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -653,6 +653,8 @@ function Base.accumulate(f, dv::DataVector) end return DataArray(new_data, new_na) end +Base.cumsum(dv::DataVector) = accumulate(+, dv) +Base.cumprod(dv::DataVector) = accumulate(*, dv) for f in [unary_vector_operators; ffts] @eval ($f)(dv::DataVector) = any(dv.na) ? NA : ($f)(dv.data) diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index 236478d..0672954 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -15,16 +15,15 @@ # This is used as a wrapper during PooledDataArray construction only, to distinguish # arrays of pool indices from normal arrays -type RefArray{R<:Integer,N} +struct RefArray{R<:Integer,N} a::Array{R,N} end -type PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} +mutable struct PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} refs::Array{R, N} pool::Vector{T} - function PooledDataArray(rs::RefArray{R, N}, - p::Vector{T}) + function PooledDataArray{T,R,N}(rs::RefArray{R, N}, p::Vector{T}) where {T,R,N} # refs mustn't overflow pool if length(rs.a) > 0 && maximum(rs.a) > prod(size(p)) throw(ArgumentError("Reference array points beyond the end of the pool")) @@ -32,8 +31,8 @@ type PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} new(rs.a,p) end end -typealias PooledDataVector{T,R} PooledDataArray{T,R,1} -typealias PooledDataMatrix{T,R} PooledDataArray{T,R,2} +const PooledDataVector{T,R} = PooledDataArray{T,R,1} +const PooledDataMatrix{T,R} = PooledDataArray{T,R,2} ############################################################################## ## @@ -600,7 +599,7 @@ end Base.sort(pda::PooledDataArray; kw...) = pda[sortperm(pda; kw...)] -type FastPerm{O<:Base.Sort.Ordering,V<:AbstractVector} <: Base.Sort.Ordering +struct FastPerm{O<:Base.Sort.Ordering,V<:AbstractVector} <: Base.Sort.Ordering ord::O vec::V end diff --git a/src/reduce.jl b/src/reduce.jl index 33fc601..1ff0926 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -87,9 +87,9 @@ end # NA, it returns NA. Otherwise we will fall back to the implementation # in Base, which is slow because it's type-unstable, but guarantees the # correct semantics -typealias SafeMapFuns Union{typeof(identity), typeof(abs), typeof(abs2), +const SafeMapFuns = Union{typeof(identity), typeof(abs), typeof(abs2), typeof(exp), typeof(log), typeof(Base.centralizedabs2fun)} -typealias SafeReduceFuns Union{typeof(+), typeof(*), typeof(max), typeof(min)} +const SafeReduceFuns = Union{typeof(+), typeof(*), typeof(max), typeof(min)} function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) any(A.na) && return NA Base._mapreduce(f, op, A.data) diff --git a/test/operators.jl b/test/operators.jl index dcc9fce..633bdb3 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -324,7 +324,7 @@ module TestOperators # Cumulative vector operators on DataVector's dv = convert(DataArray, ones(5)) - for f in [Base.cumprod, Base.cumsum] + for f in [Base.cumprod, Base.cumsum, t -> accumulate(min, t), t -> accumulate(max, t)] for i in 1:length(dv) @assert f(dv)[i] == f(dv.data)[i] end From a2bb139d4c61df53b21610978a4d7844b24555e6 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 17:23:14 -0500 Subject: [PATCH 07/14] Use @inline instead of @generate when possible --- src/indexing.jl | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index b30401f..226ff57 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -161,21 +161,17 @@ function Base.getindex(pda::PooledDataArray, I::Real) end end -@generated function Base.getindex(pda::PooledDataArray, I::Integer...) - quote - if getindex(pda.refs, I...) == 0 - return NA - else - return pda.pool[getindex(pda.refs, I...)] - end +@inline function Base.getindex(pda::PooledDataArray, I::Integer...) + if getindex(pda.refs, I...) == 0 + return NA + else + return pda.pool[getindex(pda.refs, I...)] end end # Vector case -@generated function Base.getindex(A::PooledDataArray, I::Union{AbstractVector,Colon}...) - quote - PooledDataArray(RefArray(getindex(A.refs, I...)), copy(A.pool)) - end +@inline function Base.getindex(A::PooledDataArray, I::Union{AbstractVector,Colon}...) + PooledDataArray(RefArray(getindex(A.refs, I...)), copy(A.pool)) end ## setindex!: DataArray From 141660610c2d6a5d1f4716435355d79affa11be6 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 17:37:03 -0500 Subject: [PATCH 08/14] Now, erf, erfc, and digamma are in SpecialFunctions instead of Base --- src/operators.jl | 12 +++++++-- test/operators.jl | 68 +++++++++++++++++++++++------------------------ 2 files changed, 44 insertions(+), 36 deletions(-) diff --git a/src/operators.jl b/src/operators.jl index 0fd70c0..b6e21b2 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -303,11 +303,19 @@ for f in (:(Base.abs), :(Base.abs2), :(Base.conj), :(Base.sign)) end # One-argument elementary functions that always return floating points +## Base for f in (:(Base.acos), :(Base.acosh), :(Base.asin), :(Base.asinh), :(Base.atan), :(Base.atanh), :(Base.sin), :(Base.sinh), :(Base.cos), :(Base.cosh), :(Base.tan), :(Base.tanh), :(Base.exp), :(Base.exp2), :(Base.expm1), :(Base.log), :(Base.log10), :(Base.log1p), - :(Base.log2), :(Base.exponent), :(Base.sqrt), :(Base.gamma), :(Base.lgamma), - :(Base.digamma), :(Base.erf), :(Base.erfc)) + :(Base.log2), :(Base.exponent), :(Base.sqrt), :(Base.gamma), :(Base.lgamma)) + @eval begin + ($f)(::NAtype) = NA + @dataarray_unary $(f) AbstractFloat T + @dataarray_unary $(f) Real Float64 + end +end +## SpecialFunctions (should be a conditional module when supported) +for f in (:(SpecialFunctions.digamma), :(SpecialFunctions.erf), :(SpecialFunctions.erfc)) @eval begin ($f)(::NAtype) = NA @dataarray_unary $(f) AbstractFloat T diff --git a/test/operators.jl b/test/operators.jl index 633bdb3..0c696dc 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -8,40 +8,40 @@ module TestOperators const comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] - const elementary_functions = [:(Base.abs), - :(Base.abs2), - :(Base.sign), - :(Base.acos), - :(Base.acosh), - :(Base.asin), - :(Base.asinh), - :(Base.atan), - :(Base.atanh), - :(Base.sin), - :(Base.sinh), - :(Base.conj), - :(Base.cos), - :(Base.cosh), - :(Base.tan), - :(Base.tanh), - :(Base.ceil), - :(Base.floor), - :(Base.round), - :(Base.trunc), - :(Base.exp), - :(Base.exp2), - :(Base.expm1), - :(Base.log), - :(Base.log10), - :(Base.log1p), - :(Base.log2), - :(Base.exponent), - :(Base.sqrt), - :(Base.gamma), - :(Base.lgamma), - :(Base.digamma), - :(Base.erf), - :(Base.erfc)] + const elementary_functions = [:(abs), + :(abs2), + :(sign), + :(acos), + :(acosh), + :(asin), + :(asinh), + :(atan), + :(atanh), + :(sin), + :(sinh), + :(conj), + :(cos), + :(cosh), + :(tan), + :(tanh), + :(ceil), + :(floor), + :(round), + :(trunc), + :(exp), + :(exp2), + :(expm1), + :(log), + :(log10), + :(log1p), + :(log2), + :(exponent), + :(sqrt), + :(gamma), + :(lgamma), + :(digamma), + :(erf), + :(erfc)] macro test_da_pda(da, code) esc(quote From 0c9184cba55025a84fcbc1abeaa6f04601a5e507 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 20:42:21 -0500 Subject: [PATCH 09/14] Intialize arrays with all NAs. Flip bits _unsafe_getindex. --- src/dataarray.jl | 2 +- src/indexing.jl | 1 + src/pooleddataarray.jl | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/dataarray.jl b/src/dataarray.jl index c4adc4f..31b64c1 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -246,7 +246,7 @@ end #' dv = @data [false, false, true, false] #' dv_new = similar(dv, Float64, 2, 2, 2) function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T} - return DataArray(Array{T}(dims), falses(dims)) + return DataArray(Array{T}(dims), trues(dims)) end #' @description diff --git a/src/indexing.jl b/src/indexing.jl index 226ff57..e8b057e 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -131,6 +131,7 @@ Base.unsafe_getindex(x::Number, i) = (@inbounds xi = x[i]; xi) N = length(I) quote $(Expr(:meta, :inline)) + flipbits!(dest.na) # similar initializes with NAs @nexprs $N d->(J_d = I[d]) srcextr = daextract(src) destextr = daextract(dest) diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index 0672954..e85d8eb 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -658,8 +658,8 @@ function PooledDataVecs(v1::AbstractArray, ## Return two PooledDataVecs that share the same pool. ## TODO: allow specification of REFTYPE - refs1 = Array(DEFAULT_POOLED_REF_TYPE, size(v1)) - refs2 = Array(DEFAULT_POOLED_REF_TYPE, size(v2)) + refs1 = Array{DEFAULT_POOLED_REF_TYPE}(size(v1)) + refs2 = Array{DEFAULT_POOLED_REF_TYPE}(size(v2)) poolref = Dict{promote_type(eltype(v1), eltype(v2)), DEFAULT_POOLED_REF_TYPE}() maxref = 0 From a38a6f63ad32a3b41b6196b125e61dd25a13a4f4 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sun, 12 Feb 2017 22:36:47 -0500 Subject: [PATCH 10/14] Define linearindexing for Type{T} instead of T --- src/indexing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/indexing.jl b/src/indexing.jl index e8b057e..f76c87f 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -114,7 +114,7 @@ Base.getindex(t::AbstractDataArray, i::Real) = ## getindex: DataArray -Base.linearindexing(x::Union{DataArray,PooledDataArray}) = Base.LinearFast() +Base.linearindexing(::Type{<:Union{DataArray,PooledDataArray}}) = Base.LinearFast() # Scalar case function Base.getindex(da::DataArray, I::Real) From a75d4409a71e3a621fe5bb8fb43c876ee94958cf Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Mon, 13 Feb 2017 19:39:11 -0500 Subject: [PATCH 11/14] Update ! -> .! and remove commented out tests --- src/dataarray.jl | 2 +- src/pooleddataarray.jl | 2 +- test/broadcast.jl | 2 -- test/datamatrix.jl | 42 +++++++++++++++++++++--------------------- test/reduce.jl | 6 +++--- test/reducedim.jl | 2 +- test/sort.jl | 2 +- 7 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/dataarray.jl b/src/dataarray.jl index 31b64c1..22afd47 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -424,7 +424,7 @@ end #' #' dv = @data [1, 2, NA, 4] #' v = dropna(dv) -dropna(dv::DataVector) = copy(dv.data[!dv.na]) # -> Vector +dropna(dv::DataVector) = dv.data[.!dv.na] # -> Vector #' @description #' diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index e85d8eb..dcdaeed 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -92,7 +92,7 @@ end function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, m::AbstractArray{Bool, N}, r::Type{R} = DEFAULT_POOLED_REF_TYPE) - pool = convert(Array, unique(d[!m])) + pool = convert(Array, unique(d[.!m])) if method_exists(isless, (T, T)) sort!(pool) end diff --git a/test/broadcast.jl b/test/broadcast.jl index 0cc4dbc..1c087b1 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -20,7 +20,6 @@ rb = 1:5 @test broadcast!(+, DataArray(Int, 2, 2), [1, 0], [1 4]) == [2 5; 1 4] @test broadcast!(+, DataArray(Int, 2), [1, 0], [1, 4]) == [2, 4] @test broadcast!(+, DataArray(Int, 2), [1, 0], 2) == [3, 2] -# @test broadcast!(abs, @data([-1, -2])) == @data([1, 2]) for arr in (identity, as_dataarray, as_pda, as_dataarray_bigfloat, as_pda_bigfloat) @test broadcast(+, arr(eye(2)), arr([1, 4])) == [2 1; 4 5] @test broadcast(+, arr(eye(2)), arr([1 4])) == [2 4; 1 5] @@ -123,7 +122,6 @@ rt = Base.return_types(broadcast!, (typeof(+), DataArray{Float64, 3}, Array{Floa @test isequal(broadcast(|, @data([NA, false]), @data([NA true false])), @data([NA true NA; NA true false])) # Test map! -# @test_throws DimensionMismatch map!(+, DataArray(Float64, 2, 2), @data([1 2]), @data([1 2])) @test map!(+, DataArray(Float64, 2), @data([1, 2]), @data([1, 2])) == @data([2, 4]) x = @data([-1, -2]) @test map!(abs, x, x) == @data([1, 2]) diff --git a/test/datamatrix.jl b/test/datamatrix.jl index c2bb6df..dde5879 100644 --- a/test/datamatrix.jl +++ b/test/datamatrix.jl @@ -44,10 +44,10 @@ module TestDataMatrix b[1, 1] = NA res = a * b[1:1, :] @assert all(isna(res[:, 1])) - @assert all(!isna(res[:, 2])) - @assert all(!isna(res[:, 3])) + @assert all(.!(isna(res[:, 2]))) + @assert all(.!(isna(res[:, 3]))) res = a * b[2:2, :] - @assert all(!isna(res)) + @assert all(.!(isna(res))) # # DataMatrix w NA's * DataVector @@ -55,8 +55,8 @@ module TestDataMatrix res = b * a @assert isna(res[1]) - @assert !isna(res[2]) - @assert !isna(res[3]) + @assert .!(isna(res[2])) + @assert .!(isna(res[3])) # # DataMatrix * DataMatrix @@ -71,11 +71,11 @@ module TestDataMatrix @assert isna(res[1, 2]) @assert isna(res[1, 3]) @assert isna(res[2, 1]) - @assert !isna(res[2, 2]) - @assert !isna(res[2, 3]) + @assert .!(isna(res[2, 2])) + @assert .!(isna(res[2, 3])) @assert isna(res[3, 1]) - @assert !isna(res[3, 2]) - @assert !isna(res[3, 3]) + @assert .!(isna(res[3, 2])) + @assert .!(isna(res[3, 3])) res = b * @data eye(3) # 3x3 Float64 DataMatrix: @@ -85,12 +85,12 @@ module TestDataMatrix @assert isna(res[1, 1]) @assert isna(res[1, 2]) @assert isna(res[1, 3]) - @assert !isna(res[2, 1]) - @assert !isna(res[2, 2]) - @assert !isna(res[2, 3]) - @assert !isna(res[3, 1]) - @assert !isna(res[3, 2]) - @assert !isna(res[3, 3]) + @assert .!(isna(res[2, 1])) + @assert .!(isna(res[2, 2])) + @assert .!(isna(res[2, 3])) + @assert .!(isna(res[3, 1])) + @assert .!(isna(res[3, 2])) + @assert .!(isna(res[3, 3])) res = (@data eye(3)) * b # julia> dataeye(3) * b @@ -99,14 +99,14 @@ module TestDataMatrix # NA 1.0 0.0 # NA 0.0 1.0 @assert isna(res[1, 1]) - @assert !isna(res[1, 2]) - @assert !isna(res[1, 3]) + @assert .!(isna(res[1, 2])) + @assert .!(isna(res[1, 3])) @assert isna(res[2, 1]) - @assert !isna(res[2, 2]) - @assert !isna(res[2, 3]) + @assert .!(isna(res[2, 2])) + @assert .!(isna(res[2, 3])) @assert isna(res[3, 1]) - @assert !isna(res[3, 2]) - @assert !isna(res[3, 3]) + @assert .!(isna(res[3, 2])) + @assert .!(isna(res[3, 3])) # Test row operations dm = @data eye(6, 2) diff --git a/test/reduce.jl b/test/reduce.jl index 10eb8df..16fa026 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -142,11 +142,11 @@ da2 = DataArray(randn(128)) @same_behavior mean(da1, weights(da2.data); skipna=true) mean(da1.data, weights(da2.data)) da1[1:3:end] = NA -@same_behavior mean(da1, weights(da2); skipna=true) mean(dropna(da1), weights(da2.data[!da1.na])) -@same_behavior mean(da1, weights(da2.data); skipna=true) mean(dropna(da1), weights(da2.data[!da1.na])) +@same_behavior mean(da1, weights(da2); skipna=true) mean(dropna(da1), weights(da2.data[(!).(da1.na)])) +@same_behavior mean(da1, weights(da2.data); skipna=true) mean(dropna(da1), weights(da2.data[(!).(da1.na)])) da2[1:2:end] = NA -keep = !da1.na .& !da2.na +keep = .!da1.na .& .!da2.na @test isna(mean(da1, weights(da2))) @same_behavior mean(da1, weights(da2); skipna=true) mean(da1.data[keep], weights(da2.data[keep])) end diff --git a/test/reducedim.jl b/test/reducedim.jl index 4bce0ab..c79310f 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -109,7 +109,7 @@ macro test_da_approx_eq(da1, da2) v2 = $(esc(da2)) na = isna(v1) @test na == isna(v2) - defined = !na + defined = (!).(na) if any(defined) @test isapprox(v1[defined], v2[defined], nans = true) end diff --git a/test/sort.jl b/test/sort.jl index c1f6649..c094c81 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -17,7 +17,7 @@ for T in (Float64, BigFloat) nna = sum(na) a = Vector{T}(n) ra = randn(n-nna) - a[!na] = ra + a[.!na] = ra for da in (DataArray(a, na), PooledDataArray(a, na), (pda = PooledDataArray(a, na); setlevels!(pda, shuffle!(pda.pool)))) @test isequal(sort(da), [DataArray(sort(dropna(da))); DataArray(T, nna)]) @test isequal(sort(da; lt=(x,y)->isless(x,y)), [DataArray(sort(dropna(da))); DataArray(T, nna)]) From ed30192bd81b7677b8f33578657a18a0ba557402 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Mon, 13 Feb 2017 20:12:41 -0500 Subject: [PATCH 12/14] Reenable Compat. Necessary for old @nsplat and @ngenerate macros --- REQUIRE | 1 + 1 file changed, 1 insertion(+) diff --git a/REQUIRE b/REQUIRE index cde7698..e75c726 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,5 @@ julia 0.6- +Compat 0.8.6 StatsBase 0.3 Reexport SpecialFunctions From 937d3965921ed411abfd5c9b1e9238b13837e63b Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Wed, 15 Feb 2017 20:15:52 -0500 Subject: [PATCH 13/14] Make broadcast work for scalar Strings --- src/broadcast.jl | 18 +++++++++++++----- test/broadcast.jl | 3 +++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/broadcast.jl b/src/broadcast.jl index e7b8023..903fa81 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -116,9 +116,11 @@ Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) = As[k] <: DataArray ? quote $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d - end : quote + end : (As[k] <: AbstractArray ? quote $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d - end + end : quote + $(Symbol("j_$(k)_d")) = 1 + end) for k = 1:N]...))), # post @@ -138,10 +140,16 @@ Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) = end : nothing for k = 1:N]...)) - # Extract values for ordinary AbstractArrays + # Extract values for other type $(Expr(:block, [ - :(@inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d"))) - for k = find(t -> !(t <: DataArray || t <: PooledDataArray), As)]...)) + As[k] <: AbstractArray && !(As[k] <: AbstractDataArray) ? quote + # ordinary AbstractArrays + @inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d")) + end : quote + # non AbstractArrays (e.g. Strings and Numbers) + @inbounds $(Symbol("v_$(k)")) = $(Symbol("A_$(k)")) + end + for k = 1:N]...)) # Compute and store return value $(gen_na_conds(F, nd, As, B)) diff --git a/test/broadcast.jl b/test/broadcast.jl index 1c087b1..e0db9b5 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -113,6 +113,9 @@ rt = Base.return_types(broadcast, (typeof(+), Array{Float64, 3}, DataArray{Int, rt = Base.return_types(broadcast!, (typeof(+), DataArray{Float64, 3}, Array{Float64, 3}, Array{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} +# Test String broadcast +@test broadcast(==, @data(["a", "b", "c", "d"]), "a") == @data([true,false,false,false]) + # Test broadcasting of functions that do something besides propagate NA @test isequal(broadcast(isequal, @data([NA, 1]), @data([NA 1])), @data([true false; false true])) @test isequal(broadcast(isequal, @pdata([NA, 1]), @data([NA 1])), @data([true false; false true])) From 9f68def9ec3dd8a8f39b90c67135beb2f242fdf3 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Fri, 17 Feb 2017 17:26:19 -0500 Subject: [PATCH 14/14] Update syntax for indexing traits --- src/indexing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/indexing.jl b/src/indexing.jl index f76c87f..e93c39f 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -114,7 +114,7 @@ Base.getindex(t::AbstractDataArray, i::Real) = ## getindex: DataArray -Base.linearindexing(::Type{<:Union{DataArray,PooledDataArray}}) = Base.LinearFast() +Base.IndexStyle(::Type{<:AbstractDataArray}) = Base.IndexLinear() # Scalar case function Base.getindex(da::DataArray, I::Real)