Skip to content

Commit

Permalink
Use AbstractVector in types. Add a bit of documentation.
Browse files Browse the repository at this point in the history
The reason for the switch to `AbstractVector` is to allow for generalization to DArray and SharedArray types.
  • Loading branch information
dmbates committed Feb 21, 2014
1 parent 3346bff commit 83e144c
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 17 deletions.
36 changes: 21 additions & 15 deletions src/glmfit.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
type GlmResp{T<:FP} <: ModResp # response in a glm model
y::Vector{T} # response
y::AbstractVector{T} # response
d::UnivariateDistribution
l::Link
devresid::Vector{T} # (squared) deviance residuals
eta::Vector{T} # linear predictor
mu::Vector{T} # mean response
mueta::Vector{T} # derivative of mu w.r.t. eta
offset::Vector{T} # offset added to linear predictor (usually 0)
var::Vector{T} # (unweighted) variance at current mu
wts::Vector{T} # prior weights
wrkresid::Vector{T} # working residuals
function GlmResp(y::Vector{T}, d::Distribution, l::Link, eta::Vector{T},
mu::Vector{T}, off::Vector{T}, wts::Vector{T})
devresid::AbstractVector{T} # (squared) deviance residuals
eta::AbstractVector{T} # linear predictor
mu::AbstractVector{T} # mean response
mueta::AbstractVector{T} # derivative of mu w.r.t. eta
offset::AbstractVector{T} # offset added to linear predictor (usually 0)
var::AbstractVector{T} # (unweighted) variance at current mu
wts::AbstractVector{T} # prior weights
wrkresid::AbstractVector{T} # working residuals
function GlmResp(y::AbstractVector{T}, d::UnivariateDistribution, l::Link,
eta::AbstractVector{T}, mu::AbstractVector{T},
off::AbstractVector{T}, wts::AbstractVector{T})
if isa(d, Binomial)
for yy in y; 0. <= yy <= 1. || error("$yy in y is not in [0,1]"); end
else
Expand All @@ -20,20 +21,25 @@ type GlmResp{T<:FP} <: ModResp # response in a glm model
n = length(y)
length(eta) == length(mu) == length(wts) == n || error("mismatched sizes")
lo = length(off); lo == 0 || lo == n || error("offset must have length $n or length 0")
res = new(y,d,l,Array(T,n),eta,mu,Array(T,n),off,Array(T,n),wts,Array(T,n))
res = new(y,d,l,similar(y),eta,mu,similar(y),off,similar(y),wts,similar(y))
updatemu!(res, eta)
res
end
end

# returns the sum of the squared deviance residuals
deviance(r::GlmResp) = sum(r.devresid)

# update the `devresid` field
devresid!(r::GlmResp) = devresid!(r.d, r.devresid, r.y, r.mu, r.wts)

# apply the link function generating the linear predictor (eta) vector from the mean vector (mu)
linkfun!(r::GlmResp) = linkfun!(r.l, r.eta, r.mu)

# apply the inverse link function generating the mean vector (mu) from the linear predictor (eta)
linkinv!(r::GlmResp) = linkinv!(r.l, r.mu, r.eta)

# evaluate the mueta vector (derivative of mu w.r.t. eta) from the linear predictor (eta)
mueta!(r::GlmResp) = mueta!(r.l, r.mueta, r.eta)

function updatemu!{T<:FP}(r::GlmResp{T}, linPr::Vector{T})
Expand All @@ -43,7 +49,7 @@ function updatemu!{T<:FP}(r::GlmResp{T}, linPr::Vector{T})
sum(r.devresid)
end

updatemu!{T<:FP}(r::GlmResp{T}, linPr) = updatemu!(r, convert(Vector{T},vec(linPr)))
updatemu!{T<:FP}(r::GlmResp{T}, linPr) = updatemu!(r, convert(typeof(r.y),vec(linPr)))

var!(r::GlmResp) = var!(r.d, r.var, r.mu)

Expand Down Expand Up @@ -71,8 +77,8 @@ function coeftable(mm::GlmMod)
cc = coef(mm)
se = stderr(mm)
zz = cc ./ se
CoefTable(DataFrame({cc, se, zz, 2.0 * ccdf(Normal(), abs(zz))},
["Estimate","Std.Error","z value", "Pr(>|z|)"]),
CoefTable(hcat(cc,se,zz,2.0 * ccdf(Normal(), abs(zz))),
["Estimate","Std.Error","z value", "Pr(>|z|)"],
coefnames(mm.fr), 4)
end

Expand Down
4 changes: 2 additions & 2 deletions src/lm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ function coeftable(mm::LmMod)
cc = coef(mm)
se = stderr(mm)
tt = cc ./ se
CoefTable(DataFrame({cc, se, tt, ccdf(FDist(1, df_residual(mm)), abs2(tt))},
["Estimate","Std.Error","t value", "Pr(>|t|)"]),
CoefTable(hcat(cc,se,tt,ccdf(FDist(1, df_residual(mm)), abs2(tt))),
["Estimate","Std.Error","t value", "Pr(>|t|)"],
coefnames(mm.fr), 4)
end

Expand Down

6 comments on commit 83e144c

@simonster
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this change may have a negative performance impact because, while Vector{T} is a concrete type, AbstractVector{T} is not. It may be better to parametrize GlmResp by the vector type instead of the element type.

@simonster
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you parametrize by the vector type, you may also be able to avoid the changes in 15df539.

@dmbates
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to specify that the vector type should have an eltype that extends FloatingPoint?

Right now things will fall apart as soon as an LAPACK/BLAS operation is attempted on anything other than Float64 or Float32 vectors. However, with the work by @andreasnoackjensen on generalizing linear algebra methods it is conceivable that a user could work with arrays of Float128 or BigFloat. (I don't really want to think about the Complex cases.)

Should I just throw in the towel and define the vector type as V<:AbstractArray{Float64,1} ?

@simonster
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can add typealias FPAbstractVector{T<:FloatingPoint} AbstractVector{T} and then parametrize by V<:FPAbstractVector.

@dmbates
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would there be a value in using StoredVector as the base class instead of AbstractVector?

@simonster
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the little I've read of the discussion in JuliaLang/julia#987, I think you're right that StoredArray is more appropriate, but others might have a better understanding of the differences.

Please sign in to comment.