Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add RoundingMode argument to convert #8845

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions base/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ convert(::Type{Float16}, x::MathConst) = float16(float32(x))
convert{T<:Real}(::Type{Complex{T}}, x::MathConst) = convert(Complex{T}, convert(T,x))
convert{T<:Integer}(::Type{Rational{T}}, x::MathConst) = convert(Rational{T}, float64(x))

stagedfunction call{T<:Union(Float32,Float64),s}(t::Type{T},c::MathConst{s},r::RoundingMode)
f = T(big(c()),r())
:($f)
end

=={s}(::MathConst{s}, ::MathConst{s}) = true
==(::MathConst, ::MathConst) = false

Expand Down
13 changes: 11 additions & 2 deletions base/mpfr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import
realmin, realmax, get_rounding, set_rounding, maxintfloat, widen,
significand, frexp

import Base.Rounding: get_rounding_raw, set_rounding_raw

import Base.GMP: ClongMax, CulongMax, CdoubleMax

import Base.Math.lgamma_r
Expand Down Expand Up @@ -118,6 +120,11 @@ convert(::Type{Float64}, x::BigFloat) =
convert(::Type{Float32}, x::BigFloat) =
ccall((:mpfr_get_flt,:libmpfr), Float32, (Ptr{BigFloat},Int32), &x, ROUNDING_MODE[end])

call(::Type{Float64}, x::BigFloat, r::RoundingMode) =
ccall((:mpfr_get_d,:libmpfr), Float64, (Ptr{BigFloat},Int32), &x, to_mpfr(r))
call(::Type{Float32}, x::BigFloat, r::RoundingMode) =
ccall((:mpfr_get_flt,:libmpfr), Float32, (Ptr{BigFloat},Int32), &x, to_mpfr(r))

convert(::Type{Integer}, x::BigFloat) = convert(BigInt, x)

promote_rule{T<:Real}(::Type{BigFloat}, ::Type{T}) = BigFloat
Expand Down Expand Up @@ -597,8 +604,10 @@ function from_mpfr(c::Integer)
RoundingMode(c)
end

get_rounding(::Type{BigFloat}) = from_mpfr(ROUNDING_MODE[end])
set_rounding(::Type{BigFloat},r::RoundingMode) = ROUNDING_MODE[end] = to_mpfr(r)
get_rounding_raw(::Type{BigFloat}) = ROUNDING_MODE[end]
set_rounding_raw(::Type{BigFloat},i::Integer) = ROUNDING_MODE[end] = i
get_rounding(::Type{BigFloat}) = from_mpfr(get_rounding_raw(BigFloat))
set_rounding(::Type{BigFloat},r::RoundingMode) = set_rounding_raw(BigFloat,to_mpfr(r))

function copysign(x::BigFloat, y::BigFloat)
z = BigFloat()
Expand Down
38 changes: 34 additions & 4 deletions base/rounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,46 @@ function from_fenv(r::Integer)
end
end

set_rounding{T<:Union(Float32,Float64)}(::Type{T},r::RoundingMode) = ccall(:fesetround, Cint, (Cint,), to_fenv(r))
get_rounding{T<:Union(Float32,Float64)}(::Type{T}) = from_fenv(ccall(:fegetround, Cint, ()))
set_rounding_raw{T<:Union(Float32,Float64)}(::Type{T},i::Integer) = ccall(:fesetround, Cint, (Cint,), i)
get_rounding_raw{T<:Union(Float32,Float64)}(::Type{T}) = ccall(:fegetround, Cint, ())

set_rounding{T<:Union(Float32,Float64)}(::Type{T},r::RoundingMode) = set_rounding_raw(T,to_fenv(r))
get_rounding{T<:Union(Float32,Float64)}(::Type{T}) = from_fenv(get_rounding_raw(T))

function with_rounding{T}(f::Function, ::Type{T}, rounding::RoundingMode)
old_rounding = get_rounding(T)
old_rounding_raw = get_rounding_raw(T)
set_rounding(T,rounding)
try
return f()
finally
set_rounding(T,old_rounding)
set_rounding_raw(T,old_rounding_raw)
end
end


# Should be equivalent to:
# call(::Type{Float32},x::Float64,r::RoundingMode) = with_rounding(Float64,r) do
# convert(Float32,x)
# end
# but explicit checks are currently quicker (~20x).
# Assumes current rounding mode is RoundToNearest

call(::Type{Float32},x::Float64,r::RoundingMode{:TiesToEven}) = convert(Float32,x)

function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardNegative})
y = convert(Float32,x)
y > x ? prevfloat(y) : y
end
function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardPositive})
y = convert(Float32,x)
y < x ? nextfloat(y) : y
end
function call(::Type{Float32},x::Float64,r::RoundingMode{:TowardZero})
y = convert(Float32,x)
if x > 0.0
y > x ? prevfloat(y) : y
else
y < x ? nextfloat(y) : y
end
end

Expand Down
110 changes: 90 additions & 20 deletions doc/helpdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -369,11 +369,35 @@ Any[

"),

("Base","convert","convert(type, x)
("Base","convert","convert(T, x)

Try to convert \"x\" to the given type. Conversion to a different
numeric type will raise an \"InexactError\" if \"x\" cannot be
represented exactly in the new type.
Convert \"x\" to a value of type \"T\".

If \"T\" is an \"Integer\" type, an \"InexactError\" will be raised
if \"x\" is not representable by \"T\", for example if \"x\" is not
integer-valued, or is outside the range supported by \"T\".

julia> convert(Int, 3.0)
3

julia> convert(Int, 3.5)
ERROR: InexactError()
in convert at int.jl:185

If \"T\" is a \"FloatingPoint\" or \"Rational\" type, then it will
return the closest value to \"x\" representable by \"T\".

julia> x = 1/3
0.3333333333333333

julia> convert(Float32, x)
0.33333334f0

julia> convert(Rational{Int32}, x)
1//3

julia> convert(Rational{Int64}, x)
6004799503160661//18014398509481984

"),

Expand Down Expand Up @@ -564,10 +588,10 @@ Any[

"),

("Base","fieldtype","fieldtype(value, name::Symbol)
("Base","fieldtype","fieldtype(type, name::Symbol | index::Int)

Determine the declared type of a named field in a value of
composite type.
Determine the declared type of a field (specified by name or index)
in a composite type.

"),

Expand Down Expand Up @@ -1801,15 +1825,15 @@ Any[

("Base","is_valid_ascii","is_valid_ascii(s) -> Bool

Returns true if the string or byte vector is valid ASCII, false
otherwise.
Returns true if the argument (\"ASCIIString\", \"UTF8String\", or
byte vector) is valid ASCII, false otherwise.

"),

("Base","is_valid_utf8","is_valid_utf8(s) -> Bool

Returns true if the string or byte vector is valid UTF-8, false
otherwise.
Returns true if the argument (\"ASCIIString\", \"UTF8String\", or
byte vector) is valid UTF-8, false otherwise.

"),

Expand Down Expand Up @@ -2246,7 +2270,8 @@ Any[

("Base","is_valid_utf16","is_valid_utf16(s) -> Bool

Returns true if the string or \"Uint16\" array is valid UTF-16.
Returns true if the argument (\"UTF16String\" or \"Uint16\" array)
is valid UTF-16.

"),

Expand Down Expand Up @@ -3208,11 +3233,11 @@ Any[

("Base","writedlm","writedlm(f, A, delim='t')

Write \"A\" (a vector, matrix or an iterable collection of
iterable rows) as text to \"f\" (either a filename string or an
\"IO\" stream) using the given delimeter \"delim\" (which defaults
to tab, but can be any printable Julia object, typically a \"Char\"
or \"String\").
Write \"A\" (a vector, matrix or an iterable collection of iterable
rows) as text to \"f\" (either a filename string or an \"IO\"
stream) using the given delimeter \"delim\" (which defaults to tab,
but can be any printable Julia object, typically a \"Char\" or
\"String\").

For example, two vectors \"x\" and \"y\" of the same length can be
written as two columns of tab-delimited text to \"f\" by either
Expand Down Expand Up @@ -6070,9 +6095,22 @@ popdisplay(d::Display)

"),

("Base","cat","cat(dim, A...)
("Base","cat","cat(dims, A...)

Concatenate the input arrays along the specified dimension
Concatenate the input arrays along the specified dimensions in the
iterable \"dims\". For dimensions not in \"dims\", all input arrays
should have the same size, which will also be the size of the
output array along that dimension. For dimensions in \"dims\", the
size of the output array is the sum of the sizes of the input
arrays along that dimension. If \"dims\" is a single number, the
different arrays are tightly stacked along that dimension. If
\"dims\" is an iterable containing several dimensions, this allows
to construct block diagonal matrices and their higher-dimensional
analogues by simultaneously increasing several dimensions for every
new input array and putting zero blocks elsewhere. For example,
*cat([1,2], matrices...)* builds a block diagonal matrix, i.e. a
block matrix with *matrices[1]*, *matrices[2]*, ... as diagonal
blocks and matching zero blocks away from the diagonal.

"),

Expand Down Expand Up @@ -11759,7 +11797,39 @@ Millisecond(v)
\"A\". This includes zeros that are explicitly stored in the sparse
matrix. The returned vector points directly to the internal nonzero
storage of \"A\", and any modifications to the returned vector will
mutate \"A\" as well.
mutate \"A\" as well. See \"rowvals(A)\" and \"nzrange(A, col)\".

"),

("Base","rowvals","rowvals(A)

Return a vector of the row indices of \"A\", and any modifications
to the returned vector will mutate \"A\" as well. Given the
internal storage format of sparse matrices, providing access to how
the row indices are stored internally can be useful in conjuction
with iterating over structural nonzero values. See \"nonzeros(A)\"
and \"nzrange(A, col)\".

"),

("Base","nzrange","nzrange(A, col)

Return the range of indices to the structural nonzero values of a
sparse matrix column. In conjunction with \"nonzeros(A)\" and
\"rowvals(A)\", this allows for convenient iterating over a sparse
matrix

A = sparse(I,J,V)
rows = rowvals(A)
vals = nonzeros(A)
m, n = size(A)
for i = 1:n
for j in nzrange(A, i)
row = rows[j]
val = vals[j]
# perform sparse wizardry...
end
end

"),

Expand Down
34 changes: 32 additions & 2 deletions doc/stdlib/base.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,39 @@ All Objects
With a single symbol argument, tests whether a global variable with that
name is defined in ``current_module()``.

.. function:: convert(type, x)
.. function:: convert(T, x)

Try to convert ``x`` to the given type. Conversion to a different numeric type will raise an ``InexactError`` if ``x`` cannot be represented exactly in the new type.
Convert ``x`` to a value of type ``T``.

If ``T`` is an ``Integer`` type, an ``InexactError`` will be raised if
``x`` is not representable by ``T``, for example if ``x`` is not
integer-valued, or is outside the range supported by ``T``.

.. doctest::

julia> convert(Int, 3.0)
3

julia> convert(Int, 3.5)
ERROR: InexactError()
in convert at int.jl:185

If ``T`` is a ``FloatingPoint`` or ``Rational`` type, then it will return
the closest value to ``x`` representable by ``T``.

.. doctest::

julia> x = 1/3
0.3333333333333333

julia> convert(Float32, x)
0.33333334f0

julia> convert(Rational{Int32}, x)
1//3

julia> convert(Rational{Int64}, x)
6004799503160661//18014398509481984

.. function:: promote(xs...)

Expand Down
36 changes: 36 additions & 0 deletions test/rounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,39 @@ with_rounding(Float32,RoundDown) do
@test a32 - b32 === -c32
@test b32 - a32 === c32
end

# convert with rounding
for v = [sqrt(2),-1/3,nextfloat(1.0),prevfloat(1.0),nextfloat(-1.0),
prevfloat(-1.0),nextfloat(0.0),prevfloat(0.0)]
pn = Float32(v,RoundNearest)
@test pn == convert(Float32,v)
pz = Float32(v,RoundToZero)
@test pz == with_rounding(()->convert(Float32,v), Float64, RoundToZero)
pd = Float32(v,RoundDown)
@test pd == with_rounding(()->convert(Float32,v), Float64, RoundDown)
pu = Float32(v,RoundUp)
@test pu == with_rounding(()->convert(Float32,v), Float64, RoundUp)

@test pn == pd || pn == pu
@test v > 0 ? pz == pd : pz == pu
@test pu - pd == eps(pz)
end

for T in [Float32,Float64]
for v in [sqrt(big(2.0)),-big(1.0)/big(3.0),nextfloat(big(1.0)),
prevfloat(big(1.0)),nextfloat(big(0.0)),prevfloat(big(0.0)),
pi,e,eulergamma,catalan,golden,]
pn = T(v,RoundNearest)
@test pn == convert(T,v)
pz = T(v,RoundToZero)
@test pz == with_rounding(()->convert(T,v), BigFloat, RoundToZero)
pd = T(v,RoundDown)
@test pd == with_rounding(()->convert(T,v), BigFloat, RoundDown)
pu = T(v,RoundUp)
@test pu == with_rounding(()->convert(T,v), BigFloat, RoundUp)

@test pn == pd || pn == pu
@test v > 0 ? pz == pd : pz == pu
@test pu - pd == eps(pz)
end
end