Skip to content

Commit

Permalink
Merge pull request #12172 from ArchRobison/adr/subnormal
Browse files Browse the repository at this point in the history
Add Julia routines for setting/getting "subnormals are zero" mode.
  • Loading branch information
jakebolewski committed Jul 19, 2015
2 parents 2539fcf + 591e0a9 commit 4b757af
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 27 deletions.
2 changes: 2 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,8 @@ export
get_rounding,
set_rounding,
with_rounding,
get_zero_subnormals,
set_zero_subnormals,

# statistics
cor,
Expand Down
6 changes: 5 additions & 1 deletion base/rounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ include("fenv_constants.jl")
export
RoundingMode, RoundNearest, RoundToZero, RoundUp, RoundDown, RoundFromZero,
RoundNearestTiesAway, RoundNearestTiesUp,
get_rounding, set_rounding, with_rounding
get_rounding, set_rounding, with_rounding,
get_zero_subnormals, set_zero_subnormals

## rounding modes ##
immutable RoundingMode{T} end
Expand Down Expand Up @@ -85,4 +86,7 @@ function _convert_rounding{T<:FloatingPoint}(::Type{T},x::Real,r::RoundingMode{:
end
end

set_zero_subnormals(yes::Bool) = ccall(:jl_set_zero_subnormals,Int32,(Int8,),yes)==0
get_zero_subnormals() = ccall(:jl_get_zero_subnormals,Int32,())!=0

end #module
57 changes: 57 additions & 0 deletions doc/manual/performance-tips.rst
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,63 @@ resulting speedup depend very much on the hardware. You can examine
the change in generated code by using Julia's :func:`code_native`
function.

Treat Subnormal Numbers as Zeros
--------------------------------

Subnormal numbers, formerly called `denormal numbers <https://en.wikipedia.org/wiki/Denormal_number>`_,
are useful in many contexts, but incur a performance penalty on some hardware.
A call :func:`set_zero_subnormals(true) <set_zero_subnormals>`
grants permission for floating-point operations to treat subnormal
inputs or outputs as zeros, which may improve performance on some hardware.
A call :func:`set_zero_subnormals(false) <set_zero_subnormals>`
enforces strict IEEE behavior for subnormal numbers.

Below is an example where subnormals noticeably impact performance on some hardware::

function timestep{T}( b::Vector{T}, a::Vector{T}, Δt::T )
@assert length(a)==length(b)
n = length(b)
b[1] = 1 # Boundary condition
for i=2:n-1
b[i] = a[i] + (a[i-1] - T(2)*a[i] + a[i+1]) * Δt
end
b[n] = 0 # Boundary condition
end

function heatflow{T}( a::Vector{T}, nstep::Integer )
b = similar(a)
for t=1:div(nstep,2) # Assume nstep is even
timestep(b,a,T(0.1))
timestep(a,b,T(0.1))
end
end

heatflow(zeros(Float32,10),2) # Force compilation
for trial=1:6
a = zeros(Float32,1000)
set_zero_subnormals(iseven(trial)) # Odd trials use strict IEEE arithmetic
@time heatflow(a,1000)
end

This example generates many subnormal numbers because the values in ``a`` become
an exponentially decreasing curve, which slowly flattens out over time.

Treating subnormals as zeros should be used with caution, because doing so
breaks some identities, such as ``x-y==0`` implies ``x==y``::

julia> x=3f-38; y=2f-38;

julia> set_zero_subnormals(false); (x-y,x==y)
(1.0000001f-38,false)

julia> set_zero_subnormals(true); (x-y,x==y)
(0.0f0,false)

In some applications, an alternative to zeroing subnormal numbers is
to inject a tiny bit of noise. For example, instead of
initializing ``a`` with zeros, initialize it with::

a = rand(Float32,1000) * 1.f-9

.. _man-code-warntype:

Expand Down
19 changes: 19 additions & 0 deletions doc/stdlib/numbers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,25 @@ General Number Functions and Constants

See ``get_rounding`` for available rounding modes.

.. function:: get_zero_subnormals() -> Bool

Returns ``false`` if operations on subnormal floating-point values
("denormals") obey rules for IEEE arithmetic, and ``true`` if they
might be converted to zeros.

.. function:: set_zero_subnormals(yes::Bool) -> Bool

If ``yes`` is ``false``, subsequent floating-point operations follow
rules for IEEE arithmetic on subnormal values ("denormals").
Otherwise, floating-point operations are permitted (but not required)
to convert subnormal inputs or outputs to zero. Returns ``true``
unless ``yes==true`` but the hardware does not support zeroing of
subnormal numbers.

``set_zero_subnormals(true)`` can speed up some computations on
some hardware. However, it can break identities such as
``(x-y==0) == (x==y)``.

Integers
~~~~~~~~

Expand Down
76 changes: 52 additions & 24 deletions src/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,43 +430,71 @@ DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
// -- set/clear the FZ/DAZ flags on x86 & x86-64 --
#ifdef __SSE__

DLLEXPORT uint8_t jl_zero_subnormals(uint8_t isZero)
{
uint32_t flags = 0x00000000;
int32_t info[4];

jl_cpuid(info, 0);
if (info[0] >= 1) {
jl_cpuid(info, 0x00000001);
if ((info[3] & ((int)1 << 26)) != 0) {
// SSE2 supports both FZ and DAZ
flags = 0x00008040;
}
else if ((info[3] & ((int)1 << 25)) != 0) {
// SSE supports only the FZ flag
flags = 0x00008000;
// Cache of information recovered from jl_cpuid.
// In a multithreaded environment, there will be races on subnormal_flags,
// but they are harmless idempotent races. If we ever embrace C11, then
// subnormal_flags should be declared atomic.
static volatile int32_t subnormal_flags = 1;

static int32_t get_subnormal_flags() {
uint32_t f = subnormal_flags;
if (f & 1) {
// CPU capabilities not yet inspected.
f = 0;
int32_t info[4];
jl_cpuid(info, 0);
if (info[0] >= 1) {
jl_cpuid(info, 0x00000001);
if (info[3] & (1 << 26)) {
// SSE2 supports both FZ and DAZ
f = 0x00008040;
} else if (info[3] & (1 << 25)) {
// SSE supports only the FZ flag
f = 0x00008000;
}
}
subnormal_flags = f;
}
return f;
}

// Returns non-zero if subnormals go to 0; zero otherwise.
DLLEXPORT int32_t jl_get_zero_subnormals(int8_t isZero)
{
uint32_t flags = get_subnormal_flags();
return _mm_getcsr() & flags;
}

// Return zero on success, non-zero on failure.
DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
uint32_t flags = get_subnormal_flags();
if (flags) {
if (isZero) {
_mm_setcsr(_mm_getcsr() | flags);
}
else {
_mm_setcsr(_mm_getcsr() & ~flags);
}
return 1;
uint32_t state = _mm_getcsr();
if (isZero)
state |= flags;
else
state &= ~flags;
_mm_setcsr(state);
return 0;
} else {
// Report a failure only if user is trying to enable FTZ/DAZ.
return isZero;
}
return 0;
}

#else

DLLEXPORT uint8_t jl_zero_subnormals(uint8_t isZero)
DLLEXPORT int32_t jl_get_zero_subnormals(int8_t isZero)
{
return 0;
}

DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
{
return isZero;
}

#endif

// -- processor native alignment information --
Expand Down
6 changes: 4 additions & 2 deletions test/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,10 @@ end
@test_approx_eq quadgk(cos, 0,0.7,1, norm=abs)[1] sin(1)

# Ensure subnormal flags functions don't segfault
@test any(ccall("jl_zero_subnormals", UInt8, (UInt8,), 1) .== [0x00 0x01])
@test any(ccall("jl_zero_subnormals", UInt8, (UInt8,), 0) .== [0x00 0x01])
@test any(set_zero_subnormals(true) .== [false,true])
@test any(get_zero_subnormals() .== [false,true])
@test set_zero_subnormals(false)
@test !get_zero_subnormals()

# useful test functions for relative error
err(z, x) = abs(z - x) / abs(x)
Expand Down

0 comments on commit 4b757af

Please sign in to comment.