Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion libclc/clc/lib/generic/math/clc_remquo.cl
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,23 @@
//===----------------------------------------------------------------------===//

#include <clc/clc_convert.h>
#include <clc/float/definitions.h>
#include <clc/integer/clc_clz.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_copysign.h>
#include <clc/math/clc_fabs.h>
#include <clc/math/clc_floor.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_frexp.h>
#include <clc/math/clc_ldexp.h>
#include <clc/math/clc_subnormal_config.h>
#include <clc/math/clc_nan.h>
#include <clc/math/clc_native_recip.h>
#include <clc/math/clc_rint.h>
#include <clc/math/clc_sincos_helpers.h>
#include <clc/math/clc_trunc.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isfinite.h>
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_max.h>

#define __CLC_ADDRESS_SPACE private
Expand Down
133 changes: 73 additions & 60 deletions libclc/clc/lib/generic/math/clc_remquo.inc
Original file line number Diff line number Diff line change
Expand Up @@ -8,69 +8,82 @@

_CLC_DEF _CLC_OVERLOAD float __clc_remquo(float x, float y,
__CLC_ADDRESS_SPACE int *quo) {
x = __clc_flush_denormal_if_not_supported(x);
y = __clc_flush_denormal_if_not_supported(y);
int ux = __clc_as_int(x);
int ax = ux & EXSIGNBIT_SP32;
float xa = __clc_as_float(ax);
int sx = ux ^ ax;
int ex = ax >> EXPSHIFTBITS_SP32;

int uy = __clc_as_int(y);
int ay = uy & EXSIGNBIT_SP32;
float ya = __clc_as_float(ay);
int sy = uy ^ ay;
int ey = ay >> EXPSHIFTBITS_SP32;

float xr = __clc_as_float(0x3f800000 | (ax & 0x007fffff));
float yr = __clc_as_float(0x3f800000 | (ay & 0x007fffff));
int c;
int k = ex - ey;

uint q = 0;

while (k > 0) {
c = xr >= yr;
q = (q << 1) | c;
xr -= c ? yr : 0.0f;
xr += xr;
--k;
const int bits = 12;
float ax = __clc_fabs(x);
float ay = __clc_fabs(y);
float ret;
int q7;
if (ax > ay) {
int ex, ey;
{
int _exp;
float _mant = __clc_frexp(ax, &_exp);
ex = _exp - 1;
ax = __clc_ldexp(_mant, bits);
}
{
int _exp;
float _mant = __clc_frexp(ay, &_exp);
ey = _exp - 1;
ay = __clc_ldexp(_mant, 1);
}
int nb = ex - ey;
float ayinv = __clc_native_recip(ay);
int qacc = 0;
while (nb > bits) {
float q = __clc_rint(ax * ayinv);
ax = __clc_fma(-q, ay, ax);
int clt = ax < 0.0f;
float axp = ax + ay;
ax = clt ? axp : ax;

int iq = (int)q;
iq -= clt;
qacc = (qacc << bits) | iq;

ax = __clc_ldexp(ax, bits);
nb -= bits;
}
ax = __clc_ldexp(ax, nb - bits + 1);
{
float q = __clc_rint(ax * ayinv);
ax = __clc_fma(-q, ay, ax);
int clt = ax < 0.0f;
float axp = ax + ay;
ax = clt ? axp : ax;
int iq = (int)q;
iq -= clt;
qacc = (qacc << (nb + 1)) | iq;
}
int aq = (2.0f * ax > ay) | ((qacc & 0x1) & (2.0f * ax == ay));
ax = ax - (aq ? ay : 0.0f);
qacc += aq;
int qneg = (__clc_as_int(x) ^ __clc_as_int(y)) >> 31;
q7 = ((qacc & 0x7f) ^ qneg) - qneg;
ax = __clc_ldexp(ax, ey);
ret =
__clc_as_float((__clc_as_int(x) & (int)0x80000000) ^ __clc_as_int(ax));
} else {
ret = x;
q7 = 0;
bool c = (ay < 0x1.0p+127f & 2.0f * ax > ay) | (ax > 0.5f * ay);

int qsgn = 1 + (((__clc_as_int(x) ^ __clc_as_int(y)) >> 31) << 1);
float t = __clc_fma(y, -(float)qsgn, x);
ret = c ? t : __builtin_elementwise_canonicalize(x);
q7 = c ? qsgn : q7;
ret = ax == ay ? __clc_copysign(0.0f, x) : ret;
q7 = ax == ay ? qsgn : q7;
}

c = xr > yr;
q = (q << 1) | c;
xr -= c ? yr : 0.0f;
ret = y == 0.0f ? __clc_nan(0) : ret;
q7 = y == 0.0f ? 0 : q7;
bool c = !__clc_isnan(y) && __clc_isfinite(x);
ret = c ? ret : __clc_nan(0);
q7 = c ? q7 : 0;

int lt = ex < ey;

q = lt ? 0 : q;
xr = lt ? xa : xr;
yr = lt ? ya : yr;

c = (yr < 2.0f * xr) | ((yr == 2.0f * xr) & ((q & 0x1) == 0x1));
xr -= c ? yr : 0.0f;
q += c;

float s = __clc_as_float(ey << EXPSHIFTBITS_SP32);
xr *= lt ? 1.0f : s;

int qsgn = sx == sy ? 1 : -1;
int quot = (q & 0x7f) * qsgn;

c = ax == ay;
quot = c ? qsgn : quot;
xr = c ? 0.0f : xr;

xr = __clc_as_float(sx ^ __clc_as_int(xr));

c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 |
ay == 0;
quot = c ? 0 : quot;
xr = c ? __clc_as_float(QNANBITPATT_SP32) : xr;

*quo = quot;

return xr;
*quo = q7;
return ret;
}

// remquo signature is special, we don't have macro for this
Expand Down
Loading