@@ -17,12 +17,20 @@ float logf(float x) { return __devicelib_logf(x); }
1717
1818DEVICE_EXTERN_C
1919float expf (float x) { return __devicelib_expf (x); }
20-
20+ // On Windows, the math.h includes wrapper definition for functions:
21+ // frexpf, ldexpf, hypotf, so we can't provide these 3 functions in
22+ // device libraries which may lead to multiple definition error.
23+ // The "hypotf" on Windows will call an internal function "_hypotf"
24+ // and frexpf, ldexpf will call corresponding double version:
25+ // frexp and ldexp. frexpf and ldexpf can only be used on platforms
26+ // with fp64 support currently.
27+ #ifndef _WIN32
2128DEVICE_EXTERN_C
2229float frexpf (float x, int *exp) { return __devicelib_frexpf (x, exp); }
2330
2431DEVICE_EXTERN_C
2532float ldexpf (float x, int exp) { return __devicelib_ldexpf (x, exp); }
33+ #endif
2634
2735DEVICE_EXTERN_C
2836float log10f (float x) { return __devicelib_log10f (x); }
@@ -54,8 +62,13 @@ float sqrtf(float x) { return __devicelib_sqrtf(x); }
5462DEVICE_EXTERN_C
5563float cbrtf (float x) { return __devicelib_cbrtf (x); }
5664
65+ #ifndef _WIN32
5766DEVICE_EXTERN_C
5867float hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
68+ #else
69+ DEVICE_EXTERN_C
70+ float _hypotf (float x, float y) { return __devicelib_hypotf (x, y); }
71+ #endif
5972
6073DEVICE_EXTERN_C
6174float erff (float x) { return __devicelib_erff (x); }
@@ -128,3 +141,230 @@ float asinhf(float x) { return __devicelib_asinhf(x); }
128141
129142DEVICE_EXTERN_C
130143float atanhf (float x) { return __devicelib_atanhf (x); }
144+
145+ #if defined(_WIN32)
146+ #include < math.h>
147+ union _Fval { // pun floating type as integer array
148+ unsigned short _Sh[8 ];
149+ float _Val;
150+ };
151+
152+ union _Dconst { // pun float types as integer array
153+ unsigned short _Word[2 ]; // TRANSITION, ABI: Twice as large as necessary.
154+ float _Float;
155+ };
156+
157+ #define _F0 1 // little-endian
158+ #define _F1 0
159+
160+ // IEEE 754 float properties
161+ #define FHUGE_EXP (int )(_FMAX * 900L / 1000 )
162+
163+ #define NBITS (16 + _FOFF)
164+ #define FSIGN (x ) (((_Fval *)(char *)&(x))->_Sh[_F0] & _FSIGN)
165+
166+ #define INIT (w0 ) \
167+ { 0 , w0 }
168+
169+ #define _FXbig (float )((NBITS + 1 ) * 347L / 1000 )
170+ DEVICE_EXTERN_C
171+ short _FDtest (float *px) { // categorize *px
172+ _Fval *ps = (_Fval *)(char *)px;
173+ short ret = 0 ;
174+ if ((ps->_Sh [_F0] & _FMASK) == _FMAX << _FOFF)
175+ ret = (short )((ps->_Sh [_F0] & _FFRAC) != 0 || ps->_Sh [_F1] != 0 ? _NANCODE
176+ : _INFCODE);
177+ else if ((ps->_Sh [_F0] & ~_FSIGN) != 0 || ps->_Sh [_F1] != 0 )
178+ ret = (ps->_Sh [_F0] & _FMASK) == 0 ? _DENORM : _FINITE;
179+
180+ return ret;
181+ }
182+
183+ DEVICE_EXTERN_C
184+ short _FDnorm (_Fval *ps) { // normalize float fraction
185+ short xchar;
186+ unsigned short sign = (unsigned short )(ps->_Sh [_F0] & _FSIGN);
187+
188+ xchar = 1 ;
189+ if ((ps->_Sh [_F0] &= _FFRAC) != 0 || ps->_Sh [_F1]) { // nonzero, scale
190+ if (ps->_Sh [_F0] == 0 ) {
191+ ps->_Sh [_F0] = ps->_Sh [_F1];
192+ ps->_Sh [_F1] = 0 ;
193+ xchar -= 16 ;
194+ }
195+
196+ for (; ps->_Sh [_F0] < 1 << _FOFF; --xchar) { // shift left by 1
197+ ps->_Sh [_F0] = (unsigned short )(ps->_Sh [_F0] << 1 | ps->_Sh [_F1] >> 15 );
198+ ps->_Sh [_F1] <<= 1 ;
199+ }
200+ for (; 1 << (_FOFF + 1 ) <= ps->_Sh [_F0]; ++xchar) { // shift right by 1
201+ ps->_Sh [_F1] = (unsigned short )(ps->_Sh [_F1] >> 1 | ps->_Sh [_F0] << 15 );
202+ ps->_Sh [_F0] >>= 1 ;
203+ }
204+ ps->_Sh [_F0] &= _FFRAC;
205+ }
206+ ps->_Sh [_F0] |= sign;
207+ return xchar;
208+ }
209+
210+ DEVICE_EXTERN_C
211+ short _FDscale (float *px, long lexp) { // scale *px by 2^xexp with checking
212+ _Dconst _FInf = {INIT (_FMAX << _FOFF)};
213+ _Fval *ps = (_Fval *)(char *)px;
214+ short xchar = (short )((ps->_Sh [_F0] & _FMASK) >> _FOFF);
215+
216+ if (xchar == _FMAX)
217+ return (short )((ps->_Sh [_F0] & _FFRAC) != 0 || ps->_Sh [_F1] != 0
218+ ? _NANCODE
219+ : _INFCODE);
220+ if (xchar == 0 && 0 < (xchar = _FDnorm (ps)))
221+ return 0 ;
222+
223+ short ret = 0 ;
224+ if (0 < lexp && _FMAX - xchar <= lexp) { // overflow, return +/-INF
225+ *px = ps->_Sh [_F0] & _FSIGN ? -_FInf._Float : _FInf._Float ;
226+ ret = _INFCODE;
227+ } else if (-xchar < lexp) { // finite result, repack
228+ ps->_Sh [_F0] =
229+ (unsigned short )(ps->_Sh [_F0] & ~_FMASK | (lexp + xchar) << _FOFF);
230+ ret = _FINITE;
231+ } else { // denormalized, scale
232+ unsigned short sign = (unsigned short )(ps->_Sh [_F0] & _FSIGN);
233+
234+ ps->_Sh [_F0] = (unsigned short )(1 << _FOFF | ps->_Sh [_F0] & _FFRAC);
235+ lexp += xchar - 1 ;
236+ if (lexp < -(16 + 1 + _FOFF) || 0 <= lexp) { // underflow, return +/-0
237+ ps->_Sh [_F0] = sign;
238+ ps->_Sh [_F1] = 0 ;
239+ ret = 0 ;
240+ } else { // nonzero, align fraction
241+ ret = _FINITE;
242+ short xexp = (short )lexp;
243+ unsigned short psx = 0 ;
244+
245+ if (xexp <= -16 ) { // scale by words
246+ psx = ps->_Sh [_F1] | (psx != 0 ? 1 : 0 );
247+ ps->_Sh [_F1] = ps->_Sh [_F0];
248+ ps->_Sh [_F0] = 0 ;
249+ xexp += 16 ;
250+ }
251+ if ((xexp = (short )-xexp) != 0 ) { // scale by bits
252+ psx = (ps->_Sh [_F1] << (16 - xexp)) | (psx != 0 ? 1 : 0 );
253+ ps->_Sh [_F1] = (unsigned short )(ps->_Sh [_F1] >> xexp |
254+ ps->_Sh [_F0] << (16 - xexp));
255+ ps->_Sh [_F0] >>= xexp;
256+ }
257+
258+ ps->_Sh [_F0] |= sign;
259+ if ((0x8000 < psx || 0x8000 == psx && (ps->_Sh [_F1] & 0x0001 ) != 0 ) &&
260+ (++ps->_Sh [_F1] & 0xffff ) == 0 )
261+ ++ps->_Sh [_F0]; // round up
262+ else if (ps->_Sh [_F0] == sign && ps->_Sh [_F1] == 0 )
263+ ret = 0 ;
264+ }
265+ }
266+
267+ return ret;
268+ }
269+
270+ DEVICE_EXTERN_C
271+ short _FExp (float *px, float y,
272+ short eoff) { // compute y * e^(*px), (*px) finite, |y| not huge
273+ static const float hugexp = FHUGE_EXP;
274+ _Dconst _FInf = {INIT (_FMAX << _FOFF)};
275+ static const float p[] = {1 .0F , 60 .09114349F };
276+ static const float q[] = {12 .01517514F , 120 .18228722F };
277+ static const float c1 = (22713 .0F / 32768 .0F );
278+ static const float c2 = 1 .4286068203094172321214581765680755e-6F ;
279+ static const float invln2 = 1 .4426950408889634073599246810018921F ;
280+ short ret = 0 ;
281+ if (*px < -hugexp || y == 0 .0F ) { // certain underflow
282+ *px = 0 .0F ;
283+ } else if (hugexp < *px) { // certain overflow
284+ *px = _FInf._Float ;
285+ ret = _INFCODE;
286+ } else { // xexp won't overflow
287+ float g = *px * invln2;
288+ short xexp = (short )(g + (g < 0 .0F ? -0 .5F : +0 .5F ));
289+
290+ g = xexp;
291+ g = (float )((*px - g * c1) - g * c2);
292+ if (-__FLT_EPSILON__ < g && g < __FLT_EPSILON__) {
293+ *px = y;
294+ } else { // g * g worth computing
295+ const float z = g * g;
296+ const float w = q[0 ] * z + q[1 ];
297+
298+ g *= z + p[1 ];
299+ *px = (w + g) / (w - g) * 2 .0F * y;
300+ --xexp;
301+ }
302+ ret = _FDscale (px, (long )xexp + eoff);
303+ }
304+ return ret;
305+ }
306+
307+ DEVICE_EXTERN_C
308+ float _FCosh (float x, float y) { // compute y * cosh(x), |y| <= 1
309+ switch (_FDtest (&x)) { // test for special codes
310+ case _NANCODE:
311+ case _INFCODE:
312+ return x;
313+ case 0 :
314+ return y;
315+ default : // finite
316+ if (y == 0 .0F )
317+ return y;
318+
319+ if (x < 0 .0F )
320+ x = -x;
321+
322+ if (x < _FXbig) { // worth adding in exp(-x)
323+ _FExp (&x, 1 .0F , -1 );
324+ return y * (x + 0 .25F / x);
325+ }
326+ _FExp (&x, y, -1 );
327+ return x;
328+ }
329+ }
330+
331+ DEVICE_EXTERN_C
332+ float _FSinh (float x, float y) { // compute y * sinh(x), |y| <= 1
333+ _Dconst _FRteps = {INIT ((_FBIAS - NBITS / 2 ) << _FOFF)};
334+ static const float p[] = {0 .00020400F , 0 .00832983F , 0 .16666737F , 0 .99999998F };
335+ short neg;
336+
337+ switch (_FDtest (&x)) { // test for special codes
338+ case _NANCODE:
339+ return x;
340+ case _INFCODE:
341+ return y != 0 .0F ? x : FSIGN (x) ? -y : y;
342+ case 0 :
343+ return x * y;
344+ default : // finite
345+ if (y == 0 .0F )
346+ return x < 0 .0F ? -y : y;
347+
348+ if (x < 0 .0F ) {
349+ x = -x;
350+ neg = 1 ;
351+ } else
352+ neg = 0 ;
353+
354+ if (x < _FRteps._Float ) {
355+ x *= y; // x tiny
356+ } else if (x < 1 .0F ) {
357+ float w = x * x;
358+
359+ x += ((p[0 ] * w + p[1 ]) * w + p[2 ]) * w * x;
360+ x *= y;
361+ } else if (x < _FXbig) { // worth adding in exp(-x)
362+ _FExp (&x, 1 .0F , -1 );
363+ x = y * (x - 0 .25F / x);
364+ } else
365+ _FExp (&x, y, -1 );
366+
367+ return neg ? -x : x;
368+ }
369+ }
370+ #endif
0 commit comments