fix linux_aarch64 stdint redefine

This commit is contained in:
PrimedErwin 2025-05-15 21:04:28 +08:00
parent 7da0afb59f
commit 945f614b98
44 changed files with 185 additions and 198 deletions

View File

@ -5,8 +5,6 @@
extern "C" { extern "C" {
#endif #endif
#define __NEED_float_t
#define __NEED_double_t
#ifndef _HUGE_ENUF #ifndef _HUGE_ENUF
#define _HUGE_ENUF 1e+300 // _HUGE_ENUF*_HUGE_ENUF must overflow #define _HUGE_ENUF 1e+300 // _HUGE_ENUF*_HUGE_ENUF must overflow
@ -62,17 +60,6 @@ extern "C" {
#define predict_true(x) (x) #define predict_true(x) (x)
#define predict_false(x) (x) #define predict_false(x) (x)
typedef signed char int8_t;
typedef short int16_t;
typedef int int32_t;
typedef long long int64_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef float float_t;
typedef double double_t;
int __fpclassify(double); int __fpclassify(double);
int __fpclassifyf(float); int __fpclassifyf(float);
int __fpclassifyl(long double); int __fpclassifyl(long double);
@ -107,7 +94,7 @@ static __inline unsigned long long __DOUBLE_BITS(double __f)
#define isnormal(x) ( \ #define isnormal(x) ( \
sizeof(x) == sizeof(float) ? ((__FLOAT_BITS(x)+0x00800000) & 0x7fffffff) >= 0x01000000 : \ sizeof(x) == sizeof(float) ? ((__FLOAT_BITS(x)+0x00800000) & 0x7fffffff) >= 0x01000000 : \
sizeof(x) == sizeof(double) ? ((__DOUBLE_BITS(x)+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53 : \ sizeof(x) == sizeof(double) ? ((__DOUBLE_BITS(x)+(1ULL<<52)) & ULLONG_SHIFT1) >= 1ULL<<53 : \
__fpclassifyl(x) == FP_NORMAL) __fpclassifyl(x) == FP_NORMAL)
#define isfinite(x) ( \ #define isfinite(x) ( \
@ -130,20 +117,20 @@ int __signbitl(long double);
static __inline int __is##rel(type __x, type __y) \ static __inline int __is##rel(type __x, type __y) \
{ return !isunordered(__x,__y) && __x op __y; } { return !isunordered(__x,__y) && __x op __y; }
__ISREL_DEF(lessf, <, float_t) __ISREL_DEF(lessf, <, float)
__ISREL_DEF(less, <, double_t) __ISREL_DEF(less, <, double)
__ISREL_DEF(lessl, <, long double) __ISREL_DEF(lessl, <, long double)
__ISREL_DEF(lessequalf, <=, float_t) __ISREL_DEF(lessequalf, <=, float)
__ISREL_DEF(lessequal, <=, double_t) __ISREL_DEF(lessequal, <=, double)
__ISREL_DEF(lessequall, <=, long double) __ISREL_DEF(lessequall, <=, long double)
__ISREL_DEF(lessgreaterf, !=, float_t) __ISREL_DEF(lessgreaterf, !=, float)
__ISREL_DEF(lessgreater, !=, double_t) __ISREL_DEF(lessgreater, !=, double)
__ISREL_DEF(lessgreaterl, !=, long double) __ISREL_DEF(lessgreaterl, !=, long double)
__ISREL_DEF(greaterf, >, float_t) __ISREL_DEF(greaterf, >, float)
__ISREL_DEF(greater, >, double_t) __ISREL_DEF(greater, >, double)
__ISREL_DEF(greaterl, >, long double) __ISREL_DEF(greaterl, >, long double)
__ISREL_DEF(greaterequalf, >=, float_t) __ISREL_DEF(greaterequalf, >=, float)
__ISREL_DEF(greaterequal, >=, double_t) __ISREL_DEF(greaterequal, >=, double)
__ISREL_DEF(greaterequall, >=, long double) __ISREL_DEF(greaterequall, >=, long double)
#define __tg_pred_2(x, y, p) ( \ #define __tg_pred_2(x, y, p) ( \
@ -248,10 +235,10 @@ static inline void fp_force_evall(long double x)
} \ } \
} while(0) } while(0)
typedef union {float _f; uint32_t _i;}asuint_union; typedef union {float _f; unsigned int _i;}asuint_union;
typedef union {uint32_t _i; float _f;}asfloat_union; typedef union {unsigned int _i; float _f;}asfloat_union;
typedef union {double _f; uint64_t _i;}asuint64_union; typedef union {double _f; unsigned long long _i;}asuint64_union;
typedef union {uint64_t _i; double _f;}asdouble_union; typedef union {unsigned long long _i; double _f;}asdouble_union;
#define asuint(f) ((asuint_union){f})._i #define asuint(f) ((asuint_union){f})._i
#define asfloat(i) ((asfloat_union){i})._f #define asfloat(i) ((asfloat_union){i})._f
#define asuint64(f) ((asuint64_union){f})._i #define asuint64(f) ((asuint64_union){f})._i
@ -259,9 +246,9 @@ typedef union {uint64_t _i; double _f;}asdouble_union;
#define EXTRACT_WORDS(hi,lo,d) \ #define EXTRACT_WORDS(hi,lo,d) \
do { \ do { \
uint64_t __u = asuint64(d); \ unsigned long long __u = asuint64(d); \
(hi) = __u >> 32; \ (hi) = __u >> 32; \
(lo) = (uint32_t)__u; \ (lo) = (unsigned int)__u; \
} while (0) } while (0)
#define GET_HIGH_WORD(hi,d) \ #define GET_HIGH_WORD(hi,d) \
@ -271,16 +258,16 @@ do { \
#define GET_LOW_WORD(lo,d) \ #define GET_LOW_WORD(lo,d) \
do { \ do { \
(lo) = (uint32_t)asuint64(d); \ (lo) = (unsigned int)asuint64(d); \
} while (0) } while (0)
#define INSERT_WORDS(d,hi,lo) \ #define INSERT_WORDS(d,hi,lo) \
do { \ do { \
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \ (d) = asdouble(((unsigned long long)(hi)<<32) | (unsigned int)(lo)); \
} while (0) } while (0)
#define SET_HIGH_WORD(d,hi) \ #define SET_HIGH_WORD(d,hi) \
INSERT_WORDS(d, hi, (uint32_t)asuint64(d)) INSERT_WORDS(d, hi, (unsigned int)asuint64(d))
#define SET_LOW_WORD(d,lo) \ #define SET_LOW_WORD(d,lo) \
INSERT_WORDS(d, asuint64(d)>>32, lo) INSERT_WORDS(d, asuint64(d)>>32, lo)
@ -302,15 +289,15 @@ double __cos(double, double);
double __tan(double, double, int); double __tan(double, double, int);
/* error handling functions */ /* error handling functions */
float __math_xflowf(uint32_t, float); float __math_xflowf(unsigned int, float);
float __math_uflowf(uint32_t); float __math_uflowf(unsigned int);
float __math_oflowf(uint32_t); float __math_oflowf(unsigned int);
float __math_divzerof(uint32_t); float __math_divzerof(unsigned int);
float __math_invalidf(float); float __math_invalidf(float);
double __math_xflow(uint32_t, double); double __math_xflow(unsigned int, double);
double __math_uflow(uint32_t); double __math_uflow(unsigned int);
double __math_oflow(uint32_t); double __math_oflow(unsigned int);
double __math_divzero(uint32_t); double __math_divzero(unsigned int);
double __math_invalid(double); double __math_invalid(double);
#if LDBL_MANT_DIG != DBL_MANT_DIG #if LDBL_MANT_DIG != DBL_MANT_DIG
long double __math_invalidl(long double); long double __math_invalidl(long double);

View File

@ -60,7 +60,7 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
double __cos(double x, double y) double __cos(double x, double y)
{ {
double_t hz,z,r,w; double hz,z,r,w;
z = x*x; z = x*x;
w = z*z; w = z*z;

View File

@ -2,7 +2,7 @@
int __fpclassify(double x) int __fpclassify(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
int e = u.i>>52 & 0x7ff; int e = u.i>>52 & 0x7ff;
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO; if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE; if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;

View File

@ -2,7 +2,7 @@
int __fpclassifyf(float x) int __fpclassifyf(float x)
{ {
union {float f; uint32_t i;} u = {x}; union {float f; unsigned int i;} u = {x};
int e = u.i>>23 & 0xff; int e = u.i>>23 & 0xff;
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO; if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE; if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
double __math_divzero(uint32_t sign) double __math_divzero(unsigned int sign)
{ {
return fp_barrier(sign ? -1.0 : 1.0) / 0.0; return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
float __math_divzerof(uint32_t sign) float __math_divzerof(unsigned int sign)
{ {
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f; return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
double __math_oflow(uint32_t sign) double __math_oflow(unsigned int sign)
{ {
return __math_xflow(sign, 0x1p769); return __math_xflow(sign, 0x1p769);
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
float __math_oflowf(uint32_t sign) float __math_oflowf(unsigned int sign)
{ {
return __math_xflowf(sign, 0x1p97f); return __math_xflowf(sign, 0x1p97f);
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
double __math_uflow(uint32_t sign) double __math_uflow(unsigned int sign)
{ {
return __math_xflow(sign, 0x1p-767); return __math_xflow(sign, 0x1p-767);
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
float __math_uflowf(uint32_t sign) float __math_uflowf(unsigned int sign)
{ {
return __math_xflowf(sign, 0x1p-95f); return __math_xflowf(sign, 0x1p-95f);
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
double __math_xflow(uint32_t sign, double y) double __math_xflow(unsigned int sign, double y)
{ {
return eval_as_double(fp_barrier(sign ? -y : y) * y); return eval_as_double(fp_barrier(sign ? -y : y) * y);
} }

View File

@ -1,6 +1,6 @@
#include "math.h" #include "math.h"
float __math_xflowf(uint32_t sign, float y) float __math_xflowf(unsigned int sign, float y)
{ {
return eval_as_float(fp_barrierf(sign ? -y : y) * y); return eval_as_float(fp_barrierf(sign ? -y : y) * y);
} }

View File

@ -48,10 +48,10 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
int __rem_pio2(double x, double *y) int __rem_pio2(double x, double *y)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
double_t z,w,t,r,fn; double z,w,t,r,fn;
double tx[3],ty[2]; double tx[3],ty[2];
uint32_t ix; unsigned int ix;
int sign, n, ex, ey, i; int sign, n, ex, ey, i;
sign = u.i>>63; sign = u.i>>63;
@ -119,8 +119,8 @@ int __rem_pio2(double x, double *y)
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium: medium:
/* rint(x/(pi/2)) */ /* rint(x/(pi/2)) */
fn = (double_t)x*invpio2 + toint - toint; fn = (double)x*invpio2 + toint - toint;
n = (int32_t)fn; n = (int)fn;
r = x - fn*pio2_1; r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */ w = fn*pio2_1t; /* 1st round, good to 85 bits */
/* Matters with directed rounding. */ /* Matters with directed rounding. */
@ -167,11 +167,11 @@ medium:
} }
/* set z = scalbn(|x|,-ilogb(x)+23) */ /* set z = scalbn(|x|,-ilogb(x)+23) */
u.f = x; u.f = x;
u.i &= (uint64_t)-1>>12; u.i &= (unsigned long long)-1>>12;
u.i |= (uint64_t)(0x3ff + 23)<<52; u.i |= (unsigned long long)(0x3ff + 23)<<52;
z = u.f; z = u.f;
for (i=0; i < 2; i++) { for (i=0; i < 2; i++) {
tx[i] = (double)(int32_t)z; tx[i] = (double)(int)z;
z = (z-tx[i])*0x1p24; z = (z-tx[i])*0x1p24;
} }
tx[i] = z; tx[i] = z;

View File

@ -138,7 +138,7 @@ static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
* NB: This table must have at least (e0-3)/24 + jk terms. * NB: This table must have at least (e0-3)/24 + jk terms.
* For quad precision (e0 <= 16360, jk = 6), this is 686. * For quad precision (e0 <= 16360, jk = 6), this is 686.
*/ */
static const int32_t ipio2[] = { static const int ipio2[] = {
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
@ -272,7 +272,7 @@ static const double PIo2[] = {
int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
{ {
int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
double z,fw,f[20],fq[20],q[20]; double z,fw,f[20],fq[20],q[20];
/* initialize jk*/ /* initialize jk*/
@ -300,15 +300,15 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
recompute: recompute:
/* distill q[] into iq[] reversingly */ /* distill q[] into iq[] reversingly */
for (i=0,j=jz,z=q[jz]; j>0; i++,j--) { for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
fw = (double)(int32_t)(0x1p-24*z); fw = (double)(int)(0x1p-24*z);
iq[i] = (int32_t)(z - 0x1p24*fw); iq[i] = (int)(z - 0x1p24*fw);
z = q[j-1]+fw; z = q[j-1]+fw;
} }
/* compute n */ /* compute n */
z = scalbn(z,q0); /* actual value of z */ z = scalbn(z,q0); /* actual value of z */
z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
n = (int32_t)z; n = (int)z;
z -= (double)n; z -= (double)n;
ih = 0; ih = 0;
if (q0 > 0) { /* need iq[jz-1] to determine n */ if (q0 > 0) { /* need iq[jz-1] to determine n */
@ -375,13 +375,13 @@ recompute:
} else { /* break z into 24-bit if necessary */ } else { /* break z into 24-bit if necessary */
z = scalbn(z,-q0); z = scalbn(z,-q0);
if (z >= 0x1p24) { if (z >= 0x1p24) {
fw = (double)(int32_t)(0x1p-24*z); fw = (double)(int)(0x1p-24*z);
iq[jz] = (int32_t)(z - 0x1p24*fw); iq[jz] = (int)(z - 0x1p24*fw);
jz += 1; jz += 1;
q0 += 24; q0 += 24;
iq[jz] = (int32_t)fw; iq[jz] = (int)fw;
} else } else
iq[jz] = (int32_t)z; iq[jz] = (int)z;
} }
/* convert integer "bit" chunk to floating-point value */ /* convert integer "bit" chunk to floating-point value */
@ -411,7 +411,7 @@ recompute:
fw = 0.0; fw = 0.0;
for (i=jz; i>=0; i--) for (i=jz; i>=0; i--)
fw += fq[i]; fw += fq[i];
// TODO: drop excess precision here once double_t is used // TODO: drop excess precision here once double is used
fw = (double)fw; fw = (double)fw;
y[0] = ih==0 ? fw : -fw; y[0] = ih==0 ? fw : -fw;
fw = fq[0]-fw; fw = fq[0]-fw;

View File

@ -5,7 +5,7 @@ int __signbit(double x)
{ {
union { union {
double d; double d;
uint64_t i; unsigned long long i;
} y = { x }; } y = { x };
return y.i>>63; return y.i>>63;
} }

View File

@ -5,7 +5,7 @@ int __signbitf(float x)
{ {
union { union {
float f; float f;
uint32_t i; unsigned int i;
} y = { x }; } y = { x };
return y.i>>31; return y.i>>31;
} }

View File

@ -51,7 +51,7 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
double __sin(double x, double y, int iy) double __sin(double x, double y, int iy)
{ {
double_t z,r,v,w; double z,r,v,w;
z = x*x; z = x*x;
w = z*z; w = z*z;

View File

@ -65,9 +65,9 @@ pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
double __tan(double x, double y, int odd) double __tan(double x, double y, int odd)
{ {
double_t z, r, v, w, s, a; double z, r, v, w, s, a;
double w0, a0; double w0, a0;
uint32_t hx; unsigned int hx;
int big, sign; int big, sign;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);

View File

@ -51,7 +51,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
static double R(double z) static double R(double z)
{ {
double_t p, q; double p, q;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
return p/q; return p/q;
@ -60,13 +60,13 @@ static double R(double z)
double acos(double x) double acos(double x)
{ {
double z,w,s,c,df; double z,w,s,c,df;
uint32_t hx,ix; unsigned int hx,ix;
GET_HIGH_WORD(hx, x); GET_HIGH_WORD(hx, x);
ix = hx & 0x7fffffff; ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */ /* |x| >= 1 or nan */
if (ix >= 0x3ff00000) { if (ix >= 0x3ff00000) {
uint32_t lx; unsigned int lx;
GET_LOW_WORD(lx,x); GET_LOW_WORD(lx,x);
if ((ix-0x3ff00000 | lx) == 0) { if ((ix-0x3ff00000 | lx) == 0) {

View File

@ -58,7 +58,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
static double R(double z) static double R(double z)
{ {
double_t p, q; double p, q;
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
return p/q; return p/q;
@ -67,13 +67,13 @@ static double R(double z)
double asin(double x) double asin(double x)
{ {
double z,r,s; double z,r,s;
uint32_t hx,ix; unsigned int hx,ix;
GET_HIGH_WORD(hx, x); GET_HIGH_WORD(hx, x);
ix = hx & 0x7fffffff; ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */ /* |x| >= 1 or nan */
if (ix >= 0x3ff00000) { if (ix >= 0x3ff00000) {
uint32_t lx; unsigned int lx;
GET_LOW_WORD(lx, x); GET_LOW_WORD(lx, x);
if ((ix-0x3ff00000 | lx) == 0) if ((ix-0x3ff00000 | lx) == 0)
/* asin(1) = +-pi/2 with inexact */ /* asin(1) = +-pi/2 with inexact */

View File

@ -62,8 +62,8 @@ static const double aT[] = {
double atan(double x) double atan(double x)
{ {
double_t w,s1,s2,z; double w,s1,s2,z;
uint32_t ix,sign; unsigned int ix,sign;
int id; int id;
GET_HIGH_WORD(ix, x); GET_HIGH_WORD(ix, x);

View File

@ -46,7 +46,7 @@ pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
double atan2(double y, double x) double atan2(double y, double x)
{ {
double z; double z;
uint32_t m,lx,ly,ix,iy; unsigned int m,lx,ly,ix,iy;
if (isnan(x) || isnan(y)) if (isnan(x) || isnan(y))
return x+y; return x+y;

View File

@ -17,7 +17,7 @@
#include <math.h> #include <math.h>
static const uint32_t static const unsigned int
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
@ -31,9 +31,9 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
double cbrt(double x) double cbrt(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
double_t r,s,t,w; double r,s,t,w;
uint32_t hx = u.i>>32 & 0x7fffffff; unsigned int hx = u.i>>32 & 0x7fffffff;
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */ if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
return x+x; return x+x;
@ -62,7 +62,7 @@ double cbrt(double x)
} else } else
hx = hx/3 + B1; hx = hx/3 + B1;
u.i &= 1ULL<<63; u.i &= 1ULL<<63;
u.i |= (uint64_t)hx << 32; u.i |= (unsigned long long)hx << 32;
t = u.f; t = u.f;
/* /*

View File

@ -5,13 +5,13 @@
#elif FLT_EVAL_METHOD==2 #elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON #define EPS LDBL_EPSILON
#endif #endif
static const double_t toint = 1/EPS; static const double toint = 1/EPS;
double ceil(double x) double ceil(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
int e = u.i >> 52 & 0x7ff; int e = u.i >> 52 & 0x7ff;
double_t y; double y;
if (e >= 0x3ff+52 || x == 0) if (e >= 0x3ff+52 || x == 0)
return x; return x;

View File

@ -45,7 +45,7 @@
double cos(double x) double cos(double x)
{ {
double y[2]; double y[2];
uint32_t ix; unsigned int ix;
unsigned n; unsigned n;
GET_HIGH_WORD(ix, x); GET_HIGH_WORD(ix, x);

View File

@ -23,12 +23,12 @@
is scale*(1+TMP) without intermediate rounding. The bit representation of is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent a double. (int)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */ negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki)
{ {
double_t scale, y; double scale, y;
if ((ki & 0x80000000) == 0) { if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */ /* k > 0, the exponent of scale might have overflowed by <= 460. */
@ -46,7 +46,7 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
range to avoid double rounding that can cause 0.5+E/2 ulp error where range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */ is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo; double hi, lo;
lo = scale - y + scale * tmp; lo = scale - y + scale * tmp;
hi = 1.0 + y; hi = 1.0 + y;
lo = 1.0 - hi + y + lo; lo = 1.0 - hi + y + lo;
@ -62,16 +62,16 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
} }
/* Top 12 bits of a double (sign and exponent bits). */ /* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x) static inline unsigned int top12(double x)
{ {
return asuint64(x) >> 52; return asuint64(x) >> 52;
} }
double exp(double x) double exp(double x)
{ {
uint32_t abstop; unsigned int abstop;
uint64_t ki, idx, top, sbits; unsigned long long ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp; double kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff; abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
@ -103,7 +103,7 @@ double exp(double x)
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift); kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16; ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki; kd = (double)(int)ki;
#else #else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift); kd = eval_as_double(z + Shift);

View File

@ -19,7 +19,7 @@ extern const struct exp_data {
double poly[4]; /* Last four coefficients. */ double poly[4]; /* Last four coefficients. */
double exp2_shift; double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER]; double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2*(1 << EXP_TABLE_BITS)]; unsigned long long tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data; } __exp_data;
#endif #endif

View File

@ -2,7 +2,7 @@
double fabs(double x) double fabs(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
u.i &= ULLONG_NSHIFT/2; u.i &= ULLONG_NSHIFT/2;
return u.f; return u.f;
} }

View File

@ -3,8 +3,8 @@
int factorial(int n) int factorial(int n)
{ {
if (n < 0) return (int)__math_invalid(-1.0f); if (n < 0) return (int)__math_invalid(-1.0f);
int32_t r = 1; int r = 1;
for (int32_t i = 2; i <= n; i++) for (int i = 2; i <= n; i++)
{ {
r = r * i; r = r * i;
} }

View File

@ -5,13 +5,13 @@
#elif FLT_EVAL_METHOD==2 #elif FLT_EVAL_METHOD==2
#define EPS LDBL_EPSILON #define EPS LDBL_EPSILON
#endif #endif
static const double_t toint = 1/EPS; static const double toint = 1/EPS;
double floor(double x) double floor(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
int e = u.i >> 52 & 0x7ff; int e = u.i >> 52 & 0x7ff;
double_t y; double y;
if (e >= 0x3ff+52 || x == 0) if (e >= 0x3ff+52 || x == 0)
return x; return x;

View File

@ -2,15 +2,15 @@
double fmod(double x, double y) double fmod(double x, double y)
{ {
union {double f; uint64_t i;} ux = {x}, uy = {y}; union {double f; unsigned long long i;} ux = {x}, uy = {y};
int ex = ux.i>>52 & 0x7ff; int ex = ux.i>>52 & 0x7ff;
int ey = uy.i>>52 & 0x7ff; int ey = uy.i>>52 & 0x7ff;
int sx = ux.i>>63; int sx = ux.i>>63;
uint64_t i; unsigned long long i;
/* in the followings uxi should be ux.i, but then gcc wrongly adds */ /* in the followings uxi should be ux.i, but then gcc wrongly adds */
/* float load/store to inner loops ruining performance and code size */ /* float load/store to inner loops ruining performance and code size */
uint64_t uxi = ux.i; unsigned long long uxi = ux.i;
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff) if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
return (x*y)/(x*y); return (x*y)/(x*y);
@ -57,11 +57,11 @@ double fmod(double x, double y)
/* scale result */ /* scale result */
if (ex > 0) { if (ex > 0) {
uxi -= 1ULL << 52; uxi -= 1ULL << 52;
uxi |= (uint64_t)ex << 52; uxi |= (unsigned long long)ex << 52;
} else { } else {
uxi >>= -ex + 1; uxi >>= -ex + 1;
} }
uxi |= (uint64_t)sx << 63; uxi |= (unsigned long long)sx << 63;
ux.i = uxi; ux.i = uxi;
return ux.f; return ux.f;
} }

View File

@ -6,7 +6,7 @@ int gcd(int a, int b)
if (b < 0) b = -b; if (b < 0) b = -b;
while (b != 0) while (b != 0)
{ {
int32_t t = b; int t = b;
b = a % b; b = a % b;
a = t; a = t;
} }

View File

@ -18,16 +18,16 @@
#define OFF 0x3fe6000000000000 #define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */ /* Top 16 bits of a double. */
static inline uint32_t top16(double x) static inline unsigned int top16(double x)
{ {
return asuint64(x) >> 48; return asuint64(x) >> 48;
} }
double log(double x) double log(double x)
{ {
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp; unsigned long long ix, iz, tmp;
uint32_t top; unsigned int top;
int k, i; int k, i;
ix = asuint64(x); ix = asuint64(x);
@ -48,8 +48,8 @@ double log(double x)
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
/* Worst-case error is around 0.507 ULP. */ /* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27; w = r * 0x1p27;
double_t rhi = r + w - w; double rhi = r + w - w;
double_t rlo = r - rhi; double rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */ w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w; hi = r + w;
lo = r - hi + w; lo = r - hi + w;
@ -76,7 +76,7 @@ double log(double x)
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N; i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */ k = (long long)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52); iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc; invc = T[i].invc;
logc = T[i].logc; logc = T[i].logc;
@ -91,7 +91,7 @@ double log(double x)
/* rounding error: 0x1p-55/N + 0x1p-66. */ /* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc; r = (z - T2[i].chi - T2[i].clo) * invc;
#endif #endif
kd = (double_t)k; kd = (double)k;
/* hi + lo = r + log(c) + k*Ln2. */ /* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc; w = kd * Ln2hi + logc;

View File

@ -34,9 +34,9 @@ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
double log10(double x) double log10(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo; double hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
uint32_t hx; unsigned int hx;
int k; int k;
hx = u.i>>32; hx = u.i>>32;
@ -60,7 +60,7 @@ double log10(double x)
hx += 0x3ff00000 - 0x3fe6a09e; hx += 0x3ff00000 - 0x3fe6a09e;
k += (int)(hx>>20) - 0x3ff; k += (int)(hx>>20) - 0x3ff;
hx = (hx&0x000fffff) + 0x3fe6a09e; hx = (hx&0x000fffff) + 0x3fe6a09e;
u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); u.i = (unsigned long long)hx<<32 | (u.i&0xffffffff);
x = u.f; x = u.f;
f = x - 1.0; f = x - 1.0;
@ -76,7 +76,7 @@ double log10(double x)
/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
hi = f - hfsq; hi = f - hfsq;
u.f = hi; u.f = hi;
u.i &= (uint64_t)-1<<32; u.i &= (unsigned long long)-1<<32;
hi = u.f; hi = u.f;
lo = f - hi - hfsq + s*(hfsq+R); lo = f - hi - hfsq + s*(hfsq+R);

View File

@ -18,16 +18,16 @@
#define OFF 0x3fe6000000000000 #define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */ /* Top 16 bits of a double. */
static inline uint32_t top16(double x) static inline unsigned int top16(double x)
{ {
return asuint64(x) >> 48; return asuint64(x) >> 48;
} }
double log2(double x) double log2(double x)
{ {
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp; unsigned long long ix, iz, tmp;
uint32_t top; unsigned int top;
int k, i; int k, i;
ix = asuint64(x); ix = asuint64(x);
@ -44,7 +44,7 @@ double log2(double x)
hi = r * InvLn2hi; hi = r * InvLn2hi;
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi); lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
#else #else
double_t rhi, rlo; double rhi, rlo;
rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32); rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
rlo = r - rhi; rlo = r - rhi;
hi = rhi * InvLn2hi; hi = rhi * InvLn2hi;
@ -79,12 +79,12 @@ double log2(double x)
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */ k = (long long)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52); iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc; invc = T[i].invc;
logc = T[i].logc; logc = T[i].logc;
z = asdouble(iz); z = asdouble(iz);
kd = (double_t)k; kd = (double)k;
/* log2(x) = log2(z/c) + log2(c) + k. */ /* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */ /* r ~= z/c - 1, |r| < 1/(2*N). */
@ -94,7 +94,7 @@ double log2(double x)
t1 = r * InvLn2hi; t1 = r * InvLn2hi;
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1); t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
#else #else
double_t rhi, rlo; double rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */ /* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc; r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32); rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);

View File

@ -2,8 +2,8 @@
double modf(double x, double *iptr) double modf(double x, double *iptr)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
uint64_t mask; unsigned long long mask;
int e = (int)(u.i>>52 & 0x7ff) - 0x3ff; int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
/* no fractional part */ /* no fractional part */

View File

@ -23,7 +23,7 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
#define OFF 0x3fe6955500000000 #define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */ /* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x) static inline unsigned int top12(double x)
{ {
return asuint64(x) >> 52; return asuint64(x) >> 52;
} }
@ -31,11 +31,11 @@ static inline uint32_t top12(double x)
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */ normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t log_inline(uint64_t ix, double_t *tail) static inline double log_inline(unsigned long long ix, double *tail)
{ {
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ /* double for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp; unsigned long long iz, tmp;
int k, i; int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact. /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
@ -43,10 +43,10 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
The ith subinterval contains z and c is near its center. */ The ith subinterval contains z and c is near its center. */
tmp = ix - OFF; tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N; i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */ k = (long long)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52); iz = ix - (tmp & 0xfffULL << 52);
z = asdouble(iz); z = asdouble(iz);
kd = (double_t)k; kd = (double)k;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc; invc = T[i].invc;
@ -59,10 +59,10 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
r = __builtin_fma(z, invc, -1.0); r = __builtin_fma(z, invc, -1.0);
#else #else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32)); double zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32));
double_t zlo = z - zhi; double zlo = z - zhi;
double_t rhi = zhi * invc - 1.0; double rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc; double rlo = zlo * invc;
r = rhi + rlo; r = rhi + rlo;
#endif #endif
@ -73,7 +73,7 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
lo2 = t1 - t2 + r; lo2 = t1 - t2 + r;
/* Evaluation is optimized assuming superscalar pipelined execution. */ /* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4; double ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */ ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar; ar2 = r * ar;
ar3 = r * ar2; ar3 = r * ar2;
@ -83,8 +83,8 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
lo3 = __builtin_fma(ar, r, -ar2); lo3 = __builtin_fma(ar, r, -ar2);
lo4 = t2 - hi + ar2; lo4 = t2 - hi + ar2;
#else #else
double_t arhi = A[0] * rhi; double arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi; double arhi2 = rhi * arhi;
hi = t2 + arhi2; hi = t2 + arhi2;
lo3 = rlo * (ar + arhi); lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2; lo4 = t2 - hi + arhi2;
@ -116,12 +116,12 @@ static inline double_t log_inline(uint64_t ix, double_t *tail)
is scale*(1+TMP) without intermediate rounding. The bit representation of is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent a double. (int)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */ negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki)
{ {
double_t scale, y; double scale, y;
if ((ki & 0x80000000) == 0) { if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */ /* k > 0, the exponent of scale might have overflowed by <= 460. */
@ -140,7 +140,7 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
range to avoid double rounding that can cause 0.5+E/2 ulp error where range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */ is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0; double hi, lo, one = 1.0;
if (y < 0.0) if (y < 0.0)
one = -1.0; one = -1.0;
lo = scale - y + scale * tmp; lo = scale - y + scale * tmp;
@ -161,12 +161,12 @@ static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
{ {
uint32_t abstop; unsigned int abstop;
uint64_t ki, idx, top, sbits; unsigned long long ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ /* double for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp; double kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff; abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= if (predict_false(abstop - top12(0x1p-54) >=
@ -174,7 +174,7 @@ static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
if (abstop - top12(0x1p-54) >= 0x80000000) { if (abstop - top12(0x1p-54) >= 0x80000000) {
/* Avoid spurious underflow for tiny x. */ /* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */ /* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0; double one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one; return sign_bias ? -one : one;
} }
if (abstop >= top12(1024.0)) { if (abstop >= top12(1024.0)) {
@ -198,7 +198,7 @@ static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift); kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16; ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki; kd = (double)(int)ki;
#else #else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift); kd = eval_as_double(z + Shift);
@ -230,7 +230,7 @@ static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */ the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint64_t iy) static inline int checkint(unsigned long long iy)
{ {
int e = iy >> 52 & 0x7ff; int e = iy >> 52 & 0x7ff;
if (e < 0x3ff) if (e < 0x3ff)
@ -245,16 +245,16 @@ static inline int checkint(uint64_t iy)
} }
/* Returns 1 if input is the bit representation of 0, infinity or nan. */ /* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int zeroinfnan(uint64_t i) static inline int zeroinfnan(unsigned long long i)
{ {
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1; return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
} }
double pow(double x, double y) double pow(double x, double y)
{ {
uint32_t sign_bias = 0; unsigned int sign_bias = 0;
uint64_t ix, iy; unsigned long long ix, iy;
uint32_t topx, topy; unsigned int topx, topy;
ix = asuint64(x); ix = asuint64(x);
iy = asuint64(y); iy = asuint64(y);
@ -281,7 +281,7 @@ double pow(double x, double y)
return y * y; return y * y;
} }
if (predict_false(zeroinfnan(ix))) { if (predict_false(zeroinfnan(ix))) {
double_t x2 = x * x; double x2 = x * x;
if (ix >> 63 && checkint(iy) == 1) if (ix >> 63 && checkint(iy) == 1)
x2 = -x2; x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and /* Without the barrier some versions of clang hoist the 1/x2 and
@ -323,17 +323,17 @@ double pow(double x, double y)
} }
} }
double_t lo; double lo;
double_t hi = log_inline(ix, &lo); double hi = log_inline(ix, &lo);
double_t ehi, elo; double ehi, elo;
#if __FP_FAST_FMA #if __FP_FAST_FMA
ehi = y * hi; ehi = y * hi;
elo = y * lo + __builtin_fma(y, hi, -ehi); elo = y * lo + __builtin_fma(y, hi, -ehi);
#else #else
double_t yhi = asdouble(iy & ULLONG_NSHIFT << 27); double yhi = asdouble(iy & ULLONG_NSHIFT << 27);
double_t ylo = y - yhi; double ylo = y - yhi;
double_t lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27); double lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27);
double_t llo = hi - lhi + lo; double llo = hi - lhi + lo;
ehi = yhi * lhi; ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif #endif

View File

@ -2,8 +2,8 @@
double scalbn(double x, int n) double scalbn(double x, int n)
{ {
union {double f; uint64_t i;} u; union {double f; unsigned long long i;} u;
double_t y = x; double y = x;
if (n > 1023) { if (n > 1023) {
y *= 0x1p1023; y *= 0x1p1023;
@ -26,7 +26,7 @@ double scalbn(double x, int n)
n = -1022; n = -1022;
} }
} }
u.i = (uint64_t)(0x3ff+n)<<52; u.i = (unsigned long long)(0x3ff+n)<<52;
x = y * u.f; x = y * u.f;
return x; return x;
} }

View File

@ -45,7 +45,7 @@
double sin(double x) double sin(double x)
{ {
double y[2]; double y[2];
uint32_t ix; unsigned int ix;
unsigned n; unsigned n;
/* High word of x. */ /* High word of x. */

View File

@ -4,24 +4,24 @@
#define FENV_SUPPORT 1 #define FENV_SUPPORT 1
/* returns a*b*2^-32 - e, with error 0 <= e < 1. */ /* returns a*b*2^-32 - e, with error 0 <= e < 1. */
static inline uint32_t mul32(uint32_t a, uint32_t b) static inline unsigned int mul32(unsigned int a, unsigned int b)
{ {
return (uint64_t)a*b >> 32; return (unsigned long long)a*b >> 32;
} }
/* returns a*b*2^-64 - e, with error 0 <= e < 3. */ /* returns a*b*2^-64 - e, with error 0 <= e < 3. */
static inline uint64_t mul64(uint64_t a, uint64_t b) static inline unsigned long long mul64(unsigned long long a, unsigned long long b)
{ {
uint64_t ahi = a>>32; unsigned long long ahi = a>>32;
uint64_t alo = a&0xffffffff; unsigned long long alo = a&0xffffffff;
uint64_t bhi = b>>32; unsigned long long bhi = b>>32;
uint64_t blo = b&0xffffffff; unsigned long long blo = b&0xffffffff;
return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32); return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
} }
double sqrt(double x) double sqrt(double x)
{ {
uint64_t ix, top, m; unsigned long long ix, top, m;
/* special case handling. */ /* special case handling. */
ix = asuint64(x); ix = asuint64(x);
@ -103,11 +103,11 @@ double sqrt(double x)
and after switching to 64 bit and after switching to 64 bit
m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */ m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */
static const uint64_t three = 0xc0000000; static const unsigned long long three = 0xc0000000;
uint64_t r, s, d, u, i; unsigned long long r, s, d, u, i;
i = (ix >> 46) % 128; i = (ix >> 46) % 128;
r = (uint32_t)__rsqrt_tab[i] << 16; r = (unsigned int)__rsqrt_tab[i] << 16;
/* |r sqrt(m) - 1| < 0x1.fdp-9 */ /* |r sqrt(m) - 1| < 0x1.fdp-9 */
s = mul32(m>>32, r); s = mul32(m>>32, r);
/* |s/sqrt(m) - 1| < 0x1.fdp-9 */ /* |s/sqrt(m) - 1| < 0x1.fdp-9 */
@ -134,7 +134,7 @@ double sqrt(double x)
compute nearest rounded result: compute nearest rounded result:
the nearest result to 52 bits is either s or s+0x1p-52, the nearest result to 52 bits is either s or s+0x1p-52,
we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */ we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */
uint64_t d0, d1, d2; unsigned long long d0, d1, d2;
double y, t; double y, t;
d0 = (m << 42) - s*s; d0 = (m << 42) - s*s;
d1 = s - d0; d1 = s - d0;
@ -147,7 +147,7 @@ double sqrt(double x)
/* handle rounding modes and inexact exception: /* handle rounding modes and inexact exception:
only (s+1)^2 == 2^42 m case is exact otherwise only (s+1)^2 == 2^42 m case is exact otherwise
add a tiny value to cause the fenv effects. */ add a tiny value to cause the fenv effects. */
uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000; unsigned long long tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
tiny |= (d1^d2) & 0x8000000000000000; tiny |= (d1^d2) & 0x8000000000000000;
t = asdouble(tiny); t = asdouble(tiny);
y = eval_as_double(y + t); y = eval_as_double(y + t);

View File

@ -1,5 +1,5 @@
#include "sqrt_data.h" #include "sqrt_data.h"
const uint16_t __rsqrt_tab[128] = { const unsigned short __rsqrt_tab[128] = {
0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43, 0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b, 0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1, 0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,

View File

@ -7,6 +7,6 @@
if x in [2,4): i = (int)(32*x-64); if x in [2,4): i = (int)(32*x-64);
__rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error: __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
|__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */ |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
extern const uint16_t __rsqrt_tab[128]; extern const unsigned short __rsqrt_tab[128];
#endif #endif

View File

@ -44,7 +44,7 @@
double tan(double x) double tan(double x)
{ {
double y[2]; double y[2];
uint32_t ix; unsigned int ix;
unsigned n; unsigned n;
GET_HIGH_WORD(ix, x); GET_HIGH_WORD(ix, x);

View File

@ -2,9 +2,9 @@
double trunc(double x) double trunc(double x)
{ {
union {double f; uint64_t i;} u = {x}; union {double f; unsigned long long i;} u = {x};
int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12; int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
uint64_t m; unsigned long long m;
if (e >= 52 + 12) if (e >= 52 + 12)
return x; return x;