#include "pocketpy/common/dmath.h" #include "pocketpy/common/algorithm.h" #include "pocketpy/common/_log_spline_tbl.h" #include union Float64Bits { double f; uint64_t i; }; /* IEEE 754 double precision floating point data manipulation */ typedef union { double f; uint64_t u; struct {int32_t i0,i1;} s; } udi_t; // https://github.com/akohlmey/fastermath/blob/master/src/exp.c#L63 double dmath_exp2(double x) { if (x > 1000) return DMATH_INFINITY; if (x < -1000) return 0; if (dmath_isnan(x)) return DMATH_NAN; const int FM_DOUBLE_BIAS = 1023; static const double fm_exp2_q[] = { /* 1.00000000000000000000e0, */ 2.33184211722314911771e2, 4.36821166879210612817e3 }; static const double fm_exp2_p[] = { 2.30933477057345225087e-2, 2.02020656693165307700e1, 1.51390680115615096133e3 }; double ipart, fpart, px, qx; udi_t epart; ipart = dmath_floor(x+0.5); fpart = x - ipart; // FM_DOUBLE_INIT_EXP(epart,ipart); epart.s.i0 = 0; epart.s.i1 = (((int) ipart) + FM_DOUBLE_BIAS) << 20; x = fpart*fpart; px = fm_exp2_p[0]; px = px*x + fm_exp2_p[1]; qx = x + fm_exp2_q[0]; px = px*x + fm_exp2_p[2]; qx = qx*x + fm_exp2_q[1]; px = px * fpart; x = 1.0 + 2.0*(px/(qx-px)); return epart.f*x; } double dmath_log2(double x) { if(x < 0) return DMATH_NAN; if(x == 0) return -DMATH_INFINITY; if(x == DMATH_INFINITY) return DMATH_INFINITY; if(dmath_isnan(x)) return DMATH_NAN; const double fm_log_dinv = 4.09600000000000000000e+03; const double fm_log_dsq6 = 9.93410746256510361521e-09; const int FM_DOUBLE_BIAS = 1023; const int FM_DOUBLE_EMASK = 2146435072; const int FM_DOUBLE_MBITS = 20; const int FM_DOUBLE_MMASK = 1048575; const int FM_DOUBLE_EZERO = 1072693248; const int FM_SPLINE_SHIFT = 8; udi_t val; double a,b,y; int32_t hx, ipart; val.f = x; hx = val.s.i1; /* extract exponent and subtract bias */ ipart = (((hx & FM_DOUBLE_EMASK) >> FM_DOUBLE_MBITS) - FM_DOUBLE_BIAS); /* mask out exponent to get the prefactor to 2**ipart */ hx &= FM_DOUBLE_MMASK; val.s.i1 = hx | FM_DOUBLE_EZERO; x = val.f; /* table index */ hx >>= FM_SPLINE_SHIFT; /* compute x value matching table index */ val.s.i0 = 0; val.s.i1 = FM_DOUBLE_EZERO | (hx << FM_SPLINE_SHIFT); b = (x - val.f) * fm_log_dinv; a = 1.0 - b; /* evaluate spline */ y = a * fm_log_q1[hx] + b * fm_log_q1[hx+1]; a = (a*a*a-a) * fm_log_q2[hx]; b = (b*b*b-b) * fm_log_q2[hx+1]; y += (a + b) * fm_log_dsq6; return ((double) ipart) + (y * DMATH_LOG2_E); } double dmath_exp(double x) { return dmath_exp2(x * DMATH_LOG2_E); // log2(e) } double dmath_exp10(double x) { return dmath_exp2(x * 3.321928094887362); // log2(10) } double dmath_log(double x) { return dmath_log2(x) / DMATH_LOG2_E; // log2(e) } double dmath_log10(double x) { return dmath_log2(x) / 3.321928094887362; // log2(10) } double dmath_pow(double base, double exp) { int exp_int = (int)exp; if(exp_int == exp) { if(exp_int == 0) return 1; if(exp_int < 0) { base = 1 / base; exp_int = -exp_int; } double res = 1; while(exp_int > 0) { if(exp_int & 1) res *= base; base *= base; exp_int >>= 1; } return res; } if (base > 0) { return dmath_exp(exp * dmath_log(base)); } if (base == 0) { if (exp > 0) return 0; if (exp == 0) return 1; } return DMATH_NAN; } double dmath_sqrt(double x) { return dmath_pow(x, 0.5); } double dmath_cbrt(double x) { return dmath_pow(x, 1.0 / 3.0); } // https://github.com/kraj/musl/blob/kraj/master/src/math/sincos.c static double __sin(double x, double y, int iy) { static const double S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ double z,r,v,w; z = x*x; w = z*z; r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6); v = z*x; if (iy == 0) return x + v*(S1 + z*r); else return x - ((z*(0.5*y - v*r) - y) - v*S1); } static double __cos(double x, double y) { static const double C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ double hz,z,r,w; z = x*x; w = z*z; r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6)); hz = 0.5*z; w = 1.0-hz; return w + (((1.0-w)-hz) + (z*r-x*y)); } int __rem_pio2(double x, double *y) { static const double toint = 1.5/2.22044604925031308085e-16, pio4 = 0x1.921fb54442d18p-1, invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ union Float64Bits u = { .f = x }; double z,w,t,r,fn; double tx[3],ty[2]; uint32_t ix; int sign, n, ex, ey, i; sign = u.i>>63; ix = u.i>>32 & 0x7fffffff; if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */ if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */ goto medium; /* cancellation -- use medium case */ if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */ if (!sign) { z = x - pio2_1; /* one round good to 85 bits */ y[0] = z - pio2_1t; y[1] = (z-y[0]) - pio2_1t; return 1; } else { z = x + pio2_1; y[0] = z + pio2_1t; y[1] = (z-y[0]) + pio2_1t; return -1; } } else { if (!sign) { z = x - 2*pio2_1; y[0] = z - 2*pio2_1t; y[1] = (z-y[0]) - 2*pio2_1t; return 2; } else { z = x + 2*pio2_1; y[0] = z + 2*pio2_1t; y[1] = (z-y[0]) + 2*pio2_1t; return -2; } } } if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */ if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */ if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */ goto medium; if (!sign) { z = x - 3*pio2_1; y[0] = z - 3*pio2_1t; y[1] = (z-y[0]) - 3*pio2_1t; return 3; } else { z = x + 3*pio2_1; y[0] = z + 3*pio2_1t; y[1] = (z-y[0]) + 3*pio2_1t; return -3; } } else { if (ix == 0x401921fb) /* |x| ~= 4pi/2 */ goto medium; if (!sign) { z = x - 4*pio2_1; y[0] = z - 4*pio2_1t; y[1] = (z-y[0]) - 4*pio2_1t; return 4; } else { z = x + 4*pio2_1; y[0] = z + 4*pio2_1t; y[1] = (z-y[0]) + 4*pio2_1t; return -4; } } } if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: /* rint(x/(pi/2)) */ fn = (double)x*invpio2 + toint - toint; n = (int32_t)fn; r = x - fn*pio2_1; w = fn*pio2_1t; /* 1st round, good to 85 bits */ /* Matters with directed rounding. */ if ((r - w < -pio4)) { n--; fn--; r = x - fn*pio2_1; w = fn*pio2_1t; } else if ((r - w > pio4)) { n++; fn++; r = x - fn*pio2_1; w = fn*pio2_1t; } y[0] = r - w; u.f = y[0]; ey = u.i>>52 & 0x7ff; ex = ix>>20; if (ex - ey > 16) { /* 2nd round, good to 118 bits */ t = r; w = fn*pio2_2; r = t - w; w = fn*pio2_2t - ((t-r)-w); y[0] = r - w; u.f = y[0]; ey = u.i>>52 & 0x7ff; if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */ t = r; w = fn*pio2_3; r = t - w; w = fn*pio2_3t - ((t-r)-w); y[0] = r - w; } } y[1] = (r - y[0]) - w; return n; } (void)tx; (void)ty; (void)i; return 0; #if 0 /* * all other (large) arguments */ if (ix >= 0x7ff00000) { /* x is inf or NaN */ y[0] = y[1] = x - x; return 0; } /* set z = scalbn(|x|,-ilogb(x)+23) */ u.f = x; u.i &= (uint64_t)-1>>12; u.i |= (uint64_t)(0x3ff + 23)<<52; z = u.f; for (i=0; i < 2; i++) { tx[i] = (double)(int32_t)z; z = (z-tx[i])*0x1p24; } tx[i] = z; /* skip zero terms, first term is non-zero */ while (tx[i] == 0.0) i--; n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1); if (sign) { y[0] = -ty[0]; y[1] = -ty[1]; return -n; } y[0] = ty[0]; y[1] = ty[1]; return n; #endif } void dmath_sincos(double x, double *sin, double *cos) { double y[2], s, c; uint32_t ix; unsigned n; //GET_HIGH_WORD(ix, x); union Float64Bits u = { .f = x }; ix = (uint32_t)(u.i >> 32); ix &= 0x7fffffff; /* |x| ~< pi/4 */ if (ix <= 0x3fe921fb) { /* if |x| < 2**-27 * sqrt(2) */ if (ix < 0x3e46a09e) { /* raise inexact if x!=0 and underflow if subnormal */ // FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f); volatile double y_force_eval; y_force_eval = ix < 0x00100000 ? x/0x1p120f : x+0x1p120f; (void)y_force_eval; *sin = x; *cos = 1.0; return; } *sin = __sin(x, 0.0, 0); *cos = __cos(x, 0.0); return; } /* sincos(Inf or NaN) is NaN */ if (ix >= 0x7ff00000) { *sin = *cos = x - x; return; } /* argument reduction needed */ n = __rem_pio2(x, y); s = __sin(y[0], y[1], 1); c = __cos(y[0], y[1]); switch (n&3) { case 0: *sin = s; *cos = c; break; case 1: *sin = c; *cos = -s; break; case 2: *sin = -s; *cos = -c; break; case 3: default: *sin = -c; *cos = s; break; } } double dmath_sin(double x) { double s, c; dmath_sincos(x, &s, &c); return s; } double dmath_cos(double x) { double s, c; dmath_sincos(x, &s, &c); return c; } double dmath_tan(double x) { double s, c; dmath_sincos(x, &s, &c); return s / c; } static double zig_r64(double z) { const double pS0 = 1.66666666666666657415e-01; const double pS1 = -3.25565818622400915405e-01; const double pS2 = 2.01212532134862925881e-01; const double pS3 = -4.00555345006794114027e-02; const double pS4 = 7.91534994289814532176e-04; const double pS5 = 3.47933107596021167570e-05; const double qS1 = -2.40339491173441421878e+00; const double qS2 = 2.02094576023350569471e+00; const double qS3 = -6.88283971605453293030e-01; const double qS4 = 7.70381505559019352791e-02; double p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); double q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); return p / q; } // https://github.com/ziglang/zig/blob/master/lib/std/math/asin.zig double dmath_asin(double x) { const double pio2_hi = 1.57079632679489655800e+00; const double pio2_lo = 6.12323399573676603587e-17; union Float64Bits ux_union; ux_union.f = x; uint64_t ux = ux_union.i; uint32_t hx = (uint32_t)(ux >> 32); uint32_t ix = hx & 0x7FFFFFFF; /* |x| >= 1 or nan */ if (ix >= 0x3FF00000) { uint32_t lx = (uint32_t)(ux & 0xFFFFFFFF); /* asin(1) = +-pi/2 with inexact */ if (((ix - 0x3FF00000) | lx) == 0) { return x * pio2_hi + 0x1.0p-120; } else { return DMATH_NAN; } } /* |x| < 0.5 */ if (ix < 0x3FE00000) { /* if 0x1p-1022 <= |x| < 0x1p-26 avoid raising overflow */ if (ix < 0x3E500000 && ix >= 0x00100000) { return x; } else { return x + x * zig_r64(x * x); } } /* 1 > |x| >= 0.5 */ double z = (1 - dmath_fabs(x)) * 0.5; double s = dmath_sqrt(z); double r = zig_r64(z); double fx; /* |x| > 0.975 */ if (ix >= 0x3FEF3333) { fx = pio2_hi - 2 * (s + s * r); } else { union Float64Bits jx_union = { .f = s }; uint64_t jx = jx_union.i; union Float64Bits df_union = { .i = jx & 0xFFFFFFFF00000000ULL }; double df = df_union.f; double c = (z - df * df) / (s + df); fx = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * df)); } if (hx >> 31 != 0) { return -fx; } else { return fx; } } double dmath_acos(double x) { return DMATH_PI / 2 - dmath_asin(x); } double dmath_atan(double x) { return dmath_asin(x / dmath_sqrt(1 + x * x)); } double dmath_atan2(double y, double x) { if (x > 0) { return dmath_atan(y / x); } else if (x < 0 && y >= 0) { return dmath_atan(y / x) + DMATH_PI; } else if (x < 0 && y < 0) { return dmath_atan(y / x) - DMATH_PI; } else if (x == 0 && y > 0) { return DMATH_PI / 2; } else if (x == 0 && y < 0) { return -DMATH_PI / 2; } else { return DMATH_NAN; } } //////////////////////////////////////////////////////////////////// int dmath_isinf(double x) { union Float64Bits u = { .f = x }; return (u.i & -1ULL>>1) == 0x7ffULL<<52; } int dmath_isnan(double x) { union Float64Bits u = { .f = x }; return (u.i & -1ULL>>1) > 0x7ffULL<<52; } int dmath_isnormal(double x) { union Float64Bits u = { .f = x }; return ((u.i+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53; } int dmath_isfinite(double x) { union Float64Bits u = { .f = x }; return (u.i & -1ULL>>1) < 0x7ffULL<<52; } // https://github.com/kraj/musl/blob/kraj/master/src/math/fmod.c double dmath_fmod(double x, double y) { union Float64Bits ux = { .f = x }, uy = { .f = y }; int ex = ux.i>>52 & 0x7ff; int ey = uy.i>>52 & 0x7ff; int sx = ux.i>>63; uint64_t i; /* in the followings uxi should be ux.i, but then gcc wrongly adds */ /* float load/store to inner loops ruining performance and code size */ uint64_t uxi = ux.i; if (uy.i<<1 == 0 || dmath_isnan(y) || ex == 0x7ff) return (x*y)/(x*y); if (uxi<<1 <= uy.i<<1) { if (uxi<<1 == uy.i<<1) return 0*x; return x; } /* normalize x and y */ if (!ex) { for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1); uxi <<= -ex + 1; } else { uxi &= -1ULL >> 12; uxi |= 1ULL << 52; } if (!ey) { for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1); uy.i <<= -ey + 1; } else { uy.i &= -1ULL >> 12; uy.i |= 1ULL << 52; } /* x mod y */ for (; ex > ey; ex--) { i = uxi - uy.i; if (i >> 63 == 0) { if (i == 0) return 0*x; uxi = i; } uxi <<= 1; } i = uxi - uy.i; if (i >> 63 == 0) { if (i == 0) return 0*x; uxi = i; } for (; uxi>>52 == 0; uxi <<= 1, ex--); /* scale result */ if (ex > 0) { uxi -= 1ULL << 52; uxi |= (uint64_t)ex << 52; } else { uxi >>= -ex + 1; } uxi |= (uint64_t)sx << 63; ux.i = uxi; return ux.f; } // https://github.com/kraj/musl/blob/kraj/master/src/math/copysign.c double dmath_copysign(double x, double y) { union Float64Bits ux = { .f = x }, uy = { .f = y }; ux.i &= -1ULL/2; ux.i |= uy.i & 1ULL<<63; return ux.f; } double dmath_fabs(double x) { return (x < 0) ? -x : x; } double dmath_ceil(double x) { if(!dmath_isfinite(x)) return x; int int_part = (int)x; if (x > 0 && x != (double)int_part) { return (double)(int_part + 1); } return (double)int_part; } double dmath_floor(double x) { if(!dmath_isfinite(x)) return x; int int_part = (int)x; if (x < 0 && x != (double)int_part) { return (double)(int_part - 1); } return (double)int_part; } double dmath_trunc(double x) { return (double)((int)x); } // https://github.com/kraj/musl/blob/kraj/master/src/math/modf.c double dmath_modf(double x, double* iptr) { union Float64Bits u = { .f = x }; uint64_t mask; int e = (int)(u.i>>52 & 0x7ff) - 0x3ff; /* no fractional part */ if (e >= 52) { *iptr = x; if (e == 0x400 && u.i<<12 != 0) /* nan */ return x; u.i &= 1ULL<<63; return u.f; } /* no integral part*/ if (e < 0) { u.i &= 1ULL<<63; *iptr = u.f; return x; } mask = -1ULL>>12>>e; if ((u.i & mask) == 0) { *iptr = x; u.i &= 1ULL<<63; return u.f; } u.i &= ~mask; *iptr = u.f; return x - u.f; } double dmath_fmin(double x, double y) { return (x < y) ? x : y; } double dmath_fmax(double x, double y) { return (x > y) ? x : y; }