diff --git a/3rd/math/CMakeLists.txt b/3rd/math/CMakeLists.txt index 089ee1fa..b3d695ed 100644 --- a/3rd/math/CMakeLists.txt +++ b/3rd/math/CMakeLists.txt @@ -8,24 +8,10 @@ set(CMAKE_C_STANDARD_REQUIRED ON) include_directories(${CMAKE_CURRENT_LIST_DIR}/include) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_LIST_DIR}/src MUSL_LIBC_MATH_SRC) if(MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise /Ox /Oi-") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox /Oi- /wd4244 /wd4723") # mute warning C4723: div by 0 because some function returns nan when a input is nan. - add_compile_options( /wd4723) -elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict -O2") else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -fexcess-precision=standard -ffp-contract=off") endif() add_library(${PROJECT_NAME} STATIC ${MUSL_LIBC_MATH_SRC}) - - -# if(MSVC) -# target_compile_options(${PROJECT_NAME} PRIVATE /fp:precise /Od) -# message("MUSL_LIBC_MATH: MSVC set") -# elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang") -# target_compile_options(${PROJECT_NAME} PRIVATE -ffp-model=strict) -# message("MUSL_LIBC_MATH: CLANG set") -# elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU") -# message("MUSL_LIBC_MATH: gcc set") -# endif() diff --git a/3rd/math/include/features/features.h b/3rd/math/include/features/features.h new file mode 100644 index 00000000..85570ff5 --- /dev/null +++ b/3rd/math/include/features/features.h @@ -0,0 +1,8 @@ +#if defined(_MSC_VER) +#define hidden +#else +#define weak __attribute__((__weak__)) +#define hidden __attribute__((__visibility__("hidden"))) +#define weak_alias(old, new) \ + extern __typeof(old) new __attribute__((__weak__, __alias__(#old))) +#endif \ No newline at end of file diff --git a/3rd/math/include/float.h b/3rd/math/include/float.h new file mode 100644 index 00000000..791dde91 --- /dev/null +++ b/3rd/math/include/float.h @@ -0,0 +1,51 @@ +#ifndef _FLOAT_H +#define _FLOAT_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#define FLT_RADIX 2 + +#define FLT_TRUE_MIN 1.40129846432481707092e-45F +#define FLT_MIN 1.17549435082228750797e-38F +#define FLT_MAX 3.40282346638528859812e+38F +#define FLT_EPSILON 1.1920928955078125e-07F + +#define FLT_MANT_DIG 24 +#define FLT_MIN_EXP (-125) +#define FLT_MAX_EXP 128 +#define FLT_HAS_SUBNORM 1 + +#define FLT_DIG 6 +#define FLT_DECIMAL_DIG 9 +#define FLT_MIN_10_EXP (-37) +#define FLT_MAX_10_EXP 38 + +#define DBL_TRUE_MIN 4.94065645841246544177e-324 +#define DBL_MIN 2.22507385850720138309e-308 +#define DBL_MAX 1.79769313486231570815e+308 +#define DBL_EPSILON 2.22044604925031308085e-16 + +#define DBL_MANT_DIG 53 +#define DBL_MIN_EXP (-1021) +#define DBL_MAX_EXP 1024 +#define DBL_HAS_SUBNORM 1 + +#define DBL_DIG 15 +#define DBL_DECIMAL_DIG 17 +#define DBL_MIN_10_EXP (-307) +#define DBL_MAX_10_EXP 308 + +#define LDBL_HAS_SUBNORM 1 +#define LDBL_DECIMAL_DIG DECIMAL_DIG +#define LDBL_MANT_DIG DBL_MANT_DIG +#define LDBL_MAX_EXP DBL_MAX_EXP + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/3rd/math/include/libm.h b/3rd/math/include/libm.h new file mode 100644 index 00000000..f5f43f81 --- /dev/null +++ b/3rd/math/include/libm.h @@ -0,0 +1,277 @@ +#ifndef _LIBM_H +#define _LIBM_H + +#include +#include "float.h" +#include "math.h" + + +#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN +union ldshape { + long double f; + struct { + uint64_t m; + uint16_t se; + } i; +}; +#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN +/* This is the m68k variant of 80-bit long double, and this definition only works + * on archs where the alignment requirement of uint64_t is <= 4. */ +union ldshape { + long double f; + struct { + uint16_t se; + uint16_t pad; + uint64_t m; + } i; +}; +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN +union ldshape { + long double f; + struct { + uint64_t lo; + uint32_t mid; + uint16_t top; + uint16_t se; + } i; + struct { + uint64_t lo; + uint64_t hi; + } i2; +}; +#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN +union ldshape { + long double f; + struct { + uint16_t se; + uint16_t top; + uint32_t mid; + uint64_t lo; + } i; + struct { + uint64_t hi; + uint64_t lo; + } i2; +}; +#else +#error Unsupported long double representation +#endif + +/* Support non-nearest rounding mode. */ +#define WANT_ROUNDING 1 +/* Support signaling NaNs. */ +#define WANT_SNAN 0 + +#if WANT_SNAN +#error SNaN is unsupported +#else +#define issignalingf_inline(x) 0 +#define issignaling_inline(x) 0 +#endif + +#ifndef TOINT_INTRINSICS +#define TOINT_INTRINSICS 0 +#endif + +#if TOINT_INTRINSICS +/* Round x to nearest int in all rounding modes, ties have to be rounded + consistently with converttoint so the results match. If the result + would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */ +static double_t roundtoint(double_t); + +/* Convert x to nearest int in all rounding modes, ties have to be rounded + consistently with roundtoint. If the result is not representible in an + int32_t then the semantics is unspecified. */ +static int32_t converttoint(double_t); +#endif + +/* Helps static branch prediction so hot path can be better optimized. */ +#ifdef __GNUC__ +#define predict_true(x) __builtin_expect(!!(x), 1) +#define predict_false(x) __builtin_expect(x, 0) +#else +#define predict_true(x) (x) +#define predict_false(x) (x) +#endif + +/* Evaluate an expression as the specified type. With standard excess + precision handling a type cast or assignment is enough (with + -ffloat-store an assignment is required, in old compilers argument + passing and return statement may not drop excess precision). */ + +static inline float eval_as_float(float x) +{ + float y = x; + return y; +} + +static inline double eval_as_double(double x) +{ + double y = x; + return y; +} + +/* fp_barrier returns its input, but limits code transformations + as if it had a side-effect (e.g. observable io) and returned + an arbitrary value. */ + +#ifndef fp_barrierf +#define fp_barrierf fp_barrierf +static inline float fp_barrierf(float x) +{ + volatile float y = x; + return y; +} +#endif + +#ifndef fp_barrier +#define fp_barrier fp_barrier +static inline double fp_barrier(double x) +{ + volatile double y = x; + return y; +} +#endif + +#ifndef fp_barrierl +#define fp_barrierl fp_barrierl +static inline long double fp_barrierl(long double x) +{ + volatile long double y = x; + return y; +} +#endif + +/* fp_force_eval ensures that the input value is computed when that's + otherwise unused. To prevent the constant folding of the input + expression, an additional fp_barrier may be needed or a compilation + mode that does so (e.g. -frounding-math in gcc). Then it can be + used to evaluate an expression for its fenv side-effects only. */ + +#ifndef fp_force_evalf +#define fp_force_evalf fp_force_evalf +static inline void fp_force_evalf(float x) +{ + volatile float y; + y = x; +} +#endif + +#ifndef fp_force_eval +#define fp_force_eval fp_force_eval +static inline void fp_force_eval(double x) +{ + volatile double y; + y = x; +} +#endif + +#ifndef fp_force_evall +#define fp_force_evall fp_force_evall +static inline void fp_force_evall(long double x) +{ + volatile long double y; + y = x; +} +#endif + +#define FORCE_EVAL(x) do { \ + if (sizeof(x) == sizeof(float)) { \ + fp_force_evalf(x); \ + } else if (sizeof(x) == sizeof(double)) { \ + fp_force_eval(x); \ + } else { \ + fp_force_evall(x); \ + } \ +} while(0) + +typedef union {float _f; unsigned int _i;}asuint_union; +typedef union {unsigned int _i; float _f;}asfloat_union; +typedef union {double _f; unsigned long long _i;}asuint64_union; +typedef union {unsigned long long _i; double _f;}asdouble_union; +#define asuint(f) ((asuint_union){f})._i +#define asfloat(i) ((asfloat_union){i})._f +#define asuint64(f) ((asuint64_union){f})._i +#define asdouble(i) ((asdouble_union){i})._f + +#define EXTRACT_WORDS(hi,lo,d) \ +do { \ + uint64_t __u = asuint64(d); \ + (hi) = __u >> 32; \ + (lo) = (uint32_t)__u; \ +} while (0) + +#define GET_HIGH_WORD(hi,d) \ +do { \ + (hi) = asuint64(d) >> 32; \ +} while (0) + +#define GET_LOW_WORD(lo,d) \ +do { \ + (lo) = (uint32_t)asuint64(d); \ +} while (0) + +#define INSERT_WORDS(d,hi,lo) \ +do { \ + (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \ +} while (0) + +#define SET_HIGH_WORD(d,hi) \ + INSERT_WORDS(d, hi, (uint32_t)asuint64(d)) + +#define SET_LOW_WORD(d,lo) \ + INSERT_WORDS(d, asuint64(d)>>32, lo) + +#define GET_FLOAT_WORD(w,d) \ +do { \ + (w) = asuint(d); \ +} while (0) + +#define SET_FLOAT_WORD(d,w) \ +do { \ + (d) = asfloat(w); \ +} while (0) + +hidden int __rem_pio2_large(double*,double*,int,int,int); + +hidden int __rem_pio2(double,double*); +hidden double __sin(double,double,int); +hidden double __cos(double,double); +hidden double __tan(double,double,int); +// hidden double __expo2(double,double); + +// hidden int __rem_pio2f(float,double*); +// hidden float __sindf(double); +// hidden float __cosdf(double); +// hidden float __tandf(double,int); +// hidden float __expo2f(float,float); + +// hidden int __rem_pio2l(long double, long double *); +// hidden long double __sinl(long double, long double, int); +// hidden long double __cosl(long double, long double); +// hidden long double __tanl(long double, long double, int); + +// hidden long double __polevll(long double, const long double *, int); +// hidden long double __p1evll(long double, const long double *, int); + +// extern int __signgam; +// hidden double __lgamma_r(double, int *); +// hidden float __lgammaf_r(float, int *); + +/* error handling functions */ +hidden float __math_xflowf(uint32_t, float); +hidden float __math_uflowf(uint32_t); +hidden float __math_oflowf(uint32_t); +hidden float __math_divzerof(uint32_t); +hidden float __math_invalidf(float); +hidden double __math_xflow(uint32_t, double); +hidden double __math_uflow(uint32_t); +hidden double __math_oflow(uint32_t); +hidden double __math_divzero(uint32_t); +hidden double __math_invalid(double); +#if LDBL_MANT_DIG != DBL_MANT_DIG +hidden long double __math_invalidl(long double); +#endif + +#endif diff --git a/3rd/math/include/math.h b/3rd/math/include/math.h index 82c49c25..660542de 100644 --- a/3rd/math/include/math.h +++ b/3rd/math/include/math.h @@ -5,6 +5,23 @@ extern "C" { #endif +#include "features/features.h" + +#if defined(_WIN32) || defined(_WIN64) +#if defined _M_IX86 && _M_IX86_FP < 2 && !defined _M_FP_FAST + typedef long double float_t; + typedef long double double_t; +#else + typedef float float_t; + typedef double double_t; +#endif +#else +#define __NEED_float_t +#define __NEED_double_t +// #include +typedef float float_t; +typedef double double_t; +#endif #ifndef _HUGE_ENUF #define _HUGE_ENUF 1e+300 // _HUGE_ENUF*_HUGE_ENUF must overflow @@ -45,21 +62,6 @@ extern "C" { #define FLT_EVAL_METHOD 0 -/* Support non-nearest rounding mode. */ -#define WANT_ROUNDING 1 -/* Support signaling NaNs. */ -#define WANT_SNAN 0 - -#if WANT_SNAN -#error SNaN is unsupported -#else -#define issignalingf_inline(x) 0 -#define issignaling_inline(x) 0 -#endif - -#define predict_true(x) (x) -#define predict_false(x) (x) - int __fpclassify(double); int __fpclassifyf(float); int __fpclassifyl(long double); @@ -117,20 +119,20 @@ int __signbitl(long double); static __inline int __is##rel(type __x, type __y) \ { return !isunordered(__x,__y) && __x op __y; } -__ISREL_DEF(lessf, <, float) -__ISREL_DEF(less, <, double) +__ISREL_DEF(lessf, <, float_t) +__ISREL_DEF(less, <, double_t) __ISREL_DEF(lessl, <, long double) -__ISREL_DEF(lessequalf, <=, float) -__ISREL_DEF(lessequal, <=, double) +__ISREL_DEF(lessequalf, <=, float_t) +__ISREL_DEF(lessequal, <=, double_t) __ISREL_DEF(lessequall, <=, long double) -__ISREL_DEF(lessgreaterf, !=, float) -__ISREL_DEF(lessgreater, !=, double) +__ISREL_DEF(lessgreaterf, !=, float_t) +__ISREL_DEF(lessgreater, !=, double_t) __ISREL_DEF(lessgreaterl, !=, long double) -__ISREL_DEF(greaterf, >, float) -__ISREL_DEF(greater, >, double) +__ISREL_DEF(greaterf, >, float_t) +__ISREL_DEF(greater, >, double_t) __ISREL_DEF(greaterl, >, long double) -__ISREL_DEF(greaterequalf, >=, float) -__ISREL_DEF(greaterequal, >=, double) +__ISREL_DEF(greaterequalf, >=, float_t) +__ISREL_DEF(greaterequal, >=, double_t) __ISREL_DEF(greaterequall, >=, long double) #define __tg_pred_2(x, y, p) ( \ @@ -144,165 +146,6 @@ __ISREL_DEF(greaterequall, >=, long double) #define isgreater(x, y) __tg_pred_2(x, y, __isgreater) #define isgreaterequal(x, y) __tg_pred_2(x, y, __isgreaterequal) -/* Evaluate an expression as the specified type. With standard excess - precision handling a type cast or assignment is enough (with - -ffloat-store an assignment is required, in old compilers argument - passing and return statement may not drop excess precision). */ - -static inline float eval_as_float(float x) -{ - float y = x; - return y; -} - -static inline double eval_as_double(double x) -{ - double y = x; - return y; -} - -/* fp_barrier returns its input, but limits code transformations - as if it had a side-effect (e.g. observable io) and returned - an arbitrary value. */ - -#ifndef fp_barrierf -#define fp_barrierf fp_barrierf -static inline float fp_barrierf(float x) -{ - volatile float y = x; - return y; -} -#endif - -#ifndef fp_barrier -#define fp_barrier fp_barrier -static inline double fp_barrier(double x) -{ - volatile double y = x; - return y; -} -#endif - -#ifndef fp_barrierl -#define fp_barrierl fp_barrierl -static inline long double fp_barrierl(long double x) -{ - volatile long double y = x; - return y; -} -#endif - -/* fp_force_eval ensures that the input value is computed when that's - otherwise unused. To prevent the constant folding of the input - expression, an additional fp_barrier may be needed or a compilation - mode that does so (e.g. -frounding-math in gcc). Then it can be - used to evaluate an expression for its fenv side-effects only. */ - -#ifndef fp_force_evalf -#define fp_force_evalf fp_force_evalf -static inline void fp_force_evalf(float x) -{ - volatile float y; - y = x; -} -#endif - -#ifndef fp_force_eval -#define fp_force_eval fp_force_eval -static inline void fp_force_eval(double x) -{ - volatile double y; - y = x; -} -#endif - -#ifndef fp_force_evall -#define fp_force_evall fp_force_evall -static inline void fp_force_evall(long double x) -{ - volatile long double y; - y = x; -} -#endif - -#define FORCE_EVAL(x) do { \ - if (sizeof(x) == sizeof(float)) { \ - fp_force_evalf(x); \ - } else if (sizeof(x) == sizeof(double)) { \ - fp_force_eval(x); \ - } else { \ - fp_force_evall(x); \ - } \ -} while(0) - -typedef union {float _f; unsigned int _i;}asuint_union; -typedef union {unsigned int _i; float _f;}asfloat_union; -typedef union {double _f; unsigned long long _i;}asuint64_union; -typedef union {unsigned long long _i; double _f;}asdouble_union; -#define asuint(f) ((asuint_union){f})._i -#define asfloat(i) ((asfloat_union){i})._f -#define asuint64(f) ((asuint64_union){f})._i -#define asdouble(i) ((asdouble_union){i})._f - -#define EXTRACT_WORDS(hi,lo,d) \ -do { \ - unsigned long long __u = asuint64(d); \ - (hi) = __u >> 32; \ - (lo) = (unsigned int)__u; \ -} while (0) - -#define GET_HIGH_WORD(hi,d) \ -do { \ - (hi) = asuint64(d) >> 32; \ -} while (0) - -#define GET_LOW_WORD(lo,d) \ -do { \ - (lo) = (unsigned int)asuint64(d); \ -} while (0) - -#define INSERT_WORDS(d,hi,lo) \ -do { \ - (d) = asdouble(((unsigned long long)(hi)<<32) | (unsigned int)(lo)); \ -} while (0) - -#define SET_HIGH_WORD(d,hi) \ - INSERT_WORDS(d, hi, (unsigned int)asuint64(d)) - -#define SET_LOW_WORD(d,lo) \ - INSERT_WORDS(d, asuint64(d)>>32, lo) - -#define GET_FLOAT_WORD(w,d) \ -do { \ - (w) = asuint(d); \ -} while (0) - -#define SET_FLOAT_WORD(d,w) \ -do { \ - (d) = asfloat(w); \ -} while (0) - -int __rem_pio2_large(double*, double*, int, int, int); -int __rem_pio2(double, double*); -double __sin(double, double, int); -double __cos(double, double); -double __tan(double, double, int); - -/* error handling functions */ -float __math_xflowf(unsigned int, float); -float __math_uflowf(unsigned int); -float __math_oflowf(unsigned int); -float __math_divzerof(unsigned int); -float __math_invalidf(float); -double __math_xflow(unsigned int, double); -double __math_uflow(unsigned int); -double __math_oflow(unsigned int); -double __math_divzero(unsigned int); -double __math_invalid(double); -#if LDBL_MANT_DIG != DBL_MANT_DIG -long double __math_invalidl(long double); -#endif - double acos(double); double asin(double); double atan(double); @@ -332,44 +175,9 @@ double sqrt(double); double tan(double); double trunc(double); -#define FLT_TRUE_MIN 1.40129846432481707092e-45F -#define FLT_MIN 1.17549435082228750797e-38F -#define FLT_MAX 3.40282346638528859812e+38F -#define FLT_EPSILON 1.1920928955078125e-07F - -#define FLT_MANT_DIG 24 -#define FLT_MIN_EXP (-125) -#define FLT_MAX_EXP 128 -#define FLT_HAS_SUBNORM 1 - -#define FLT_DIG 6 -#define FLT_DECIMAL_DIG 9 -#define FLT_MIN_10_EXP (-37) -#define FLT_MAX_10_EXP 38 - -#define DBL_TRUE_MIN 4.94065645841246544177e-324 -#define DBL_MIN 2.22507385850720138309e-308 -#define DBL_MAX 1.79769313486231570815e+308 -#define DBL_EPSILON 2.22044604925031308085e-16 - -#define DBL_MANT_DIG 53 -#define DBL_MIN_EXP (-1021) -#define DBL_MAX_EXP 1024 -#define DBL_HAS_SUBNORM 1 - -#define DBL_DIG 15 -#define DBL_DECIMAL_DIG 17 -#define DBL_MIN_10_EXP (-307) -#define DBL_MAX_10_EXP 308 - -#define LDBL_HAS_SUBNORM 1 -#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG -#define LDBL_MANT_DIG DBL_MANT_DIG -#define LDBL_MAX_EXP DBL_MAX_EXP #undef MAXFLOAT #define MAXFLOAT 3.40282346638528859812e+38F -#define HUGE 3.40282346638528859812e+38F #define M_DEG2RAD 0.017453292519943295 /* pi/180 */ #define M_E 2.7182818284590452354 /* e */ @@ -397,6 +205,32 @@ double trunc(double); // double y1(double); // double yn(int, double); +#define HUGE 3.40282346638528859812e+38F + +// double drem(double, double); +// float dremf(float, float); + +// int finite(double); +// int finitef(float); + +// double scalb(double, double); +// float scalbf(float, float); + +// double significand(double); +// float significandf(float); + +// double lgamma_r(double, int*); +// float lgammaf_r(float, int*); + +// float j0f(float); +// float j1f(float); +// float jnf(int, float); + +// float y0f(float); +// float y1f(float); +// float ynf(int, float); + + #ifdef __cplusplus } #endif diff --git a/3rd/math/src/__cos.c b/3rd/math/src/__cos.c index 42cbd314..46cefb38 100644 --- a/3rd/math/src/__cos.c +++ b/3rd/math/src/__cos.c @@ -48,7 +48,7 @@ * any extra precision in w. */ -#include "math.h" +#include "libm.h" static const double C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ @@ -60,7 +60,7 @@ C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ double __cos(double x, double y) { - double hz,z,r,w; + double_t hz,z,r,w; z = x*x; w = z*z; diff --git a/3rd/math/src/__fpclassify.c b/3rd/math/src/__fpclassify.c index 4a4abaee..4b36fbad 100644 --- a/3rd/math/src/__fpclassify.c +++ b/3rd/math/src/__fpclassify.c @@ -1,8 +1,9 @@ -#include +#include "math.h" +#include int __fpclassify(double x) { - union {double f; unsigned long long i;} u = {x}; + union {double f; uint64_t i;} u = {x}; int e = u.i>>52 & 0x7ff; if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO; if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE; diff --git a/3rd/math/src/__fpclassifyf.c b/3rd/math/src/__fpclassifyf.c index 4d5f8e06..fa9cb69d 100644 --- a/3rd/math/src/__fpclassifyf.c +++ b/3rd/math/src/__fpclassifyf.c @@ -1,8 +1,9 @@ -#include +#include "math.h" +#include int __fpclassifyf(float x) { - union {float f; unsigned int i;} u = {x}; + union {float f; uint32_t i;} u = {x}; int e = u.i>>23 & 0xff; if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO; if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE; diff --git a/3rd/math/src/__fpclassifyl.c b/3rd/math/src/__fpclassifyl.c index 2bbf3a72..e41781b6 100644 --- a/3rd/math/src/__fpclassifyl.c +++ b/3rd/math/src/__fpclassifyl.c @@ -1,4 +1,4 @@ -#include "math.h" +#include "libm.h" #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024 int __fpclassifyl(long double x) diff --git a/3rd/math/src/__math_divzero.c b/3rd/math/src/__math_divzero.c index b66388bd..59d21350 100644 --- a/3rd/math/src/__math_divzero.c +++ b/3rd/math/src/__math_divzero.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -double __math_divzero(unsigned int sign) +double __math_divzero(uint32_t sign) { return fp_barrier(sign ? -1.0 : 1.0) / 0.0; } diff --git a/3rd/math/src/__math_divzerof.c b/3rd/math/src/__math_divzerof.c index 8d17fe40..ce046f3e 100644 --- a/3rd/math/src/__math_divzerof.c +++ b/3rd/math/src/__math_divzerof.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -float __math_divzerof(unsigned int sign) +float __math_divzerof(uint32_t sign) { return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f; } diff --git a/3rd/math/src/__math_invalid.c b/3rd/math/src/__math_invalid.c index e0d23fa7..17740490 100644 --- a/3rd/math/src/__math_invalid.c +++ b/3rd/math/src/__math_invalid.c @@ -1,4 +1,4 @@ -#include "math.h" +#include "libm.h" double __math_invalid(double x) { diff --git a/3rd/math/src/__math_invalidf.c b/3rd/math/src/__math_invalidf.c index a62de893..357d4b12 100644 --- a/3rd/math/src/__math_invalidf.c +++ b/3rd/math/src/__math_invalidf.c @@ -1,4 +1,4 @@ -#include "math.h" +#include "libm.h" float __math_invalidf(float x) { diff --git a/3rd/math/src/__math_invalidl.c b/3rd/math/src/__math_invalidl.c index 4100067d..7c2edc8c 100644 --- a/3rd/math/src/__math_invalidl.c +++ b/3rd/math/src/__math_invalidl.c @@ -1,4 +1,5 @@ -#include "math.h" +#include "float.h" +#include "libm.h" #if LDBL_MANT_DIG != DBL_MANT_DIG long double __math_invalidl(long double x) diff --git a/3rd/math/src/__math_oflow.c b/3rd/math/src/__math_oflow.c index 84567aaf..c85dbf98 100644 --- a/3rd/math/src/__math_oflow.c +++ b/3rd/math/src/__math_oflow.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -double __math_oflow(unsigned int sign) +double __math_oflow(uint32_t sign) { return __math_xflow(sign, 0x1p769); } diff --git a/3rd/math/src/__math_oflowf.c b/3rd/math/src/__math_oflowf.c index 894aa246..fa7d0620 100644 --- a/3rd/math/src/__math_oflowf.c +++ b/3rd/math/src/__math_oflowf.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -float __math_oflowf(unsigned int sign) +float __math_oflowf(uint32_t sign) { return __math_xflowf(sign, 0x1p97f); } diff --git a/3rd/math/src/__math_uflow.c b/3rd/math/src/__math_uflow.c index a57293fd..b90594ae 100644 --- a/3rd/math/src/__math_uflow.c +++ b/3rd/math/src/__math_uflow.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -double __math_uflow(unsigned int sign) +double __math_uflow(uint32_t sign) { return __math_xflow(sign, 0x1p-767); } diff --git a/3rd/math/src/__math_uflowf.c b/3rd/math/src/__math_uflowf.c index e28fe668..94d50f2b 100644 --- a/3rd/math/src/__math_uflowf.c +++ b/3rd/math/src/__math_uflowf.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -float __math_uflowf(unsigned int sign) +float __math_uflowf(uint32_t sign) { return __math_xflowf(sign, 0x1p-95f); } diff --git a/3rd/math/src/__math_xflow.c b/3rd/math/src/__math_xflow.c index 716b7b8e..744203c4 100644 --- a/3rd/math/src/__math_xflow.c +++ b/3rd/math/src/__math_xflow.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -double __math_xflow(unsigned int sign, double y) +double __math_xflow(uint32_t sign, double y) { return eval_as_double(fp_barrier(sign ? -y : y) * y); } diff --git a/3rd/math/src/__math_xflowf.c b/3rd/math/src/__math_xflowf.c index 5a002606..f2c84784 100644 --- a/3rd/math/src/__math_xflowf.c +++ b/3rd/math/src/__math_xflowf.c @@ -1,6 +1,6 @@ -#include "math.h" +#include "libm.h" -float __math_xflowf(unsigned int sign, float y) +float __math_xflowf(uint32_t sign, float y) { return eval_as_float(fp_barrierf(sign ? -y : y) * y); } diff --git a/3rd/math/src/__rem_pio2.c b/3rd/math/src/__rem_pio2.c index e7c920b4..dcf672fb 100644 --- a/3rd/math/src/__rem_pio2.c +++ b/3rd/math/src/__rem_pio2.c @@ -17,7 +17,7 @@ * use __rem_pio2_large() for large x */ -#include "math.h" +#include "libm.h" #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON @@ -48,10 +48,10 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ int __rem_pio2(double x, double *y) { - union {double f; unsigned long long i;} u = {x}; - double z,w,t,r,fn; + union {double f; uint64_t i;} u = {x}; + double_t z,w,t,r,fn; double tx[3],ty[2]; - unsigned int ix; + uint32_t ix; int sign, n, ex, ey, i; sign = u.i>>63; @@ -119,8 +119,8 @@ int __rem_pio2(double x, double *y) if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ medium: /* rint(x/(pi/2)) */ - fn = (double)x*invpio2 + toint - toint; - n = (int)fn; + fn = (double_t)x*invpio2 + toint - toint; + n = (int32_t)fn; r = x - fn*pio2_1; w = fn*pio2_1t; /* 1st round, good to 85 bits */ /* Matters with directed rounding. */ @@ -167,11 +167,11 @@ medium: } /* set z = scalbn(|x|,-ilogb(x)+23) */ u.f = x; - u.i &= (unsigned long long)-1>>12; - u.i |= (unsigned long long)(0x3ff + 23)<<52; + u.i &= (uint64_t)-1>>12; + u.i |= (uint64_t)(0x3ff + 23)<<52; z = u.f; for (i=0; i < 2; i++) { - tx[i] = (double)(int)z; + tx[i] = (double)(int32_t)z; z = (z-tx[i])*0x1p24; } tx[i] = z; diff --git a/3rd/math/src/__rem_pio2_large.c b/3rd/math/src/__rem_pio2_large.c index d971e905..958f28c2 100644 --- a/3rd/math/src/__rem_pio2_large.c +++ b/3rd/math/src/__rem_pio2_large.c @@ -122,7 +122,7 @@ * to produce the hexadecimal values shown. */ -#include "math.h" +#include "libm.h" static const int init_jk[] = {3,4,4,6}; /* initial value for jk */ @@ -138,7 +138,7 @@ static const int init_jk[] = {3,4,4,6}; /* initial value for jk */ * NB: This table must have at least (e0-3)/24 + jk terms. * For quad precision (e0 <= 16360, jk = 6), this is 686. */ -static const int ipio2[] = { +static const int32_t ipio2[] = { 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, @@ -272,7 +272,7 @@ static const double PIo2[] = { int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) { - int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; + int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; double z,fw,f[20],fq[20],q[20]; /* initialize jk*/ @@ -300,15 +300,15 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) recompute: /* distill q[] into iq[] reversingly */ for (i=0,j=jz,z=q[jz]; j>0; i++,j--) { - fw = (double)(int)(0x1p-24*z); - iq[i] = (int)(z - 0x1p24*fw); + fw = (double)(int32_t)(0x1p-24*z); + iq[i] = (int32_t)(z - 0x1p24*fw); z = q[j-1]+fw; } /* compute n */ z = scalbn(z,q0); /* actual value of z */ z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ - n = (int)z; + n = (int32_t)z; z -= (double)n; ih = 0; if (q0 > 0) { /* need iq[jz-1] to determine n */ @@ -375,13 +375,13 @@ recompute: } else { /* break z into 24-bit if necessary */ z = scalbn(z,-q0); if (z >= 0x1p24) { - fw = (double)(int)(0x1p-24*z); - iq[jz] = (int)(z - 0x1p24*fw); + fw = (double)(int32_t)(0x1p-24*z); + iq[jz] = (int32_t)(z - 0x1p24*fw); jz += 1; q0 += 24; - iq[jz] = (int)fw; + iq[jz] = (int32_t)fw; } else - iq[jz] = (int)z; + iq[jz] = (int32_t)z; } /* convert integer "bit" chunk to floating-point value */ @@ -411,7 +411,7 @@ recompute: fw = 0.0; for (i=jz; i>=0; i--) fw += fq[i]; - // TODO: drop excess precision here once double is used + // TODO: drop excess precision here once double_t is used fw = (double)fw; y[0] = ih==0 ? fw : -fw; fw = fq[0]-fw; diff --git a/3rd/math/src/__signbit.c b/3rd/math/src/__signbit.c index f8249cfc..e700b6b7 100644 --- a/3rd/math/src/__signbit.c +++ b/3rd/math/src/__signbit.c @@ -1,11 +1,11 @@ -#include "math.h" +#include "libm.h" // FIXME: macro in math.h int __signbit(double x) { union { double d; - unsigned long long i; + uint64_t i; } y = { x }; return y.i>>63; } diff --git a/3rd/math/src/__signbitf.c b/3rd/math/src/__signbitf.c index 86da0563..40ad3cfd 100644 --- a/3rd/math/src/__signbitf.c +++ b/3rd/math/src/__signbitf.c @@ -1,11 +1,11 @@ -#include "math.h" +#include "libm.h" // FIXME: macro in math.h int __signbitf(float x) { union { float f; - unsigned int i; + uint32_t i; } y = { x }; return y.i>>31; } diff --git a/3rd/math/src/__signbitl.c b/3rd/math/src/__signbitl.c index d1239df7..63b3dc5a 100644 --- a/3rd/math/src/__signbitl.c +++ b/3rd/math/src/__signbitl.c @@ -1,4 +1,4 @@ -#include "math.h" +#include "libm.h" #if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 int __signbitl(long double x) diff --git a/3rd/math/src/__sin.c b/3rd/math/src/__sin.c index 437783f2..40309496 100644 --- a/3rd/math/src/__sin.c +++ b/3rd/math/src/__sin.c @@ -39,7 +39,7 @@ * sin(x) = x + (S1*x + (x *(r-y/2)+y)) */ -#include "math.h" +#include "libm.h" static const double S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ @@ -51,7 +51,7 @@ S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ double __sin(double x, double y, int iy) { - double z,r,v,w; + double_t z,r,v,w; z = x*x; w = z*z; diff --git a/3rd/math/src/__tan.c b/3rd/math/src/__tan.c index 6e5a938b..8019844d 100644 --- a/3rd/math/src/__tan.c +++ b/3rd/math/src/__tan.c @@ -43,7 +43,7 @@ * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) */ -#include "math.h" +#include "libm.h" static const double T[] = { 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ @@ -65,9 +65,9 @@ pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ double __tan(double x, double y, int odd) { - double z, r, v, w, s, a; + double_t z, r, v, w, s, a; double w0, a0; - unsigned int hx; + uint32_t hx; int big, sign; GET_HIGH_WORD(hx,x); diff --git a/3rd/math/src/acos.c b/3rd/math/src/acos.c index 8b85c60e..ea9c87bf 100644 --- a/3rd/math/src/acos.c +++ b/3rd/math/src/acos.c @@ -33,7 +33,7 @@ * Function needed: sqrt */ -#include "math.h" +#include "libm.h" static const double pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ @@ -51,7 +51,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ static double R(double z) { - double p, q; + double_t p, q; p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); return p/q; @@ -60,13 +60,13 @@ static double R(double z) double acos(double x) { double z,w,s,c,df; - unsigned int hx,ix; + uint32_t hx,ix; GET_HIGH_WORD(hx, x); ix = hx & 0x7fffffff; /* |x| >= 1 or nan */ if (ix >= 0x3ff00000) { - unsigned int lx; + uint32_t lx; GET_LOW_WORD(lx,x); if ((ix-0x3ff00000 | lx) == 0) { diff --git a/3rd/math/src/asin.c b/3rd/math/src/asin.c index 61b6c0fc..c926b188 100644 --- a/3rd/math/src/asin.c +++ b/3rd/math/src/asin.c @@ -39,7 +39,7 @@ * */ -#include "math.h" +#include "libm.h" static const double pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */ @@ -58,7 +58,7 @@ qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ static double R(double z) { - double p, q; + double_t p, q; p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5))))); q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4))); return p/q; @@ -67,13 +67,13 @@ static double R(double z) double asin(double x) { double z,r,s; - unsigned int hx,ix; + uint32_t hx,ix; GET_HIGH_WORD(hx, x); ix = hx & 0x7fffffff; /* |x| >= 1 or nan */ if (ix >= 0x3ff00000) { - unsigned int lx; + uint32_t lx; GET_LOW_WORD(lx, x); if ((ix-0x3ff00000 | lx) == 0) /* asin(1) = +-pi/2 with inexact */ diff --git a/3rd/math/src/atan.c b/3rd/math/src/atan.c index fa9ef41e..63b0ab25 100644 --- a/3rd/math/src/atan.c +++ b/3rd/math/src/atan.c @@ -30,7 +30,7 @@ */ -#include "math.h" +#include "libm.h" static const double atanhi[] = { 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ @@ -62,8 +62,8 @@ static const double aT[] = { double atan(double x) { - double w,s1,s2,z; - unsigned int ix,sign; + double_t w,s1,s2,z; + uint32_t ix,sign; int id; GET_HIGH_WORD(ix, x); diff --git a/3rd/math/src/atan2.c b/3rd/math/src/atan2.c index 9d7a7a89..5a1903c6 100644 --- a/3rd/math/src/atan2.c +++ b/3rd/math/src/atan2.c @@ -37,7 +37,7 @@ * to produce the hexadecimal values shown. */ -#include "math.h" +#include "libm.h" static const double pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */ @@ -46,7 +46,7 @@ pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ double atan2(double y, double x) { double z; - unsigned int m,lx,ly,ix,iy; + uint32_t m,lx,ly,ix,iy; if (isnan(x) || isnan(y)) return x+y; diff --git a/3rd/math/src/cbrt.c b/3rd/math/src/cbrt.c index a6a00a40..5a15e9c3 100644 --- a/3rd/math/src/cbrt.c +++ b/3rd/math/src/cbrt.c @@ -15,9 +15,10 @@ * Return cube root of x */ -#include +#include "math.h" +#include -static const unsigned int +static const uint32_t B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ @@ -31,9 +32,9 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ double cbrt(double x) { - union {double f; unsigned long long i;} u = {x}; - double r,s,t,w; - unsigned int hx = u.i>>32 & 0x7fffffff; + union {double f; uint64_t i;} u = {x}; + double_t r,s,t,w; + uint32_t hx = u.i>>32 & 0x7fffffff; if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */ return x+x; @@ -62,7 +63,7 @@ double cbrt(double x) } else hx = hx/3 + B1; u.i &= 1ULL<<63; - u.i |= (unsigned long long)hx << 32; + u.i |= (uint64_t)hx << 32; t = u.f; /* diff --git a/3rd/math/src/ceil.c b/3rd/math/src/ceil.c index 077589ea..b13e6f2d 100644 --- a/3rd/math/src/ceil.c +++ b/3rd/math/src/ceil.c @@ -1,17 +1,17 @@ -#include "math.h" +#include "libm.h" #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON #elif FLT_EVAL_METHOD==2 #define EPS LDBL_EPSILON #endif -static const double toint = 1/EPS; +static const double_t toint = 1/EPS; double ceil(double x) { - union {double f; unsigned long long i;} u = {x}; + union {double f; uint64_t i;} u = {x}; int e = u.i >> 52 & 0x7ff; - double y; + double_t y; if (e >= 0x3ff+52 || x == 0) return x; diff --git a/3rd/math/src/cos.c b/3rd/math/src/cos.c index 883cb52c..ee97f68b 100644 --- a/3rd/math/src/cos.c +++ b/3rd/math/src/cos.c @@ -40,12 +40,12 @@ * TRIG(x) returns trig(x) nearly rounded */ -#include "math.h" +#include "libm.h" double cos(double x) { double y[2]; - unsigned int ix; + uint32_t ix; unsigned n; GET_HIGH_WORD(ix, x); diff --git a/3rd/math/src/degrees.c b/3rd/math/src/degrees.c index 970586d5..32f49d9a 100644 --- a/3rd/math/src/degrees.c +++ b/3rd/math/src/degrees.c @@ -1,4 +1,4 @@ -#include +#include "libm.h" double degrees(double x) { diff --git a/3rd/math/src/exp.c b/3rd/math/src/exp.c index 52a1276c..c0e0c688 100644 --- a/3rd/math/src/exp.c +++ b/3rd/math/src/exp.c @@ -5,7 +5,9 @@ * SPDX-License-Identifier: MIT */ -#include +#include "math.h" +#include +#include "libm.h" #include "exp_data.h" #define N (1 << EXP_TABLE_BITS) @@ -23,12 +25,12 @@ is scale*(1+TMP) without intermediate rounding. The bit representation of scale is in SBITS, however it has a computed exponent that may have overflown into the sign bit so that needs to be adjusted before using it as - a double. (int)KI is the k used in the argument reduction and exponent + a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki) +static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) { - double scale, y; + double_t scale, y; if ((ki & 0x80000000) == 0) { /* k > 0, the exponent of scale might have overflowed by <= 460. */ @@ -46,7 +48,7 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned range to avoid double rounding that can cause 0.5+E/2 ulp error where E is the worst-case ulp error outside the subnormal range. So this is only useful if the goal is better than 1 ulp worst-case error. */ - double hi, lo; + double_t hi, lo; lo = scale - y + scale * tmp; hi = 1.0 + y; lo = 1.0 - hi + y + lo; @@ -62,16 +64,16 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned } /* Top 12 bits of a double (sign and exponent bits). */ -static inline unsigned int top12(double x) +static inline uint32_t top12(double x) { return asuint64(x) >> 52; } double exp(double x) { - unsigned int abstop; - unsigned long long ki, idx, top, sbits; - double kd, z, r, r2, scale, tail, tmp; + uint32_t abstop; + uint64_t ki, idx, top, sbits; + double_t kd, z, r, r2, scale, tail, tmp; abstop = top12(x) & 0x7ff; if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) { @@ -103,7 +105,7 @@ double exp(double x) /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ kd = eval_as_double(z + Shift); ki = asuint64(kd) >> 16; - kd = (double)(int)ki; + kd = (double_t)(int32_t)ki; #else /* z - kd is in [-1, 1] in non-nearest rounding modes. */ kd = eval_as_double(z + Shift); diff --git a/3rd/math/src/exp_data.h b/3rd/math/src/exp_data.h index 67fae3de..01289c1f 100644 --- a/3rd/math/src/exp_data.h +++ b/3rd/math/src/exp_data.h @@ -5,13 +5,14 @@ #ifndef _EXP_DATA_H #define _EXP_DATA_H -#include +#include "features/features.h" +#include #define EXP_TABLE_BITS 7 #define EXP_POLY_ORDER 5 #define EXP_USE_TOINT_NARROW 0 #define EXP2_POLY_ORDER 5 -extern const struct exp_data { +extern hidden const struct exp_data { double invln2N; double shift; double negln2hiN; @@ -19,7 +20,7 @@ extern const struct exp_data { double poly[4]; /* Last four coefficients. */ double exp2_shift; double exp2_poly[EXP2_POLY_ORDER]; - unsigned long long tab[2*(1 << EXP_TABLE_BITS)]; + uint64_t tab[2*(1 << EXP_TABLE_BITS)]; } __exp_data; #endif diff --git a/3rd/math/src/fabs.c b/3rd/math/src/fabs.c index fc108938..622e86ef 100644 --- a/3rd/math/src/fabs.c +++ b/3rd/math/src/fabs.c @@ -1,8 +1,9 @@ -#include +#include "math.h" +#include double fabs(double x) { - union {double f; unsigned long long i;} u = {x}; + union {double f; uint64_t i;} u = {x}; u.i &= ULLONG_NSHIFT/2; return u.f; } diff --git a/3rd/math/src/factorial.c b/3rd/math/src/factorial.c index 212d1034..096cc44e 100644 --- a/3rd/math/src/factorial.c +++ b/3rd/math/src/factorial.c @@ -1,4 +1,4 @@ -#include +#include "libm.h" int factorial(int n) { diff --git a/3rd/math/src/floor.c b/3rd/math/src/floor.c index 73f59faa..14a31cd8 100644 --- a/3rd/math/src/floor.c +++ b/3rd/math/src/floor.c @@ -1,17 +1,17 @@ -#include "math.h" +#include "libm.h" #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 #define EPS DBL_EPSILON #elif FLT_EVAL_METHOD==2 #define EPS LDBL_EPSILON #endif -static const double toint = 1/EPS; +static const double_t toint = 1/EPS; double floor(double x) { - union {double f; unsigned long long i;} u = {x}; + union {double f; uint64_t i;} u = {x}; int e = u.i >> 52 & 0x7ff; - double y; + double_t y; if (e >= 0x3ff+52 || x == 0) return x; diff --git a/3rd/math/src/fmax.c b/3rd/math/src/fmax.c index 94f0caa1..b6a7d064 100644 --- a/3rd/math/src/fmax.c +++ b/3rd/math/src/fmax.c @@ -1,4 +1,4 @@ -#include +#include "math.h" double fmax(double x, double y) { diff --git a/3rd/math/src/fmin.c b/3rd/math/src/fmin.c index 08a8fd17..c835abb4 100644 --- a/3rd/math/src/fmin.c +++ b/3rd/math/src/fmin.c @@ -1,4 +1,4 @@ -#include +#include "math.h" double fmin(double x, double y) { diff --git a/3rd/math/src/fmod.c b/3rd/math/src/fmod.c index 6cf0f9a5..9eff0442 100644 --- a/3rd/math/src/fmod.c +++ b/3rd/math/src/fmod.c @@ -1,16 +1,17 @@ -#include +#include "math.h" +#include double fmod(double x, double y) { - union {double f; unsigned long long i;} ux = {x}, uy = {y}; + union {double f; uint64_t i;} ux = {x}, uy = {y}; int ex = ux.i>>52 & 0x7ff; int ey = uy.i>>52 & 0x7ff; int sx = ux.i>>63; - unsigned long long i; + uint64_t i; /* in the followings uxi should be ux.i, but then gcc wrongly adds */ /* float load/store to inner loops ruining performance and code size */ - unsigned long long uxi = ux.i; + uint64_t uxi = ux.i; if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff) return (x*y)/(x*y); @@ -57,11 +58,11 @@ double fmod(double x, double y) /* scale result */ if (ex > 0) { uxi -= 1ULL << 52; - uxi |= (unsigned long long)ex << 52; + uxi |= (uint64_t)ex << 52; } else { uxi >>= -ex + 1; } - uxi |= (unsigned long long)sx << 63; + uxi |= (uint64_t)sx << 63; ux.i = uxi; return ux.f; } diff --git a/3rd/math/src/fsum.c b/3rd/math/src/fsum.c index 3bb83a91..b2cd1e43 100644 --- a/3rd/math/src/fsum.c +++ b/3rd/math/src/fsum.c @@ -1,4 +1,4 @@ -#include +#include "libm.h" double fsum(double* aptr, int n) { diff --git a/3rd/math/src/gcd.c b/3rd/math/src/gcd.c index 600977c7..47043ab9 100644 --- a/3rd/math/src/gcd.c +++ b/3rd/math/src/gcd.c @@ -1,4 +1,4 @@ -#include +#include "libm.h" int gcd(int a, int b) { diff --git a/3rd/math/src/log.c b/3rd/math/src/log.c index 80492244..1d370275 100644 --- a/3rd/math/src/log.c +++ b/3rd/math/src/log.c @@ -5,7 +5,9 @@ * SPDX-License-Identifier: MIT */ -#include +#include "math.h" +#include +#include "libm.h" #include "log_data.h" #define T __log_data.tab @@ -18,16 +20,16 @@ #define OFF 0x3fe6000000000000 /* Top 16 bits of a double. */ -static inline unsigned int top16(double x) +static inline uint32_t top16(double x) { return asuint64(x) >> 48; } double log(double x) { - double w, z, r, r2, r3, y, invc, logc, kd, hi, lo; - unsigned long long ix, iz, tmp; - unsigned int top; + double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; + uint64_t ix, iz, tmp; + uint32_t top; int k, i; ix = asuint64(x); @@ -48,8 +50,8 @@ double log(double x) r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); /* Worst-case error is around 0.507 ULP. */ w = r * 0x1p27; - double rhi = r + w - w; - double rlo = r - rhi; + double_t rhi = r + w - w; + double_t rlo = r - rhi; w = rhi * rhi * B[0]; /* B[0] == -0.5. */ hi = r + w; lo = r - hi + w; @@ -76,7 +78,7 @@ double log(double x) The ith subinterval contains z and c is near its center. */ tmp = ix - OFF; i = (tmp >> (52 - LOG_TABLE_BITS)) % N; - k = (long long)tmp >> 52; /* arithmetic shift */ + k = (int64_t)tmp >> 52; /* arithmetic shift */ iz = ix - (tmp & 0xfffULL << 52); invc = T[i].invc; logc = T[i].logc; @@ -91,7 +93,7 @@ double log(double x) /* rounding error: 0x1p-55/N + 0x1p-66. */ r = (z - T2[i].chi - T2[i].clo) * invc; #endif - kd = (double)k; + kd = (double_t)k; /* hi + lo = r + log(c) + k*Ln2. */ w = kd * Ln2hi + logc; diff --git a/3rd/math/src/log10.c b/3rd/math/src/log10.c index cf77f3c5..a4b8f45d 100644 --- a/3rd/math/src/log10.c +++ b/3rd/math/src/log10.c @@ -17,7 +17,8 @@ * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) */ -#include +#include "math.h" +#include static const double ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */ @@ -34,9 +35,9 @@ Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ double log10(double x) { - union {double f; unsigned long long i;} u = {x}; - double hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo; - unsigned int hx; + union {double f; uint64_t i;} u = {x}; + double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo; + uint32_t hx; int k; hx = u.i>>32; @@ -60,7 +61,7 @@ double log10(double x) hx += 0x3ff00000 - 0x3fe6a09e; k += (int)(hx>>20) - 0x3ff; hx = (hx&0x000fffff) + 0x3fe6a09e; - u.i = (unsigned long long)hx<<32 | (u.i&0xffffffff); + u.i = (uint64_t)hx<<32 | (u.i&0xffffffff); x = u.f; f = x - 1.0; @@ -76,7 +77,7 @@ double log10(double x) /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ hi = f - hfsq; u.f = hi; - u.i &= (unsigned long long)-1<<32; + u.i &= (uint64_t)-1<<32; hi = u.f; lo = f - hi - hfsq + s*(hfsq+R); diff --git a/3rd/math/src/log2.c b/3rd/math/src/log2.c index fbe56e30..36fb4643 100644 --- a/3rd/math/src/log2.c +++ b/3rd/math/src/log2.c @@ -5,7 +5,9 @@ * SPDX-License-Identifier: MIT */ -#include +#include "math.h" +#include +#include "libm.h" #include "log2_data.h" #define T __log2_data.tab @@ -18,16 +20,16 @@ #define OFF 0x3fe6000000000000 /* Top 16 bits of a double. */ -static inline unsigned int top16(double x) +static inline uint32_t top16(double x) { return asuint64(x) >> 48; } double log2(double x) { - double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; - unsigned long long ix, iz, tmp; - unsigned int top; + double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; + uint64_t ix, iz, tmp; + uint32_t top; int k, i; ix = asuint64(x); @@ -44,7 +46,7 @@ double log2(double x) hi = r * InvLn2hi; lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi); #else - double rhi, rlo; + double_t rhi, rlo; rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32); rlo = r - rhi; hi = rhi * InvLn2hi; @@ -79,12 +81,12 @@ double log2(double x) The ith subinterval contains z and c is near its center. */ tmp = ix - OFF; i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; - k = (long long)tmp >> 52; /* arithmetic shift */ + k = (int64_t)tmp >> 52; /* arithmetic shift */ iz = ix - (tmp & 0xfffULL << 52); invc = T[i].invc; logc = T[i].logc; z = asdouble(iz); - kd = (double)k; + kd = (double_t)k; /* log2(x) = log2(z/c) + log2(c) + k. */ /* r ~= z/c - 1, |r| < 1/(2*N). */ @@ -94,7 +96,7 @@ double log2(double x) t1 = r * InvLn2hi; t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1); #else - double rhi, rlo; + double_t rhi, rlo; /* rounding error: 0x1p-55/N + 0x1p-65. */ r = (z - T2[i].chi - T2[i].clo) * invc; rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32); diff --git a/3rd/math/src/log2_data.h b/3rd/math/src/log2_data.h index 2c047b8f..fb95d2ff 100644 --- a/3rd/math/src/log2_data.h +++ b/3rd/math/src/log2_data.h @@ -5,11 +5,12 @@ #ifndef _LOG2_DATA_H #define _LOG2_DATA_H +#include "features/features.h" #define LOG2_TABLE_BITS 6 #define LOG2_POLY_ORDER 7 #define LOG2_POLY1_ORDER 11 -extern const struct log2_data { +extern hidden const struct log2_data { double invln2hi; double invln2lo; double poly[LOG2_POLY_ORDER - 1]; diff --git a/3rd/math/src/log_data.h b/3rd/math/src/log_data.h index fd0f8d4c..4ff36084 100644 --- a/3rd/math/src/log_data.h +++ b/3rd/math/src/log_data.h @@ -5,11 +5,12 @@ #ifndef _LOG_DATA_H #define _LOG_DATA_H +#include "features/features.h" #define LOG_TABLE_BITS 7 #define LOG_POLY_ORDER 6 #define LOG_POLY1_ORDER 12 -extern const struct log_data { +extern hidden const struct log_data { double ln2hi; double ln2lo; double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ diff --git a/3rd/math/src/modf.c b/3rd/math/src/modf.c index 1eca3bea..055fd124 100644 --- a/3rd/math/src/modf.c +++ b/3rd/math/src/modf.c @@ -1,9 +1,9 @@ -#include "math.h" +#include "libm.h" double modf(double x, double *iptr) { - union {double f; unsigned long long i;} u = {x}; - unsigned long long mask; + union {double f; uint64_t i;} u = {x}; + uint64_t mask; int e = (int)(u.i>>52 & 0x7ff) - 0x3ff; /* no fractional part */ diff --git a/3rd/math/src/pow.c b/3rd/math/src/pow.c index 63dbd64a..ed114059 100644 --- a/3rd/math/src/pow.c +++ b/3rd/math/src/pow.c @@ -5,7 +5,9 @@ * SPDX-License-Identifier: MIT */ -#include +#include "math.h" +#include +#include "libm.h" #include "exp_data.h" #include "pow_data.h" @@ -23,7 +25,7 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma) #define OFF 0x3fe6955500000000 /* Top 12 bits of a double (sign and exponent bits). */ -static inline unsigned int top12(double x) +static inline uint32_t top12(double x) { return asuint64(x) >> 52; } @@ -31,11 +33,11 @@ static inline unsigned int top12(double x) /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about additional 15 bits precision. IX is the bit representation of x, but normalized in the subnormal range using the sign bit for the exponent. */ -static inline double log_inline(unsigned long long ix, double *tail) +static inline double_t log_inline(uint64_t ix, double_t *tail) { - /* double for better performance on targets with FLT_EVAL_METHOD==2. */ - double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; - unsigned long long iz, tmp; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; + uint64_t iz, tmp; int k, i; /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. @@ -43,10 +45,10 @@ static inline double log_inline(unsigned long long ix, double *tail) The ith subinterval contains z and c is near its center. */ tmp = ix - OFF; i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N; - k = (long long)tmp >> 52; /* arithmetic shift */ + k = (int64_t)tmp >> 52; /* arithmetic shift */ iz = ix - (tmp & 0xfffULL << 52); z = asdouble(iz); - kd = (double)k; + kd = (double_t)k; /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ invc = T[i].invc; @@ -59,10 +61,10 @@ static inline double log_inline(unsigned long long ix, double *tail) r = __builtin_fma(z, invc, -1.0); #else /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ - double zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32)); - double zlo = z - zhi; - double rhi = zhi * invc - 1.0; - double rlo = zlo * invc; + double_t zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32)); + double_t zlo = z - zhi; + double_t rhi = zhi * invc - 1.0; + double_t rlo = zlo * invc; r = rhi + rlo; #endif @@ -73,7 +75,7 @@ static inline double log_inline(unsigned long long ix, double *tail) lo2 = t1 - t2 + r; /* Evaluation is optimized assuming superscalar pipelined execution. */ - double ar, ar2, ar3, lo3, lo4; + double_t ar, ar2, ar3, lo3, lo4; ar = A[0] * r; /* A[0] = -0.5. */ ar2 = r * ar; ar3 = r * ar2; @@ -83,8 +85,8 @@ static inline double log_inline(unsigned long long ix, double *tail) lo3 = __builtin_fma(ar, r, -ar2); lo4 = t2 - hi + ar2; #else - double arhi = A[0] * rhi; - double arhi2 = rhi * arhi; + double_t arhi = A[0] * rhi; + double_t arhi2 = rhi * arhi; hi = t2 + arhi2; lo3 = rlo * (ar + arhi); lo4 = t2 - hi + arhi2; @@ -116,12 +118,12 @@ static inline double log_inline(unsigned long long ix, double *tail) is scale*(1+TMP) without intermediate rounding. The bit representation of scale is in SBITS, however it has a computed exponent that may have overflown into the sign bit so that needs to be adjusted before using it as - a double. (int)KI is the k used in the argument reduction and exponent + a double. (int32_t)KI is the k used in the argument reduction and exponent adjustment of scale, positive k here means the result may overflow and negative k means the result may underflow. */ -static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki) +static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) { - double scale, y; + double_t scale, y; if ((ki & 0x80000000) == 0) { /* k > 0, the exponent of scale might have overflowed by <= 460. */ @@ -140,7 +142,7 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned range to avoid double rounding that can cause 0.5+E/2 ulp error where E is the worst-case ulp error outside the subnormal range. So this is only useful if the goal is better than 1 ulp worst-case error. */ - double hi, lo, one = 1.0; + double_t hi, lo, one = 1.0; if (y < 0.0) one = -1.0; lo = scale - y + scale * tmp; @@ -161,12 +163,12 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ -static inline double exp_inline(double x, double xtail, unsigned int sign_bias) +static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias) { - unsigned int abstop; - unsigned long long ki, idx, top, sbits; - /* double for better performance on targets with FLT_EVAL_METHOD==2. */ - double kd, z, r, r2, scale, tail, tmp; + uint32_t abstop; + uint64_t ki, idx, top, sbits; + /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ + double_t kd, z, r, r2, scale, tail, tmp; abstop = top12(x) & 0x7ff; if (predict_false(abstop - top12(0x1p-54) >= @@ -174,7 +176,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias) if (abstop - top12(0x1p-54) >= 0x80000000) { /* Avoid spurious underflow for tiny x. */ /* Note: 0 is common input. */ - double one = WANT_ROUNDING ? 1.0 + x : 1.0; + double_t one = WANT_ROUNDING ? 1.0 + x : 1.0; return sign_bias ? -one : one; } if (abstop >= top12(1024.0)) { @@ -198,7 +200,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias) /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ kd = eval_as_double(z + Shift); ki = asuint64(kd) >> 16; - kd = (double)(int)ki; + kd = (double_t)(int32_t)ki; #else /* z - kd is in [-1, 1] in non-nearest rounding modes. */ kd = eval_as_double(z + Shift); @@ -230,7 +232,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias) /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is the bit representation of a non-zero finite floating-point value. */ -static inline int checkint(unsigned long long iy) +static inline int checkint(uint64_t iy) { int e = iy >> 52 & 0x7ff; if (e < 0x3ff) @@ -245,16 +247,16 @@ static inline int checkint(unsigned long long iy) } /* Returns 1 if input is the bit representation of 0, infinity or nan. */ -static inline int zeroinfnan(unsigned long long i) +static inline int zeroinfnan(uint64_t i) { return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1; } double pow(double x, double y) { - unsigned int sign_bias = 0; - unsigned long long ix, iy; - unsigned int topx, topy; + uint32_t sign_bias = 0; + uint64_t ix, iy; + uint32_t topx, topy; ix = asuint64(x); iy = asuint64(y); @@ -281,7 +283,7 @@ double pow(double x, double y) return y * y; } if (predict_false(zeroinfnan(ix))) { - double x2 = x * x; + double_t x2 = x * x; if (ix >> 63 && checkint(iy) == 1) x2 = -x2; /* Without the barrier some versions of clang hoist the 1/x2 and @@ -323,17 +325,17 @@ double pow(double x, double y) } } - double lo; - double hi = log_inline(ix, &lo); - double ehi, elo; + double_t lo; + double_t hi = log_inline(ix, &lo); + double_t ehi, elo; #if __FP_FAST_FMA ehi = y * hi; elo = y * lo + __builtin_fma(y, hi, -ehi); #else - double yhi = asdouble(iy & ULLONG_NSHIFT << 27); - double ylo = y - yhi; - double lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27); - double llo = hi - lhi + lo; + double_t yhi = asdouble(iy & ULLONG_NSHIFT << 27); + double_t ylo = y - yhi; + double_t lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27); + double_t llo = hi - lhi + lo; ehi = yhi * lhi; elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ #endif diff --git a/3rd/math/src/pow_data.h b/3rd/math/src/pow_data.h index ab23e059..18bd1099 100644 --- a/3rd/math/src/pow_data.h +++ b/3rd/math/src/pow_data.h @@ -5,10 +5,11 @@ #ifndef _POW_DATA_H #define _POW_DATA_H +#include "features/features.h" #define POW_LOG_TABLE_BITS 7 #define POW_LOG_POLY_ORDER 8 -extern const struct pow_log_data { +extern hidden const struct pow_log_data { double ln2hi; double ln2lo; double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */ diff --git a/3rd/math/src/radians.c b/3rd/math/src/radians.c index 06268cde..f8ca0c19 100644 --- a/3rd/math/src/radians.c +++ b/3rd/math/src/radians.c @@ -1,4 +1,4 @@ -#include +#include "libm.h" double radians(double x) { diff --git a/3rd/math/src/scalbn.c b/3rd/math/src/scalbn.c index fc2407c5..976bfe05 100644 --- a/3rd/math/src/scalbn.c +++ b/3rd/math/src/scalbn.c @@ -1,9 +1,10 @@ -#include +#include "math.h" +#include double scalbn(double x, int n) { - union {double f; unsigned long long i;} u; - double y = x; + union {double f; uint64_t i;} u; + double_t y = x; if (n > 1023) { y *= 0x1p1023; @@ -26,7 +27,7 @@ double scalbn(double x, int n) n = -1022; } } - u.i = (unsigned long long)(0x3ff+n)<<52; + u.i = (uint64_t)(0x3ff+n)<<52; x = y * u.f; return x; } diff --git a/3rd/math/src/sin.c b/3rd/math/src/sin.c index 7d905b07..055e215b 100644 --- a/3rd/math/src/sin.c +++ b/3rd/math/src/sin.c @@ -40,12 +40,12 @@ * TRIG(x) returns trig(x) nearly rounded */ -#include "math.h" +#include "libm.h" double sin(double x) { double y[2]; - unsigned int ix; + uint32_t ix; unsigned n; /* High word of x. */ diff --git a/3rd/math/src/sqrt.c b/3rd/math/src/sqrt.c index e1a3c9d8..5f13efe0 100644 --- a/3rd/math/src/sqrt.c +++ b/3rd/math/src/sqrt.c @@ -1,27 +1,29 @@ -#include +#include +#include "math.h" +#include "libm.h" #include "sqrt_data.h" #define FENV_SUPPORT 1 /* returns a*b*2^-32 - e, with error 0 <= e < 1. */ -static inline unsigned int mul32(unsigned int a, unsigned int b) +static inline uint32_t mul32(uint32_t a, uint32_t b) { - return (unsigned long long)a*b >> 32; + return (uint64_t)a*b >> 32; } /* returns a*b*2^-64 - e, with error 0 <= e < 3. */ -static inline unsigned long long mul64(unsigned long long a, unsigned long long b) +static inline uint64_t mul64(uint64_t a, uint64_t b) { - unsigned long long ahi = a>>32; - unsigned long long alo = a&0xffffffff; - unsigned long long bhi = b>>32; - unsigned long long blo = b&0xffffffff; + uint64_t ahi = a>>32; + uint64_t alo = a&0xffffffff; + uint64_t bhi = b>>32; + uint64_t blo = b&0xffffffff; return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32); } double sqrt(double x) { - unsigned long long ix, top, m; + uint64_t ix, top, m; /* special case handling. */ ix = asuint64(x); @@ -103,11 +105,11 @@ double sqrt(double x) and after switching to 64 bit m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */ - static const unsigned long long three = 0xc0000000; - unsigned long long r, s, d, u, i; + static const uint64_t three = 0xc0000000; + uint64_t r, s, d, u, i; i = (ix >> 46) % 128; - r = (unsigned int)__rsqrt_tab[i] << 16; + r = (uint32_t)__rsqrt_tab[i] << 16; /* |r sqrt(m) - 1| < 0x1.fdp-9 */ s = mul32(m>>32, r); /* |s/sqrt(m) - 1| < 0x1.fdp-9 */ @@ -134,7 +136,7 @@ double sqrt(double x) compute nearest rounded result: the nearest result to 52 bits is either s or s+0x1p-52, we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */ - unsigned long long d0, d1, d2; + uint64_t d0, d1, d2; double y, t; d0 = (m << 42) - s*s; d1 = s - d0; @@ -147,7 +149,7 @@ double sqrt(double x) /* handle rounding modes and inexact exception: only (s+1)^2 == 2^42 m case is exact otherwise add a tiny value to cause the fenv effects. */ - unsigned long long tiny = predict_false(d2==0) ? 0 : 0x0010000000000000; + uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000; tiny |= (d1^d2) & 0x8000000000000000; t = asdouble(tiny); y = eval_as_double(y + t); diff --git a/3rd/math/src/sqrt_data.c b/3rd/math/src/sqrt_data.c index 013938fb..61bc22f4 100644 --- a/3rd/math/src/sqrt_data.c +++ b/3rd/math/src/sqrt_data.c @@ -1,5 +1,5 @@ #include "sqrt_data.h" -const unsigned short __rsqrt_tab[128] = { +const uint16_t __rsqrt_tab[128] = { 0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43, 0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b, 0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1, diff --git a/3rd/math/src/sqrt_data.h b/3rd/math/src/sqrt_data.h index 459004b9..c1f4e820 100644 --- a/3rd/math/src/sqrt_data.h +++ b/3rd/math/src/sqrt_data.h @@ -1,12 +1,13 @@ #ifndef _SQRT_DATA_H #define _SQRT_DATA_H -#include +#include "features/features.h" +#include /* if x in [1,2): i = (int)(64*x); if x in [2,4): i = (int)(32*x-64); __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error: |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */ -extern const unsigned short __rsqrt_tab[128]; +extern hidden const uint16_t __rsqrt_tab[128]; #endif diff --git a/3rd/math/src/tan.c b/3rd/math/src/tan.c index 89689762..9c724a45 100644 --- a/3rd/math/src/tan.c +++ b/3rd/math/src/tan.c @@ -39,12 +39,12 @@ * TRIG(x) returns trig(x) nearly rounded */ -#include "math.h" +#include "libm.h" double tan(double x) { double y[2]; - unsigned int ix; + uint32_t ix; unsigned n; GET_HIGH_WORD(ix, x); diff --git a/3rd/math/src/trunc.c b/3rd/math/src/trunc.c index a7d397fb..5186ce3c 100644 --- a/3rd/math/src/trunc.c +++ b/3rd/math/src/trunc.c @@ -1,10 +1,10 @@ -#include "math.h" +#include "libm.h" double trunc(double x) { - union {double f; unsigned long long i;} u = {x}; + union {double f; uint64_t i;} u = {x}; int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12; - unsigned long long m; + uint64_t m; if (e >= 52 + 12) return x; diff --git a/CMakeLists.txt b/CMakeLists.txt index 105dde48..ac1e231f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,12 +50,8 @@ if(PK_ENABLE_DETERMINISTIC_FLOAT) add_subdirectory(3rd/math) include_directories(3rd/math) add_definitions(-DPK_ENABLE_DETERMINISTIC_FLOAT) - if(MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise") - elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict -O2") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + if(NOT MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexcess-precision=standard -ffp-contract=off") endif() endif() @@ -124,6 +120,7 @@ endif() if(NOT MSVC) target_link_libraries(${PROJECT_NAME} pthread) endif() + if(PK_BUILD_MODULE_LZ4) target_link_libraries(${PROJECT_NAME} lz4) endif()