determinism_asm_sin ver.

2025-10-20 11:30:18 +00:00 · 2025-05-21 21:27:26 +08:00 · 2025-05-21 21:27:26 +08:00 · a210b0c21e
commit a210b0c21e
parent 4d8e316c0b
62 changed files with 627 additions and 451 deletions
--- a/3rd/math/CMakeLists.txt
+++ b/3rd/math/CMakeLists.txt
@ -8,24 +8,10 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/include)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_LIST_DIR}/src MUSL_LIBC_MATH_SRC)
 if(MSVC)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise /Ox /Oi-")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox /Oi- /wd4244 /wd4723")
    # mute warning C4723: div by 0 because some function returns nan when a input is nan.
    add_compile_options( /wd4723)
 elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict -O2")
 else()
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -fexcess-precision=standard -ffp-contract=off")
 endif()
 add_library(${PROJECT_NAME} STATIC ${MUSL_LIBC_MATH_SRC})
 # if(MSVC)
 #     target_compile_options(${PROJECT_NAME} PRIVATE /fp:precise /Od)
 #     message("MUSL_LIBC_MATH: MSVC set")
 # elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
 #     target_compile_options(${PROJECT_NAME} PRIVATE -ffp-model=strict)
 #     message("MUSL_LIBC_MATH: CLANG set")
 # elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU")
 #     message("MUSL_LIBC_MATH: gcc set")
 # endif()   
--- a/3rd/math/include/features/features.h
+++ b/3rd/math/include/features/features.h
@ -0,0 +1,8 @@
 #if defined(_MSC_VER)
 #define hidden 
 #else
 #define weak __attribute__((__weak__))
 #define hidden __attribute__((__visibility__("hidden")))
 #define weak_alias(old, new) \
 	extern __typeof(old) new __attribute__((__weak__, __alias__(#old)))
 #endif
--- a/3rd/math/include/float.h
+++ b/3rd/math/include/float.h
@ -0,0 +1,51 @@
 #ifndef _FLOAT_H
 #define _FLOAT_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 #define FLT_RADIX 2
 #define FLT_TRUE_MIN 1.40129846432481707092e-45F
 #define FLT_MIN 1.17549435082228750797e-38F
 #define FLT_MAX 3.40282346638528859812e+38F
 #define FLT_EPSILON 1.1920928955078125e-07F
 #define FLT_MANT_DIG 24
 #define FLT_MIN_EXP (-125)
 #define FLT_MAX_EXP 128
 #define FLT_HAS_SUBNORM 1
 #define FLT_DIG 6
 #define FLT_DECIMAL_DIG 9
 #define FLT_MIN_10_EXP (-37)
 #define FLT_MAX_10_EXP 38
 #define DBL_TRUE_MIN 4.94065645841246544177e-324
 #define DBL_MIN 2.22507385850720138309e-308
 #define DBL_MAX 1.79769313486231570815e+308
 #define DBL_EPSILON 2.22044604925031308085e-16
 #define DBL_MANT_DIG 53
 #define DBL_MIN_EXP (-1021)
 #define DBL_MAX_EXP 1024
 #define DBL_HAS_SUBNORM 1
 #define DBL_DIG 15
 #define DBL_DECIMAL_DIG 17
 #define DBL_MIN_10_EXP (-307)
 #define DBL_MAX_10_EXP 308
 #define LDBL_HAS_SUBNORM 1
 #define LDBL_DECIMAL_DIG DECIMAL_DIG
 #define LDBL_MANT_DIG    DBL_MANT_DIG            
 #define LDBL_MAX_EXP     DBL_MAX_EXP             
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/3rd/math/include/libm.h
+++ b/3rd/math/include/libm.h
@ -0,0 +1,277 @@
 #ifndef _LIBM_H
 #define _LIBM_H
 #include <stdint.h>
 #include "float.h"
 #include "math.h"
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
 union ldshape {
 	long double f;
 	struct {
 		uint64_t m;
 		uint16_t se;
 	} i;
 };
 #elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
 /* This is the m68k variant of 80-bit long double, and this definition only works
 * on archs where the alignment requirement of uint64_t is <= 4. */
 union ldshape {
 	long double f;
 	struct {
 		uint16_t se;
 		uint16_t pad;
 		uint64_t m;
 	} i;
 };
 #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
 union ldshape {
 	long double f;
 	struct {
 		uint64_t lo;
 		uint32_t mid;
 		uint16_t top;
 		uint16_t se;
 	} i;
 	struct {
 		uint64_t lo;
 		uint64_t hi;
 	} i2;
 };
 #elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
 union ldshape {
 	long double f;
 	struct {
 		uint16_t se;
 		uint16_t top;
 		uint32_t mid;
 		uint64_t lo;
 	} i;
 	struct {
 		uint64_t hi;
 		uint64_t lo;
 	} i2;
 };
 #else
 #error Unsupported long double representation
 #endif
 /* Support non-nearest rounding mode.  */
 #define WANT_ROUNDING 1
 /* Support signaling NaNs.  */
 #define WANT_SNAN 0
 #if WANT_SNAN
 #error SNaN is unsupported
 #else
 #define issignalingf_inline(x) 0
 #define issignaling_inline(x) 0
 #endif
 #ifndef TOINT_INTRINSICS
 #define TOINT_INTRINSICS 0
 #endif
 #if TOINT_INTRINSICS
 /* Round x to nearest int in all rounding modes, ties have to be rounded
   consistently with converttoint so the results match.  If the result
   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
 static double_t roundtoint(double_t);
 /* Convert x to nearest int in all rounding modes, ties have to be rounded
   consistently with roundtoint.  If the result is not representible in an
   int32_t then the semantics is unspecified.  */
 static int32_t converttoint(double_t);
 #endif
 /* Helps static branch prediction so hot path can be better optimized.  */
 #ifdef __GNUC__
 #define predict_true(x) __builtin_expect(!!(x), 1)
 #define predict_false(x) __builtin_expect(x, 0)
 #else
 #define predict_true(x) (x)
 #define predict_false(x) (x)
 #endif
 /* Evaluate an expression as the specified type. With standard excess
   precision handling a type cast or assignment is enough (with
   -ffloat-store an assignment is required, in old compilers argument
   passing and return statement may not drop excess precision).  */
 static inline float eval_as_float(float x)
 {
 	float y = x;
 	return y;
 }
 static inline double eval_as_double(double x)
 {
 	double y = x;
 	return y;
 }
 /* fp_barrier returns its input, but limits code transformations
   as if it had a side-effect (e.g. observable io) and returned
   an arbitrary value.  */
 #ifndef fp_barrierf
 #define fp_barrierf fp_barrierf
 static inline float fp_barrierf(float x)
 {
 	volatile float y = x;
 	return y;
 }
 #endif
 #ifndef fp_barrier
 #define fp_barrier fp_barrier
 static inline double fp_barrier(double x)
 {
 	volatile double y = x;
 	return y;
 }
 #endif
 #ifndef fp_barrierl
 #define fp_barrierl fp_barrierl
 static inline long double fp_barrierl(long double x)
 {
 	volatile long double y = x;
 	return y;
 }
 #endif
 /* fp_force_eval ensures that the input value is computed when that's
   otherwise unused.  To prevent the constant folding of the input
   expression, an additional fp_barrier may be needed or a compilation
   mode that does so (e.g. -frounding-math in gcc). Then it can be
   used to evaluate an expression for its fenv side-effects only.   */
 #ifndef fp_force_evalf
 #define fp_force_evalf fp_force_evalf
 static inline void fp_force_evalf(float x)
 {
 	volatile float y;
 	y = x;
 }
 #endif
 #ifndef fp_force_eval
 #define fp_force_eval fp_force_eval
 static inline void fp_force_eval(double x)
 {
 	volatile double y;
 	y = x;
 }
 #endif
 #ifndef fp_force_evall
 #define fp_force_evall fp_force_evall
 static inline void fp_force_evall(long double x)
 {
 	volatile long double y;
 	y = x;
 }
 #endif
 #define FORCE_EVAL(x) do {                        \
 	if (sizeof(x) == sizeof(float)) {         \
 		fp_force_evalf(x);                \
 	} else if (sizeof(x) == sizeof(double)) { \
 		fp_force_eval(x);                 \
 	} else {                                  \
 		fp_force_evall(x);                \
 	}                                         \
 } while(0)
 typedef union {float _f; unsigned int _i;}asuint_union;
 typedef union {unsigned int _i; float _f;}asfloat_union;
 typedef union {double _f; unsigned long long _i;}asuint64_union;
 typedef union {unsigned long long _i; double _f;}asdouble_union;
 #define asuint(f) ((asuint_union){f})._i
 #define asfloat(i) ((asfloat_union){i})._f
 #define asuint64(f) ((asuint64_union){f})._i
 #define asdouble(i) ((asdouble_union){i})._f
 #define EXTRACT_WORDS(hi,lo,d)                    \
 do {                                              \
  uint64_t __u = asuint64(d);                     \
  (hi) = __u >> 32;                               \
  (lo) = (uint32_t)__u;                           \
 } while (0)
 #define GET_HIGH_WORD(hi,d)                       \
 do {                                              \
  (hi) = asuint64(d) >> 32;                       \
 } while (0)
 #define GET_LOW_WORD(lo,d)                        \
 do {                                              \
  (lo) = (uint32_t)asuint64(d);                   \
 } while (0)
 #define INSERT_WORDS(d,hi,lo)                     \
 do {                                              \
  (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
 } while (0)
 #define SET_HIGH_WORD(d,hi)                       \
  INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
 #define SET_LOW_WORD(d,lo)                        \
  INSERT_WORDS(d, asuint64(d)>>32, lo)
 #define GET_FLOAT_WORD(w,d)                       \
 do {                                              \
  (w) = asuint(d);                                \
 } while (0)
 #define SET_FLOAT_WORD(d,w)                       \
 do {                                              \
  (d) = asfloat(w);                               \
 } while (0)
 hidden int    __rem_pio2_large(double*,double*,int,int,int);
 hidden int    __rem_pio2(double,double*);
 hidden double __sin(double,double,int);
 hidden double __cos(double,double);
 hidden double __tan(double,double,int);
 // hidden double __expo2(double,double);
 // hidden int    __rem_pio2f(float,double*);
 // hidden float  __sindf(double);
 // hidden float  __cosdf(double);
 // hidden float  __tandf(double,int);
 // hidden float  __expo2f(float,float);
 // hidden int __rem_pio2l(long double, long double *);
 // hidden long double __sinl(long double, long double, int);
 // hidden long double __cosl(long double, long double);
 // hidden long double __tanl(long double, long double, int);
 // hidden long double __polevll(long double, const long double *, int);
 // hidden long double __p1evll(long double, const long double *, int);
 // extern int __signgam;
 // hidden double __lgamma_r(double, int *);
 // hidden float __lgammaf_r(float, int *);
 /* error handling functions */
 hidden float __math_xflowf(uint32_t, float);
 hidden float __math_uflowf(uint32_t);
 hidden float __math_oflowf(uint32_t);
 hidden float __math_divzerof(uint32_t);
 hidden float __math_invalidf(float);
 hidden double __math_xflow(uint32_t, double);
 hidden double __math_uflow(uint32_t);
 hidden double __math_oflow(uint32_t);
 hidden double __math_divzero(uint32_t);
 hidden double __math_invalid(double);
 #if LDBL_MANT_DIG != DBL_MANT_DIG
 hidden long double __math_invalidl(long double);
 #endif
 #endif
--- a/3rd/math/include/math.h
+++ b/3rd/math/include/math.h
@ -5,6 +5,23 @@
 extern "C" {
 #endif
 #include "features/features.h"
 #if defined(_WIN32) || defined(_WIN64)
 #if defined _M_IX86 && _M_IX86_FP < 2 && !defined _M_FP_FAST
 	typedef long double float_t;
 	typedef long double double_t;
 #else
 	typedef float  float_t;
 	typedef double double_t;
 #endif
 #else
 #define __NEED_float_t
 #define __NEED_double_t
 // #include <bits/alltypes.h>
 typedef float  float_t;
 typedef double double_t;
 #endif
 #ifndef _HUGE_ENUF
    #define _HUGE_ENUF  1e+300  // _HUGE_ENUF*_HUGE_ENUF must overflow
@ -45,21 +62,6 @@ extern "C" {
 #define FLT_EVAL_METHOD 0
 /* Support non-nearest rounding mode.  */
 #define WANT_ROUNDING 1
 /* Support signaling NaNs.  */
 #define WANT_SNAN 0
 #if WANT_SNAN
 #error SNaN is unsupported
 #else
 #define issignalingf_inline(x) 0
 #define issignaling_inline(x) 0
 #endif
 #define predict_true(x) (x)
 #define predict_false(x) (x)
 int __fpclassify(double);
 int __fpclassifyf(float);
 int __fpclassifyl(long double);
@ -117,20 +119,20 @@ int __signbitl(long double);
 static __inline int __is##rel(type __x, type __y) \
 { return !isunordered(__x,__y) && __x op __y; }
-__ISREL_DEF(lessf, <, float)
+__ISREL_DEF(lessf, <, float_t)
-__ISREL_DEF(less, <, double)
+__ISREL_DEF(less, <, double_t)
 __ISREL_DEF(lessl, <, long double)
-__ISREL_DEF(lessequalf, <=, float)
+__ISREL_DEF(lessequalf, <=, float_t)
-__ISREL_DEF(lessequal, <=, double)
+__ISREL_DEF(lessequal, <=, double_t)
 __ISREL_DEF(lessequall, <=, long double)
-__ISREL_DEF(lessgreaterf, !=, float)
+__ISREL_DEF(lessgreaterf, !=, float_t)
-__ISREL_DEF(lessgreater, !=, double)
+__ISREL_DEF(lessgreater, !=, double_t)
 __ISREL_DEF(lessgreaterl, !=, long double)
-__ISREL_DEF(greaterf, >, float)
+__ISREL_DEF(greaterf, >, float_t)
-__ISREL_DEF(greater, >, double)
+__ISREL_DEF(greater, >, double_t)
 __ISREL_DEF(greaterl, >, long double)
-__ISREL_DEF(greaterequalf, >=, float)
+__ISREL_DEF(greaterequalf, >=, float_t)
-__ISREL_DEF(greaterequal, >=, double)
+__ISREL_DEF(greaterequal, >=, double_t)
 __ISREL_DEF(greaterequall, >=, long double)
 #define __tg_pred_2(x, y, p) ( \
@ -144,165 +146,6 @@ __ISREL_DEF(greaterequall, >=, long double)
 #define isgreater(x, y)         __tg_pred_2(x, y, __isgreater)
 #define isgreaterequal(x, y)    __tg_pred_2(x, y, __isgreaterequal)
 /* Evaluate an expression as the specified type. With standard excess
   precision handling a type cast or assignment is enough (with
   -ffloat-store an assignment is required, in old compilers argument
   passing and return statement may not drop excess precision).  */
 static inline float eval_as_float(float x)
 {
 	float y = x;
 	return y;
 }
 static inline double eval_as_double(double x)
 {
 	double y = x;
 	return y;
 }
 /* fp_barrier returns its input, but limits code transformations
 	as if it had a side-effect (e.g. observable io) and returned
 	an arbitrary value.  */
 #ifndef fp_barrierf
 #define fp_barrierf fp_barrierf
 static inline float fp_barrierf(float x)
 {
 	volatile float y = x;
 	return y;
 }
 #endif
 #ifndef fp_barrier
 #define fp_barrier fp_barrier
 static inline double fp_barrier(double x)
 {
 	volatile double y = x;
 	return y;
 }
 #endif
 #ifndef fp_barrierl
 #define fp_barrierl fp_barrierl
 static inline long double fp_barrierl(long double x)
 {
 	volatile long double y = x;
 	return y;
 }
 #endif
 /* fp_force_eval ensures that the input value is computed when that's
 	otherwise unused.  To prevent the constant folding of the input
 	expression, an additional fp_barrier may be needed or a compilation
 	mode that does so (e.g. -frounding-math in gcc). Then it can be
 	used to evaluate an expression for its fenv side-effects only.   */
 #ifndef fp_force_evalf
 #define fp_force_evalf fp_force_evalf
 static inline void fp_force_evalf(float x)
 {
 	volatile float y;
 	y = x;
 }
 #endif
 #ifndef fp_force_eval
 #define fp_force_eval fp_force_eval
 static inline void fp_force_eval(double x)
 {
 	volatile double y;
 	y = x;
 }
 #endif
 #ifndef fp_force_evall
 #define fp_force_evall fp_force_evall
 static inline void fp_force_evall(long double x)
 {
 	volatile long double y;
 	y = x;
 }
 #endif
 #define FORCE_EVAL(x) do {                        \
 	if (sizeof(x) == sizeof(float)) {         \
 		fp_force_evalf(x);                \
 	} else if (sizeof(x) == sizeof(double)) { \
 		fp_force_eval(x);                 \
 	} else {                                  \
 		fp_force_evall(x);                \
 	}                                         \
 } while(0)
 typedef union {float _f; unsigned int _i;}asuint_union;
 typedef union {unsigned int _i; float _f;}asfloat_union;
 typedef union {double _f; unsigned long long _i;}asuint64_union;
 typedef union {unsigned long long _i; double _f;}asdouble_union;
 #define asuint(f) ((asuint_union){f})._i
 #define asfloat(i) ((asfloat_union){i})._f
 #define asuint64(f) ((asuint64_union){f})._i
 #define asdouble(i) ((asdouble_union){i})._f
 #define EXTRACT_WORDS(hi,lo,d)                    \
 do {                                              \
 	unsigned long long __u = asuint64(d);                     \
 	(hi) = __u >> 32;                               \
 	(lo) = (unsigned int)__u;                           \
 } while (0)
 #define GET_HIGH_WORD(hi,d)                       \
 do {                                              \
 	(hi) = asuint64(d) >> 32;                       \
 } while (0)
 #define GET_LOW_WORD(lo,d)                        \
 do {                                              \
 	(lo) = (unsigned int)asuint64(d);                   \
 } while (0)
 #define INSERT_WORDS(d,hi,lo)                     \
 do {                                              \
 	(d) = asdouble(((unsigned long long)(hi)<<32) | (unsigned int)(lo)); \
 } while (0)
 #define SET_HIGH_WORD(d,hi)                       \
 	INSERT_WORDS(d, hi, (unsigned int)asuint64(d))
 #define SET_LOW_WORD(d,lo)                        \
 	INSERT_WORDS(d, asuint64(d)>>32, lo)
 #define GET_FLOAT_WORD(w,d)                       \
 do {                                              \
 	(w) = asuint(d);                                \
 } while (0)
 #define SET_FLOAT_WORD(d,w)                       \
 do {                                              \
 	(d) = asfloat(w);                               \
 } while (0)
 int    __rem_pio2_large(double*, double*, int, int, int);
 int    __rem_pio2(double, double*);
 double __sin(double, double, int);
 double __cos(double, double);
 double __tan(double, double, int);
 /* error handling functions */
 float __math_xflowf(unsigned int, float);
 float __math_uflowf(unsigned int);
 float __math_oflowf(unsigned int);
 float __math_divzerof(unsigned int);
 float __math_invalidf(float);
 double __math_xflow(unsigned int, double);
 double __math_uflow(unsigned int);
 double __math_oflow(unsigned int);
 double __math_divzero(unsigned int);
 double __math_invalid(double);
 #if LDBL_MANT_DIG != DBL_MANT_DIG
 long double __math_invalidl(long double);
 #endif
 double      acos(double);
 double      asin(double);
 double      atan(double);
@ -332,44 +175,9 @@ double      sqrt(double);
 double      tan(double);
 double      trunc(double);
 #define FLT_TRUE_MIN 1.40129846432481707092e-45F
 #define FLT_MIN 1.17549435082228750797e-38F
 #define FLT_MAX 3.40282346638528859812e+38F
 #define FLT_EPSILON 1.1920928955078125e-07F
 #define FLT_MANT_DIG 24
 #define FLT_MIN_EXP (-125)
 #define FLT_MAX_EXP 128
 #define FLT_HAS_SUBNORM 1
 #define FLT_DIG 6
 #define FLT_DECIMAL_DIG 9
 #define FLT_MIN_10_EXP (-37)
 #define FLT_MAX_10_EXP 38
 #define DBL_TRUE_MIN 4.94065645841246544177e-324
 #define DBL_MIN 2.22507385850720138309e-308
 #define DBL_MAX 1.79769313486231570815e+308
 #define DBL_EPSILON 2.22044604925031308085e-16
 #define DBL_MANT_DIG 53
 #define DBL_MIN_EXP (-1021)
 #define DBL_MAX_EXP 1024
 #define DBL_HAS_SUBNORM 1
 #define DBL_DIG 15
 #define DBL_DECIMAL_DIG 17
 #define DBL_MIN_10_EXP (-307)
 #define DBL_MAX_10_EXP 308
 #define LDBL_HAS_SUBNORM 1
 #define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG
 #define LDBL_MANT_DIG    DBL_MANT_DIG            
 #define LDBL_MAX_EXP     DBL_MAX_EXP             
 #undef  MAXFLOAT
 #define MAXFLOAT        3.40282346638528859812e+38F
 #define HUGE            3.40282346638528859812e+38F
 #define M_DEG2RAD		0.017453292519943295	/* pi/180 */
 #define M_E             2.7182818284590452354   /* e */
@ -397,6 +205,32 @@ double      trunc(double);
 // double      y1(double);
 // double      yn(int, double);
 #define HUGE            3.40282346638528859812e+38F
 // double      drem(double, double);
 // float       dremf(float, float);
 // int         finite(double);
 // int         finitef(float);
 // double      scalb(double, double);
 // float       scalbf(float, float);
 // double      significand(double);
 // float       significandf(float);
 // double      lgamma_r(double, int*);
 // float       lgammaf_r(float, int*);
 // float       j0f(float);
 // float       j1f(float);
 // float       jnf(int, float);
 // float       y0f(float);
 // float       y1f(float);
 // float       ynf(int, float);
 #ifdef __cplusplus
 }
 #endif
--- a/3rd/math/src/__cos.c
+++ b/3rd/math/src/__cos.c
@ -48,7 +48,7 @@
 *         any extra precision in w.
 */
-#include "math.h"
+#include "libm.h"
 static const double
 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
@ -60,7 +60,7 @@ C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 double __cos(double x, double y)
 {
-	double hz,z,r,w;
+	double_t hz,z,r,w;
 	z  = x*x;
 	w  = z*z;
--- a/3rd/math/src/__fpclassify.c
+++ b/3rd/math/src/__fpclassify.c
@ -1,8 +1,9 @@
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 int __fpclassify(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = u.i>>52 & 0x7ff;
 	if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
 	if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;
--- a/3rd/math/src/__fpclassifyf.c
+++ b/3rd/math/src/__fpclassifyf.c
@ -1,8 +1,9 @@
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 int __fpclassifyf(float x)
 {
-	union {float f; unsigned int i;} u = {x};
+	union {float f; uint32_t i;} u = {x};
 	int e = u.i>>23 & 0xff;
 	if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
 	if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;
--- a/3rd/math/src/__fpclassifyl.c
+++ b/3rd/math/src/__fpclassifyl.c
@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 int __fpclassifyl(long double x)
--- a/3rd/math/src/__math_divzero.c
+++ b/3rd/math/src/__math_divzero.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-double __math_divzero(unsigned int sign)
+double __math_divzero(uint32_t sign)
 {
 	return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
 }
--- a/3rd/math/src/__math_divzerof.c
+++ b/3rd/math/src/__math_divzerof.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-float __math_divzerof(unsigned int sign)
+float __math_divzerof(uint32_t sign)
 {
 	return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
 }
--- a/3rd/math/src/__math_invalid.c
+++ b/3rd/math/src/__math_invalid.c
@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 double __math_invalid(double x)
 {
--- a/3rd/math/src/__math_invalidf.c
+++ b/3rd/math/src/__math_invalidf.c
@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 float __math_invalidf(float x)
 {
--- a/3rd/math/src/__math_invalidl.c
+++ b/3rd/math/src/__math_invalidl.c
@ -1,4 +1,5 @@
-#include "math.h"
+#include "float.h"
 #include "libm.h"
 #if LDBL_MANT_DIG != DBL_MANT_DIG
 long double __math_invalidl(long double x)
--- a/3rd/math/src/__math_oflow.c
+++ b/3rd/math/src/__math_oflow.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-double __math_oflow(unsigned int sign)
+double __math_oflow(uint32_t sign)
 {
 	return __math_xflow(sign, 0x1p769);
 }
--- a/3rd/math/src/__math_oflowf.c
+++ b/3rd/math/src/__math_oflowf.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-float __math_oflowf(unsigned int sign)
+float __math_oflowf(uint32_t sign)
 {
 	return __math_xflowf(sign, 0x1p97f);
 }
--- a/3rd/math/src/__math_uflow.c
+++ b/3rd/math/src/__math_uflow.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-double __math_uflow(unsigned int sign)
+double __math_uflow(uint32_t sign)
 {
 	return __math_xflow(sign, 0x1p-767);
 }
--- a/3rd/math/src/__math_uflowf.c
+++ b/3rd/math/src/__math_uflowf.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-float __math_uflowf(unsigned int sign)
+float __math_uflowf(uint32_t sign)
 {
 	return __math_xflowf(sign, 0x1p-95f);
 }
--- a/3rd/math/src/__math_xflow.c
+++ b/3rd/math/src/__math_xflow.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-double __math_xflow(unsigned int sign, double y)
+double __math_xflow(uint32_t sign, double y)
 {
 	return eval_as_double(fp_barrier(sign ? -y : y) * y);
 }
--- a/3rd/math/src/__math_xflowf.c
+++ b/3rd/math/src/__math_xflowf.c
@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
-float __math_xflowf(unsigned int sign, float y)
+float __math_xflowf(uint32_t sign, float y)
 {
 	return eval_as_float(fp_barrierf(sign ? -y : y) * y);
 }
--- a/3rd/math/src/__rem_pio2.c
+++ b/3rd/math/src/__rem_pio2.c
@ -17,7 +17,7 @@
 * use __rem_pio2_large() for large x
 */
-#include "math.h"
+#include "libm.h"
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
@ -48,10 +48,10 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
 int __rem_pio2(double x, double *y)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
-	double z,w,t,r,fn;
+	double_t z,w,t,r,fn;
 	double tx[3],ty[2];
-	unsigned int ix;
+	uint32_t ix;
 	int sign, n, ex, ey, i;
 	sign = u.i>>63;
@ -119,8 +119,8 @@ int __rem_pio2(double x, double *y)
 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
 medium:
 		/* rint(x/(pi/2)) */
-		fn = (double)x*invpio2 + toint - toint;
+		fn = (double_t)x*invpio2 + toint - toint;
-		n = (int)fn;
+		n = (int32_t)fn;
 		r = x - fn*pio2_1;
 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
 		/* Matters with directed rounding. */
@ -167,11 +167,11 @@ medium:
 	}
 	/* set z = scalbn(|x|,-ilogb(x)+23) */
 	u.f = x;
-	u.i &= (unsigned long long)-1>>12;
+	u.i &= (uint64_t)-1>>12;
-	u.i |= (unsigned long long)(0x3ff + 23)<<52;
+	u.i |= (uint64_t)(0x3ff + 23)<<52;
 	z = u.f;
 	for (i=0; i < 2; i++) {
-		tx[i] = (double)(int)z;
+		tx[i] = (double)(int32_t)z;
 		z     = (z-tx[i])*0x1p24;
 	}
 	tx[i] = z;
--- a/3rd/math/src/__rem_pio2_large.c
+++ b/3rd/math/src/__rem_pio2_large.c
@ -122,7 +122,7 @@
 * to produce the hexadecimal values shown.
 */
-#include "math.h"
+#include "libm.h"
 static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
@ -138,7 +138,7 @@ static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
 * NB: This table must have at least (e0-3)/24 + jk terms.
 *     For quad precision (e0 <= 16360, jk = 6), this is 686.
 */
-static const int ipio2[] = {
+static const int32_t ipio2[] = {
 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
@ -272,7 +272,7 @@ static const double PIo2[] = {
 int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 {
-	int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
+	int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
 	double z,fw,f[20],fq[20],q[20];
 	/* initialize jk*/
@ -300,15 +300,15 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 recompute:
 	/* distill q[] into iq[] reversingly */
 	for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
-		fw    = (double)(int)(0x1p-24*z);
+		fw    = (double)(int32_t)(0x1p-24*z);
-		iq[i] = (int)(z - 0x1p24*fw);
+		iq[i] = (int32_t)(z - 0x1p24*fw);
 		z     = q[j-1]+fw;
 	}
 	/* compute n */
 	z  = scalbn(z,q0);       /* actual value of z */
 	z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
-	n  = (int)z;
+	n  = (int32_t)z;
 	z -= (double)n;
 	ih = 0;
 	if (q0 > 0) {  /* need iq[jz-1] to determine n */
@ -375,13 +375,13 @@ recompute:
 	} else { /* break z into 24-bit if necessary */
 		z = scalbn(z,-q0);
 		if (z >= 0x1p24) {
-			fw = (double)(int)(0x1p-24*z);
+			fw = (double)(int32_t)(0x1p-24*z);
-			iq[jz] = (int)(z - 0x1p24*fw);
+			iq[jz] = (int32_t)(z - 0x1p24*fw);
 			jz += 1;
 			q0 += 24;
-			iq[jz] = (int)fw;
+			iq[jz] = (int32_t)fw;
 		} else
-			iq[jz] = (int)z;
+			iq[jz] = (int32_t)z;
 	}
 	/* convert integer "bit" chunk to floating-point value */
@ -411,7 +411,7 @@ recompute:
 		fw = 0.0;
 		for (i=jz; i>=0; i--)
 			fw += fq[i];
-		// TODO: drop excess precision here once double is used
+		// TODO: drop excess precision here once double_t is used
 		fw = (double)fw;
 		y[0] = ih==0 ? fw : -fw;
 		fw = fq[0]-fw;
--- a/3rd/math/src/__signbit.c
+++ b/3rd/math/src/__signbit.c
@ -1,11 +1,11 @@
-#include "math.h"
+#include "libm.h"
 // FIXME: macro in math.h
 int __signbit(double x)
 {
 	union {
 		double d;
-		unsigned long long i;
+		uint64_t i;
 	} y = { x };
 	return y.i>>63;
 }
--- a/3rd/math/src/__signbitf.c
+++ b/3rd/math/src/__signbitf.c
@ -1,11 +1,11 @@
-#include "math.h"
+#include "libm.h"
 // FIXME: macro in math.h
 int __signbitf(float x)
 {
 	union {
 		float f;
-		unsigned int i;
+		uint32_t i;
 	} y = { x };
 	return y.i>>31;
 }
--- a/3rd/math/src/__signbitl.c
+++ b/3rd/math/src/__signbitl.c
@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 #if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 int __signbitl(long double x)
--- a/3rd/math/src/__sin.c
+++ b/3rd/math/src/__sin.c
@ -39,7 +39,7 @@
 *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
 */
-#include "math.h"
+#include "libm.h"
 static const double
 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
@ -51,7 +51,7 @@ S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
 double __sin(double x, double y, int iy)
 {
-	double z,r,v,w;
+	double_t z,r,v,w;
 	z = x*x;
 	w = z*z;
--- a/3rd/math/src/__tan.c
+++ b/3rd/math/src/__tan.c
@ -43,7 +43,7 @@
 *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
 */
-#include "math.h"
+#include "libm.h"
 static const double T[] = {
             3.33333333333334091986e-01, /* 3FD55555, 55555563 */
@ -65,9 +65,9 @@ pio4lo =     3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
 double __tan(double x, double y, int odd)
 {
-	double z, r, v, w, s, a;
+	double_t z, r, v, w, s, a;
 	double w0, a0;
-	unsigned int hx;
+	uint32_t hx;
 	int big, sign;
 	GET_HIGH_WORD(hx,x);
--- a/3rd/math/src/acos.c
+++ b/3rd/math/src/acos.c
@ -33,7 +33,7 @@
 * Function needed: sqrt
 */
-#include "math.h"
+#include "libm.h"
 static const double
 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
@ -51,7 +51,7 @@ qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 static double R(double z)
 {
-	double p, q;
+	double_t p, q;
 	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
 	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
 	return p/q;
@ -60,13 +60,13 @@ static double R(double z)
 double acos(double x)
 {
 	double z,w,s,c,df;
-	unsigned int hx,ix;
+	uint32_t hx,ix;
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	/* |x| >= 1 or nan */
 	if (ix >= 0x3ff00000) {
-		unsigned int lx;
+		uint32_t lx;
 		GET_LOW_WORD(lx,x);
 		if ((ix-0x3ff00000 | lx) == 0) {
--- a/3rd/math/src/asin.c
+++ b/3rd/math/src/asin.c
@ -39,7 +39,7 @@
 *
 */
-#include "math.h"
+#include "libm.h"
 static const double
 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
@ -58,7 +58,7 @@ qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 static double R(double z)
 {
-	double p, q;
+	double_t p, q;
 	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
 	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
 	return p/q;
@ -67,13 +67,13 @@ static double R(double z)
 double asin(double x)
 {
 	double z,r,s;
-	unsigned int hx,ix;
+	uint32_t hx,ix;
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	/* |x| >= 1 or nan */
 	if (ix >= 0x3ff00000) {
-		unsigned int lx;
+		uint32_t lx;
 		GET_LOW_WORD(lx, x);
 		if ((ix-0x3ff00000 | lx) == 0)
 			/* asin(1) = +-pi/2 with inexact */
--- a/3rd/math/src/atan.c
+++ b/3rd/math/src/atan.c
@ -30,7 +30,7 @@
 */
-#include "math.h"
+#include "libm.h"
 static const double atanhi[] = {
  4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
@ -62,8 +62,8 @@ static const double aT[] = {
 double atan(double x)
 {
-	double w,s1,s2,z;
+	double_t w,s1,s2,z;
-	unsigned int ix,sign;
+	uint32_t ix,sign;
 	int id;
 	GET_HIGH_WORD(ix, x);
--- a/3rd/math/src/atan2.c
+++ b/3rd/math/src/atan2.c
@ -37,7 +37,7 @@
 * to produce the hexadecimal values shown.
 */
-#include "math.h"
+#include "libm.h"
 static const double
 pi     = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
@ -46,7 +46,7 @@ pi_lo  = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
 double atan2(double y, double x)
 {
 	double z;
-	unsigned int m,lx,ly,ix,iy;
+	uint32_t m,lx,ly,ix,iy;
 	if (isnan(x) || isnan(y))
 		return x+y;
--- a/3rd/math/src/cbrt.c
+++ b/3rd/math/src/cbrt.c
@ -15,9 +15,10 @@
 * Return cube root of x
 */
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
-static const unsigned int
+static const uint32_t
 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
@ -31,9 +32,9 @@ P4 =  0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
 double cbrt(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
-	double r,s,t,w;
+	double_t r,s,t,w;
-	unsigned int hx = u.i>>32 & 0x7fffffff;
+	uint32_t hx = u.i>>32 & 0x7fffffff;
 	if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
 		return x+x;
@ -62,7 +63,7 @@ double cbrt(double x)
 	} else
 		hx = hx/3 + B1;
 	u.i &= 1ULL<<63;
-	u.i |= (unsigned long long)hx << 32;
+	u.i |= (uint64_t)hx << 32;
 	t = u.f;
 	/*
--- a/3rd/math/src/ceil.c
+++ b/3rd/math/src/ceil.c
@ -1,17 +1,17 @@
-#include "math.h"
+#include "libm.h"
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
 #elif FLT_EVAL_METHOD==2
 #define EPS LDBL_EPSILON
 #endif
-static const double toint = 1/EPS;
+static const double_t toint = 1/EPS;
 double ceil(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = u.i >> 52 & 0x7ff;
-	double y;
+	double_t y;
 	if (e >= 0x3ff+52 || x == 0)
 		return x;
--- a/3rd/math/src/cos.c
+++ b/3rd/math/src/cos.c
@ -40,12 +40,12 @@
 *      TRIG(x) returns trig(x) nearly rounded
 */
-#include "math.h"
+#include "libm.h"
 double cos(double x)
 {
 	double y[2];
-	unsigned int ix;
+	uint32_t ix;
 	unsigned n;
 	GET_HIGH_WORD(ix, x);
--- a/3rd/math/src/degrees.c
+++ b/3rd/math/src/degrees.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 double degrees(double x)
 {
--- a/3rd/math/src/exp.c
+++ b/3rd/math/src/exp.c
@ -5,7 +5,9 @@
 * SPDX-License-Identifier: MIT
 */
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 #include "libm.h"
 #include "exp_data.h"
 #define N (1 << EXP_TABLE_BITS)
@ -23,12 +25,12 @@
   is scale*(1+TMP) without intermediate rounding.  The bit representation of
   scale is in SBITS, however it has a computed exponent that may have
   overflown into the sign bit so that needs to be adjusted before using it as
-   a double.  (int)KI is the k used in the argument reduction and exponent
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
   adjustment of scale, positive k here means the result may overflow and
   negative k means the result may underflow.  */
-static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki)
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double scale, y;
+	double_t scale, y;
 	if ((ki & 0x80000000) == 0) {
 		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
@ -46,7 +48,7 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 		 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 		 E is the worst-case ulp error outside the subnormal range.  So this
 		 is only useful if the goal is better than 1 ulp worst-case error.  */
-		double hi, lo;
+		double_t hi, lo;
 		lo = scale - y + scale * tmp;
 		hi = 1.0 + y;
 		lo = 1.0 - hi + y + lo;
@ -62,16 +64,16 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 }
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline unsigned int top12(double x)
+static inline uint32_t top12(double x)
 {
 	return asuint64(x) >> 52;
 }
 double exp(double x)
 {
-	unsigned int abstop;
+	uint32_t abstop;
-	unsigned long long ki, idx, top, sbits;
+	uint64_t ki, idx, top, sbits;
-	double kd, z, r, r2, scale, tail, tmp;
+	double_t kd, z, r, r2, scale, tail, tmp;
 	abstop = top12(x) & 0x7ff;
 	if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
@ -103,7 +105,7 @@ double exp(double x)
 	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
 	kd = eval_as_double(z + Shift);
 	ki = asuint64(kd) >> 16;
-	kd = (double)(int)ki;
+	kd = (double_t)(int32_t)ki;
 #else
 	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
 	kd = eval_as_double(z + Shift);
--- a/3rd/math/src/exp_data.h
+++ b/3rd/math/src/exp_data.h
@ -5,13 +5,14 @@
 #ifndef _EXP_DATA_H
 #define _EXP_DATA_H
-#include <math.h>
+#include "features/features.h"
 #include <stdint.h>
 #define EXP_TABLE_BITS 7
 #define EXP_POLY_ORDER 5
 #define EXP_USE_TOINT_NARROW 0
 #define EXP2_POLY_ORDER 5
-extern const struct exp_data {
+extern hidden const struct exp_data {
 	double invln2N;
 	double shift;
 	double negln2hiN;
@ -19,7 +20,7 @@ extern const struct exp_data {
 	double poly[4]; /* Last four coefficients.  */
 	double exp2_shift;
 	double exp2_poly[EXP2_POLY_ORDER];
-	unsigned long long tab[2*(1 << EXP_TABLE_BITS)];
+	uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 } __exp_data;
 #endif
--- a/3rd/math/src/fabs.c
+++ b/3rd/math/src/fabs.c
@ -1,8 +1,9 @@
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 double fabs(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	u.i &= ULLONG_NSHIFT/2;
 	return u.f;
 }
--- a/3rd/math/src/factorial.c
+++ b/3rd/math/src/factorial.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 int factorial(int n)
 {
--- a/3rd/math/src/floor.c
+++ b/3rd/math/src/floor.c
@ -1,17 +1,17 @@
-#include "math.h"
+#include "libm.h"
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
 #elif FLT_EVAL_METHOD==2
 #define EPS LDBL_EPSILON
 #endif
-static const double toint = 1/EPS;
+static const double_t toint = 1/EPS;
 double floor(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = u.i >> 52 & 0x7ff;
-	double y;
+	double_t y;
 	if (e >= 0x3ff+52 || x == 0)
 		return x;
--- a/3rd/math/src/fmax.c
+++ b/3rd/math/src/fmax.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "math.h"
 double fmax(double x, double y)
 {
--- a/3rd/math/src/fmin.c
+++ b/3rd/math/src/fmin.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "math.h"
 double fmin(double x, double y)
 {
--- a/3rd/math/src/fmod.c
+++ b/3rd/math/src/fmod.c
@ -1,16 +1,17 @@
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 double fmod(double x, double y)
 {
-	union {double f; unsigned long long i;} ux = {x}, uy = {y};
+	union {double f; uint64_t i;} ux = {x}, uy = {y};
 	int ex = ux.i>>52 & 0x7ff;
 	int ey = uy.i>>52 & 0x7ff;
 	int sx = ux.i>>63;
-	unsigned long long i;
+	uint64_t i;
 	/* in the followings uxi should be ux.i, but then gcc wrongly adds */
 	/* float load/store to inner loops ruining performance and code size */
-	unsigned long long uxi = ux.i;
+	uint64_t uxi = ux.i;
 	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
 		return (x*y)/(x*y);
@ -57,11 +58,11 @@ double fmod(double x, double y)
 	/* scale result */
 	if (ex > 0) {
 		uxi -= 1ULL << 52;
-		uxi |= (unsigned long long)ex << 52;
+		uxi |= (uint64_t)ex << 52;
 	} else {
 		uxi >>= -ex + 1;
 	}
-	uxi |= (unsigned long long)sx << 63;
+	uxi |= (uint64_t)sx << 63;
 	ux.i = uxi;
 	return ux.f;
 }
--- a/3rd/math/src/fsum.c
+++ b/3rd/math/src/fsum.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 double fsum(double* aptr, int n)
 {
--- a/3rd/math/src/gcd.c
+++ b/3rd/math/src/gcd.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 int gcd(int a, int b)
 {
--- a/3rd/math/src/log.c
+++ b/3rd/math/src/log.c
@ -5,7 +5,9 @@
 * SPDX-License-Identifier: MIT
 */
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 #include "libm.h"
 #include "log_data.h"
 #define T __log_data.tab
@ -18,16 +20,16 @@
 #define OFF 0x3fe6000000000000
 /* Top 16 bits of a double.  */
-static inline unsigned int top16(double x)
+static inline uint32_t top16(double x)
 {
 	return asuint64(x) >> 48;
 }
 double log(double x)
 {
-	double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+	double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
-	unsigned long long ix, iz, tmp;
+	uint64_t ix, iz, tmp;
-	unsigned int top;
+	uint32_t top;
 	int k, i;
 	ix = asuint64(x);
@ -48,8 +50,8 @@ double log(double x)
 			   r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
 		/* Worst-case error is around 0.507 ULP.  */
 		w = r * 0x1p27;
-		double rhi = r + w - w;
+		double_t rhi = r + w - w;
-		double rlo = r - rhi;
+		double_t rlo = r - rhi;
 		w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
 		hi = r + w;
 		lo = r - hi + w;
@ -76,7 +78,7 @@ double log(double x)
 	   The ith subinterval contains z and c is near its center.  */
 	tmp = ix - OFF;
 	i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
-	k = (long long)tmp >> 52; /* arithmetic shift */
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
 	iz = ix - (tmp & 0xfffULL << 52);
 	invc = T[i].invc;
 	logc = T[i].logc;
@ -91,7 +93,7 @@ double log(double x)
 	/* rounding error: 0x1p-55/N + 0x1p-66.  */
 	r = (z - T2[i].chi - T2[i].clo) * invc;
 #endif
-	kd = (double)k;
+	kd = (double_t)k;
 	/* hi + lo = r + log(c) + k*Ln2.  */
 	w = kd * Ln2hi + logc;
--- a/3rd/math/src/log10.c
+++ b/3rd/math/src/log10.c
@ -17,7 +17,8 @@
 *    log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
 */
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 static const double
 ivln10hi  = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
@ -34,9 +35,9 @@ Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 double log10(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
-	double hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
+	double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
-	unsigned int hx;
+	uint32_t hx;
 	int k;
 	hx = u.i>>32;
@ -60,7 +61,7 @@ double log10(double x)
 	hx += 0x3ff00000 - 0x3fe6a09e;
 	k += (int)(hx>>20) - 0x3ff;
 	hx = (hx&0x000fffff) + 0x3fe6a09e;
-	u.i = (unsigned long long)hx<<32 | (u.i&0xffffffff);
+	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
 	x = u.f;
 	f = x - 1.0;
@ -76,7 +77,7 @@ double log10(double x)
 	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
 	hi = f - hfsq;
 	u.f = hi;
-	u.i &= (unsigned long long)-1<<32;
+	u.i &= (uint64_t)-1<<32;
 	hi = u.f;
 	lo = f - hi - hfsq + s*(hfsq+R);
--- a/3rd/math/src/log2.c
+++ b/3rd/math/src/log2.c
@ -5,7 +5,9 @@
 * SPDX-License-Identifier: MIT
 */
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 #include "libm.h"
 #include "log2_data.h"
 #define T __log2_data.tab
@ -18,16 +20,16 @@
 #define OFF 0x3fe6000000000000
 /* Top 16 bits of a double.  */
-static inline unsigned int top16(double x)
+static inline uint32_t top16(double x)
 {
 	return asuint64(x) >> 48;
 }
 double log2(double x)
 {
-	double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
+	double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
-	unsigned long long ix, iz, tmp;
+	uint64_t ix, iz, tmp;
-	unsigned int top;
+	uint32_t top;
 	int k, i;
 	ix = asuint64(x);
@ -44,7 +46,7 @@ double log2(double x)
 		hi = r * InvLn2hi;
 		lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
 #else
-		double rhi, rlo;
+		double_t rhi, rlo;
 		rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
 		rlo = r - rhi;
 		hi = rhi * InvLn2hi;
@ -79,12 +81,12 @@ double log2(double x)
 	   The ith subinterval contains z and c is near its center.  */
 	tmp = ix - OFF;
 	i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
-	k = (long long)tmp >> 52; /* arithmetic shift */
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
 	iz = ix - (tmp & 0xfffULL << 52);
 	invc = T[i].invc;
 	logc = T[i].logc;
 	z = asdouble(iz);
-	kd = (double)k;
+	kd = (double_t)k;
 	/* log2(x) = log2(z/c) + log2(c) + k.  */
 	/* r ~= z/c - 1, |r| < 1/(2*N).  */
@ -94,7 +96,7 @@ double log2(double x)
 	t1 = r * InvLn2hi;
 	t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
 #else
-	double rhi, rlo;
+	double_t rhi, rlo;
 	/* rounding error: 0x1p-55/N + 0x1p-65.  */
 	r = (z - T2[i].chi - T2[i].clo) * invc;
 	rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
--- a/3rd/math/src/log2_data.h
+++ b/3rd/math/src/log2_data.h
@ -5,11 +5,12 @@
 #ifndef _LOG2_DATA_H
 #define _LOG2_DATA_H
 #include "features/features.h"
 #define LOG2_TABLE_BITS 6
 #define LOG2_POLY_ORDER 7
 #define LOG2_POLY1_ORDER 11
-extern const struct log2_data {
+extern hidden const struct log2_data {
 	double invln2hi;
 	double invln2lo;
 	double poly[LOG2_POLY_ORDER - 1];
--- a/3rd/math/src/log_data.h
+++ b/3rd/math/src/log_data.h
@ -5,11 +5,12 @@
 #ifndef _LOG_DATA_H
 #define _LOG_DATA_H
 #include "features/features.h"
 #define LOG_TABLE_BITS 7
 #define LOG_POLY_ORDER 6
 #define LOG_POLY1_ORDER 12
-extern const struct log_data {
+extern hidden const struct log_data {
 	double ln2hi;
 	double ln2lo;
 	double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
--- a/3rd/math/src/modf.c
+++ b/3rd/math/src/modf.c
@ -1,9 +1,9 @@
-#include "math.h"
+#include "libm.h"
 double modf(double x, double *iptr)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
-	unsigned long long mask;
+	uint64_t mask;
 	int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
 	/* no fractional part */
--- a/3rd/math/src/pow.c
+++ b/3rd/math/src/pow.c
@ -5,7 +5,9 @@
 * SPDX-License-Identifier: MIT
 */
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 #include "libm.h"
 #include "exp_data.h"
 #include "pow_data.h"
@ -23,7 +25,7 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
 #define OFF 0x3fe6955500000000
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline unsigned int top12(double x)
+static inline uint32_t top12(double x)
 {
 	return asuint64(x) >> 52;
 }
@ -31,11 +33,11 @@ static inline unsigned int top12(double x)
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
   additional 15 bits precision.  IX is the bit representation of x, but
   normalized in the subnormal range using the sign bit for the exponent.  */
-static inline double log_inline(unsigned long long ix, double *tail)
+static inline double_t log_inline(uint64_t ix, double_t *tail)
 {
-	/* double for better performance on targets with FLT_EVAL_METHOD==2.  */
+	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-	double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
+	double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
-	unsigned long long iz, tmp;
+	uint64_t iz, tmp;
 	int k, i;
 	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
@ -43,10 +45,10 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	   The ith subinterval contains z and c is near its center.  */
 	tmp = ix - OFF;
 	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
-	k = (long long)tmp >> 52; /* arithmetic shift */
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
 	iz = ix - (tmp & 0xfffULL << 52);
 	z = asdouble(iz);
-	kd = (double)k;
+	kd = (double_t)k;
 	/* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
 	invc = T[i].invc;
@ -59,10 +61,10 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	r = __builtin_fma(z, invc, -1.0);
 #else
 	/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
-	double zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32));
+	double_t zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32));
-	double zlo = z - zhi;
+	double_t zlo = z - zhi;
-	double rhi = zhi * invc - 1.0;
+	double_t rhi = zhi * invc - 1.0;
-	double rlo = zlo * invc;
+	double_t rlo = zlo * invc;
 	r = rhi + rlo;
 #endif
@ -73,7 +75,7 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	lo2 = t1 - t2 + r;
 	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	double ar, ar2, ar3, lo3, lo4;
+	double_t ar, ar2, ar3, lo3, lo4;
 	ar = A[0] * r; /* A[0] = -0.5.  */
 	ar2 = r * ar;
 	ar3 = r * ar2;
@ -83,8 +85,8 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	lo3 = __builtin_fma(ar, r, -ar2);
 	lo4 = t2 - hi + ar2;
 #else
-	double arhi = A[0] * rhi;
+	double_t arhi = A[0] * rhi;
-	double arhi2 = rhi * arhi;
+	double_t arhi2 = rhi * arhi;
 	hi = t2 + arhi2;
 	lo3 = rlo * (ar + arhi);
 	lo4 = t2 - hi + arhi2;
@ -116,12 +118,12 @@ static inline double log_inline(unsigned long long ix, double *tail)
   is scale*(1+TMP) without intermediate rounding.  The bit representation of
   scale is in SBITS, however it has a computed exponent that may have
   overflown into the sign bit so that needs to be adjusted before using it as
-   a double.  (int)KI is the k used in the argument reduction and exponent
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
   adjustment of scale, positive k here means the result may overflow and
   negative k means the result may underflow.  */
-static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki)
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double scale, y;
+	double_t scale, y;
 	if ((ki & 0x80000000) == 0) {
 		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
@ -140,7 +142,7 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 		   range to avoid double rounding that can cause 0.5+E/2 ulp error where
 		   E is the worst-case ulp error outside the subnormal range.  So this
 		   is only useful if the goal is better than 1 ulp worst-case error.  */
-		double hi, lo, one = 1.0;
+		double_t hi, lo, one = 1.0;
 		if (y < 0.0)
 			one = -1.0;
 		lo = scale - y + scale * tmp;
@ -161,12 +163,12 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
   The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
-static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
+static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
 {
-	unsigned int abstop;
+	uint32_t abstop;
-	unsigned long long ki, idx, top, sbits;
+	uint64_t ki, idx, top, sbits;
-	/* double for better performance on targets with FLT_EVAL_METHOD==2.  */
+	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
-	double kd, z, r, r2, scale, tail, tmp;
+	double_t kd, z, r, r2, scale, tail, tmp;
 	abstop = top12(x) & 0x7ff;
 	if (predict_false(abstop - top12(0x1p-54) >=
@ -174,7 +176,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
 		if (abstop - top12(0x1p-54) >= 0x80000000) {
 			/* Avoid spurious underflow for tiny x.  */
 			/* Note: 0 is common input.  */
-			double one = WANT_ROUNDING ? 1.0 + x : 1.0;
+			double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
 			return sign_bias ? -one : one;
 		}
 		if (abstop >= top12(1024.0)) {
@ -198,7 +200,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
 	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
 	kd = eval_as_double(z + Shift);
 	ki = asuint64(kd) >> 16;
-	kd = (double)(int)ki;
+	kd = (double_t)(int32_t)ki;
 #else
 	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
 	kd = eval_as_double(z + Shift);
@ -230,7 +232,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
   the bit representation of a non-zero finite floating-point value.  */
-static inline int checkint(unsigned long long iy)
+static inline int checkint(uint64_t iy)
 {
 	int e = iy >> 52 & 0x7ff;
 	if (e < 0x3ff)
@ -245,16 +247,16 @@ static inline int checkint(unsigned long long iy)
 }
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
-static inline int zeroinfnan(unsigned long long i)
+static inline int zeroinfnan(uint64_t i)
 {
 	return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
 }
 double pow(double x, double y)
 {
-	unsigned int sign_bias = 0;
+	uint32_t sign_bias = 0;
-	unsigned long long ix, iy;
+	uint64_t ix, iy;
-	unsigned int topx, topy;
+	uint32_t topx, topy;
 	ix = asuint64(x);
 	iy = asuint64(y);
@ -281,7 +283,7 @@ double pow(double x, double y)
 			return y * y;
 		}
 		if (predict_false(zeroinfnan(ix))) {
-			double x2 = x * x;
+			double_t x2 = x * x;
 			if (ix >> 63 && checkint(iy) == 1)
 				x2 = -x2;
 			/* Without the barrier some versions of clang hoist the 1/x2 and
@ -323,17 +325,17 @@ double pow(double x, double y)
 		}
 	}
-	double lo;
+	double_t lo;
-	double hi = log_inline(ix, &lo);
+	double_t hi = log_inline(ix, &lo);
-	double ehi, elo;
+	double_t ehi, elo;
 #if __FP_FAST_FMA
 	ehi = y * hi;
 	elo = y * lo + __builtin_fma(y, hi, -ehi);
 #else
-	double yhi = asdouble(iy & ULLONG_NSHIFT << 27);
+	double_t yhi = asdouble(iy & ULLONG_NSHIFT << 27);
-	double ylo = y - yhi;
+	double_t ylo = y - yhi;
-	double lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27);
+	double_t lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27);
-	double llo = hi - lhi + lo;
+	double_t llo = hi - lhi + lo;
 	ehi = yhi * lhi;
 	elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
 #endif
--- a/3rd/math/src/pow_data.h
+++ b/3rd/math/src/pow_data.h
@ -5,10 +5,11 @@
 #ifndef _POW_DATA_H
 #define _POW_DATA_H
 #include "features/features.h"
 #define POW_LOG_TABLE_BITS 7
 #define POW_LOG_POLY_ORDER 8
-extern const struct pow_log_data {
+extern hidden const struct pow_log_data {
 	double ln2hi;
 	double ln2lo;
 	double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
--- a/3rd/math/src/radians.c
+++ b/3rd/math/src/radians.c
@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 double radians(double x)
 {
--- a/3rd/math/src/scalbn.c
+++ b/3rd/math/src/scalbn.c
@ -1,9 +1,10 @@
-#include <math.h>
+#include "math.h"
 #include <stdint.h>
 double scalbn(double x, int n)
 {
-	union {double f; unsigned long long i;} u;
+	union {double f; uint64_t i;} u;
-	double y = x;
+	double_t y = x;
 	if (n > 1023) {
 		y *= 0x1p1023;
@ -26,7 +27,7 @@ double scalbn(double x, int n)
 				n = -1022;
 		}
 	}
-	u.i = (unsigned long long)(0x3ff+n)<<52;
+	u.i = (uint64_t)(0x3ff+n)<<52;
 	x = y * u.f;
 	return x;
 }
--- a/3rd/math/src/sin.c
+++ b/3rd/math/src/sin.c
@ -40,12 +40,12 @@
 *      TRIG(x) returns trig(x) nearly rounded
 */
-#include "math.h"
+#include "libm.h"
 double sin(double x)
 {
 	double y[2];
-	unsigned int ix;
+	uint32_t ix;
 	unsigned n;
 	/* High word of x. */
--- a/3rd/math/src/sqrt.c
+++ b/3rd/math/src/sqrt.c
@ -1,27 +1,29 @@
-#include <math.h>
+#include <stdint.h>
 #include "math.h"
 #include "libm.h"
 #include "sqrt_data.h"
 #define FENV_SUPPORT 1
 /* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
-static inline unsigned int mul32(unsigned int a, unsigned int b)
+static inline uint32_t mul32(uint32_t a, uint32_t b)
 {
-	return (unsigned long long)a*b >> 32;
+	return (uint64_t)a*b >> 32;
 }
 /* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
-static inline unsigned long long mul64(unsigned long long a, unsigned long long b)
+static inline uint64_t mul64(uint64_t a, uint64_t b)
 {
-	unsigned long long ahi = a>>32;
+	uint64_t ahi = a>>32;
-	unsigned long long alo = a&0xffffffff;
+	uint64_t alo = a&0xffffffff;
-	unsigned long long bhi = b>>32;
+	uint64_t bhi = b>>32;
-	unsigned long long blo = b&0xffffffff;
+	uint64_t blo = b&0xffffffff;
 	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
 }
 double sqrt(double x)
 {
-	unsigned long long ix, top, m;
+	uint64_t ix, top, m;
 	/* special case handling.  */
 	ix = asuint64(x);
@ -103,11 +105,11 @@ double sqrt(double x)
 	   and after switching to 64 bit
 	     m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62  */
-	static const unsigned long long three = 0xc0000000;
+	static const uint64_t three = 0xc0000000;
-	unsigned long long r, s, d, u, i;
+	uint64_t r, s, d, u, i;
 	i = (ix >> 46) % 128;
-	r = (unsigned int)__rsqrt_tab[i] << 16;
+	r = (uint32_t)__rsqrt_tab[i] << 16;
 	/* |r sqrt(m) - 1| < 0x1.fdp-9 */
 	s = mul32(m>>32, r);
 	/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
@ -134,7 +136,7 @@ double sqrt(double x)
 	   compute nearest rounded result:
 	   the nearest result to 52 bits is either s or s+0x1p-52,
 	   we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.  */
-	unsigned long long d0, d1, d2;
+	uint64_t d0, d1, d2;
 	double y, t;
 	d0 = (m << 42) - s*s;
 	d1 = s - d0;
@ -147,7 +149,7 @@ double sqrt(double x)
 		/* handle rounding modes and inexact exception:
 		   only (s+1)^2 == 2^42 m case is exact otherwise
 		   add a tiny value to cause the fenv effects.  */
-		unsigned long long tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
+		uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
 		tiny |= (d1^d2) & 0x8000000000000000;
 		t = asdouble(tiny);
 		y = eval_as_double(y + t);
--- a/3rd/math/src/sqrt_data.c
+++ b/3rd/math/src/sqrt_data.c
@ -1,5 +1,5 @@
 #include "sqrt_data.h"
-const unsigned short __rsqrt_tab[128] = {
+const uint16_t __rsqrt_tab[128] = {
 0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
 0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
 0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
--- a/3rd/math/src/sqrt_data.h
+++ b/3rd/math/src/sqrt_data.h
@ -1,12 +1,13 @@
 #ifndef _SQRT_DATA_H
 #define _SQRT_DATA_H
-#include <math.h>
+#include "features/features.h"
 #include <stdint.h>
 /* if x in [1,2): i = (int)(64*x);
   if x in [2,4): i = (int)(32*x-64);
   __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
   |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
-extern const unsigned short __rsqrt_tab[128];
+extern hidden const uint16_t __rsqrt_tab[128];
 #endif
--- a/3rd/math/src/tan.c
+++ b/3rd/math/src/tan.c
@ -39,12 +39,12 @@
 *      TRIG(x) returns trig(x) nearly rounded
 */
-#include "math.h"
+#include "libm.h"
 double tan(double x)
 {
 	double y[2];
-	unsigned int ix;
+	uint32_t ix;
 	unsigned n;
 	GET_HIGH_WORD(ix, x);
--- a/3rd/math/src/trunc.c
+++ b/3rd/math/src/trunc.c
@ -1,10 +1,10 @@
-#include "math.h"
+#include "libm.h"
 double trunc(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
-	unsigned long long m;
+	uint64_t m;
 	if (e >= 52 + 12)
 		return x;
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -50,12 +50,8 @@ if(PK_ENABLE_DETERMINISTIC_FLOAT)
    add_subdirectory(3rd/math)
    include_directories(3rd/math)
    add_definitions(-DPK_ENABLE_DETERMINISTIC_FLOAT)
-    if(MSVC)
+    if(NOT MSVC)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexcess-precision=standard -ffp-contract=off")
    elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict -O2")
    else()
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
    endif()
 endif()
@ -124,6 +120,7 @@ endif()
 if(NOT MSVC)
    target_link_libraries(${PROJECT_NAME} pthread)
 endif()
 if(PK_BUILD_MODULE_LZ4)
    target_link_libraries(${PROJECT_NAME} lz4)
 endif()