diff --git a/3rd/math/CMakeLists.txt b/3rd/math/CMakeLists.txt
index 089ee1fa..b3d695ed 100644
--- a/3rd/math/CMakeLists.txt
+++ b/3rd/math/CMakeLists.txt
@@ -8,24 +8,10 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
 include_directories(${CMAKE_CURRENT_LIST_DIR}/include)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_LIST_DIR}/src MUSL_LIBC_MATH_SRC)
 if(MSVC)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise /Ox /Oi-")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox /Oi- /wd4244 /wd4723")
     # mute warning C4723: div by 0 because some function returns nan when a input is nan.
-    add_compile_options( /wd4723)
-elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict -O2")
 else()
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -fexcess-precision=standard -ffp-contract=off")
 endif()
 
 add_library(${PROJECT_NAME} STATIC ${MUSL_LIBC_MATH_SRC})
-
-
-# if(MSVC)
-#     target_compile_options(${PROJECT_NAME} PRIVATE /fp:precise /Od)
-#     message("MUSL_LIBC_MATH: MSVC set")
-# elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
-#     target_compile_options(${PROJECT_NAME} PRIVATE -ffp-model=strict)
-#     message("MUSL_LIBC_MATH: CLANG set")
-# elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU")
-#     message("MUSL_LIBC_MATH: gcc set")
-# endif()   
diff --git a/3rd/math/include/features/features.h b/3rd/math/include/features/features.h
new file mode 100644
index 00000000..85570ff5
--- /dev/null
+++ b/3rd/math/include/features/features.h
@@ -0,0 +1,8 @@
+#if defined(_MSC_VER)
+#define hidden 
+#else
+#define weak __attribute__((__weak__))
+#define hidden __attribute__((__visibility__("hidden")))
+#define weak_alias(old, new) \
+	extern __typeof(old) new __attribute__((__weak__, __alias__(#old)))
+#endif
\ No newline at end of file
diff --git a/3rd/math/include/float.h b/3rd/math/include/float.h
new file mode 100644
index 00000000..791dde91
--- /dev/null
+++ b/3rd/math/include/float.h
@@ -0,0 +1,51 @@
+#ifndef _FLOAT_H
+#define _FLOAT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define FLT_RADIX 2
+
+#define FLT_TRUE_MIN 1.40129846432481707092e-45F
+#define FLT_MIN 1.17549435082228750797e-38F
+#define FLT_MAX 3.40282346638528859812e+38F
+#define FLT_EPSILON 1.1920928955078125e-07F
+
+#define FLT_MANT_DIG 24
+#define FLT_MIN_EXP (-125)
+#define FLT_MAX_EXP 128
+#define FLT_HAS_SUBNORM 1
+
+#define FLT_DIG 6
+#define FLT_DECIMAL_DIG 9
+#define FLT_MIN_10_EXP (-37)
+#define FLT_MAX_10_EXP 38
+
+#define DBL_TRUE_MIN 4.94065645841246544177e-324
+#define DBL_MIN 2.22507385850720138309e-308
+#define DBL_MAX 1.79769313486231570815e+308
+#define DBL_EPSILON 2.22044604925031308085e-16
+
+#define DBL_MANT_DIG 53
+#define DBL_MIN_EXP (-1021)
+#define DBL_MAX_EXP 1024
+#define DBL_HAS_SUBNORM 1
+
+#define DBL_DIG 15
+#define DBL_DECIMAL_DIG 17
+#define DBL_MIN_10_EXP (-307)
+#define DBL_MAX_10_EXP 308
+
+#define LDBL_HAS_SUBNORM 1
+#define LDBL_DECIMAL_DIG DECIMAL_DIG
+#define LDBL_MANT_DIG    DBL_MANT_DIG            
+#define LDBL_MAX_EXP     DBL_MAX_EXP             
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/3rd/math/include/libm.h b/3rd/math/include/libm.h
new file mode 100644
index 00000000..f5f43f81
--- /dev/null
+++ b/3rd/math/include/libm.h
@@ -0,0 +1,277 @@
+#ifndef _LIBM_H
+#define _LIBM_H
+
+#include <stdint.h>
+#include "float.h"
+#include "math.h"
+
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+	long double f;
+	struct {
+		uint64_t m;
+		uint16_t se;
+	} i;
+};
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+/* This is the m68k variant of 80-bit long double, and this definition only works
+ * on archs where the alignment requirement of uint64_t is <= 4. */
+union ldshape {
+	long double f;
+	struct {
+		uint16_t se;
+		uint16_t pad;
+		uint64_t m;
+	} i;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
+union ldshape {
+	long double f;
+	struct {
+		uint64_t lo;
+		uint32_t mid;
+		uint16_t top;
+		uint16_t se;
+	} i;
+	struct {
+		uint64_t lo;
+		uint64_t hi;
+	} i2;
+};
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
+union ldshape {
+	long double f;
+	struct {
+		uint16_t se;
+		uint16_t top;
+		uint32_t mid;
+		uint64_t lo;
+	} i;
+	struct {
+		uint64_t hi;
+		uint64_t lo;
+	} i2;
+};
+#else
+#error Unsupported long double representation
+#endif
+
+/* Support non-nearest rounding mode.  */
+#define WANT_ROUNDING 1
+/* Support signaling NaNs.  */
+#define WANT_SNAN 0
+
+#if WANT_SNAN
+#error SNaN is unsupported
+#else
+#define issignalingf_inline(x) 0
+#define issignaling_inline(x) 0
+#endif
+
+#ifndef TOINT_INTRINSICS
+#define TOINT_INTRINSICS 0
+#endif
+
+#if TOINT_INTRINSICS
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+   consistently with converttoint so the results match.  If the result
+   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
+static double_t roundtoint(double_t);
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+   consistently with roundtoint.  If the result is not representible in an
+   int32_t then the semantics is unspecified.  */
+static int32_t converttoint(double_t);
+#endif
+
+/* Helps static branch prediction so hot path can be better optimized.  */
+#ifdef __GNUC__
+#define predict_true(x) __builtin_expect(!!(x), 1)
+#define predict_false(x) __builtin_expect(x, 0)
+#else
+#define predict_true(x) (x)
+#define predict_false(x) (x)
+#endif
+
+/* Evaluate an expression as the specified type. With standard excess
+   precision handling a type cast or assignment is enough (with
+   -ffloat-store an assignment is required, in old compilers argument
+   passing and return statement may not drop excess precision).  */
+
+static inline float eval_as_float(float x)
+{
+	float y = x;
+	return y;
+}
+
+static inline double eval_as_double(double x)
+{
+	double y = x;
+	return y;
+}
+
+/* fp_barrier returns its input, but limits code transformations
+   as if it had a side-effect (e.g. observable io) and returned
+   an arbitrary value.  */
+
+#ifndef fp_barrierf
+#define fp_barrierf fp_barrierf
+static inline float fp_barrierf(float x)
+{
+	volatile float y = x;
+	return y;
+}
+#endif
+
+#ifndef fp_barrier
+#define fp_barrier fp_barrier
+static inline double fp_barrier(double x)
+{
+	volatile double y = x;
+	return y;
+}
+#endif
+
+#ifndef fp_barrierl
+#define fp_barrierl fp_barrierl
+static inline long double fp_barrierl(long double x)
+{
+	volatile long double y = x;
+	return y;
+}
+#endif
+
+/* fp_force_eval ensures that the input value is computed when that's
+   otherwise unused.  To prevent the constant folding of the input
+   expression, an additional fp_barrier may be needed or a compilation
+   mode that does so (e.g. -frounding-math in gcc). Then it can be
+   used to evaluate an expression for its fenv side-effects only.   */
+
+#ifndef fp_force_evalf
+#define fp_force_evalf fp_force_evalf
+static inline void fp_force_evalf(float x)
+{
+	volatile float y;
+	y = x;
+}
+#endif
+
+#ifndef fp_force_eval
+#define fp_force_eval fp_force_eval
+static inline void fp_force_eval(double x)
+{
+	volatile double y;
+	y = x;
+}
+#endif
+
+#ifndef fp_force_evall
+#define fp_force_evall fp_force_evall
+static inline void fp_force_evall(long double x)
+{
+	volatile long double y;
+	y = x;
+}
+#endif
+
+#define FORCE_EVAL(x) do {                        \
+	if (sizeof(x) == sizeof(float)) {         \
+		fp_force_evalf(x);                \
+	} else if (sizeof(x) == sizeof(double)) { \
+		fp_force_eval(x);                 \
+	} else {                                  \
+		fp_force_evall(x);                \
+	}                                         \
+} while(0)
+
+typedef union {float _f; unsigned int _i;}asuint_union;
+typedef union {unsigned int _i; float _f;}asfloat_union;
+typedef union {double _f; unsigned long long _i;}asuint64_union;
+typedef union {unsigned long long _i; double _f;}asdouble_union;
+#define asuint(f) ((asuint_union){f})._i
+#define asfloat(i) ((asfloat_union){i})._f
+#define asuint64(f) ((asuint64_union){f})._i
+#define asdouble(i) ((asdouble_union){i})._f
+
+#define EXTRACT_WORDS(hi,lo,d)                    \
+do {                                              \
+  uint64_t __u = asuint64(d);                     \
+  (hi) = __u >> 32;                               \
+  (lo) = (uint32_t)__u;                           \
+} while (0)
+
+#define GET_HIGH_WORD(hi,d)                       \
+do {                                              \
+  (hi) = asuint64(d) >> 32;                       \
+} while (0)
+
+#define GET_LOW_WORD(lo,d)                        \
+do {                                              \
+  (lo) = (uint32_t)asuint64(d);                   \
+} while (0)
+
+#define INSERT_WORDS(d,hi,lo)                     \
+do {                                              \
+  (d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
+} while (0)
+
+#define SET_HIGH_WORD(d,hi)                       \
+  INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
+
+#define SET_LOW_WORD(d,lo)                        \
+  INSERT_WORDS(d, asuint64(d)>>32, lo)
+
+#define GET_FLOAT_WORD(w,d)                       \
+do {                                              \
+  (w) = asuint(d);                                \
+} while (0)
+
+#define SET_FLOAT_WORD(d,w)                       \
+do {                                              \
+  (d) = asfloat(w);                               \
+} while (0)
+
+hidden int    __rem_pio2_large(double*,double*,int,int,int);
+
+hidden int    __rem_pio2(double,double*);
+hidden double __sin(double,double,int);
+hidden double __cos(double,double);
+hidden double __tan(double,double,int);
+// hidden double __expo2(double,double);
+
+// hidden int    __rem_pio2f(float,double*);
+// hidden float  __sindf(double);
+// hidden float  __cosdf(double);
+// hidden float  __tandf(double,int);
+// hidden float  __expo2f(float,float);
+
+// hidden int __rem_pio2l(long double, long double *);
+// hidden long double __sinl(long double, long double, int);
+// hidden long double __cosl(long double, long double);
+// hidden long double __tanl(long double, long double, int);
+
+// hidden long double __polevll(long double, const long double *, int);
+// hidden long double __p1evll(long double, const long double *, int);
+
+// extern int __signgam;
+// hidden double __lgamma_r(double, int *);
+// hidden float __lgammaf_r(float, int *);
+
+/* error handling functions */
+hidden float __math_xflowf(uint32_t, float);
+hidden float __math_uflowf(uint32_t);
+hidden float __math_oflowf(uint32_t);
+hidden float __math_divzerof(uint32_t);
+hidden float __math_invalidf(float);
+hidden double __math_xflow(uint32_t, double);
+hidden double __math_uflow(uint32_t);
+hidden double __math_oflow(uint32_t);
+hidden double __math_divzero(uint32_t);
+hidden double __math_invalid(double);
+#if LDBL_MANT_DIG != DBL_MANT_DIG
+hidden long double __math_invalidl(long double);
+#endif
+
+#endif
diff --git a/3rd/math/include/math.h b/3rd/math/include/math.h
index 82c49c25..660542de 100644
--- a/3rd/math/include/math.h
+++ b/3rd/math/include/math.h
@@ -5,6 +5,23 @@
 extern "C" {
 #endif
 
+#include "features/features.h"
+
+#if defined(_WIN32) || defined(_WIN64)
+#if defined _M_IX86 && _M_IX86_FP < 2 && !defined _M_FP_FAST
+	typedef long double float_t;
+	typedef long double double_t;
+#else
+	typedef float  float_t;
+	typedef double double_t;
+#endif
+#else
+#define __NEED_float_t
+#define __NEED_double_t
+// #include <bits/alltypes.h>
+typedef float  float_t;
+typedef double double_t;
+#endif
 
 #ifndef _HUGE_ENUF
     #define _HUGE_ENUF  1e+300  // _HUGE_ENUF*_HUGE_ENUF must overflow
@@ -45,21 +62,6 @@ extern "C" {
 
 #define FLT_EVAL_METHOD 0
 
-/* Support non-nearest rounding mode.  */
-#define WANT_ROUNDING 1
-/* Support signaling NaNs.  */
-#define WANT_SNAN 0
-
-#if WANT_SNAN
-#error SNaN is unsupported
-#else
-#define issignalingf_inline(x) 0
-#define issignaling_inline(x) 0
-#endif
-
-#define predict_true(x) (x)
-#define predict_false(x) (x)
-
 int __fpclassify(double);
 int __fpclassifyf(float);
 int __fpclassifyl(long double);
@@ -117,20 +119,20 @@ int __signbitl(long double);
 static __inline int __is##rel(type __x, type __y) \
 { return !isunordered(__x,__y) && __x op __y; }
 
-__ISREL_DEF(lessf, <, float)
-__ISREL_DEF(less, <, double)
+__ISREL_DEF(lessf, <, float_t)
+__ISREL_DEF(less, <, double_t)
 __ISREL_DEF(lessl, <, long double)
-__ISREL_DEF(lessequalf, <=, float)
-__ISREL_DEF(lessequal, <=, double)
+__ISREL_DEF(lessequalf, <=, float_t)
+__ISREL_DEF(lessequal, <=, double_t)
 __ISREL_DEF(lessequall, <=, long double)
-__ISREL_DEF(lessgreaterf, !=, float)
-__ISREL_DEF(lessgreater, !=, double)
+__ISREL_DEF(lessgreaterf, !=, float_t)
+__ISREL_DEF(lessgreater, !=, double_t)
 __ISREL_DEF(lessgreaterl, !=, long double)
-__ISREL_DEF(greaterf, >, float)
-__ISREL_DEF(greater, >, double)
+__ISREL_DEF(greaterf, >, float_t)
+__ISREL_DEF(greater, >, double_t)
 __ISREL_DEF(greaterl, >, long double)
-__ISREL_DEF(greaterequalf, >=, float)
-__ISREL_DEF(greaterequal, >=, double)
+__ISREL_DEF(greaterequalf, >=, float_t)
+__ISREL_DEF(greaterequal, >=, double_t)
 __ISREL_DEF(greaterequall, >=, long double)
 
 #define __tg_pred_2(x, y, p) ( \
@@ -144,165 +146,6 @@ __ISREL_DEF(greaterequall, >=, long double)
 #define isgreater(x, y)         __tg_pred_2(x, y, __isgreater)
 #define isgreaterequal(x, y)    __tg_pred_2(x, y, __isgreaterequal)
 
-/* Evaluate an expression as the specified type. With standard excess
-   precision handling a type cast or assignment is enough (with
-   -ffloat-store an assignment is required, in old compilers argument
-   passing and return statement may not drop excess precision).  */
-
-static inline float eval_as_float(float x)
-{
-	float y = x;
-	return y;
-}
-
-static inline double eval_as_double(double x)
-{
-	double y = x;
-	return y;
-}
-
-/* fp_barrier returns its input, but limits code transformations
-	as if it had a side-effect (e.g. observable io) and returned
-	an arbitrary value.  */
-
-#ifndef fp_barrierf
-#define fp_barrierf fp_barrierf
-static inline float fp_barrierf(float x)
-{
-	volatile float y = x;
-	return y;
-}
-#endif
-
-#ifndef fp_barrier
-#define fp_barrier fp_barrier
-static inline double fp_barrier(double x)
-{
-	volatile double y = x;
-	return y;
-}
-#endif
-
-#ifndef fp_barrierl
-#define fp_barrierl fp_barrierl
-static inline long double fp_barrierl(long double x)
-{
-	volatile long double y = x;
-	return y;
-}
-#endif
-
-/* fp_force_eval ensures that the input value is computed when that's
-	otherwise unused.  To prevent the constant folding of the input
-	expression, an additional fp_barrier may be needed or a compilation
-	mode that does so (e.g. -frounding-math in gcc). Then it can be
-	used to evaluate an expression for its fenv side-effects only.   */
-
-#ifndef fp_force_evalf
-#define fp_force_evalf fp_force_evalf
-static inline void fp_force_evalf(float x)
-{
-	volatile float y;
-	y = x;
-}
-#endif
-
-#ifndef fp_force_eval
-#define fp_force_eval fp_force_eval
-static inline void fp_force_eval(double x)
-{
-	volatile double y;
-	y = x;
-}
-#endif
-
-#ifndef fp_force_evall
-#define fp_force_evall fp_force_evall
-static inline void fp_force_evall(long double x)
-{
-	volatile long double y;
-	y = x;
-}
-#endif
-
-#define FORCE_EVAL(x) do {                        \
-	if (sizeof(x) == sizeof(float)) {         \
-		fp_force_evalf(x);                \
-	} else if (sizeof(x) == sizeof(double)) { \
-		fp_force_eval(x);                 \
-	} else {                                  \
-		fp_force_evall(x);                \
-	}                                         \
-} while(0)
-
-typedef union {float _f; unsigned int _i;}asuint_union;
-typedef union {unsigned int _i; float _f;}asfloat_union;
-typedef union {double _f; unsigned long long _i;}asuint64_union;
-typedef union {unsigned long long _i; double _f;}asdouble_union;
-#define asuint(f) ((asuint_union){f})._i
-#define asfloat(i) ((asfloat_union){i})._f
-#define asuint64(f) ((asuint64_union){f})._i
-#define asdouble(i) ((asdouble_union){i})._f
-
-#define EXTRACT_WORDS(hi,lo,d)                    \
-do {                                              \
-	unsigned long long __u = asuint64(d);                     \
-	(hi) = __u >> 32;                               \
-	(lo) = (unsigned int)__u;                           \
-} while (0)
-
-#define GET_HIGH_WORD(hi,d)                       \
-do {                                              \
-	(hi) = asuint64(d) >> 32;                       \
-} while (0)
-
-#define GET_LOW_WORD(lo,d)                        \
-do {                                              \
-	(lo) = (unsigned int)asuint64(d);                   \
-} while (0)
-
-#define INSERT_WORDS(d,hi,lo)                     \
-do {                                              \
-	(d) = asdouble(((unsigned long long)(hi)<<32) | (unsigned int)(lo)); \
-} while (0)
-
-#define SET_HIGH_WORD(d,hi)                       \
-	INSERT_WORDS(d, hi, (unsigned int)asuint64(d))
-
-#define SET_LOW_WORD(d,lo)                        \
-	INSERT_WORDS(d, asuint64(d)>>32, lo)
-
-#define GET_FLOAT_WORD(w,d)                       \
-do {                                              \
-	(w) = asuint(d);                                \
-} while (0)
-
-#define SET_FLOAT_WORD(d,w)                       \
-do {                                              \
-	(d) = asfloat(w);                               \
-} while (0)
-   
-int    __rem_pio2_large(double*, double*, int, int, int);
-int    __rem_pio2(double, double*);
-double __sin(double, double, int);
-double __cos(double, double);
-double __tan(double, double, int);
-
-/* error handling functions */
-float __math_xflowf(unsigned int, float);
-float __math_uflowf(unsigned int);
-float __math_oflowf(unsigned int);
-float __math_divzerof(unsigned int);
-float __math_invalidf(float);
-double __math_xflow(unsigned int, double);
-double __math_uflow(unsigned int);
-double __math_oflow(unsigned int);
-double __math_divzero(unsigned int);
-double __math_invalid(double);
-#if LDBL_MANT_DIG != DBL_MANT_DIG
-long double __math_invalidl(long double);
-#endif
-
 double      acos(double);
 double      asin(double);
 double      atan(double);
@@ -332,44 +175,9 @@ double      sqrt(double);
 double      tan(double);
 double      trunc(double);
 
-#define FLT_TRUE_MIN 1.40129846432481707092e-45F
-#define FLT_MIN 1.17549435082228750797e-38F
-#define FLT_MAX 3.40282346638528859812e+38F
-#define FLT_EPSILON 1.1920928955078125e-07F
-
-#define FLT_MANT_DIG 24
-#define FLT_MIN_EXP (-125)
-#define FLT_MAX_EXP 128
-#define FLT_HAS_SUBNORM 1
-
-#define FLT_DIG 6
-#define FLT_DECIMAL_DIG 9
-#define FLT_MIN_10_EXP (-37)
-#define FLT_MAX_10_EXP 38
-
-#define DBL_TRUE_MIN 4.94065645841246544177e-324
-#define DBL_MIN 2.22507385850720138309e-308
-#define DBL_MAX 1.79769313486231570815e+308
-#define DBL_EPSILON 2.22044604925031308085e-16
-
-#define DBL_MANT_DIG 53
-#define DBL_MIN_EXP (-1021)
-#define DBL_MAX_EXP 1024
-#define DBL_HAS_SUBNORM 1
-
-#define DBL_DIG 15
-#define DBL_DECIMAL_DIG 17
-#define DBL_MIN_10_EXP (-307)
-#define DBL_MAX_10_EXP 308
-
-#define LDBL_HAS_SUBNORM 1
-#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG
-#define LDBL_MANT_DIG    DBL_MANT_DIG            
-#define LDBL_MAX_EXP     DBL_MAX_EXP             
 
 #undef  MAXFLOAT
 #define MAXFLOAT        3.40282346638528859812e+38F
-#define HUGE            3.40282346638528859812e+38F
 
 #define M_DEG2RAD		0.017453292519943295	/* pi/180 */
 #define M_E             2.7182818284590452354   /* e */
@@ -397,6 +205,32 @@ double      trunc(double);
 // double      y1(double);
 // double      yn(int, double);
 
+#define HUGE            3.40282346638528859812e+38F
+
+// double      drem(double, double);
+// float       dremf(float, float);
+
+// int         finite(double);
+// int         finitef(float);
+
+// double      scalb(double, double);
+// float       scalbf(float, float);
+
+// double      significand(double);
+// float       significandf(float);
+
+// double      lgamma_r(double, int*);
+// float       lgammaf_r(float, int*);
+
+// float       j0f(float);
+// float       j1f(float);
+// float       jnf(int, float);
+
+// float       y0f(float);
+// float       y1f(float);
+// float       ynf(int, float);
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/3rd/math/src/__cos.c b/3rd/math/src/__cos.c
index 42cbd314..46cefb38 100644
--- a/3rd/math/src/__cos.c
+++ b/3rd/math/src/__cos.c
@@ -48,7 +48,7 @@
  *         any extra precision in w.
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const double
 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
@@ -60,7 +60,7 @@ C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
 
 double __cos(double x, double y)
 {
-	double hz,z,r,w;
+	double_t hz,z,r,w;
 
 	z  = x*x;
 	w  = z*z;
diff --git a/3rd/math/src/__fpclassify.c b/3rd/math/src/__fpclassify.c
index 4a4abaee..4b36fbad 100644
--- a/3rd/math/src/__fpclassify.c
+++ b/3rd/math/src/__fpclassify.c
@@ -1,8 +1,9 @@
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
 int __fpclassify(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = u.i>>52 & 0x7ff;
 	if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
 	if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;
diff --git a/3rd/math/src/__fpclassifyf.c b/3rd/math/src/__fpclassifyf.c
index 4d5f8e06..fa9cb69d 100644
--- a/3rd/math/src/__fpclassifyf.c
+++ b/3rd/math/src/__fpclassifyf.c
@@ -1,8 +1,9 @@
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
 int __fpclassifyf(float x)
 {
-	union {float f; unsigned int i;} u = {x};
+	union {float f; uint32_t i;} u = {x};
 	int e = u.i>>23 & 0xff;
 	if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
 	if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;
diff --git a/3rd/math/src/__fpclassifyl.c b/3rd/math/src/__fpclassifyl.c
index 2bbf3a72..e41781b6 100644
--- a/3rd/math/src/__fpclassifyl.c
+++ b/3rd/math/src/__fpclassifyl.c
@@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 
 #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
 int __fpclassifyl(long double x)
diff --git a/3rd/math/src/__math_divzero.c b/3rd/math/src/__math_divzero.c
index b66388bd..59d21350 100644
--- a/3rd/math/src/__math_divzero.c
+++ b/3rd/math/src/__math_divzero.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-double __math_divzero(unsigned int sign)
+double __math_divzero(uint32_t sign)
 {
 	return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
 }
diff --git a/3rd/math/src/__math_divzerof.c b/3rd/math/src/__math_divzerof.c
index 8d17fe40..ce046f3e 100644
--- a/3rd/math/src/__math_divzerof.c
+++ b/3rd/math/src/__math_divzerof.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-float __math_divzerof(unsigned int sign)
+float __math_divzerof(uint32_t sign)
 {
 	return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
 }
diff --git a/3rd/math/src/__math_invalid.c b/3rd/math/src/__math_invalid.c
index e0d23fa7..17740490 100644
--- a/3rd/math/src/__math_invalid.c
+++ b/3rd/math/src/__math_invalid.c
@@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 
 double __math_invalid(double x)
 {
diff --git a/3rd/math/src/__math_invalidf.c b/3rd/math/src/__math_invalidf.c
index a62de893..357d4b12 100644
--- a/3rd/math/src/__math_invalidf.c
+++ b/3rd/math/src/__math_invalidf.c
@@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 
 float __math_invalidf(float x)
 {
diff --git a/3rd/math/src/__math_invalidl.c b/3rd/math/src/__math_invalidl.c
index 4100067d..7c2edc8c 100644
--- a/3rd/math/src/__math_invalidl.c
+++ b/3rd/math/src/__math_invalidl.c
@@ -1,4 +1,5 @@
-#include "math.h"
+#include "float.h"
+#include "libm.h"
 
 #if LDBL_MANT_DIG != DBL_MANT_DIG
 long double __math_invalidl(long double x)
diff --git a/3rd/math/src/__math_oflow.c b/3rd/math/src/__math_oflow.c
index 84567aaf..c85dbf98 100644
--- a/3rd/math/src/__math_oflow.c
+++ b/3rd/math/src/__math_oflow.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-double __math_oflow(unsigned int sign)
+double __math_oflow(uint32_t sign)
 {
 	return __math_xflow(sign, 0x1p769);
 }
diff --git a/3rd/math/src/__math_oflowf.c b/3rd/math/src/__math_oflowf.c
index 894aa246..fa7d0620 100644
--- a/3rd/math/src/__math_oflowf.c
+++ b/3rd/math/src/__math_oflowf.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-float __math_oflowf(unsigned int sign)
+float __math_oflowf(uint32_t sign)
 {
 	return __math_xflowf(sign, 0x1p97f);
 }
diff --git a/3rd/math/src/__math_uflow.c b/3rd/math/src/__math_uflow.c
index a57293fd..b90594ae 100644
--- a/3rd/math/src/__math_uflow.c
+++ b/3rd/math/src/__math_uflow.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-double __math_uflow(unsigned int sign)
+double __math_uflow(uint32_t sign)
 {
 	return __math_xflow(sign, 0x1p-767);
 }
diff --git a/3rd/math/src/__math_uflowf.c b/3rd/math/src/__math_uflowf.c
index e28fe668..94d50f2b 100644
--- a/3rd/math/src/__math_uflowf.c
+++ b/3rd/math/src/__math_uflowf.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-float __math_uflowf(unsigned int sign)
+float __math_uflowf(uint32_t sign)
 {
 	return __math_xflowf(sign, 0x1p-95f);
 }
diff --git a/3rd/math/src/__math_xflow.c b/3rd/math/src/__math_xflow.c
index 716b7b8e..744203c4 100644
--- a/3rd/math/src/__math_xflow.c
+++ b/3rd/math/src/__math_xflow.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-double __math_xflow(unsigned int sign, double y)
+double __math_xflow(uint32_t sign, double y)
 {
 	return eval_as_double(fp_barrier(sign ? -y : y) * y);
 }
diff --git a/3rd/math/src/__math_xflowf.c b/3rd/math/src/__math_xflowf.c
index 5a002606..f2c84784 100644
--- a/3rd/math/src/__math_xflowf.c
+++ b/3rd/math/src/__math_xflowf.c
@@ -1,6 +1,6 @@
-#include "math.h"
+#include "libm.h"
 
-float __math_xflowf(unsigned int sign, float y)
+float __math_xflowf(uint32_t sign, float y)
 {
 	return eval_as_float(fp_barrierf(sign ? -y : y) * y);
 }
diff --git a/3rd/math/src/__rem_pio2.c b/3rd/math/src/__rem_pio2.c
index e7c920b4..dcf672fb 100644
--- a/3rd/math/src/__rem_pio2.c
+++ b/3rd/math/src/__rem_pio2.c
@@ -17,7 +17,7 @@
  * use __rem_pio2_large() for large x
  */
 
-#include "math.h"
+#include "libm.h"
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
@@ -48,10 +48,10 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
 int __rem_pio2(double x, double *y)
 {
-	union {double f; unsigned long long i;} u = {x};
-	double z,w,t,r,fn;
+	union {double f; uint64_t i;} u = {x};
+	double_t z,w,t,r,fn;
 	double tx[3],ty[2];
-	unsigned int ix;
+	uint32_t ix;
 	int sign, n, ex, ey, i;
 
 	sign = u.i>>63;
@@ -119,8 +119,8 @@ int __rem_pio2(double x, double *y)
 	if (ix < 0x413921fb) {  /* |x| ~< 2^20*(pi/2), medium size */
 medium:
 		/* rint(x/(pi/2)) */
-		fn = (double)x*invpio2 + toint - toint;
-		n = (int)fn;
+		fn = (double_t)x*invpio2 + toint - toint;
+		n = (int32_t)fn;
 		r = x - fn*pio2_1;
 		w = fn*pio2_1t;  /* 1st round, good to 85 bits */
 		/* Matters with directed rounding. */
@@ -167,11 +167,11 @@ medium:
 	}
 	/* set z = scalbn(|x|,-ilogb(x)+23) */
 	u.f = x;
-	u.i &= (unsigned long long)-1>>12;
-	u.i |= (unsigned long long)(0x3ff + 23)<<52;
+	u.i &= (uint64_t)-1>>12;
+	u.i |= (uint64_t)(0x3ff + 23)<<52;
 	z = u.f;
 	for (i=0; i < 2; i++) {
-		tx[i] = (double)(int)z;
+		tx[i] = (double)(int32_t)z;
 		z     = (z-tx[i])*0x1p24;
 	}
 	tx[i] = z;
diff --git a/3rd/math/src/__rem_pio2_large.c b/3rd/math/src/__rem_pio2_large.c
index d971e905..958f28c2 100644
--- a/3rd/math/src/__rem_pio2_large.c
+++ b/3rd/math/src/__rem_pio2_large.c
@@ -122,7 +122,7 @@
  * to produce the hexadecimal values shown.
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
 
@@ -138,7 +138,7 @@ static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
  * NB: This table must have at least (e0-3)/24 + jk terms.
  *     For quad precision (e0 <= 16360, jk = 6), this is 686.
  */
-static const int ipio2[] = {
+static const int32_t ipio2[] = {
 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
@@ -272,7 +272,7 @@ static const double PIo2[] = {
 
 int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 {
-	int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
+	int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
 	double z,fw,f[20],fq[20],q[20];
 
 	/* initialize jk*/
@@ -300,15 +300,15 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 recompute:
 	/* distill q[] into iq[] reversingly */
 	for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
-		fw    = (double)(int)(0x1p-24*z);
-		iq[i] = (int)(z - 0x1p24*fw);
+		fw    = (double)(int32_t)(0x1p-24*z);
+		iq[i] = (int32_t)(z - 0x1p24*fw);
 		z     = q[j-1]+fw;
 	}
 
 	/* compute n */
 	z  = scalbn(z,q0);       /* actual value of z */
 	z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
-	n  = (int)z;
+	n  = (int32_t)z;
 	z -= (double)n;
 	ih = 0;
 	if (q0 > 0) {  /* need iq[jz-1] to determine n */
@@ -375,13 +375,13 @@ recompute:
 	} else { /* break z into 24-bit if necessary */
 		z = scalbn(z,-q0);
 		if (z >= 0x1p24) {
-			fw = (double)(int)(0x1p-24*z);
-			iq[jz] = (int)(z - 0x1p24*fw);
+			fw = (double)(int32_t)(0x1p-24*z);
+			iq[jz] = (int32_t)(z - 0x1p24*fw);
 			jz += 1;
 			q0 += 24;
-			iq[jz] = (int)fw;
+			iq[jz] = (int32_t)fw;
 		} else
-			iq[jz] = (int)z;
+			iq[jz] = (int32_t)z;
 	}
 
 	/* convert integer "bit" chunk to floating-point value */
@@ -411,7 +411,7 @@ recompute:
 		fw = 0.0;
 		for (i=jz; i>=0; i--)
 			fw += fq[i];
-		// TODO: drop excess precision here once double is used
+		// TODO: drop excess precision here once double_t is used
 		fw = (double)fw;
 		y[0] = ih==0 ? fw : -fw;
 		fw = fq[0]-fw;
diff --git a/3rd/math/src/__signbit.c b/3rd/math/src/__signbit.c
index f8249cfc..e700b6b7 100644
--- a/3rd/math/src/__signbit.c
+++ b/3rd/math/src/__signbit.c
@@ -1,11 +1,11 @@
-#include "math.h"
+#include "libm.h"
 
 // FIXME: macro in math.h
 int __signbit(double x)
 {
 	union {
 		double d;
-		unsigned long long i;
+		uint64_t i;
 	} y = { x };
 	return y.i>>63;
 }
diff --git a/3rd/math/src/__signbitf.c b/3rd/math/src/__signbitf.c
index 86da0563..40ad3cfd 100644
--- a/3rd/math/src/__signbitf.c
+++ b/3rd/math/src/__signbitf.c
@@ -1,11 +1,11 @@
-#include "math.h"
+#include "libm.h"
 
 // FIXME: macro in math.h
 int __signbitf(float x)
 {
 	union {
 		float f;
-		unsigned int i;
+		uint32_t i;
 	} y = { x };
 	return y.i>>31;
 }
diff --git a/3rd/math/src/__signbitl.c b/3rd/math/src/__signbitl.c
index d1239df7..63b3dc5a 100644
--- a/3rd/math/src/__signbitl.c
+++ b/3rd/math/src/__signbitl.c
@@ -1,4 +1,4 @@
-#include "math.h"
+#include "libm.h"
 
 #if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
 int __signbitl(long double x)
diff --git a/3rd/math/src/__sin.c b/3rd/math/src/__sin.c
index 437783f2..40309496 100644
--- a/3rd/math/src/__sin.c
+++ b/3rd/math/src/__sin.c
@@ -39,7 +39,7 @@
  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const double
 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
@@ -51,7 +51,7 @@ S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
 
 double __sin(double x, double y, int iy)
 {
-	double z,r,v,w;
+	double_t z,r,v,w;
 
 	z = x*x;
 	w = z*z;
diff --git a/3rd/math/src/__tan.c b/3rd/math/src/__tan.c
index 6e5a938b..8019844d 100644
--- a/3rd/math/src/__tan.c
+++ b/3rd/math/src/__tan.c
@@ -43,7 +43,7 @@
  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const double T[] = {
              3.33333333333334091986e-01, /* 3FD55555, 55555563 */
@@ -65,9 +65,9 @@ pio4lo =     3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
 
 double __tan(double x, double y, int odd)
 {
-	double z, r, v, w, s, a;
+	double_t z, r, v, w, s, a;
 	double w0, a0;
-	unsigned int hx;
+	uint32_t hx;
 	int big, sign;
 
 	GET_HIGH_WORD(hx,x);
diff --git a/3rd/math/src/acos.c b/3rd/math/src/acos.c
index 8b85c60e..ea9c87bf 100644
--- a/3rd/math/src/acos.c
+++ b/3rd/math/src/acos.c
@@ -33,7 +33,7 @@
  * Function needed: sqrt
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const double
 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
@@ -51,7 +51,7 @@ qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 
 static double R(double z)
 {
-	double p, q;
+	double_t p, q;
 	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
 	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
 	return p/q;
@@ -60,13 +60,13 @@ static double R(double z)
 double acos(double x)
 {
 	double z,w,s,c,df;
-	unsigned int hx,ix;
+	uint32_t hx,ix;
 
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	/* |x| >= 1 or nan */
 	if (ix >= 0x3ff00000) {
-		unsigned int lx;
+		uint32_t lx;
 
 		GET_LOW_WORD(lx,x);
 		if ((ix-0x3ff00000 | lx) == 0) {
diff --git a/3rd/math/src/asin.c b/3rd/math/src/asin.c
index 61b6c0fc..c926b188 100644
--- a/3rd/math/src/asin.c
+++ b/3rd/math/src/asin.c
@@ -39,7 +39,7 @@
  *
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const double
 pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
@@ -58,7 +58,7 @@ qS4 =  7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
 
 static double R(double z)
 {
-	double p, q;
+	double_t p, q;
 	p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
 	q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
 	return p/q;
@@ -67,13 +67,13 @@ static double R(double z)
 double asin(double x)
 {
 	double z,r,s;
-	unsigned int hx,ix;
+	uint32_t hx,ix;
 
 	GET_HIGH_WORD(hx, x);
 	ix = hx & 0x7fffffff;
 	/* |x| >= 1 or nan */
 	if (ix >= 0x3ff00000) {
-		unsigned int lx;
+		uint32_t lx;
 		GET_LOW_WORD(lx, x);
 		if ((ix-0x3ff00000 | lx) == 0)
 			/* asin(1) = +-pi/2 with inexact */
diff --git a/3rd/math/src/atan.c b/3rd/math/src/atan.c
index fa9ef41e..63b0ab25 100644
--- a/3rd/math/src/atan.c
+++ b/3rd/math/src/atan.c
@@ -30,7 +30,7 @@
  */
 
 
-#include "math.h"
+#include "libm.h"
 
 static const double atanhi[] = {
   4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
@@ -62,8 +62,8 @@ static const double aT[] = {
 
 double atan(double x)
 {
-	double w,s1,s2,z;
-	unsigned int ix,sign;
+	double_t w,s1,s2,z;
+	uint32_t ix,sign;
 	int id;
 
 	GET_HIGH_WORD(ix, x);
diff --git a/3rd/math/src/atan2.c b/3rd/math/src/atan2.c
index 9d7a7a89..5a1903c6 100644
--- a/3rd/math/src/atan2.c
+++ b/3rd/math/src/atan2.c
@@ -37,7 +37,7 @@
  * to produce the hexadecimal values shown.
  */
 
-#include "math.h"
+#include "libm.h"
 
 static const double
 pi     = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
@@ -46,7 +46,7 @@ pi_lo  = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
 double atan2(double y, double x)
 {
 	double z;
-	unsigned int m,lx,ly,ix,iy;
+	uint32_t m,lx,ly,ix,iy;
 
 	if (isnan(x) || isnan(y))
 		return x+y;
diff --git a/3rd/math/src/cbrt.c b/3rd/math/src/cbrt.c
index a6a00a40..5a15e9c3 100644
--- a/3rd/math/src/cbrt.c
+++ b/3rd/math/src/cbrt.c
@@ -15,9 +15,10 @@
  * Return cube root of x
  */
 
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
-static const unsigned int
+static const uint32_t
 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
 B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
 
@@ -31,9 +32,9 @@ P4 =  0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
 
 double cbrt(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
-	double r,s,t,w;
-	unsigned int hx = u.i>>32 & 0x7fffffff;
+	union {double f; uint64_t i;} u = {x};
+	double_t r,s,t,w;
+	uint32_t hx = u.i>>32 & 0x7fffffff;
 
 	if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
 		return x+x;
@@ -62,7 +63,7 @@ double cbrt(double x)
 	} else
 		hx = hx/3 + B1;
 	u.i &= 1ULL<<63;
-	u.i |= (unsigned long long)hx << 32;
+	u.i |= (uint64_t)hx << 32;
 	t = u.f;
 
 	/*
diff --git a/3rd/math/src/ceil.c b/3rd/math/src/ceil.c
index 077589ea..b13e6f2d 100644
--- a/3rd/math/src/ceil.c
+++ b/3rd/math/src/ceil.c
@@ -1,17 +1,17 @@
-#include "math.h"
+#include "libm.h"
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
 #elif FLT_EVAL_METHOD==2
 #define EPS LDBL_EPSILON
 #endif
-static const double toint = 1/EPS;
+static const double_t toint = 1/EPS;
 
 double ceil(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = u.i >> 52 & 0x7ff;
-	double y;
+	double_t y;
 
 	if (e >= 0x3ff+52 || x == 0)
 		return x;
diff --git a/3rd/math/src/cos.c b/3rd/math/src/cos.c
index 883cb52c..ee97f68b 100644
--- a/3rd/math/src/cos.c
+++ b/3rd/math/src/cos.c
@@ -40,12 +40,12 @@
  *      TRIG(x) returns trig(x) nearly rounded
  */
 
-#include "math.h"
+#include "libm.h"
 
 double cos(double x)
 {
 	double y[2];
-	unsigned int ix;
+	uint32_t ix;
 	unsigned n;
 
 	GET_HIGH_WORD(ix, x);
diff --git a/3rd/math/src/degrees.c b/3rd/math/src/degrees.c
index 970586d5..32f49d9a 100644
--- a/3rd/math/src/degrees.c
+++ b/3rd/math/src/degrees.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 
 double degrees(double x)
 {
diff --git a/3rd/math/src/exp.c b/3rd/math/src/exp.c
index 52a1276c..c0e0c688 100644
--- a/3rd/math/src/exp.c
+++ b/3rd/math/src/exp.c
@@ -5,7 +5,9 @@
  * SPDX-License-Identifier: MIT
  */
 
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
+#include "libm.h"
 #include "exp_data.h"
 
 #define N (1 << EXP_TABLE_BITS)
@@ -23,12 +25,12 @@
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
-   a double.  (int)KI is the k used in the argument reduction and exponent
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
-static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki)
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double scale, y;
+	double_t scale, y;
 
 	if ((ki & 0x80000000) == 0) {
 		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
@@ -46,7 +48,7 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 		 range to avoid double rounding that can cause 0.5+E/2 ulp error where
 		 E is the worst-case ulp error outside the subnormal range.  So this
 		 is only useful if the goal is better than 1 ulp worst-case error.  */
-		double hi, lo;
+		double_t hi, lo;
 		lo = scale - y + scale * tmp;
 		hi = 1.0 + y;
 		lo = 1.0 - hi + y + lo;
@@ -62,16 +64,16 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 }
 
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline unsigned int top12(double x)
+static inline uint32_t top12(double x)
 {
 	return asuint64(x) >> 52;
 }
 
 double exp(double x)
 {
-	unsigned int abstop;
-	unsigned long long ki, idx, top, sbits;
-	double kd, z, r, r2, scale, tail, tmp;
+	uint32_t abstop;
+	uint64_t ki, idx, top, sbits;
+	double_t kd, z, r, r2, scale, tail, tmp;
 
 	abstop = top12(x) & 0x7ff;
 	if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
@@ -103,7 +105,7 @@ double exp(double x)
 	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
 	kd = eval_as_double(z + Shift);
 	ki = asuint64(kd) >> 16;
-	kd = (double)(int)ki;
+	kd = (double_t)(int32_t)ki;
 #else
 	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
 	kd = eval_as_double(z + Shift);
diff --git a/3rd/math/src/exp_data.h b/3rd/math/src/exp_data.h
index 67fae3de..01289c1f 100644
--- a/3rd/math/src/exp_data.h
+++ b/3rd/math/src/exp_data.h
@@ -5,13 +5,14 @@
 #ifndef _EXP_DATA_H
 #define _EXP_DATA_H
 
-#include <math.h>
+#include "features/features.h"
+#include <stdint.h>
 
 #define EXP_TABLE_BITS 7
 #define EXP_POLY_ORDER 5
 #define EXP_USE_TOINT_NARROW 0
 #define EXP2_POLY_ORDER 5
-extern const struct exp_data {
+extern hidden const struct exp_data {
 	double invln2N;
 	double shift;
 	double negln2hiN;
@@ -19,7 +20,7 @@ extern const struct exp_data {
 	double poly[4]; /* Last four coefficients.  */
 	double exp2_shift;
 	double exp2_poly[EXP2_POLY_ORDER];
-	unsigned long long tab[2*(1 << EXP_TABLE_BITS)];
+	uint64_t tab[2*(1 << EXP_TABLE_BITS)];
 } __exp_data;
 
 #endif
diff --git a/3rd/math/src/fabs.c b/3rd/math/src/fabs.c
index fc108938..622e86ef 100644
--- a/3rd/math/src/fabs.c
+++ b/3rd/math/src/fabs.c
@@ -1,8 +1,9 @@
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
 double fabs(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	u.i &= ULLONG_NSHIFT/2;
 	return u.f;
 }
diff --git a/3rd/math/src/factorial.c b/3rd/math/src/factorial.c
index 212d1034..096cc44e 100644
--- a/3rd/math/src/factorial.c
+++ b/3rd/math/src/factorial.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 
 int factorial(int n)
 {
diff --git a/3rd/math/src/floor.c b/3rd/math/src/floor.c
index 73f59faa..14a31cd8 100644
--- a/3rd/math/src/floor.c
+++ b/3rd/math/src/floor.c
@@ -1,17 +1,17 @@
-#include "math.h"
+#include "libm.h"
 
 #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
 #define EPS DBL_EPSILON
 #elif FLT_EVAL_METHOD==2
 #define EPS LDBL_EPSILON
 #endif
-static const double toint = 1/EPS;
+static const double_t toint = 1/EPS;
 
 double floor(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = u.i >> 52 & 0x7ff;
-	double y;
+	double_t y;
 
 	if (e >= 0x3ff+52 || x == 0)
 		return x;
diff --git a/3rd/math/src/fmax.c b/3rd/math/src/fmax.c
index 94f0caa1..b6a7d064 100644
--- a/3rd/math/src/fmax.c
+++ b/3rd/math/src/fmax.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "math.h"
 
 double fmax(double x, double y)
 {
diff --git a/3rd/math/src/fmin.c b/3rd/math/src/fmin.c
index 08a8fd17..c835abb4 100644
--- a/3rd/math/src/fmin.c
+++ b/3rd/math/src/fmin.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "math.h"
 
 double fmin(double x, double y)
 {
diff --git a/3rd/math/src/fmod.c b/3rd/math/src/fmod.c
index 6cf0f9a5..9eff0442 100644
--- a/3rd/math/src/fmod.c
+++ b/3rd/math/src/fmod.c
@@ -1,16 +1,17 @@
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
 double fmod(double x, double y)
 {
-	union {double f; unsigned long long i;} ux = {x}, uy = {y};
+	union {double f; uint64_t i;} ux = {x}, uy = {y};
 	int ex = ux.i>>52 & 0x7ff;
 	int ey = uy.i>>52 & 0x7ff;
 	int sx = ux.i>>63;
-	unsigned long long i;
+	uint64_t i;
 
 	/* in the followings uxi should be ux.i, but then gcc wrongly adds */
 	/* float load/store to inner loops ruining performance and code size */
-	unsigned long long uxi = ux.i;
+	uint64_t uxi = ux.i;
 
 	if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
 		return (x*y)/(x*y);
@@ -57,11 +58,11 @@ double fmod(double x, double y)
 	/* scale result */
 	if (ex > 0) {
 		uxi -= 1ULL << 52;
-		uxi |= (unsigned long long)ex << 52;
+		uxi |= (uint64_t)ex << 52;
 	} else {
 		uxi >>= -ex + 1;
 	}
-	uxi |= (unsigned long long)sx << 63;
+	uxi |= (uint64_t)sx << 63;
 	ux.i = uxi;
 	return ux.f;
 }
diff --git a/3rd/math/src/fsum.c b/3rd/math/src/fsum.c
index 3bb83a91..b2cd1e43 100644
--- a/3rd/math/src/fsum.c
+++ b/3rd/math/src/fsum.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 
 double fsum(double* aptr, int n)
 {
diff --git a/3rd/math/src/gcd.c b/3rd/math/src/gcd.c
index 600977c7..47043ab9 100644
--- a/3rd/math/src/gcd.c
+++ b/3rd/math/src/gcd.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 
 int gcd(int a, int b)
 {
diff --git a/3rd/math/src/log.c b/3rd/math/src/log.c
index 80492244..1d370275 100644
--- a/3rd/math/src/log.c
+++ b/3rd/math/src/log.c
@@ -5,7 +5,9 @@
  * SPDX-License-Identifier: MIT
  */
 
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
+#include "libm.h"
 #include "log_data.h"
 
 #define T __log_data.tab
@@ -18,16 +20,16 @@
 #define OFF 0x3fe6000000000000
 
 /* Top 16 bits of a double.  */
-static inline unsigned int top16(double x)
+static inline uint32_t top16(double x)
 {
 	return asuint64(x) >> 48;
 }
 
 double log(double x)
 {
-	double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
-	unsigned long long ix, iz, tmp;
-	unsigned int top;
+	double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+	uint64_t ix, iz, tmp;
+	uint32_t top;
 	int k, i;
 
 	ix = asuint64(x);
@@ -48,8 +50,8 @@ double log(double x)
 			   r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
 		/* Worst-case error is around 0.507 ULP.  */
 		w = r * 0x1p27;
-		double rhi = r + w - w;
-		double rlo = r - rhi;
+		double_t rhi = r + w - w;
+		double_t rlo = r - rhi;
 		w = rhi * rhi * B[0]; /* B[0] == -0.5.  */
 		hi = r + w;
 		lo = r - hi + w;
@@ -76,7 +78,7 @@ double log(double x)
 	   The ith subinterval contains z and c is near its center.  */
 	tmp = ix - OFF;
 	i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
-	k = (long long)tmp >> 52; /* arithmetic shift */
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
 	iz = ix - (tmp & 0xfffULL << 52);
 	invc = T[i].invc;
 	logc = T[i].logc;
@@ -91,7 +93,7 @@ double log(double x)
 	/* rounding error: 0x1p-55/N + 0x1p-66.  */
 	r = (z - T2[i].chi - T2[i].clo) * invc;
 #endif
-	kd = (double)k;
+	kd = (double_t)k;
 
 	/* hi + lo = r + log(c) + k*Ln2.  */
 	w = kd * Ln2hi + logc;
diff --git a/3rd/math/src/log10.c b/3rd/math/src/log10.c
index cf77f3c5..a4b8f45d 100644
--- a/3rd/math/src/log10.c
+++ b/3rd/math/src/log10.c
@@ -17,7 +17,8 @@
  *    log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
  */
 
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
 static const double
 ivln10hi  = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
@@ -34,9 +35,9 @@ Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
 
 double log10(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
-	double hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
-	unsigned int hx;
+	union {double f; uint64_t i;} u = {x};
+	double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
+	uint32_t hx;
 	int k;
 
 	hx = u.i>>32;
@@ -60,7 +61,7 @@ double log10(double x)
 	hx += 0x3ff00000 - 0x3fe6a09e;
 	k += (int)(hx>>20) - 0x3ff;
 	hx = (hx&0x000fffff) + 0x3fe6a09e;
-	u.i = (unsigned long long)hx<<32 | (u.i&0xffffffff);
+	u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
 	x = u.f;
 
 	f = x - 1.0;
@@ -76,7 +77,7 @@ double log10(double x)
 	/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
 	hi = f - hfsq;
 	u.f = hi;
-	u.i &= (unsigned long long)-1<<32;
+	u.i &= (uint64_t)-1<<32;
 	hi = u.f;
 	lo = f - hi - hfsq + s*(hfsq+R);
 
diff --git a/3rd/math/src/log2.c b/3rd/math/src/log2.c
index fbe56e30..36fb4643 100644
--- a/3rd/math/src/log2.c
+++ b/3rd/math/src/log2.c
@@ -5,7 +5,9 @@
  * SPDX-License-Identifier: MIT
  */
 
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
+#include "libm.h"
 #include "log2_data.h"
 
 #define T __log2_data.tab
@@ -18,16 +20,16 @@
 #define OFF 0x3fe6000000000000
 
 /* Top 16 bits of a double.  */
-static inline unsigned int top16(double x)
+static inline uint32_t top16(double x)
 {
 	return asuint64(x) >> 48;
 }
 
 double log2(double x)
 {
-	double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
-	unsigned long long ix, iz, tmp;
-	unsigned int top;
+	double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
+	uint64_t ix, iz, tmp;
+	uint32_t top;
 	int k, i;
 
 	ix = asuint64(x);
@@ -44,7 +46,7 @@ double log2(double x)
 		hi = r * InvLn2hi;
 		lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
 #else
-		double rhi, rlo;
+		double_t rhi, rlo;
 		rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
 		rlo = r - rhi;
 		hi = rhi * InvLn2hi;
@@ -79,12 +81,12 @@ double log2(double x)
 	   The ith subinterval contains z and c is near its center.  */
 	tmp = ix - OFF;
 	i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
-	k = (long long)tmp >> 52; /* arithmetic shift */
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
 	iz = ix - (tmp & 0xfffULL << 52);
 	invc = T[i].invc;
 	logc = T[i].logc;
 	z = asdouble(iz);
-	kd = (double)k;
+	kd = (double_t)k;
 
 	/* log2(x) = log2(z/c) + log2(c) + k.  */
 	/* r ~= z/c - 1, |r| < 1/(2*N).  */
@@ -94,7 +96,7 @@ double log2(double x)
 	t1 = r * InvLn2hi;
 	t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
 #else
-	double rhi, rlo;
+	double_t rhi, rlo;
 	/* rounding error: 0x1p-55/N + 0x1p-65.  */
 	r = (z - T2[i].chi - T2[i].clo) * invc;
 	rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
diff --git a/3rd/math/src/log2_data.h b/3rd/math/src/log2_data.h
index 2c047b8f..fb95d2ff 100644
--- a/3rd/math/src/log2_data.h
+++ b/3rd/math/src/log2_data.h
@@ -5,11 +5,12 @@
 #ifndef _LOG2_DATA_H
 #define _LOG2_DATA_H
 
+#include "features/features.h"
 
 #define LOG2_TABLE_BITS 6
 #define LOG2_POLY_ORDER 7
 #define LOG2_POLY1_ORDER 11
-extern const struct log2_data {
+extern hidden const struct log2_data {
 	double invln2hi;
 	double invln2lo;
 	double poly[LOG2_POLY_ORDER - 1];
diff --git a/3rd/math/src/log_data.h b/3rd/math/src/log_data.h
index fd0f8d4c..4ff36084 100644
--- a/3rd/math/src/log_data.h
+++ b/3rd/math/src/log_data.h
@@ -5,11 +5,12 @@
 #ifndef _LOG_DATA_H
 #define _LOG_DATA_H
 
+#include "features/features.h"
 
 #define LOG_TABLE_BITS 7
 #define LOG_POLY_ORDER 6
 #define LOG_POLY1_ORDER 12
-extern const struct log_data {
+extern hidden const struct log_data {
 	double ln2hi;
 	double ln2lo;
 	double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
diff --git a/3rd/math/src/modf.c b/3rd/math/src/modf.c
index 1eca3bea..055fd124 100644
--- a/3rd/math/src/modf.c
+++ b/3rd/math/src/modf.c
@@ -1,9 +1,9 @@
-#include "math.h"
+#include "libm.h"
 
 double modf(double x, double *iptr)
 {
-	union {double f; unsigned long long i;} u = {x};
-	unsigned long long mask;
+	union {double f; uint64_t i;} u = {x};
+	uint64_t mask;
 	int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
 
 	/* no fractional part */
diff --git a/3rd/math/src/pow.c b/3rd/math/src/pow.c
index 63dbd64a..ed114059 100644
--- a/3rd/math/src/pow.c
+++ b/3rd/math/src/pow.c
@@ -5,7 +5,9 @@
  * SPDX-License-Identifier: MIT
  */
 
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
+#include "libm.h"
 #include "exp_data.h"
 #include "pow_data.h"
 
@@ -23,7 +25,7 @@ ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
 #define OFF 0x3fe6955500000000
 
 /* Top 12 bits of a double (sign and exponent bits).  */
-static inline unsigned int top12(double x)
+static inline uint32_t top12(double x)
 {
 	return asuint64(x) >> 52;
 }
@@ -31,11 +33,11 @@ static inline unsigned int top12(double x)
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
-static inline double log_inline(unsigned long long ix, double *tail)
+static inline double_t log_inline(uint64_t ix, double_t *tail)
 {
-	/* double for better performance on targets with FLT_EVAL_METHOD==2.  */
-	double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
-	unsigned long long iz, tmp;
+	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+	double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
+	uint64_t iz, tmp;
 	int k, i;
 
 	/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
@@ -43,10 +45,10 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	   The ith subinterval contains z and c is near its center.  */
 	tmp = ix - OFF;
 	i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
-	k = (long long)tmp >> 52; /* arithmetic shift */
+	k = (int64_t)tmp >> 52; /* arithmetic shift */
 	iz = ix - (tmp & 0xfffULL << 52);
 	z = asdouble(iz);
-	kd = (double)k;
+	kd = (double_t)k;
 
 	/* log(x) = k*Ln2 + log(c) + log1p(z/c-1).  */
 	invc = T[i].invc;
@@ -59,10 +61,10 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	r = __builtin_fma(z, invc, -1.0);
 #else
 	/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|.  */
-	double zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32));
-	double zlo = z - zhi;
-	double rhi = zhi * invc - 1.0;
-	double rlo = zlo * invc;
+	double_t zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32));
+	double_t zlo = z - zhi;
+	double_t rhi = zhi * invc - 1.0;
+	double_t rlo = zlo * invc;
 	r = rhi + rlo;
 #endif
 
@@ -73,7 +75,7 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	lo2 = t1 - t2 + r;
 
 	/* Evaluation is optimized assuming superscalar pipelined execution.  */
-	double ar, ar2, ar3, lo3, lo4;
+	double_t ar, ar2, ar3, lo3, lo4;
 	ar = A[0] * r; /* A[0] = -0.5.  */
 	ar2 = r * ar;
 	ar3 = r * ar2;
@@ -83,8 +85,8 @@ static inline double log_inline(unsigned long long ix, double *tail)
 	lo3 = __builtin_fma(ar, r, -ar2);
 	lo4 = t2 - hi + ar2;
 #else
-	double arhi = A[0] * rhi;
-	double arhi2 = rhi * arhi;
+	double_t arhi = A[0] * rhi;
+	double_t arhi2 = rhi * arhi;
 	hi = t2 + arhi2;
 	lo3 = rlo * (ar + arhi);
 	lo4 = t2 - hi + arhi2;
@@ -116,12 +118,12 @@ static inline double log_inline(unsigned long long ix, double *tail)
    is scale*(1+TMP) without intermediate rounding.  The bit representation of
    scale is in SBITS, however it has a computed exponent that may have
    overflown into the sign bit so that needs to be adjusted before using it as
-   a double.  (int)KI is the k used in the argument reduction and exponent
+   a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
-static inline double specialcase(double tmp, unsigned long long sbits, unsigned long long ki)
+static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
 {
-	double scale, y;
+	double_t scale, y;
 
 	if ((ki & 0x80000000) == 0) {
 		/* k > 0, the exponent of scale might have overflowed by <= 460.  */
@@ -140,7 +142,7 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 		   range to avoid double rounding that can cause 0.5+E/2 ulp error where
 		   E is the worst-case ulp error outside the subnormal range.  So this
 		   is only useful if the goal is better than 1 ulp worst-case error.  */
-		double hi, lo, one = 1.0;
+		double_t hi, lo, one = 1.0;
 		if (y < 0.0)
 			one = -1.0;
 		lo = scale - y + scale * tmp;
@@ -161,12 +163,12 @@ static inline double specialcase(double tmp, unsigned long long sbits, unsigned
 
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1.  */
-static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
+static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
 {
-	unsigned int abstop;
-	unsigned long long ki, idx, top, sbits;
-	/* double for better performance on targets with FLT_EVAL_METHOD==2.  */
-	double kd, z, r, r2, scale, tail, tmp;
+	uint32_t abstop;
+	uint64_t ki, idx, top, sbits;
+	/* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+	double_t kd, z, r, r2, scale, tail, tmp;
 
 	abstop = top12(x) & 0x7ff;
 	if (predict_false(abstop - top12(0x1p-54) >=
@@ -174,7 +176,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
 		if (abstop - top12(0x1p-54) >= 0x80000000) {
 			/* Avoid spurious underflow for tiny x.  */
 			/* Note: 0 is common input.  */
-			double one = WANT_ROUNDING ? 1.0 + x : 1.0;
+			double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
 			return sign_bias ? -one : one;
 		}
 		if (abstop >= top12(1024.0)) {
@@ -198,7 +200,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
 	/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes.  */
 	kd = eval_as_double(z + Shift);
 	ki = asuint64(kd) >> 16;
-	kd = (double)(int)ki;
+	kd = (double_t)(int32_t)ki;
 #else
 	/* z - kd is in [-1, 1] in non-nearest rounding modes.  */
 	kd = eval_as_double(z + Shift);
@@ -230,7 +232,7 @@ static inline double exp_inline(double x, double xtail, unsigned int sign_bias)
 
 /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
    the bit representation of a non-zero finite floating-point value.  */
-static inline int checkint(unsigned long long iy)
+static inline int checkint(uint64_t iy)
 {
 	int e = iy >> 52 & 0x7ff;
 	if (e < 0x3ff)
@@ -245,16 +247,16 @@ static inline int checkint(unsigned long long iy)
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
-static inline int zeroinfnan(unsigned long long i)
+static inline int zeroinfnan(uint64_t i)
 {
 	return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
 }
 
 double pow(double x, double y)
 {
-	unsigned int sign_bias = 0;
-	unsigned long long ix, iy;
-	unsigned int topx, topy;
+	uint32_t sign_bias = 0;
+	uint64_t ix, iy;
+	uint32_t topx, topy;
 
 	ix = asuint64(x);
 	iy = asuint64(y);
@@ -281,7 +283,7 @@ double pow(double x, double y)
 			return y * y;
 		}
 		if (predict_false(zeroinfnan(ix))) {
-			double x2 = x * x;
+			double_t x2 = x * x;
 			if (ix >> 63 && checkint(iy) == 1)
 				x2 = -x2;
 			/* Without the barrier some versions of clang hoist the 1/x2 and
@@ -323,17 +325,17 @@ double pow(double x, double y)
 		}
 	}
 
-	double lo;
-	double hi = log_inline(ix, &lo);
-	double ehi, elo;
+	double_t lo;
+	double_t hi = log_inline(ix, &lo);
+	double_t ehi, elo;
 #if __FP_FAST_FMA
 	ehi = y * hi;
 	elo = y * lo + __builtin_fma(y, hi, -ehi);
 #else
-	double yhi = asdouble(iy & ULLONG_NSHIFT << 27);
-	double ylo = y - yhi;
-	double lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27);
-	double llo = hi - lhi + lo;
+	double_t yhi = asdouble(iy & ULLONG_NSHIFT << 27);
+	double_t ylo = y - yhi;
+	double_t lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27);
+	double_t llo = hi - lhi + lo;
 	ehi = yhi * lhi;
 	elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25.  */
 #endif
diff --git a/3rd/math/src/pow_data.h b/3rd/math/src/pow_data.h
index ab23e059..18bd1099 100644
--- a/3rd/math/src/pow_data.h
+++ b/3rd/math/src/pow_data.h
@@ -5,10 +5,11 @@
 #ifndef _POW_DATA_H
 #define _POW_DATA_H
 
+#include "features/features.h"
 
 #define POW_LOG_TABLE_BITS 7
 #define POW_LOG_POLY_ORDER 8
-extern const struct pow_log_data {
+extern hidden const struct pow_log_data {
 	double ln2hi;
 	double ln2lo;
 	double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1.  */
diff --git a/3rd/math/src/radians.c b/3rd/math/src/radians.c
index 06268cde..f8ca0c19 100644
--- a/3rd/math/src/radians.c
+++ b/3rd/math/src/radians.c
@@ -1,4 +1,4 @@
-#include <math.h>
+#include "libm.h"
 
 double radians(double x)
 {
diff --git a/3rd/math/src/scalbn.c b/3rd/math/src/scalbn.c
index fc2407c5..976bfe05 100644
--- a/3rd/math/src/scalbn.c
+++ b/3rd/math/src/scalbn.c
@@ -1,9 +1,10 @@
-#include <math.h>
+#include "math.h"
+#include <stdint.h>
 
 double scalbn(double x, int n)
 {
-	union {double f; unsigned long long i;} u;
-	double y = x;
+	union {double f; uint64_t i;} u;
+	double_t y = x;
 
 	if (n > 1023) {
 		y *= 0x1p1023;
@@ -26,7 +27,7 @@ double scalbn(double x, int n)
 				n = -1022;
 		}
 	}
-	u.i = (unsigned long long)(0x3ff+n)<<52;
+	u.i = (uint64_t)(0x3ff+n)<<52;
 	x = y * u.f;
 	return x;
 }
diff --git a/3rd/math/src/sin.c b/3rd/math/src/sin.c
index 7d905b07..055e215b 100644
--- a/3rd/math/src/sin.c
+++ b/3rd/math/src/sin.c
@@ -40,12 +40,12 @@
  *      TRIG(x) returns trig(x) nearly rounded
  */
 
-#include "math.h"
+#include "libm.h"
 
 double sin(double x)
 {
 	double y[2];
-	unsigned int ix;
+	uint32_t ix;
 	unsigned n;
 
 	/* High word of x. */
diff --git a/3rd/math/src/sqrt.c b/3rd/math/src/sqrt.c
index e1a3c9d8..5f13efe0 100644
--- a/3rd/math/src/sqrt.c
+++ b/3rd/math/src/sqrt.c
@@ -1,27 +1,29 @@
-#include <math.h>
+#include <stdint.h>
+#include "math.h"
+#include "libm.h"
 #include "sqrt_data.h"
 
 #define FENV_SUPPORT 1
 
 /* returns a*b*2^-32 - e, with error 0 <= e < 1.  */
-static inline unsigned int mul32(unsigned int a, unsigned int b)
+static inline uint32_t mul32(uint32_t a, uint32_t b)
 {
-	return (unsigned long long)a*b >> 32;
+	return (uint64_t)a*b >> 32;
 }
 
 /* returns a*b*2^-64 - e, with error 0 <= e < 3.  */
-static inline unsigned long long mul64(unsigned long long a, unsigned long long b)
+static inline uint64_t mul64(uint64_t a, uint64_t b)
 {
-	unsigned long long ahi = a>>32;
-	unsigned long long alo = a&0xffffffff;
-	unsigned long long bhi = b>>32;
-	unsigned long long blo = b&0xffffffff;
+	uint64_t ahi = a>>32;
+	uint64_t alo = a&0xffffffff;
+	uint64_t bhi = b>>32;
+	uint64_t blo = b&0xffffffff;
 	return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
 }
 
 double sqrt(double x)
 {
-	unsigned long long ix, top, m;
+	uint64_t ix, top, m;
 
 	/* special case handling.  */
 	ix = asuint64(x);
@@ -103,11 +105,11 @@ double sqrt(double x)
 	   and after switching to 64 bit
 	     m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62  */
 
-	static const unsigned long long three = 0xc0000000;
-	unsigned long long r, s, d, u, i;
+	static const uint64_t three = 0xc0000000;
+	uint64_t r, s, d, u, i;
 
 	i = (ix >> 46) % 128;
-	r = (unsigned int)__rsqrt_tab[i] << 16;
+	r = (uint32_t)__rsqrt_tab[i] << 16;
 	/* |r sqrt(m) - 1| < 0x1.fdp-9 */
 	s = mul32(m>>32, r);
 	/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
@@ -134,7 +136,7 @@ double sqrt(double x)
 	   compute nearest rounded result:
 	   the nearest result to 52 bits is either s or s+0x1p-52,
 	   we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m.  */
-	unsigned long long d0, d1, d2;
+	uint64_t d0, d1, d2;
 	double y, t;
 	d0 = (m << 42) - s*s;
 	d1 = s - d0;
@@ -147,7 +149,7 @@ double sqrt(double x)
 		/* handle rounding modes and inexact exception:
 		   only (s+1)^2 == 2^42 m case is exact otherwise
 		   add a tiny value to cause the fenv effects.  */
-		unsigned long long tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
+		uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
 		tiny |= (d1^d2) & 0x8000000000000000;
 		t = asdouble(tiny);
 		y = eval_as_double(y + t);
diff --git a/3rd/math/src/sqrt_data.c b/3rd/math/src/sqrt_data.c
index 013938fb..61bc22f4 100644
--- a/3rd/math/src/sqrt_data.c
+++ b/3rd/math/src/sqrt_data.c
@@ -1,5 +1,5 @@
 #include "sqrt_data.h"
-const unsigned short __rsqrt_tab[128] = {
+const uint16_t __rsqrt_tab[128] = {
 0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
 0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
 0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
diff --git a/3rd/math/src/sqrt_data.h b/3rd/math/src/sqrt_data.h
index 459004b9..c1f4e820 100644
--- a/3rd/math/src/sqrt_data.h
+++ b/3rd/math/src/sqrt_data.h
@@ -1,12 +1,13 @@
 #ifndef _SQRT_DATA_H
 #define _SQRT_DATA_H
 
-#include <math.h>
+#include "features/features.h"
+#include <stdint.h>
 
 /* if x in [1,2): i = (int)(64*x);
    if x in [2,4): i = (int)(32*x-64);
    __rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
    |__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
-extern const unsigned short __rsqrt_tab[128];
+extern hidden const uint16_t __rsqrt_tab[128];
 
 #endif
diff --git a/3rd/math/src/tan.c b/3rd/math/src/tan.c
index 89689762..9c724a45 100644
--- a/3rd/math/src/tan.c
+++ b/3rd/math/src/tan.c
@@ -39,12 +39,12 @@
  *      TRIG(x) returns trig(x) nearly rounded
  */
 
-#include "math.h"
+#include "libm.h"
 
 double tan(double x)
 {
 	double y[2];
-	unsigned int ix;
+	uint32_t ix;
 	unsigned n;
 
 	GET_HIGH_WORD(ix, x);
diff --git a/3rd/math/src/trunc.c b/3rd/math/src/trunc.c
index a7d397fb..5186ce3c 100644
--- a/3rd/math/src/trunc.c
+++ b/3rd/math/src/trunc.c
@@ -1,10 +1,10 @@
-#include "math.h"
+#include "libm.h"
 
 double trunc(double x)
 {
-	union {double f; unsigned long long i;} u = {x};
+	union {double f; uint64_t i;} u = {x};
 	int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
-	unsigned long long m;
+	uint64_t m;
 
 	if (e >= 52 + 12)
 		return x;
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 105dde48..ac1e231f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,12 +50,8 @@ if(PK_ENABLE_DETERMINISTIC_FLOAT)
     add_subdirectory(3rd/math)
     include_directories(3rd/math)
     add_definitions(-DPK_ENABLE_DETERMINISTIC_FLOAT)
-    if(MSVC)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise")
-    elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict -O2")
-    else()
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
+    if(NOT MSVC)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexcess-precision=standard -ffp-contract=off")
     endif()
 endif()
 
@@ -124,6 +120,7 @@ endif()
 if(NOT MSVC)
     target_link_libraries(${PROJECT_NAME} pthread)
 endif()
+
 if(PK_BUILD_MODULE_LZ4)
     target_link_libraries(${PROJECT_NAME} lz4)
 endif()