mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 19:40:18 +00:00
added musl libc as 3rd deterministic math lib
This commit is contained in:
parent
1f91a6ef1b
commit
27e32fca2c
29
3rd/math/CMakeLists.txt
Normal file
29
3rd/math/CMakeLists.txt
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.10)
|
||||||
|
|
||||||
|
project("musl_math")
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
include_directories(${CMAKE_CURRENT_LIST_DIR}/include)
|
||||||
|
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_LIST_DIR}/src MUSL_LIBC_MATH_SRC)
|
||||||
|
if(MSVC)
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /fp:precise /Od /Oi-")
|
||||||
|
# mute warning C4723: div by 0 because some function returns nan when a input is nan.
|
||||||
|
add_compile_options( /wd4723)
|
||||||
|
elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffp-model=strict")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_library(${PROJECT_NAME} STATIC ${MUSL_LIBC_MATH_SRC})
|
||||||
|
|
||||||
|
|
||||||
|
# if(MSVC)
|
||||||
|
# target_compile_options(${PROJECT_NAME} PRIVATE /fp:precise /Od)
|
||||||
|
# message("MUSL_LIBC_MATH: MSVC set")
|
||||||
|
# elseif(CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
||||||
|
# target_compile_options(${PROJECT_NAME} PRIVATE -ffp-model=strict)
|
||||||
|
# message("MUSL_LIBC_MATH: CLANG set")
|
||||||
|
# elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||||
|
# message("MUSL_LIBC_MATH: gcc set")
|
||||||
|
# endif()
|
417
3rd/math/include/math.h
Normal file
417
3rd/math/include/math.h
Normal file
@ -0,0 +1,417 @@
|
|||||||
|
#ifndef _MATH_H
|
||||||
|
#define _MATH_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define __NEED_float_t
|
||||||
|
#define __NEED_double_t
|
||||||
|
|
||||||
|
#ifndef _HUGE_ENUF
|
||||||
|
#define _HUGE_ENUF 1e+300 // _HUGE_ENUF*_HUGE_ENUF must overflow
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define INFINITY ((float)(_HUGE_ENUF * _HUGE_ENUF))
|
||||||
|
#define HUGE_VALF INFINITY
|
||||||
|
#define HUGE_VAL ((double)INFINITY)
|
||||||
|
#define HUGE_VALL ((long double)INFINITY)
|
||||||
|
#define NAN ((float)(INFINITY * 0.0F))
|
||||||
|
|
||||||
|
#define MATH_ERRNO 1
|
||||||
|
#define MATH_ERREXCEPT 2
|
||||||
|
#define math_errhandling 2
|
||||||
|
|
||||||
|
#define FP_ILOGBNAN (-1-0x7fffffff)
|
||||||
|
#define FP_ILOGB0 FP_ILOGBNAN
|
||||||
|
#define ULLONG_NSHIFT 0xFFFFFFFFFFFFFFFF
|
||||||
|
#define ULLONG_SHIFT1 0x7FFFFFFFFFFFFFFF
|
||||||
|
|
||||||
|
#define FP_NAN 0
|
||||||
|
#define FP_INFINITE 1
|
||||||
|
#define FP_ZERO 2
|
||||||
|
#define FP_SUBNORMAL 3
|
||||||
|
#define FP_NORMAL 4
|
||||||
|
|
||||||
|
#ifdef __FP_FAST_FMA
|
||||||
|
#define FP_FAST_FMA 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __FP_FAST_FMAF
|
||||||
|
#define FP_FAST_FMAF 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __FP_FAST_FMAL
|
||||||
|
#define FP_FAST_FMAL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define FLT_EVAL_METHOD 0
|
||||||
|
|
||||||
|
/* Support non-nearest rounding mode. */
|
||||||
|
#define WANT_ROUNDING 1
|
||||||
|
/* Support signaling NaNs. */
|
||||||
|
#define WANT_SNAN 0
|
||||||
|
|
||||||
|
#if WANT_SNAN
|
||||||
|
#error SNaN is unsupported
|
||||||
|
#else
|
||||||
|
#define issignalingf_inline(x) 0
|
||||||
|
#define issignaling_inline(x) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define predict_true(x) (x)
|
||||||
|
#define predict_false(x) (x)
|
||||||
|
|
||||||
|
typedef signed char int8_t;
|
||||||
|
typedef short int16_t;
|
||||||
|
typedef int int32_t;
|
||||||
|
typedef long long int64_t;
|
||||||
|
typedef unsigned char uint8_t;
|
||||||
|
typedef unsigned short uint16_t;
|
||||||
|
typedef unsigned int uint32_t;
|
||||||
|
typedef unsigned long long uint64_t;
|
||||||
|
typedef float float_t;
|
||||||
|
typedef double double_t;
|
||||||
|
|
||||||
|
int __fpclassify(double);
|
||||||
|
int __fpclassifyf(float);
|
||||||
|
int __fpclassifyl(long double);
|
||||||
|
|
||||||
|
static __inline unsigned __FLOAT_BITS(float __f)
|
||||||
|
{
|
||||||
|
union {float __f; unsigned __i;} __u;
|
||||||
|
__u.__f = __f;
|
||||||
|
return __u.__i;
|
||||||
|
}
|
||||||
|
static __inline unsigned long long __DOUBLE_BITS(double __f)
|
||||||
|
{
|
||||||
|
union {double __f; unsigned long long __i;} __u;
|
||||||
|
__u.__f = __f;
|
||||||
|
return __u.__i;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define fpclassify(x) ( \
|
||||||
|
sizeof(x) == sizeof(float) ? __fpclassifyf(x) : \
|
||||||
|
sizeof(x) == sizeof(double) ? __fpclassify(x) : \
|
||||||
|
__fpclassifyl(x) )
|
||||||
|
|
||||||
|
#define isinf(x) ( \
|
||||||
|
sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) == 0x7f800000 : \
|
||||||
|
sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & ULLONG_SHIFT1) == 0x7ffULL<<52 : \
|
||||||
|
__fpclassifyl(x) == FP_INFINITE)
|
||||||
|
|
||||||
|
#define isnan(x) ( \
|
||||||
|
sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) > 0x7f800000 : \
|
||||||
|
sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & ULLONG_SHIFT1) > 0x7ffULL<<52 : \
|
||||||
|
__fpclassifyl(x) == FP_NAN)
|
||||||
|
|
||||||
|
#define isnormal(x) ( \
|
||||||
|
sizeof(x) == sizeof(float) ? ((__FLOAT_BITS(x)+0x00800000) & 0x7fffffff) >= 0x01000000 : \
|
||||||
|
sizeof(x) == sizeof(double) ? ((__DOUBLE_BITS(x)+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53 : \
|
||||||
|
__fpclassifyl(x) == FP_NORMAL)
|
||||||
|
|
||||||
|
#define isfinite(x) ( \
|
||||||
|
sizeof(x) == sizeof(float) ? (__FLOAT_BITS(x) & 0x7fffffff) < 0x7f800000 : \
|
||||||
|
sizeof(x) == sizeof(double) ? (__DOUBLE_BITS(x) & ULLONG_SHIFT1) < 0x7ffULL<<52 : \
|
||||||
|
__fpclassifyl(x) > FP_INFINITE)
|
||||||
|
|
||||||
|
int __signbit(double);
|
||||||
|
int __signbitf(float);
|
||||||
|
int __signbitl(long double);
|
||||||
|
|
||||||
|
#define signbit(x) ( \
|
||||||
|
sizeof(x) == sizeof(float) ? (int)(__FLOAT_BITS(x)>>31) : \
|
||||||
|
sizeof(x) == sizeof(double) ? (int)(__DOUBLE_BITS(x)>>63) : \
|
||||||
|
__signbitl(x) )
|
||||||
|
|
||||||
|
#define isunordered(x,y) (isnan((x)) ? ((void)(y),1) : isnan((y)))
|
||||||
|
|
||||||
|
#define __ISREL_DEF(rel, op, type) \
|
||||||
|
static __inline int __is##rel(type __x, type __y) \
|
||||||
|
{ return !isunordered(__x,__y) && __x op __y; }
|
||||||
|
|
||||||
|
__ISREL_DEF(lessf, <, float_t)
|
||||||
|
__ISREL_DEF(less, <, double_t)
|
||||||
|
__ISREL_DEF(lessl, <, long double)
|
||||||
|
__ISREL_DEF(lessequalf, <=, float_t)
|
||||||
|
__ISREL_DEF(lessequal, <=, double_t)
|
||||||
|
__ISREL_DEF(lessequall, <=, long double)
|
||||||
|
__ISREL_DEF(lessgreaterf, !=, float_t)
|
||||||
|
__ISREL_DEF(lessgreater, !=, double_t)
|
||||||
|
__ISREL_DEF(lessgreaterl, !=, long double)
|
||||||
|
__ISREL_DEF(greaterf, >, float_t)
|
||||||
|
__ISREL_DEF(greater, >, double_t)
|
||||||
|
__ISREL_DEF(greaterl, >, long double)
|
||||||
|
__ISREL_DEF(greaterequalf, >=, float_t)
|
||||||
|
__ISREL_DEF(greaterequal, >=, double_t)
|
||||||
|
__ISREL_DEF(greaterequall, >=, long double)
|
||||||
|
|
||||||
|
#define __tg_pred_2(x, y, p) ( \
|
||||||
|
sizeof((x)+(y)) == sizeof(float) ? p##f(x, y) : \
|
||||||
|
sizeof((x)+(y)) == sizeof(double) ? p(x, y) : \
|
||||||
|
p##l(x, y) )
|
||||||
|
|
||||||
|
#define isless(x, y) __tg_pred_2(x, y, __isless)
|
||||||
|
#define islessequal(x, y) __tg_pred_2(x, y, __islessequal)
|
||||||
|
#define islessgreater(x, y) __tg_pred_2(x, y, __islessgreater)
|
||||||
|
#define isgreater(x, y) __tg_pred_2(x, y, __isgreater)
|
||||||
|
#define isgreaterequal(x, y) __tg_pred_2(x, y, __isgreaterequal)
|
||||||
|
|
||||||
|
/* Evaluate an expression as the specified type. With standard excess
|
||||||
|
precision handling a type cast or assignment is enough (with
|
||||||
|
-ffloat-store an assignment is required, in old compilers argument
|
||||||
|
passing and return statement may not drop excess precision). */
|
||||||
|
|
||||||
|
static inline float eval_as_float(float x)
|
||||||
|
{
|
||||||
|
float y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline double eval_as_double(double x)
|
||||||
|
{
|
||||||
|
double y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fp_barrier returns its input, but limits code transformations
|
||||||
|
as if it had a side-effect (e.g. observable io) and returned
|
||||||
|
an arbitrary value. */
|
||||||
|
|
||||||
|
#ifndef fp_barrierf
|
||||||
|
#define fp_barrierf fp_barrierf
|
||||||
|
static inline float fp_barrierf(float x)
|
||||||
|
{
|
||||||
|
volatile float y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef fp_barrier
|
||||||
|
#define fp_barrier fp_barrier
|
||||||
|
static inline double fp_barrier(double x)
|
||||||
|
{
|
||||||
|
volatile double y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef fp_barrierl
|
||||||
|
#define fp_barrierl fp_barrierl
|
||||||
|
static inline long double fp_barrierl(long double x)
|
||||||
|
{
|
||||||
|
volatile long double y = x;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* fp_force_eval ensures that the input value is computed when that's
|
||||||
|
otherwise unused. To prevent the constant folding of the input
|
||||||
|
expression, an additional fp_barrier may be needed or a compilation
|
||||||
|
mode that does so (e.g. -frounding-math in gcc). Then it can be
|
||||||
|
used to evaluate an expression for its fenv side-effects only. */
|
||||||
|
|
||||||
|
#ifndef fp_force_evalf
|
||||||
|
#define fp_force_evalf fp_force_evalf
|
||||||
|
static inline void fp_force_evalf(float x)
|
||||||
|
{
|
||||||
|
volatile float y;
|
||||||
|
y = x;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef fp_force_eval
|
||||||
|
#define fp_force_eval fp_force_eval
|
||||||
|
static inline void fp_force_eval(double x)
|
||||||
|
{
|
||||||
|
volatile double y;
|
||||||
|
y = x;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef fp_force_evall
|
||||||
|
#define fp_force_evall fp_force_evall
|
||||||
|
static inline void fp_force_evall(long double x)
|
||||||
|
{
|
||||||
|
volatile long double y;
|
||||||
|
y = x;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define FORCE_EVAL(x) do { \
|
||||||
|
if (sizeof(x) == sizeof(float)) { \
|
||||||
|
fp_force_evalf(x); \
|
||||||
|
} else if (sizeof(x) == sizeof(double)) { \
|
||||||
|
fp_force_eval(x); \
|
||||||
|
} else { \
|
||||||
|
fp_force_evall(x); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
typedef union {float _f; uint32_t _i;}asuint_union;
|
||||||
|
typedef union {uint32_t _i; float _f;}asfloat_union;
|
||||||
|
typedef union {double _f; uint64_t _i;}asuint64_union;
|
||||||
|
typedef union {uint64_t _i; double _f;}asdouble_union;
|
||||||
|
#define asuint(f) ((asuint_union){f})._i
|
||||||
|
#define asfloat(i) ((asfloat_union){i})._f
|
||||||
|
#define asuint64(f) ((asuint64_union){f})._i
|
||||||
|
#define asdouble(i) ((asdouble_union){i})._f
|
||||||
|
|
||||||
|
#define EXTRACT_WORDS(hi,lo,d) \
|
||||||
|
do { \
|
||||||
|
uint64_t __u = asuint64(d); \
|
||||||
|
(hi) = __u >> 32; \
|
||||||
|
(lo) = (uint32_t)__u; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define GET_HIGH_WORD(hi,d) \
|
||||||
|
do { \
|
||||||
|
(hi) = asuint64(d) >> 32; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define GET_LOW_WORD(lo,d) \
|
||||||
|
do { \
|
||||||
|
(lo) = (uint32_t)asuint64(d); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define INSERT_WORDS(d,hi,lo) \
|
||||||
|
do { \
|
||||||
|
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SET_HIGH_WORD(d,hi) \
|
||||||
|
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
|
||||||
|
|
||||||
|
#define SET_LOW_WORD(d,lo) \
|
||||||
|
INSERT_WORDS(d, asuint64(d)>>32, lo)
|
||||||
|
|
||||||
|
#define GET_FLOAT_WORD(w,d) \
|
||||||
|
do { \
|
||||||
|
(w) = asuint(d); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define SET_FLOAT_WORD(d,w) \
|
||||||
|
do { \
|
||||||
|
(d) = asfloat(w); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
int __rem_pio2_large(double*, double*, int, int, int);
|
||||||
|
int __rem_pio2(double, double*);
|
||||||
|
double __sin(double, double, int);
|
||||||
|
double __cos(double, double);
|
||||||
|
double __tan(double, double, int);
|
||||||
|
|
||||||
|
/* error handling functions */
|
||||||
|
float __math_xflowf(uint32_t, float);
|
||||||
|
float __math_uflowf(uint32_t);
|
||||||
|
float __math_oflowf(uint32_t);
|
||||||
|
float __math_divzerof(uint32_t);
|
||||||
|
float __math_invalidf(float);
|
||||||
|
double __math_xflow(uint32_t, double);
|
||||||
|
double __math_uflow(uint32_t);
|
||||||
|
double __math_oflow(uint32_t);
|
||||||
|
double __math_divzero(uint32_t);
|
||||||
|
double __math_invalid(double);
|
||||||
|
#if LDBL_MANT_DIG != DBL_MANT_DIG
|
||||||
|
long double __math_invalidl(long double);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double acos(double);
|
||||||
|
double asin(double);
|
||||||
|
double atan(double);
|
||||||
|
double atan2(double, double);
|
||||||
|
double cbrt(double);
|
||||||
|
double ceil(double);
|
||||||
|
double cos(double);
|
||||||
|
double degrees(double);
|
||||||
|
double exp(double);
|
||||||
|
double fabs(double);
|
||||||
|
int factorial(int);
|
||||||
|
double floor(double);
|
||||||
|
double fmax(double, double);
|
||||||
|
double fmin(double, double);
|
||||||
|
double fmod(double, double);
|
||||||
|
double fsum(double*, int);
|
||||||
|
int gcd(int, int);
|
||||||
|
double log(double);
|
||||||
|
double log10(double);
|
||||||
|
double log2(double);
|
||||||
|
double modf(double, double *);
|
||||||
|
double pow(double, double);
|
||||||
|
double radians(double);
|
||||||
|
double scalbn(double, int);
|
||||||
|
double sin(double);
|
||||||
|
double sqrt(double);
|
||||||
|
double tan(double);
|
||||||
|
double trunc(double);
|
||||||
|
|
||||||
|
#define FLT_TRUE_MIN 1.40129846432481707092e-45F
|
||||||
|
#define FLT_MIN 1.17549435082228750797e-38F
|
||||||
|
#define FLT_MAX 3.40282346638528859812e+38F
|
||||||
|
#define FLT_EPSILON 1.1920928955078125e-07F
|
||||||
|
|
||||||
|
#define FLT_MANT_DIG 24
|
||||||
|
#define FLT_MIN_EXP (-125)
|
||||||
|
#define FLT_MAX_EXP 128
|
||||||
|
#define FLT_HAS_SUBNORM 1
|
||||||
|
|
||||||
|
#define FLT_DIG 6
|
||||||
|
#define FLT_DECIMAL_DIG 9
|
||||||
|
#define FLT_MIN_10_EXP (-37)
|
||||||
|
#define FLT_MAX_10_EXP 38
|
||||||
|
|
||||||
|
#define DBL_TRUE_MIN 4.94065645841246544177e-324
|
||||||
|
#define DBL_MIN 2.22507385850720138309e-308
|
||||||
|
#define DBL_MAX 1.79769313486231570815e+308
|
||||||
|
#define DBL_EPSILON 2.22044604925031308085e-16
|
||||||
|
|
||||||
|
#define DBL_MANT_DIG 53
|
||||||
|
#define DBL_MIN_EXP (-1021)
|
||||||
|
#define DBL_MAX_EXP 1024
|
||||||
|
#define DBL_HAS_SUBNORM 1
|
||||||
|
|
||||||
|
#define DBL_DIG 15
|
||||||
|
#define DBL_DECIMAL_DIG 17
|
||||||
|
#define DBL_MIN_10_EXP (-307)
|
||||||
|
#define DBL_MAX_10_EXP 308
|
||||||
|
|
||||||
|
#define LDBL_HAS_SUBNORM 1
|
||||||
|
#define LDBL_DECIMAL_DIG DBL_DECIMAL_DIG
|
||||||
|
#define LDBL_MANT_DIG DBL_MANT_DIG
|
||||||
|
#define LDBL_MAX_EXP DBL_MAX_EXP
|
||||||
|
|
||||||
|
#undef MAXFLOAT
|
||||||
|
#define MAXFLOAT 3.40282346638528859812e+38F
|
||||||
|
#define HUGE 3.40282346638528859812e+38F
|
||||||
|
|
||||||
|
#define M_DEG2RAD 0.017453292519943295 /* pi/180 */
|
||||||
|
#define M_E 2.7182818284590452354 /* e */
|
||||||
|
#define M_LOG2E 1.4426950408889634074 /* log_2 e */
|
||||||
|
#define M_LOG10E 0.43429448190325182765 /* log_10 e */
|
||||||
|
#define M_LN2 0.69314718055994530942 /* log_e 2 */
|
||||||
|
#define M_LN10 2.30258509299404568402 /* log_e 10 */
|
||||||
|
#define M_PI 3.14159265358979323846 /* pi */
|
||||||
|
#define M_PI_2 1.57079632679489661923 /* pi/2 */
|
||||||
|
#define M_PI_4 0.78539816339744830962 /* pi/4 */
|
||||||
|
#define M_1_PI 0.31830988618379067154 /* 1/pi */
|
||||||
|
#define M_2_PI 0.63661977236758134308 /* 2/pi */
|
||||||
|
#define M_RAD2DEG 57.29577951308232 /* 180/pi */
|
||||||
|
#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
|
||||||
|
#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
|
||||||
|
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
|
||||||
|
|
||||||
|
// extern int signgam;
|
||||||
|
|
||||||
|
// double j0(double);
|
||||||
|
// double j1(double);
|
||||||
|
// double jn(int, double);
|
||||||
|
|
||||||
|
// double y0(double);
|
||||||
|
// double y1(double);
|
||||||
|
// double yn(int, double);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
71
3rd/math/src/__cos.c
Normal file
71
3rd/math/src/__cos.c
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* __cos( x, y )
|
||||||
|
* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
|
||||||
|
* Input x is assumed to be bounded by ~pi/4 in magnitude.
|
||||||
|
* Input y is the tail of x.
|
||||||
|
*
|
||||||
|
* Algorithm
|
||||||
|
* 1. Since cos(-x) = cos(x), we need only to consider positive x.
|
||||||
|
* 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
|
||||||
|
* 3. cos(x) is approximated by a polynomial of degree 14 on
|
||||||
|
* [0,pi/4]
|
||||||
|
* 4 14
|
||||||
|
* cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
|
||||||
|
* where the remez error is
|
||||||
|
*
|
||||||
|
* | 2 4 6 8 10 12 14 | -58
|
||||||
|
* |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
|
||||||
|
* | |
|
||||||
|
*
|
||||||
|
* 4 6 8 10 12 14
|
||||||
|
* 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
|
||||||
|
* cos(x) ~ 1 - x*x/2 + r
|
||||||
|
* since cos(x+y) ~ cos(x) - sin(x)*y
|
||||||
|
* ~ cos(x) - x*y,
|
||||||
|
* a correction term is necessary in cos(x) and hence
|
||||||
|
* cos(x+y) = 1 - (x*x/2 - (r - x*y))
|
||||||
|
* For better accuracy, rearrange to
|
||||||
|
* cos(x+y) ~ w + (tmp + (r-x*y))
|
||||||
|
* where w = 1 - x*x/2 and tmp is a tiny correction term
|
||||||
|
* (1 - x*x/2 == w + tmp exactly in infinite precision).
|
||||||
|
* The exactness of w + tmp in infinite precision depends on w
|
||||||
|
* and tmp having the same precision as x. If they have extra
|
||||||
|
* precision due to compiler bugs, then the extra precision is
|
||||||
|
* only good provided it is retained in all terms of the final
|
||||||
|
* expression for cos(). Retention happens in all cases tested
|
||||||
|
* under FreeBSD, so don't pessimize things by forcibly clipping
|
||||||
|
* any extra precision in w.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double
|
||||||
|
C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
|
||||||
|
C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
|
||||||
|
C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
|
||||||
|
C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
|
||||||
|
C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
|
||||||
|
C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
|
||||||
|
|
||||||
|
double __cos(double x, double y)
|
||||||
|
{
|
||||||
|
double_t hz,z,r,w;
|
||||||
|
|
||||||
|
z = x*x;
|
||||||
|
w = z*z;
|
||||||
|
r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
|
||||||
|
hz = 0.5*z;
|
||||||
|
w = 1.0-hz;
|
||||||
|
return w + (((1.0-w)-hz) + (z*r-x*y));
|
||||||
|
}
|
10
3rd/math/src/__fpclassify.c
Normal file
10
3rd/math/src/__fpclassify.c
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
int __fpclassify(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
int e = u.i>>52 & 0x7ff;
|
||||||
|
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
|
||||||
|
if (e==0x7ff) return u.i<<12 ? FP_NAN : FP_INFINITE;
|
||||||
|
return FP_NORMAL;
|
||||||
|
}
|
10
3rd/math/src/__fpclassifyf.c
Normal file
10
3rd/math/src/__fpclassifyf.c
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
int __fpclassifyf(float x)
|
||||||
|
{
|
||||||
|
union {float f; uint32_t i;} u = {x};
|
||||||
|
int e = u.i>>23 & 0xff;
|
||||||
|
if (!e) return u.i<<1 ? FP_SUBNORMAL : FP_ZERO;
|
||||||
|
if (e==0xff) return u.i<<9 ? FP_NAN : FP_INFINITE;
|
||||||
|
return FP_NORMAL;
|
||||||
|
}
|
42
3rd/math/src/__fpclassifyl.c
Normal file
42
3rd/math/src/__fpclassifyl.c
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||||
|
int __fpclassifyl(long double x)
|
||||||
|
{
|
||||||
|
return __fpclassify(x);
|
||||||
|
}
|
||||||
|
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
|
||||||
|
int __fpclassifyl(long double x)
|
||||||
|
{
|
||||||
|
union ldshape u = {x};
|
||||||
|
int e = u.i.se & 0x7fff;
|
||||||
|
int msb = u.i.m>>63;
|
||||||
|
if (!e && !msb)
|
||||||
|
return u.i.m ? FP_SUBNORMAL : FP_ZERO;
|
||||||
|
if (e == 0x7fff) {
|
||||||
|
/* The x86 variant of 80-bit extended precision only admits
|
||||||
|
* one representation of each infinity, with the mantissa msb
|
||||||
|
* necessarily set. The version with it clear is invalid/nan.
|
||||||
|
* The m68k variant, however, allows either, and tooling uses
|
||||||
|
* the version with it clear. */
|
||||||
|
if (__BYTE_ORDER == __LITTLE_ENDIAN && !msb)
|
||||||
|
return FP_NAN;
|
||||||
|
return u.i.m << 1 ? FP_NAN : FP_INFINITE;
|
||||||
|
}
|
||||||
|
if (!msb)
|
||||||
|
return FP_NAN;
|
||||||
|
return FP_NORMAL;
|
||||||
|
}
|
||||||
|
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
|
||||||
|
int __fpclassifyl(long double x)
|
||||||
|
{
|
||||||
|
union ldshape u = {x};
|
||||||
|
int e = u.i.se & 0x7fff;
|
||||||
|
u.i.se = 0;
|
||||||
|
if (!e)
|
||||||
|
return u.i2.lo | u.i2.hi ? FP_SUBNORMAL : FP_ZERO;
|
||||||
|
if (e == 0x7fff)
|
||||||
|
return u.i2.lo | u.i2.hi ? FP_NAN : FP_INFINITE;
|
||||||
|
return FP_NORMAL;
|
||||||
|
}
|
||||||
|
#endif
|
6
3rd/math/src/__math_divzero.c
Normal file
6
3rd/math/src/__math_divzero.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double __math_divzero(uint32_t sign)
|
||||||
|
{
|
||||||
|
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
|
||||||
|
}
|
6
3rd/math/src/__math_divzerof.c
Normal file
6
3rd/math/src/__math_divzerof.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
float __math_divzerof(uint32_t sign)
|
||||||
|
{
|
||||||
|
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
|
||||||
|
}
|
6
3rd/math/src/__math_invalid.c
Normal file
6
3rd/math/src/__math_invalid.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double __math_invalid(double x)
|
||||||
|
{
|
||||||
|
return (x - x) / (x - x);
|
||||||
|
}
|
6
3rd/math/src/__math_invalidf.c
Normal file
6
3rd/math/src/__math_invalidf.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
float __math_invalidf(float x)
|
||||||
|
{
|
||||||
|
return (x - x) / (x - x);
|
||||||
|
}
|
8
3rd/math/src/__math_invalidl.c
Normal file
8
3rd/math/src/__math_invalidl.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
#if LDBL_MANT_DIG != DBL_MANT_DIG
|
||||||
|
long double __math_invalidl(long double x)
|
||||||
|
{
|
||||||
|
return (x - x) / (x - x);
|
||||||
|
}
|
||||||
|
#endif
|
6
3rd/math/src/__math_oflow.c
Normal file
6
3rd/math/src/__math_oflow.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double __math_oflow(uint32_t sign)
|
||||||
|
{
|
||||||
|
return __math_xflow(sign, 0x1p769);
|
||||||
|
}
|
6
3rd/math/src/__math_oflowf.c
Normal file
6
3rd/math/src/__math_oflowf.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
float __math_oflowf(uint32_t sign)
|
||||||
|
{
|
||||||
|
return __math_xflowf(sign, 0x1p97f);
|
||||||
|
}
|
6
3rd/math/src/__math_uflow.c
Normal file
6
3rd/math/src/__math_uflow.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double __math_uflow(uint32_t sign)
|
||||||
|
{
|
||||||
|
return __math_xflow(sign, 0x1p-767);
|
||||||
|
}
|
6
3rd/math/src/__math_uflowf.c
Normal file
6
3rd/math/src/__math_uflowf.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
float __math_uflowf(uint32_t sign)
|
||||||
|
{
|
||||||
|
return __math_xflowf(sign, 0x1p-95f);
|
||||||
|
}
|
6
3rd/math/src/__math_xflow.c
Normal file
6
3rd/math/src/__math_xflow.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double __math_xflow(uint32_t sign, double y)
|
||||||
|
{
|
||||||
|
return eval_as_double(fp_barrier(sign ? -y : y) * y);
|
||||||
|
}
|
6
3rd/math/src/__math_xflowf.c
Normal file
6
3rd/math/src/__math_xflowf.c
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
float __math_xflowf(uint32_t sign, float y)
|
||||||
|
{
|
||||||
|
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
|
||||||
|
}
|
190
3rd/math/src/__rem_pio2.c
Normal file
190
3rd/math/src/__rem_pio2.c
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*
|
||||||
|
* Optimized by Bruce D. Evans.
|
||||||
|
*/
|
||||||
|
/* __rem_pio2(x,y)
|
||||||
|
*
|
||||||
|
* return the remainder of x rem pi/2 in y[0]+y[1]
|
||||||
|
* use __rem_pio2_large() for large x
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
||||||
|
#define EPS DBL_EPSILON
|
||||||
|
#elif FLT_EVAL_METHOD==2
|
||||||
|
#define EPS LDBL_EPSILON
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* invpio2: 53 bits of 2/pi
|
||||||
|
* pio2_1: first 33 bit of pi/2
|
||||||
|
* pio2_1t: pi/2 - pio2_1
|
||||||
|
* pio2_2: second 33 bit of pi/2
|
||||||
|
* pio2_2t: pi/2 - (pio2_1+pio2_2)
|
||||||
|
* pio2_3: third 33 bit of pi/2
|
||||||
|
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
|
||||||
|
*/
|
||||||
|
static const double
|
||||||
|
toint = 1.5/EPS,
|
||||||
|
pio4 = 0x1.921fb54442d18p-1,
|
||||||
|
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
|
||||||
|
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
|
||||||
|
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
|
||||||
|
pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
|
||||||
|
pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
|
||||||
|
pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
|
||||||
|
pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
|
||||||
|
|
||||||
|
/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
|
||||||
|
int __rem_pio2(double x, double *y)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
double_t z,w,t,r,fn;
|
||||||
|
double tx[3],ty[2];
|
||||||
|
uint32_t ix;
|
||||||
|
int sign, n, ex, ey, i;
|
||||||
|
|
||||||
|
sign = u.i>>63;
|
||||||
|
ix = u.i>>32 & 0x7fffffff;
|
||||||
|
if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
|
||||||
|
if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
|
||||||
|
goto medium; /* cancellation -- use medium case */
|
||||||
|
if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
|
||||||
|
if (!sign) {
|
||||||
|
z = x - pio2_1; /* one round good to 85 bits */
|
||||||
|
y[0] = z - pio2_1t;
|
||||||
|
y[1] = (z-y[0]) - pio2_1t;
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
z = x + pio2_1;
|
||||||
|
y[0] = z + pio2_1t;
|
||||||
|
y[1] = (z-y[0]) + pio2_1t;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!sign) {
|
||||||
|
z = x - 2*pio2_1;
|
||||||
|
y[0] = z - 2*pio2_1t;
|
||||||
|
y[1] = (z-y[0]) - 2*pio2_1t;
|
||||||
|
return 2;
|
||||||
|
} else {
|
||||||
|
z = x + 2*pio2_1;
|
||||||
|
y[0] = z + 2*pio2_1t;
|
||||||
|
y[1] = (z-y[0]) + 2*pio2_1t;
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
|
||||||
|
if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
|
||||||
|
if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
|
||||||
|
goto medium;
|
||||||
|
if (!sign) {
|
||||||
|
z = x - 3*pio2_1;
|
||||||
|
y[0] = z - 3*pio2_1t;
|
||||||
|
y[1] = (z-y[0]) - 3*pio2_1t;
|
||||||
|
return 3;
|
||||||
|
} else {
|
||||||
|
z = x + 3*pio2_1;
|
||||||
|
y[0] = z + 3*pio2_1t;
|
||||||
|
y[1] = (z-y[0]) + 3*pio2_1t;
|
||||||
|
return -3;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
|
||||||
|
goto medium;
|
||||||
|
if (!sign) {
|
||||||
|
z = x - 4*pio2_1;
|
||||||
|
y[0] = z - 4*pio2_1t;
|
||||||
|
y[1] = (z-y[0]) - 4*pio2_1t;
|
||||||
|
return 4;
|
||||||
|
} else {
|
||||||
|
z = x + 4*pio2_1;
|
||||||
|
y[0] = z + 4*pio2_1t;
|
||||||
|
y[1] = (z-y[0]) + 4*pio2_1t;
|
||||||
|
return -4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
|
||||||
|
medium:
|
||||||
|
/* rint(x/(pi/2)) */
|
||||||
|
fn = (double_t)x*invpio2 + toint - toint;
|
||||||
|
n = (int32_t)fn;
|
||||||
|
r = x - fn*pio2_1;
|
||||||
|
w = fn*pio2_1t; /* 1st round, good to 85 bits */
|
||||||
|
/* Matters with directed rounding. */
|
||||||
|
if (predict_false(r - w < -pio4)) {
|
||||||
|
n--;
|
||||||
|
fn--;
|
||||||
|
r = x - fn*pio2_1;
|
||||||
|
w = fn*pio2_1t;
|
||||||
|
} else if (predict_false(r - w > pio4)) {
|
||||||
|
n++;
|
||||||
|
fn++;
|
||||||
|
r = x - fn*pio2_1;
|
||||||
|
w = fn*pio2_1t;
|
||||||
|
}
|
||||||
|
y[0] = r - w;
|
||||||
|
u.f = y[0];
|
||||||
|
ey = u.i>>52 & 0x7ff;
|
||||||
|
ex = ix>>20;
|
||||||
|
if (ex - ey > 16) { /* 2nd round, good to 118 bits */
|
||||||
|
t = r;
|
||||||
|
w = fn*pio2_2;
|
||||||
|
r = t - w;
|
||||||
|
w = fn*pio2_2t - ((t-r)-w);
|
||||||
|
y[0] = r - w;
|
||||||
|
u.f = y[0];
|
||||||
|
ey = u.i>>52 & 0x7ff;
|
||||||
|
if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
|
||||||
|
t = r;
|
||||||
|
w = fn*pio2_3;
|
||||||
|
r = t - w;
|
||||||
|
w = fn*pio2_3t - ((t-r)-w);
|
||||||
|
y[0] = r - w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
y[1] = (r - y[0]) - w;
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* all other (large) arguments
|
||||||
|
*/
|
||||||
|
if (ix >= 0x7ff00000) { /* x is inf or NaN */
|
||||||
|
y[0] = y[1] = x - x;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
/* set z = scalbn(|x|,-ilogb(x)+23) */
|
||||||
|
u.f = x;
|
||||||
|
u.i &= (uint64_t)-1>>12;
|
||||||
|
u.i |= (uint64_t)(0x3ff + 23)<<52;
|
||||||
|
z = u.f;
|
||||||
|
for (i=0; i < 2; i++) {
|
||||||
|
tx[i] = (double)(int32_t)z;
|
||||||
|
z = (z-tx[i])*0x1p24;
|
||||||
|
}
|
||||||
|
tx[i] = z;
|
||||||
|
/* skip zero terms, first term is non-zero */
|
||||||
|
while (tx[i] == 0.0)
|
||||||
|
i--;
|
||||||
|
n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
|
||||||
|
if (sign) {
|
||||||
|
y[0] = -ty[0];
|
||||||
|
y[1] = -ty[1];
|
||||||
|
return -n;
|
||||||
|
}
|
||||||
|
y[0] = ty[0];
|
||||||
|
y[1] = ty[1];
|
||||||
|
return n;
|
||||||
|
}
|
442
3rd/math/src/__rem_pio2_large.c
Normal file
442
3rd/math/src/__rem_pio2_large.c
Normal file
@ -0,0 +1,442 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* __rem_pio2_large(x,y,e0,nx,prec)
|
||||||
|
* double x[],y[]; int e0,nx,prec;
|
||||||
|
*
|
||||||
|
* __rem_pio2_large return the last three digits of N with
|
||||||
|
* y = x - N*pi/2
|
||||||
|
* so that |y| < pi/2.
|
||||||
|
*
|
||||||
|
* The method is to compute the integer (mod 8) and fraction parts of
|
||||||
|
* (2/pi)*x without doing the full multiplication. In general we
|
||||||
|
* skip the part of the product that are known to be a huge integer (
|
||||||
|
* more accurately, = 0 mod 8 ). Thus the number of operations are
|
||||||
|
* independent of the exponent of the input.
|
||||||
|
*
|
||||||
|
* (2/pi) is represented by an array of 24-bit integers in ipio2[].
|
||||||
|
*
|
||||||
|
* Input parameters:
|
||||||
|
* x[] The input value (must be positive) is broken into nx
|
||||||
|
* pieces of 24-bit integers in double precision format.
|
||||||
|
* x[i] will be the i-th 24 bit of x. The scaled exponent
|
||||||
|
* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
|
||||||
|
* match x's up to 24 bits.
|
||||||
|
*
|
||||||
|
* Example of breaking a double positive z into x[0]+x[1]+x[2]:
|
||||||
|
* e0 = ilogb(z)-23
|
||||||
|
* z = scalbn(z,-e0)
|
||||||
|
* for i = 0,1,2
|
||||||
|
* x[i] = floor(z)
|
||||||
|
* z = (z-x[i])*2**24
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* y[] ouput result in an array of double precision numbers.
|
||||||
|
* The dimension of y[] is:
|
||||||
|
* 24-bit precision 1
|
||||||
|
* 53-bit precision 2
|
||||||
|
* 64-bit precision 2
|
||||||
|
* 113-bit precision 3
|
||||||
|
* The actual value is the sum of them. Thus for 113-bit
|
||||||
|
* precison, one may have to do something like:
|
||||||
|
*
|
||||||
|
* long double t,w,r_head, r_tail;
|
||||||
|
* t = (long double)y[2] + (long double)y[1];
|
||||||
|
* w = (long double)y[0];
|
||||||
|
* r_head = t+w;
|
||||||
|
* r_tail = w - (r_head - t);
|
||||||
|
*
|
||||||
|
* e0 The exponent of x[0]. Must be <= 16360 or you need to
|
||||||
|
* expand the ipio2 table.
|
||||||
|
*
|
||||||
|
* nx dimension of x[]
|
||||||
|
*
|
||||||
|
* prec an integer indicating the precision:
|
||||||
|
* 0 24 bits (single)
|
||||||
|
* 1 53 bits (double)
|
||||||
|
* 2 64 bits (extended)
|
||||||
|
* 3 113 bits (quad)
|
||||||
|
*
|
||||||
|
* External function:
|
||||||
|
* double scalbn(), floor();
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Here is the description of some local variables:
|
||||||
|
*
|
||||||
|
* jk jk+1 is the initial number of terms of ipio2[] needed
|
||||||
|
* in the computation. The minimum and recommended value
|
||||||
|
* for jk is 3,4,4,6 for single, double, extended, and quad.
|
||||||
|
* jk+1 must be 2 larger than you might expect so that our
|
||||||
|
* recomputation test works. (Up to 24 bits in the integer
|
||||||
|
* part (the 24 bits of it that we compute) and 23 bits in
|
||||||
|
* the fraction part may be lost to cancelation before we
|
||||||
|
* recompute.)
|
||||||
|
*
|
||||||
|
* jz local integer variable indicating the number of
|
||||||
|
* terms of ipio2[] used.
|
||||||
|
*
|
||||||
|
* jx nx - 1
|
||||||
|
*
|
||||||
|
* jv index for pointing to the suitable ipio2[] for the
|
||||||
|
* computation. In general, we want
|
||||||
|
* ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
|
||||||
|
* is an integer. Thus
|
||||||
|
* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
|
||||||
|
* Hence jv = max(0,(e0-3)/24).
|
||||||
|
*
|
||||||
|
* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
|
||||||
|
*
|
||||||
|
* q[] double array with integral value, representing the
|
||||||
|
* 24-bits chunk of the product of x and 2/pi.
|
||||||
|
*
|
||||||
|
* q0 the corresponding exponent of q[0]. Note that the
|
||||||
|
* exponent for q[i] would be q0-24*i.
|
||||||
|
*
|
||||||
|
* PIo2[] double precision array, obtained by cutting pi/2
|
||||||
|
* into 24 bits chunks.
|
||||||
|
*
|
||||||
|
* f[] ipio2[] in floating point
|
||||||
|
*
|
||||||
|
* iq[] integer array by breaking up q[] in 24-bits chunk.
|
||||||
|
*
|
||||||
|
* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
|
||||||
|
*
|
||||||
|
* ih integer. If >0 it indicates q[] is >= 0.5, hence
|
||||||
|
* it also indicates the *sign* of the result.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Constants:
|
||||||
|
* The hexadecimal values are the intended ones for the following
|
||||||
|
* constants. The decimal values may be used, provided that the
|
||||||
|
* compiler will convert from decimal to binary accurately enough
|
||||||
|
* to produce the hexadecimal values shown.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const int init_jk[] = {3,4,4,6}; /* initial value for jk */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
|
||||||
|
*
|
||||||
|
* integer array, contains the (24*i)-th to (24*i+23)-th
|
||||||
|
* bit of 2/pi after binary point. The corresponding
|
||||||
|
* floating value is
|
||||||
|
*
|
||||||
|
* ipio2[i] * 2^(-24(i+1)).
|
||||||
|
*
|
||||||
|
* NB: This table must have at least (e0-3)/24 + jk terms.
|
||||||
|
* For quad precision (e0 <= 16360, jk = 6), this is 686.
|
||||||
|
*/
|
||||||
|
static const int32_t ipio2[] = {
|
||||||
|
0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
|
||||||
|
0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
|
||||||
|
0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
|
||||||
|
0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
|
||||||
|
0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
|
||||||
|
0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
|
||||||
|
0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
|
||||||
|
0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
|
||||||
|
0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
|
||||||
|
0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
|
||||||
|
0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
|
||||||
|
|
||||||
|
#if LDBL_MAX_EXP > 1024
|
||||||
|
0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
|
||||||
|
0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
|
||||||
|
0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
|
||||||
|
0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
|
||||||
|
0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
|
||||||
|
0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
|
||||||
|
0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
|
||||||
|
0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
|
||||||
|
0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
|
||||||
|
0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
|
||||||
|
0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
|
||||||
|
0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
|
||||||
|
0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
|
||||||
|
0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
|
||||||
|
0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
|
||||||
|
0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
|
||||||
|
0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
|
||||||
|
0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
|
||||||
|
0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
|
||||||
|
0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
|
||||||
|
0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
|
||||||
|
0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
|
||||||
|
0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
|
||||||
|
0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
|
||||||
|
0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
|
||||||
|
0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
|
||||||
|
0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
|
||||||
|
0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
|
||||||
|
0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
|
||||||
|
0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
|
||||||
|
0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
|
||||||
|
0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
|
||||||
|
0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
|
||||||
|
0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
|
||||||
|
0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
|
||||||
|
0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
|
||||||
|
0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
|
||||||
|
0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
|
||||||
|
0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
|
||||||
|
0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
|
||||||
|
0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
|
||||||
|
0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
|
||||||
|
0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
|
||||||
|
0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
|
||||||
|
0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
|
||||||
|
0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
|
||||||
|
0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
|
||||||
|
0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
|
||||||
|
0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
|
||||||
|
0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
|
||||||
|
0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
|
||||||
|
0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
|
||||||
|
0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
|
||||||
|
0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
|
||||||
|
0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
|
||||||
|
0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
|
||||||
|
0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
|
||||||
|
0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
|
||||||
|
0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
|
||||||
|
0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
|
||||||
|
0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
|
||||||
|
0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
|
||||||
|
0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
|
||||||
|
0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
|
||||||
|
0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
|
||||||
|
0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
|
||||||
|
0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
|
||||||
|
0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
|
||||||
|
0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
|
||||||
|
0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
|
||||||
|
0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
|
||||||
|
0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
|
||||||
|
0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
|
||||||
|
0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
|
||||||
|
0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
|
||||||
|
0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
|
||||||
|
0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
|
||||||
|
0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
|
||||||
|
0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
|
||||||
|
0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
|
||||||
|
0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
|
||||||
|
0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
|
||||||
|
0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
|
||||||
|
0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
|
||||||
|
0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
|
||||||
|
0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
|
||||||
|
0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
|
||||||
|
0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
|
||||||
|
0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
|
||||||
|
0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
|
||||||
|
0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
|
||||||
|
0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
|
||||||
|
0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
|
||||||
|
0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
|
||||||
|
0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
|
||||||
|
0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
|
||||||
|
0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
|
||||||
|
0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
|
||||||
|
0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
|
||||||
|
0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
|
||||||
|
0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
|
||||||
|
0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
|
||||||
|
0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
|
||||||
|
0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
static const double PIo2[] = {
|
||||||
|
1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
|
||||||
|
7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
|
||||||
|
5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
|
||||||
|
3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
|
||||||
|
1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
|
||||||
|
1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
|
||||||
|
2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
|
||||||
|
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
|
||||||
|
};
|
||||||
|
|
||||||
|
int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
|
||||||
|
{
|
||||||
|
int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
|
||||||
|
double z,fw,f[20],fq[20],q[20];
|
||||||
|
|
||||||
|
/* initialize jk*/
|
||||||
|
jk = init_jk[prec];
|
||||||
|
jp = jk;
|
||||||
|
|
||||||
|
/* determine jx,jv,q0, note that 3>q0 */
|
||||||
|
jx = nx-1;
|
||||||
|
jv = (e0-3)/24; if(jv<0) jv=0;
|
||||||
|
q0 = e0-24*(jv+1);
|
||||||
|
|
||||||
|
/* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
|
||||||
|
j = jv-jx; m = jx+jk;
|
||||||
|
for (i=0; i<=m; i++,j++)
|
||||||
|
f[i] = j<0 ? 0.0 : (double)ipio2[j];
|
||||||
|
|
||||||
|
/* compute q[0],q[1],...q[jk] */
|
||||||
|
for (i=0; i<=jk; i++) {
|
||||||
|
for (j=0,fw=0.0; j<=jx; j++)
|
||||||
|
fw += x[j]*f[jx+i-j];
|
||||||
|
q[i] = fw;
|
||||||
|
}
|
||||||
|
|
||||||
|
jz = jk;
|
||||||
|
recompute:
|
||||||
|
/* distill q[] into iq[] reversingly */
|
||||||
|
for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
|
||||||
|
fw = (double)(int32_t)(0x1p-24*z);
|
||||||
|
iq[i] = (int32_t)(z - 0x1p24*fw);
|
||||||
|
z = q[j-1]+fw;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compute n */
|
||||||
|
z = scalbn(z,q0); /* actual value of z */
|
||||||
|
z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
|
||||||
|
n = (int32_t)z;
|
||||||
|
z -= (double)n;
|
||||||
|
ih = 0;
|
||||||
|
if (q0 > 0) { /* need iq[jz-1] to determine n */
|
||||||
|
i = iq[jz-1]>>(24-q0); n += i;
|
||||||
|
iq[jz-1] -= i<<(24-q0);
|
||||||
|
ih = iq[jz-1]>>(23-q0);
|
||||||
|
}
|
||||||
|
else if (q0 == 0) ih = iq[jz-1]>>23;
|
||||||
|
else if (z >= 0.5) ih = 2;
|
||||||
|
|
||||||
|
if (ih > 0) { /* q > 0.5 */
|
||||||
|
n += 1; carry = 0;
|
||||||
|
for (i=0; i<jz; i++) { /* compute 1-q */
|
||||||
|
j = iq[i];
|
||||||
|
if (carry == 0) {
|
||||||
|
if (j != 0) {
|
||||||
|
carry = 1;
|
||||||
|
iq[i] = 0x1000000 - j;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
iq[i] = 0xffffff - j;
|
||||||
|
}
|
||||||
|
if (q0 > 0) { /* rare case: chance is 1 in 12 */
|
||||||
|
switch(q0) {
|
||||||
|
case 1:
|
||||||
|
iq[jz-1] &= 0x7fffff; break;
|
||||||
|
case 2:
|
||||||
|
iq[jz-1] &= 0x3fffff; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ih == 2) {
|
||||||
|
z = 1.0 - z;
|
||||||
|
if (carry != 0)
|
||||||
|
z -= scalbn(1.0,q0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check if recomputation is needed */
|
||||||
|
if (z == 0.0) {
|
||||||
|
j = 0;
|
||||||
|
for (i=jz-1; i>=jk; i--) j |= iq[i];
|
||||||
|
if (j == 0) { /* need recomputation */
|
||||||
|
for (k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */
|
||||||
|
|
||||||
|
for (i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */
|
||||||
|
f[jx+i] = (double)ipio2[jv+i];
|
||||||
|
for (j=0,fw=0.0; j<=jx; j++)
|
||||||
|
fw += x[j]*f[jx+i-j];
|
||||||
|
q[i] = fw;
|
||||||
|
}
|
||||||
|
jz += k;
|
||||||
|
goto recompute;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* chop off zero terms */
|
||||||
|
if (z == 0.0) {
|
||||||
|
jz -= 1;
|
||||||
|
q0 -= 24;
|
||||||
|
while (iq[jz] == 0) {
|
||||||
|
jz--;
|
||||||
|
q0 -= 24;
|
||||||
|
}
|
||||||
|
} else { /* break z into 24-bit if necessary */
|
||||||
|
z = scalbn(z,-q0);
|
||||||
|
if (z >= 0x1p24) {
|
||||||
|
fw = (double)(int32_t)(0x1p-24*z);
|
||||||
|
iq[jz] = (int32_t)(z - 0x1p24*fw);
|
||||||
|
jz += 1;
|
||||||
|
q0 += 24;
|
||||||
|
iq[jz] = (int32_t)fw;
|
||||||
|
} else
|
||||||
|
iq[jz] = (int32_t)z;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* convert integer "bit" chunk to floating-point value */
|
||||||
|
fw = scalbn(1.0,q0);
|
||||||
|
for (i=jz; i>=0; i--) {
|
||||||
|
q[i] = fw*(double)iq[i];
|
||||||
|
fw *= 0x1p-24;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compute PIo2[0,...,jp]*q[jz,...,0] */
|
||||||
|
for(i=jz; i>=0; i--) {
|
||||||
|
for (fw=0.0,k=0; k<=jp && k<=jz-i; k++)
|
||||||
|
fw += PIo2[k]*q[i+k];
|
||||||
|
fq[jz-i] = fw;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compress fq[] into y[] */
|
||||||
|
switch(prec) {
|
||||||
|
case 0:
|
||||||
|
fw = 0.0;
|
||||||
|
for (i=jz; i>=0; i--)
|
||||||
|
fw += fq[i];
|
||||||
|
y[0] = ih==0 ? fw : -fw;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
case 2:
|
||||||
|
fw = 0.0;
|
||||||
|
for (i=jz; i>=0; i--)
|
||||||
|
fw += fq[i];
|
||||||
|
// TODO: drop excess precision here once double_t is used
|
||||||
|
fw = (double)fw;
|
||||||
|
y[0] = ih==0 ? fw : -fw;
|
||||||
|
fw = fq[0]-fw;
|
||||||
|
for (i=1; i<=jz; i++)
|
||||||
|
fw += fq[i];
|
||||||
|
y[1] = ih==0 ? fw : -fw;
|
||||||
|
break;
|
||||||
|
case 3: /* painful */
|
||||||
|
for (i=jz; i>0; i--) {
|
||||||
|
fw = fq[i-1]+fq[i];
|
||||||
|
fq[i] += fq[i-1]-fw;
|
||||||
|
fq[i-1] = fw;
|
||||||
|
}
|
||||||
|
for (i=jz; i>1; i--) {
|
||||||
|
fw = fq[i-1]+fq[i];
|
||||||
|
fq[i] += fq[i-1]-fw;
|
||||||
|
fq[i-1] = fw;
|
||||||
|
}
|
||||||
|
for (fw=0.0,i=jz; i>=2; i--)
|
||||||
|
fw += fq[i];
|
||||||
|
if (ih==0) {
|
||||||
|
y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
|
||||||
|
} else {
|
||||||
|
y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return n&7;
|
||||||
|
}
|
13
3rd/math/src/__signbit.c
Normal file
13
3rd/math/src/__signbit.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
// FIXME: macro in math.h
|
||||||
|
int __signbit(double x)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
double d;
|
||||||
|
uint64_t i;
|
||||||
|
} y = { x };
|
||||||
|
return y.i>>63;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
11
3rd/math/src/__signbitf.c
Normal file
11
3rd/math/src/__signbitf.c
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
// FIXME: macro in math.h
|
||||||
|
int __signbitf(float x)
|
||||||
|
{
|
||||||
|
union {
|
||||||
|
float f;
|
||||||
|
uint32_t i;
|
||||||
|
} y = { x };
|
||||||
|
return y.i>>31;
|
||||||
|
}
|
14
3rd/math/src/__signbitl.c
Normal file
14
3rd/math/src/__signbitl.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
#if (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
|
||||||
|
int __signbitl(long double x)
|
||||||
|
{
|
||||||
|
union ldshape u = {x};
|
||||||
|
return u.i.se >> 15;
|
||||||
|
}
|
||||||
|
#elif LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||||
|
int __signbitl(long double x)
|
||||||
|
{
|
||||||
|
return __signbit(x);
|
||||||
|
}
|
||||||
|
#endif
|
64
3rd/math/src/__sin.c
Normal file
64
3rd/math/src/__sin.c
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* __sin( x, y, iy)
|
||||||
|
* kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
|
||||||
|
* Input x is assumed to be bounded by ~pi/4 in magnitude.
|
||||||
|
* Input y is the tail of x.
|
||||||
|
* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
|
||||||
|
*
|
||||||
|
* Algorithm
|
||||||
|
* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
|
||||||
|
* 2. Callers must return sin(-0) = -0 without calling here since our
|
||||||
|
* odd polynomial is not evaluated in a way that preserves -0.
|
||||||
|
* Callers may do the optimization sin(x) ~ x for tiny x.
|
||||||
|
* 3. sin(x) is approximated by a polynomial of degree 13 on
|
||||||
|
* [0,pi/4]
|
||||||
|
* 3 13
|
||||||
|
* sin(x) ~ x + S1*x + ... + S6*x
|
||||||
|
* where
|
||||||
|
*
|
||||||
|
* |sin(x) 2 4 6 8 10 12 | -58
|
||||||
|
* |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
|
||||||
|
* | x |
|
||||||
|
*
|
||||||
|
* 4. sin(x+y) = sin(x) + sin'(x')*y
|
||||||
|
* ~ sin(x) + (1-x*x/2)*y
|
||||||
|
* For better accuracy, let
|
||||||
|
* 3 2 2 2 2
|
||||||
|
* r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
|
||||||
|
* then 3 2
|
||||||
|
* sin(x) = x + (S1*x + (x *(r-y/2)+y))
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double
|
||||||
|
S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
|
||||||
|
S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
|
||||||
|
S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
|
||||||
|
S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
|
||||||
|
S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
|
||||||
|
S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
|
||||||
|
|
||||||
|
double __sin(double x, double y, int iy)
|
||||||
|
{
|
||||||
|
double_t z,r,v,w;
|
||||||
|
|
||||||
|
z = x*x;
|
||||||
|
w = z*z;
|
||||||
|
r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
|
||||||
|
v = z*x;
|
||||||
|
if (iy == 0)
|
||||||
|
return x + v*(S1 + z*r);
|
||||||
|
else
|
||||||
|
return x - ((z*(0.5*y - v*r) - y) - v*S1);
|
||||||
|
}
|
110
3rd/math/src/__tan.c
Normal file
110
3rd/math/src/__tan.c
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright 2004 Sun Microsystems, Inc. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* __tan( x, y, k )
|
||||||
|
* kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
|
||||||
|
* Input x is assumed to be bounded by ~pi/4 in magnitude.
|
||||||
|
* Input y is the tail of x.
|
||||||
|
* Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
|
||||||
|
*
|
||||||
|
* Algorithm
|
||||||
|
* 1. Since tan(-x) = -tan(x), we need only to consider positive x.
|
||||||
|
* 2. Callers must return tan(-0) = -0 without calling here since our
|
||||||
|
* odd polynomial is not evaluated in a way that preserves -0.
|
||||||
|
* Callers may do the optimization tan(x) ~ x for tiny x.
|
||||||
|
* 3. tan(x) is approximated by a odd polynomial of degree 27 on
|
||||||
|
* [0,0.67434]
|
||||||
|
* 3 27
|
||||||
|
* tan(x) ~ x + T1*x + ... + T13*x
|
||||||
|
* where
|
||||||
|
*
|
||||||
|
* |tan(x) 2 4 26 | -59.2
|
||||||
|
* |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
|
||||||
|
* | x |
|
||||||
|
*
|
||||||
|
* Note: tan(x+y) = tan(x) + tan'(x)*y
|
||||||
|
* ~ tan(x) + (1+x*x)*y
|
||||||
|
* Therefore, for better accuracy in computing tan(x+y), let
|
||||||
|
* 3 2 2 2 2
|
||||||
|
* r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
|
||||||
|
* then
|
||||||
|
* 3 2
|
||||||
|
* tan(x+y) = x + (T1*x + (x *(r+y)+y))
|
||||||
|
*
|
||||||
|
* 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
|
||||||
|
* tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
|
||||||
|
* = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double T[] = {
|
||||||
|
3.33333333333334091986e-01, /* 3FD55555, 55555563 */
|
||||||
|
1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */
|
||||||
|
5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */
|
||||||
|
2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */
|
||||||
|
8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */
|
||||||
|
3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */
|
||||||
|
1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */
|
||||||
|
5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */
|
||||||
|
2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */
|
||||||
|
7.81794442939557092300e-05, /* 3F147E88, A03792A6 */
|
||||||
|
7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */
|
||||||
|
-1.85586374855275456654e-05, /* BEF375CB, DB605373 */
|
||||||
|
2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */
|
||||||
|
},
|
||||||
|
pio4 = 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
|
||||||
|
pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
|
||||||
|
|
||||||
|
double __tan(double x, double y, int odd)
|
||||||
|
{
|
||||||
|
double_t z, r, v, w, s, a;
|
||||||
|
double w0, a0;
|
||||||
|
uint32_t hx;
|
||||||
|
int big, sign;
|
||||||
|
|
||||||
|
GET_HIGH_WORD(hx,x);
|
||||||
|
big = (hx&0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
|
||||||
|
if (big) {
|
||||||
|
sign = hx>>31;
|
||||||
|
if (sign) {
|
||||||
|
x = -x;
|
||||||
|
y = -y;
|
||||||
|
}
|
||||||
|
x = (pio4 - x) + (pio4lo - y);
|
||||||
|
y = 0.0;
|
||||||
|
}
|
||||||
|
z = x * x;
|
||||||
|
w = z * z;
|
||||||
|
/*
|
||||||
|
* Break x^5*(T[1]+x^2*T[2]+...) into
|
||||||
|
* x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
|
||||||
|
* x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
|
||||||
|
*/
|
||||||
|
r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11]))));
|
||||||
|
v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12])))));
|
||||||
|
s = z * x;
|
||||||
|
r = y + z*(s*(r + v) + y) + s*T[0];
|
||||||
|
w = x + r;
|
||||||
|
if (big) {
|
||||||
|
s = 1 - 2*odd;
|
||||||
|
v = s - 2.0 * (x + (r - w*w/(w + s)));
|
||||||
|
return sign ? -v : v;
|
||||||
|
}
|
||||||
|
if (!odd)
|
||||||
|
return w;
|
||||||
|
/* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
|
||||||
|
w0 = w;
|
||||||
|
SET_LOW_WORD(w0, 0);
|
||||||
|
v = r - (w0 - x); /* w0+v = r+x */
|
||||||
|
a0 = a = -1.0 / w;
|
||||||
|
SET_LOW_WORD(a0, 0);
|
||||||
|
return a0 + a*(1.0 + a0*w0 + a0*v);
|
||||||
|
}
|
101
3rd/math/src/acos.c
Normal file
101
3rd/math/src/acos.c
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* acos(x)
|
||||||
|
* Method :
|
||||||
|
* acos(x) = pi/2 - asin(x)
|
||||||
|
* acos(-x) = pi/2 + asin(x)
|
||||||
|
* For |x|<=0.5
|
||||||
|
* acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c)
|
||||||
|
* For x>0.5
|
||||||
|
* acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2)))
|
||||||
|
* = 2asin(sqrt((1-x)/2))
|
||||||
|
* = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z)
|
||||||
|
* = 2f + (2c + 2s*z*R(z))
|
||||||
|
* where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term
|
||||||
|
* for f so that f+c ~ sqrt(z).
|
||||||
|
* For x<-0.5
|
||||||
|
* acos(x) = pi - 2asin(sqrt((1-|x|)/2))
|
||||||
|
* = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z)
|
||||||
|
*
|
||||||
|
* Special cases:
|
||||||
|
* if x is NaN, return x itself;
|
||||||
|
* if |x|>1, return NaN with invalid signal.
|
||||||
|
*
|
||||||
|
* Function needed: sqrt
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double
|
||||||
|
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
|
||||||
|
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
|
||||||
|
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
|
||||||
|
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
|
||||||
|
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
|
||||||
|
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
|
||||||
|
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
|
||||||
|
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
|
||||||
|
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
|
||||||
|
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
|
||||||
|
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
|
||||||
|
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
|
||||||
|
|
||||||
|
static double R(double z)
|
||||||
|
{
|
||||||
|
double_t p, q;
|
||||||
|
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||||
|
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||||
|
return p/q;
|
||||||
|
}
|
||||||
|
|
||||||
|
double acos(double x)
|
||||||
|
{
|
||||||
|
double z,w,s,c,df;
|
||||||
|
uint32_t hx,ix;
|
||||||
|
|
||||||
|
GET_HIGH_WORD(hx, x);
|
||||||
|
ix = hx & 0x7fffffff;
|
||||||
|
/* |x| >= 1 or nan */
|
||||||
|
if (ix >= 0x3ff00000) {
|
||||||
|
uint32_t lx;
|
||||||
|
|
||||||
|
GET_LOW_WORD(lx,x);
|
||||||
|
if ((ix-0x3ff00000 | lx) == 0) {
|
||||||
|
/* acos(1)=0, acos(-1)=pi */
|
||||||
|
if (hx >> 31)
|
||||||
|
return 2*pio2_hi + 0x1p-120f;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 0/(x-x);
|
||||||
|
}
|
||||||
|
/* |x| < 0.5 */
|
||||||
|
if (ix < 0x3fe00000) {
|
||||||
|
if (ix <= 0x3c600000) /* |x| < 2**-57 */
|
||||||
|
return pio2_hi + 0x1p-120f;
|
||||||
|
return pio2_hi - (x - (pio2_lo-x*R(x*x)));
|
||||||
|
}
|
||||||
|
/* x < -0.5 */
|
||||||
|
if (hx >> 31) {
|
||||||
|
z = (1.0+x)*0.5;
|
||||||
|
s = sqrt(z);
|
||||||
|
w = R(z)*s-pio2_lo;
|
||||||
|
return 2*(pio2_hi - (s+w));
|
||||||
|
}
|
||||||
|
/* x > 0.5 */
|
||||||
|
z = (1.0-x)*0.5;
|
||||||
|
s = sqrt(z);
|
||||||
|
df = s;
|
||||||
|
SET_LOW_WORD(df,0);
|
||||||
|
c = (z-df*df)/(s+df);
|
||||||
|
w = R(z)*s+c;
|
||||||
|
return 2*(df+w);
|
||||||
|
}
|
107
3rd/math/src/asin.c
Normal file
107
3rd/math/src/asin.c
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* asin(x)
|
||||||
|
* Method :
|
||||||
|
* Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
|
||||||
|
* we approximate asin(x) on [0,0.5] by
|
||||||
|
* asin(x) = x + x*x^2*R(x^2)
|
||||||
|
* where
|
||||||
|
* R(x^2) is a rational approximation of (asin(x)-x)/x^3
|
||||||
|
* and its remez error is bounded by
|
||||||
|
* |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75)
|
||||||
|
*
|
||||||
|
* For x in [0.5,1]
|
||||||
|
* asin(x) = pi/2-2*asin(sqrt((1-x)/2))
|
||||||
|
* Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2;
|
||||||
|
* then for x>0.98
|
||||||
|
* asin(x) = pi/2 - 2*(s+s*z*R(z))
|
||||||
|
* = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo)
|
||||||
|
* For x<=0.98, let pio4_hi = pio2_hi/2, then
|
||||||
|
* f = hi part of s;
|
||||||
|
* c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z)
|
||||||
|
* and
|
||||||
|
* asin(x) = pi/2 - 2*(s+s*z*R(z))
|
||||||
|
* = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo)
|
||||||
|
* = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c))
|
||||||
|
*
|
||||||
|
* Special cases:
|
||||||
|
* if x is NaN, return x itself;
|
||||||
|
* if |x|>1, return NaN with invalid signal.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double
|
||||||
|
pio2_hi = 1.57079632679489655800e+00, /* 0x3FF921FB, 0x54442D18 */
|
||||||
|
pio2_lo = 6.12323399573676603587e-17, /* 0x3C91A626, 0x33145C07 */
|
||||||
|
/* coefficients for R(x^2) */
|
||||||
|
pS0 = 1.66666666666666657415e-01, /* 0x3FC55555, 0x55555555 */
|
||||||
|
pS1 = -3.25565818622400915405e-01, /* 0xBFD4D612, 0x03EB6F7D */
|
||||||
|
pS2 = 2.01212532134862925881e-01, /* 0x3FC9C155, 0x0E884455 */
|
||||||
|
pS3 = -4.00555345006794114027e-02, /* 0xBFA48228, 0xB5688F3B */
|
||||||
|
pS4 = 7.91534994289814532176e-04, /* 0x3F49EFE0, 0x7501B288 */
|
||||||
|
pS5 = 3.47933107596021167570e-05, /* 0x3F023DE1, 0x0DFDF709 */
|
||||||
|
qS1 = -2.40339491173441421878e+00, /* 0xC0033A27, 0x1C8A2D4B */
|
||||||
|
qS2 = 2.02094576023350569471e+00, /* 0x40002AE5, 0x9C598AC8 */
|
||||||
|
qS3 = -6.88283971605453293030e-01, /* 0xBFE6066C, 0x1B8D0159 */
|
||||||
|
qS4 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */
|
||||||
|
|
||||||
|
static double R(double z)
|
||||||
|
{
|
||||||
|
double_t p, q;
|
||||||
|
p = z*(pS0+z*(pS1+z*(pS2+z*(pS3+z*(pS4+z*pS5)))));
|
||||||
|
q = 1.0+z*(qS1+z*(qS2+z*(qS3+z*qS4)));
|
||||||
|
return p/q;
|
||||||
|
}
|
||||||
|
|
||||||
|
double asin(double x)
|
||||||
|
{
|
||||||
|
double z,r,s;
|
||||||
|
uint32_t hx,ix;
|
||||||
|
|
||||||
|
GET_HIGH_WORD(hx, x);
|
||||||
|
ix = hx & 0x7fffffff;
|
||||||
|
/* |x| >= 1 or nan */
|
||||||
|
if (ix >= 0x3ff00000) {
|
||||||
|
uint32_t lx;
|
||||||
|
GET_LOW_WORD(lx, x);
|
||||||
|
if ((ix-0x3ff00000 | lx) == 0)
|
||||||
|
/* asin(1) = +-pi/2 with inexact */
|
||||||
|
return x*pio2_hi + 0x1p-120f;
|
||||||
|
return 0/(x-x);
|
||||||
|
}
|
||||||
|
/* |x| < 0.5 */
|
||||||
|
if (ix < 0x3fe00000) {
|
||||||
|
/* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
|
||||||
|
if (ix < 0x3e500000 && ix >= 0x00100000)
|
||||||
|
return x;
|
||||||
|
return x + x*R(x*x);
|
||||||
|
}
|
||||||
|
/* 1 > |x| >= 0.5 */
|
||||||
|
z = (1 - fabs(x))*0.5;
|
||||||
|
s = sqrt(z);
|
||||||
|
r = R(z);
|
||||||
|
if (ix >= 0x3fef3333) { /* if |x| > 0.975 */
|
||||||
|
x = pio2_hi-(2*(s+s*r)-pio2_lo);
|
||||||
|
} else {
|
||||||
|
double f,c;
|
||||||
|
/* f+c = sqrt(z) */
|
||||||
|
f = s;
|
||||||
|
SET_LOW_WORD(f,0);
|
||||||
|
c = (z-f*f)/(s+f);
|
||||||
|
x = 0.5*pio2_hi - (2*s*r - (pio2_lo-2*c) - (0.5*pio2_hi-2*f));
|
||||||
|
}
|
||||||
|
if (hx >> 31)
|
||||||
|
return -x;
|
||||||
|
return x;
|
||||||
|
}
|
116
3rd/math/src/atan.c
Normal file
116
3rd/math/src/atan.c
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* atan(x)
|
||||||
|
* Method
|
||||||
|
* 1. Reduce x to positive by atan(x) = -atan(-x).
|
||||||
|
* 2. According to the integer k=4t+0.25 chopped, t=x, the argument
|
||||||
|
* is further reduced to one of the following intervals and the
|
||||||
|
* arctangent of t is evaluated by the corresponding formula:
|
||||||
|
*
|
||||||
|
* [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
|
||||||
|
* [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
|
||||||
|
* [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
|
||||||
|
* [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
|
||||||
|
* [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
|
||||||
|
*
|
||||||
|
* Constants:
|
||||||
|
* The hexadecimal values are the intended ones for the following
|
||||||
|
* constants. The decimal values may be used, provided that the
|
||||||
|
* compiler will convert from decimal to binary accurately enough
|
||||||
|
* to produce the hexadecimal values shown.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double atanhi[] = {
|
||||||
|
4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
|
||||||
|
7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
|
||||||
|
9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
|
||||||
|
1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
|
||||||
|
};
|
||||||
|
|
||||||
|
static const double atanlo[] = {
|
||||||
|
2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
|
||||||
|
3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
|
||||||
|
1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
|
||||||
|
6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
|
||||||
|
};
|
||||||
|
|
||||||
|
static const double aT[] = {
|
||||||
|
3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
|
||||||
|
-1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
|
||||||
|
1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
|
||||||
|
-1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
|
||||||
|
9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
|
||||||
|
-7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
|
||||||
|
6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
|
||||||
|
-5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
|
||||||
|
4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
|
||||||
|
-3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
|
||||||
|
1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
|
||||||
|
};
|
||||||
|
|
||||||
|
double atan(double x)
|
||||||
|
{
|
||||||
|
double_t w,s1,s2,z;
|
||||||
|
uint32_t ix,sign;
|
||||||
|
int id;
|
||||||
|
|
||||||
|
GET_HIGH_WORD(ix, x);
|
||||||
|
sign = ix >> 31;
|
||||||
|
ix &= 0x7fffffff;
|
||||||
|
if (ix >= 0x44100000) { /* if |x| >= 2^66 */
|
||||||
|
if (isnan(x))
|
||||||
|
return x;
|
||||||
|
z = atanhi[3] + 0x1p-120f;
|
||||||
|
return sign ? -z : z;
|
||||||
|
}
|
||||||
|
if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
|
||||||
|
if (ix < 0x3e400000) { /* |x| < 2^-27 */
|
||||||
|
if (ix < 0x00100000)
|
||||||
|
/* raise underflow for subnormal x */
|
||||||
|
FORCE_EVAL((float)x);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
id = -1;
|
||||||
|
} else {
|
||||||
|
x = fabs(x);
|
||||||
|
if (ix < 0x3ff30000) { /* |x| < 1.1875 */
|
||||||
|
if (ix < 0x3fe60000) { /* 7/16 <= |x| < 11/16 */
|
||||||
|
id = 0;
|
||||||
|
x = (2.0*x-1.0)/(2.0+x);
|
||||||
|
} else { /* 11/16 <= |x| < 19/16 */
|
||||||
|
id = 1;
|
||||||
|
x = (x-1.0)/(x+1.0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (ix < 0x40038000) { /* |x| < 2.4375 */
|
||||||
|
id = 2;
|
||||||
|
x = (x-1.5)/(1.0+1.5*x);
|
||||||
|
} else { /* 2.4375 <= |x| < 2^66 */
|
||||||
|
id = 3;
|
||||||
|
x = -1.0/x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* end of argument reduction */
|
||||||
|
z = x*x;
|
||||||
|
w = z*z;
|
||||||
|
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
|
||||||
|
s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
|
||||||
|
s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
|
||||||
|
if (id < 0)
|
||||||
|
return x - x*(s1+s2);
|
||||||
|
z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x);
|
||||||
|
return sign ? -z : z;
|
||||||
|
}
|
107
3rd/math/src/atan2.c
Normal file
107
3rd/math/src/atan2.c
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
/* atan2(y,x)
|
||||||
|
* Method :
|
||||||
|
* 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
|
||||||
|
* 2. Reduce x to positive by (if x and y are unexceptional):
|
||||||
|
* ARG (x+iy) = arctan(y/x) ... if x > 0,
|
||||||
|
* ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0,
|
||||||
|
*
|
||||||
|
* Special cases:
|
||||||
|
*
|
||||||
|
* ATAN2((anything), NaN ) is NaN;
|
||||||
|
* ATAN2(NAN , (anything) ) is NaN;
|
||||||
|
* ATAN2(+-0, +(anything but NaN)) is +-0 ;
|
||||||
|
* ATAN2(+-0, -(anything but NaN)) is +-pi ;
|
||||||
|
* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2;
|
||||||
|
* ATAN2(+-(anything but INF and NaN), +INF) is +-0 ;
|
||||||
|
* ATAN2(+-(anything but INF and NaN), -INF) is +-pi;
|
||||||
|
* ATAN2(+-INF,+INF ) is +-pi/4 ;
|
||||||
|
* ATAN2(+-INF,-INF ) is +-3pi/4;
|
||||||
|
* ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2;
|
||||||
|
*
|
||||||
|
* Constants:
|
||||||
|
* The hexadecimal values are the intended ones for the following
|
||||||
|
* constants. The decimal values may be used, provided that the
|
||||||
|
* compiler will convert from decimal to binary accurately enough
|
||||||
|
* to produce the hexadecimal values shown.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
static const double
|
||||||
|
pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
|
||||||
|
pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
|
||||||
|
|
||||||
|
double atan2(double y, double x)
|
||||||
|
{
|
||||||
|
double z;
|
||||||
|
uint32_t m,lx,ly,ix,iy;
|
||||||
|
|
||||||
|
if (isnan(x) || isnan(y))
|
||||||
|
return x+y;
|
||||||
|
EXTRACT_WORDS(ix, lx, x);
|
||||||
|
EXTRACT_WORDS(iy, ly, y);
|
||||||
|
if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
|
||||||
|
return atan(y);
|
||||||
|
m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
|
||||||
|
ix = ix & 0x7fffffff;
|
||||||
|
iy = iy & 0x7fffffff;
|
||||||
|
|
||||||
|
/* when y = 0 */
|
||||||
|
if ((iy|ly) == 0) {
|
||||||
|
switch(m) {
|
||||||
|
case 0:
|
||||||
|
case 1: return y; /* atan(+-0,+anything)=+-0 */
|
||||||
|
case 2: return pi; /* atan(+0,-anything) = pi */
|
||||||
|
case 3: return -pi; /* atan(-0,-anything) =-pi */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* when x = 0 */
|
||||||
|
if ((ix|lx) == 0)
|
||||||
|
return m&1 ? -pi/2 : pi/2;
|
||||||
|
/* when x is INF */
|
||||||
|
if (ix == 0x7ff00000) {
|
||||||
|
if (iy == 0x7ff00000) {
|
||||||
|
switch(m) {
|
||||||
|
case 0: return pi/4; /* atan(+INF,+INF) */
|
||||||
|
case 1: return -pi/4; /* atan(-INF,+INF) */
|
||||||
|
case 2: return 3*pi/4; /* atan(+INF,-INF) */
|
||||||
|
case 3: return -3*pi/4; /* atan(-INF,-INF) */
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch(m) {
|
||||||
|
case 0: return 0.0; /* atan(+...,+INF) */
|
||||||
|
case 1: return -0.0; /* atan(-...,+INF) */
|
||||||
|
case 2: return pi; /* atan(+...,-INF) */
|
||||||
|
case 3: return -pi; /* atan(-...,-INF) */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* |y/x| > 0x1p64 */
|
||||||
|
if (ix+(64<<20) < iy || iy == 0x7ff00000)
|
||||||
|
return m&1 ? -pi/2 : pi/2;
|
||||||
|
|
||||||
|
/* z = atan(|y/x|) without spurious underflow */
|
||||||
|
if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
|
||||||
|
z = 0;
|
||||||
|
else
|
||||||
|
z = atan(fabs(y/x));
|
||||||
|
switch (m) {
|
||||||
|
case 0: return z; /* atan(+,+) */
|
||||||
|
case 1: return -z; /* atan(-,+) */
|
||||||
|
case 2: return pi - (z-pi_lo); /* atan(+,-) */
|
||||||
|
default: /* case 3 */
|
||||||
|
return (z-pi_lo) - pi; /* atan(-,-) */
|
||||||
|
}
|
||||||
|
}
|
102
3rd/math/src/cbrt.c
Normal file
102
3rd/math/src/cbrt.c
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*
|
||||||
|
* Optimized by Bruce D. Evans.
|
||||||
|
*/
|
||||||
|
/* cbrt(x)
|
||||||
|
* Return cube root of x
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
static const uint32_t
|
||||||
|
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
|
||||||
|
B2 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
|
||||||
|
|
||||||
|
/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
|
||||||
|
static const double
|
||||||
|
P0 = 1.87595182427177009643, /* 0x3ffe03e6, 0x0f61e692 */
|
||||||
|
P1 = -1.88497979543377169875, /* 0xbffe28e0, 0x92f02420 */
|
||||||
|
P2 = 1.621429720105354466140, /* 0x3ff9f160, 0x4a49d6c2 */
|
||||||
|
P3 = -0.758397934778766047437, /* 0xbfe844cb, 0xbee751d9 */
|
||||||
|
P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
|
||||||
|
|
||||||
|
double cbrt(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
double_t r,s,t,w;
|
||||||
|
uint32_t hx = u.i>>32 & 0x7fffffff;
|
||||||
|
|
||||||
|
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
|
||||||
|
return x+x;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rough cbrt to 5 bits:
|
||||||
|
* cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
|
||||||
|
* where e is integral and >= 0, m is real and in [0, 1), and "/" and
|
||||||
|
* "%" are integer division and modulus with rounding towards minus
|
||||||
|
* infinity. The RHS is always >= the LHS and has a maximum relative
|
||||||
|
* error of about 1 in 16. Adding a bias of -0.03306235651 to the
|
||||||
|
* (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
|
||||||
|
* floating point representation, for finite positive normal values,
|
||||||
|
* ordinary integer divison of the value in bits magically gives
|
||||||
|
* almost exactly the RHS of the above provided we first subtract the
|
||||||
|
* exponent bias (1023 for doubles) and later add it back. We do the
|
||||||
|
* subtraction virtually to keep e >= 0 so that ordinary integer
|
||||||
|
* division rounds towards minus infinity; this is also efficient.
|
||||||
|
*/
|
||||||
|
if (hx < 0x00100000) { /* zero or subnormal? */
|
||||||
|
u.f = x*0x1p54;
|
||||||
|
hx = u.i>>32 & 0x7fffffff;
|
||||||
|
if (hx == 0)
|
||||||
|
return x; /* cbrt(0) is itself */
|
||||||
|
hx = hx/3 + B2;
|
||||||
|
} else
|
||||||
|
hx = hx/3 + B1;
|
||||||
|
u.i &= 1ULL<<63;
|
||||||
|
u.i |= (uint64_t)hx << 32;
|
||||||
|
t = u.f;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* New cbrt to 23 bits:
|
||||||
|
* cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
|
||||||
|
* where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
|
||||||
|
* to within 2**-23.5 when |r - 1| < 1/10. The rough approximation
|
||||||
|
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
|
||||||
|
* gives us bounds for r = t**3/x.
|
||||||
|
*
|
||||||
|
* Try to optimize for parallel evaluation as in __tanf.c.
|
||||||
|
*/
|
||||||
|
r = (t*t)*(t/x);
|
||||||
|
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Round t away from zero to 23 bits (sloppily except for ensuring that
|
||||||
|
* the result is larger in magnitude than cbrt(x) but not much more than
|
||||||
|
* 2 23-bit ulps larger). With rounding towards zero, the error bound
|
||||||
|
* would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps
|
||||||
|
* in the rounded t, the infinite-precision error in the Newton
|
||||||
|
* approximation barely affects third digit in the final error
|
||||||
|
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
|
||||||
|
* before the final error is larger than 0.667 ulps.
|
||||||
|
*/
|
||||||
|
u.f = t;
|
||||||
|
u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
|
||||||
|
t = u.f;
|
||||||
|
|
||||||
|
/* one step Newton iteration to 53 bits with error < 0.667 ulps */
|
||||||
|
s = t*t; /* t*t is exact */
|
||||||
|
r = x/s; /* error <= 0.5 ulps; |r| < |t| */
|
||||||
|
w = t+t; /* t+t is exact */
|
||||||
|
r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
|
||||||
|
t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
|
||||||
|
return t;
|
||||||
|
}
|
31
3rd/math/src/ceil.c
Normal file
31
3rd/math/src/ceil.c
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
||||||
|
#define EPS DBL_EPSILON
|
||||||
|
#elif FLT_EVAL_METHOD==2
|
||||||
|
#define EPS LDBL_EPSILON
|
||||||
|
#endif
|
||||||
|
static const double_t toint = 1/EPS;
|
||||||
|
|
||||||
|
double ceil(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
int e = u.i >> 52 & 0x7ff;
|
||||||
|
double_t y;
|
||||||
|
|
||||||
|
if (e >= 0x3ff+52 || x == 0)
|
||||||
|
return x;
|
||||||
|
/* y = int(x) - x, where int(x) is an integer neighbor of x */
|
||||||
|
if (u.i >> 63)
|
||||||
|
y = x - toint + toint - x;
|
||||||
|
else
|
||||||
|
y = x + toint - toint - x;
|
||||||
|
/* special case because of non-nearest rounding modes */
|
||||||
|
if (e <= 0x3ff-1) {
|
||||||
|
FORCE_EVAL(y);
|
||||||
|
return u.i >> 63 ? -0.0 : 1;
|
||||||
|
}
|
||||||
|
if (y < 0)
|
||||||
|
return x + y + 1;
|
||||||
|
return x + y;
|
||||||
|
}
|
77
3rd/math/src/cos.c
Normal file
77
3rd/math/src/cos.c
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* cos(x)
|
||||||
|
* Return cosine function of x.
|
||||||
|
*
|
||||||
|
* kernel function:
|
||||||
|
* __sin ... sine function on [-pi/4,pi/4]
|
||||||
|
* __cos ... cosine function on [-pi/4,pi/4]
|
||||||
|
* __rem_pio2 ... argument reduction routine
|
||||||
|
*
|
||||||
|
* Method.
|
||||||
|
* Let S,C and T denote the sin, cos and tan respectively on
|
||||||
|
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
|
||||||
|
* in [-pi/4 , +pi/4], and let n = k mod 4.
|
||||||
|
* We have
|
||||||
|
*
|
||||||
|
* n sin(x) cos(x) tan(x)
|
||||||
|
* ----------------------------------------------------------
|
||||||
|
* 0 S C T
|
||||||
|
* 1 C -S -1/T
|
||||||
|
* 2 -S -C T
|
||||||
|
* 3 -C S -1/T
|
||||||
|
* ----------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Special cases:
|
||||||
|
* Let trig be any of sin, cos, or tan.
|
||||||
|
* trig(+-INF) is NaN, with signals;
|
||||||
|
* trig(NaN) is that NaN;
|
||||||
|
*
|
||||||
|
* Accuracy:
|
||||||
|
* TRIG(x) returns trig(x) nearly rounded
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double cos(double x)
|
||||||
|
{
|
||||||
|
double y[2];
|
||||||
|
uint32_t ix;
|
||||||
|
unsigned n;
|
||||||
|
|
||||||
|
GET_HIGH_WORD(ix, x);
|
||||||
|
ix &= 0x7fffffff;
|
||||||
|
|
||||||
|
/* |x| ~< pi/4 */
|
||||||
|
if (ix <= 0x3fe921fb) {
|
||||||
|
if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
|
||||||
|
/* raise inexact if x!=0 */
|
||||||
|
FORCE_EVAL(x + 0x1p120f);
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
return __cos(x, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cos(Inf or NaN) is NaN */
|
||||||
|
if (ix >= 0x7ff00000)
|
||||||
|
return x-x;
|
||||||
|
|
||||||
|
/* argument reduction */
|
||||||
|
n = __rem_pio2(x, y);
|
||||||
|
switch (n&3) {
|
||||||
|
case 0: return __cos(y[0], y[1]);
|
||||||
|
case 1: return -__sin(y[0], y[1], 1);
|
||||||
|
case 2: return -__cos(y[0], y[1]);
|
||||||
|
default:
|
||||||
|
return __sin(y[0], y[1], 1);
|
||||||
|
}
|
||||||
|
}
|
8
3rd/math/src/degrees.c
Normal file
8
3rd/math/src/degrees.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double degrees(double x)
|
||||||
|
{
|
||||||
|
if (isinf(x) || isnan(x)) return x;
|
||||||
|
double r = x * M_RAD2DEG;
|
||||||
|
return r;
|
||||||
|
}
|
132
3rd/math/src/exp.c
Normal file
132
3rd/math/src/exp.c
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
/*
|
||||||
|
* Double-precision e^x function.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include "exp_data.h"
|
||||||
|
|
||||||
|
#define N (1 << EXP_TABLE_BITS)
|
||||||
|
#define InvLn2N __exp_data.invln2N
|
||||||
|
#define NegLn2hiN __exp_data.negln2hiN
|
||||||
|
#define NegLn2loN __exp_data.negln2loN
|
||||||
|
#define Shift __exp_data.shift
|
||||||
|
#define T __exp_data.tab
|
||||||
|
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||||
|
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||||
|
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||||
|
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||||
|
|
||||||
|
/* Handle cases that may overflow or underflow when computing the result that
|
||||||
|
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||||
|
scale is in SBITS, however it has a computed exponent that may have
|
||||||
|
overflown into the sign bit so that needs to be adjusted before using it as
|
||||||
|
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||||
|
adjustment of scale, positive k here means the result may overflow and
|
||||||
|
negative k means the result may underflow. */
|
||||||
|
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
||||||
|
{
|
||||||
|
double_t scale, y;
|
||||||
|
|
||||||
|
if ((ki & 0x80000000) == 0) {
|
||||||
|
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||||
|
sbits -= 1009ull << 52;
|
||||||
|
scale = asdouble(sbits);
|
||||||
|
y = 0x1p1009 * (scale + scale * tmp);
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
||||||
|
/* k < 0, need special care in the subnormal range. */
|
||||||
|
sbits += 1022ull << 52;
|
||||||
|
scale = asdouble(sbits);
|
||||||
|
y = scale + scale * tmp;
|
||||||
|
if (y < 1.0) {
|
||||||
|
/* Round y to the right precision before scaling it into the subnormal
|
||||||
|
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||||
|
E is the worst-case ulp error outside the subnormal range. So this
|
||||||
|
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||||
|
double_t hi, lo;
|
||||||
|
lo = scale - y + scale * tmp;
|
||||||
|
hi = 1.0 + y;
|
||||||
|
lo = 1.0 - hi + y + lo;
|
||||||
|
y = eval_as_double(hi + lo) - 1.0;
|
||||||
|
/* Avoid -0.0 with downward rounding. */
|
||||||
|
if (WANT_ROUNDING && y == 0.0)
|
||||||
|
y = 0.0;
|
||||||
|
/* The underflow exception needs to be signaled explicitly. */
|
||||||
|
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
||||||
|
}
|
||||||
|
y = 0x1p-1022 * y;
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Top 12 bits of a double (sign and exponent bits). */
|
||||||
|
static inline uint32_t top12(double x)
|
||||||
|
{
|
||||||
|
return asuint64(x) >> 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
double exp(double x)
|
||||||
|
{
|
||||||
|
uint32_t abstop;
|
||||||
|
uint64_t ki, idx, top, sbits;
|
||||||
|
double_t kd, z, r, r2, scale, tail, tmp;
|
||||||
|
|
||||||
|
abstop = top12(x) & 0x7ff;
|
||||||
|
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
|
||||||
|
if (abstop - top12(0x1p-54) >= 0x80000000)
|
||||||
|
/* Avoid spurious underflow for tiny x. */
|
||||||
|
/* Note: 0 is common input. */
|
||||||
|
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||||
|
if (abstop >= top12(1024.0)) {
|
||||||
|
if (asuint64(x) == asuint64(-INFINITY))
|
||||||
|
return 0.0;
|
||||||
|
if (abstop >= top12(INFINITY))
|
||||||
|
return 1.0 + x;
|
||||||
|
if (asuint64(x) >> 63)
|
||||||
|
return __math_uflow(0);
|
||||||
|
else
|
||||||
|
return __math_oflow(0);
|
||||||
|
}
|
||||||
|
/* Large x is special cased below. */
|
||||||
|
abstop = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||||
|
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||||
|
z = InvLn2N * x;
|
||||||
|
#if TOINT_INTRINSICS
|
||||||
|
kd = roundtoint(z);
|
||||||
|
ki = converttoint(z);
|
||||||
|
#elif EXP_USE_TOINT_NARROW
|
||||||
|
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||||
|
kd = eval_as_double(z + Shift);
|
||||||
|
ki = asuint64(kd) >> 16;
|
||||||
|
kd = (double_t)(int32_t)ki;
|
||||||
|
#else
|
||||||
|
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||||
|
kd = eval_as_double(z + Shift);
|
||||||
|
ki = asuint64(kd);
|
||||||
|
kd -= Shift;
|
||||||
|
#endif
|
||||||
|
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||||
|
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||||
|
idx = 2 * (ki % N);
|
||||||
|
top = ki << (52 - EXP_TABLE_BITS);
|
||||||
|
tail = asdouble(T[idx]);
|
||||||
|
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||||
|
sbits = T[idx + 1] + top;
|
||||||
|
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||||
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
|
r2 = r * r;
|
||||||
|
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||||
|
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||||
|
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||||
|
if (predict_false(abstop == 0))
|
||||||
|
return specialcase(tmp, sbits, ki);
|
||||||
|
scale = asdouble(sbits);
|
||||||
|
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||||
|
is no spurious underflow here even without fma. */
|
||||||
|
return eval_as_double(scale + scale * tmp);
|
||||||
|
}
|
182
3rd/math/src/exp_data.c
Normal file
182
3rd/math/src/exp_data.c
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
/*
|
||||||
|
* Shared data between exp, exp2 and pow.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "exp_data.h"
|
||||||
|
|
||||||
|
#define N (1 << EXP_TABLE_BITS)
|
||||||
|
|
||||||
|
const struct exp_data __exp_data = {
|
||||||
|
// N/ln2
|
||||||
|
.invln2N = 0x1.71547652b82fep0 * N,
|
||||||
|
// -ln2/N
|
||||||
|
.negln2hiN = -0x1.62e42fefa0000p-8,
|
||||||
|
.negln2loN = -0x1.cf79abc9e3b3ap-47,
|
||||||
|
// Used for rounding when !TOINT_INTRINSICS
|
||||||
|
#if EXP_USE_TOINT_NARROW
|
||||||
|
.shift = 0x1800000000.8p0,
|
||||||
|
#else
|
||||||
|
.shift = 0x1.8p52,
|
||||||
|
#endif
|
||||||
|
// exp polynomial coefficients.
|
||||||
|
.poly = {
|
||||||
|
// abs error: 1.555*2^-66
|
||||||
|
// ulp error: 0.509 (0.511 without fma)
|
||||||
|
// if |x| < ln2/256+eps
|
||||||
|
// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
|
||||||
|
// abs error if |x| < ln2/128: 1.7145*2^-56
|
||||||
|
0x1.ffffffffffdbdp-2,
|
||||||
|
0x1.555555555543cp-3,
|
||||||
|
0x1.55555cf172b91p-5,
|
||||||
|
0x1.1111167a4d017p-7,
|
||||||
|
},
|
||||||
|
.exp2_shift = 0x1.8p52 / N,
|
||||||
|
// exp2 polynomial coefficients.
|
||||||
|
.exp2_poly = {
|
||||||
|
// abs error: 1.2195*2^-65
|
||||||
|
// ulp error: 0.507 (0.511 without fma)
|
||||||
|
// if |x| < 1/256
|
||||||
|
// abs error if |x| < 1/128: 1.9941*2^-56
|
||||||
|
0x1.62e42fefa39efp-1,
|
||||||
|
0x1.ebfbdff82c424p-3,
|
||||||
|
0x1.c6b08d70cf4b5p-5,
|
||||||
|
0x1.3b2abd24650ccp-7,
|
||||||
|
0x1.5d7e09b4e3a84p-10,
|
||||||
|
},
|
||||||
|
// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
|
||||||
|
// tab[2*k] = asuint64(T[k])
|
||||||
|
// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
|
||||||
|
.tab = {
|
||||||
|
0x0, 0x3ff0000000000000,
|
||||||
|
0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
|
||||||
|
0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
|
||||||
|
0xbc905e7a108766d1, 0x3fefe315e86e7f85,
|
||||||
|
0x3c8cd2523567f613, 0x3fefd9b0d3158574,
|
||||||
|
0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
|
||||||
|
0x3c60f74e61e6c861, 0x3fefc74518759bc8,
|
||||||
|
0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
|
||||||
|
0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
|
||||||
|
0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
|
||||||
|
0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
|
||||||
|
0xbc6a033489906e0b, 0x3fef9b66affed31b,
|
||||||
|
0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
|
||||||
|
0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
|
||||||
|
0xbc91c923b9d5f416, 0x3fef829aaea92de0,
|
||||||
|
0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
|
||||||
|
0xbc801b15eaa59348, 0x3fef72b83c7d517b,
|
||||||
|
0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
|
||||||
|
0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
|
||||||
|
0xbc96d99c7611eb26, 0x3fef5be084045cd4,
|
||||||
|
0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
|
||||||
|
0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
|
||||||
|
0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
|
||||||
|
0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
|
||||||
|
0x3c968efde3a8a894, 0x3fef387a6e756238,
|
||||||
|
0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
|
||||||
|
0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
|
||||||
|
0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
|
||||||
|
0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
|
||||||
|
0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
|
||||||
|
0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
|
||||||
|
0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
|
||||||
|
0x3c834d754db0abb6, 0x3fef06fe0a31b715,
|
||||||
|
0x3c864201e2ac744c, 0x3fef0170fc4cd831,
|
||||||
|
0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
|
||||||
|
0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
|
||||||
|
0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
|
||||||
|
0xbc9907f81b512d8e, 0x3feeecae6d05d866,
|
||||||
|
0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
|
||||||
|
0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
|
||||||
|
0x3c859f48a72a4c6d, 0x3feedea64c123422,
|
||||||
|
0xbc9312607a28698a, 0x3feeda4504ac801c,
|
||||||
|
0xbc58a78f4817895b, 0x3feed60a21f72e2a,
|
||||||
|
0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
|
||||||
|
0x3c4363ed60c2ac11, 0x3feece086061892d,
|
||||||
|
0x3c9666093b0664ef, 0x3feeca41ed1d0057,
|
||||||
|
0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
|
||||||
|
0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
|
||||||
|
0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
|
||||||
|
0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
|
||||||
|
0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
|
||||||
|
0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
|
||||||
|
0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
|
||||||
|
0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
|
||||||
|
0x3c93350518fdd78e, 0x3feeaf4736b527da,
|
||||||
|
0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
|
||||||
|
0x3c9063e1e21c5409, 0x3feeab07dd485429,
|
||||||
|
0x3c34c7855019c6ea, 0x3feea9268a5946b7,
|
||||||
|
0x3c9432e62b64c035, 0x3feea76f15ad2148,
|
||||||
|
0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
|
||||||
|
0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
|
||||||
|
0xbc845378892be9ae, 0x3feea34634ccc320,
|
||||||
|
0xbc93cedd78565858, 0x3feea23882552225,
|
||||||
|
0x3c5710aa807e1964, 0x3feea155d44ca973,
|
||||||
|
0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
|
||||||
|
0xbc6a12ad8734b982, 0x3feea012750bdabf,
|
||||||
|
0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
|
||||||
|
0xbc80dc3d54e08851, 0x3fee9f7df9519484,
|
||||||
|
0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
|
||||||
|
0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
|
||||||
|
0xbc8619321e55e68a, 0x3fee9feb564267c9,
|
||||||
|
0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
|
||||||
|
0xbc7b32dcb94da51d, 0x3feea11473eb0187,
|
||||||
|
0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
|
||||||
|
0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
|
||||||
|
0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
|
||||||
|
0xbc9369b6f13b3734, 0x3feea589994cce13,
|
||||||
|
0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
|
||||||
|
0xbc94d450d872576e, 0x3feea8d99b4492ed,
|
||||||
|
0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
|
||||||
|
0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
|
||||||
|
0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
|
||||||
|
0x3c7bf68359f35f44, 0x3feeb1ae99157736,
|
||||||
|
0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
|
||||||
|
0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
|
||||||
|
0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
|
||||||
|
0xbc92434322f4f9aa, 0x3feebd829fde4e50,
|
||||||
|
0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
|
||||||
|
0x3c71affc2b91ce27, 0x3feec49182a3f090,
|
||||||
|
0x3c6dd235e10a73bb, 0x3feec86319e32323,
|
||||||
|
0xbc87c50422622263, 0x3feecc667b5de565,
|
||||||
|
0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
|
||||||
|
0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
|
||||||
|
0x3c90cc319cee31d2, 0x3feed99e1330b358,
|
||||||
|
0x3c8469846e735ab3, 0x3feede6b5579fdbf,
|
||||||
|
0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
|
||||||
|
0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
|
||||||
|
0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
|
||||||
|
0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
|
||||||
|
0xbc90a40e3da6f640, 0x3feef9728de5593a,
|
||||||
|
0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
|
||||||
|
0xbc91eee26b588a35, 0x3fef05b030a1064a,
|
||||||
|
0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
|
||||||
|
0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
|
||||||
|
0x3c736eae30af0cb3, 0x3fef199bdd85529c,
|
||||||
|
0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
|
||||||
|
0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
|
||||||
|
0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
|
||||||
|
0x3c676b2c6c921968, 0x3fef3720dcef9069,
|
||||||
|
0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
|
||||||
|
0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
|
||||||
|
0xbc900dae3875a949, 0x3fef4f87080d89f2,
|
||||||
|
0x3c74a385a63d07a7, 0x3fef5818dcfba487,
|
||||||
|
0xbc82919e2040220f, 0x3fef60e316c98398,
|
||||||
|
0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
|
||||||
|
0x3c843a59ac016b4b, 0x3fef7321f301b460,
|
||||||
|
0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
|
||||||
|
0xbc892ab93b470dc9, 0x3fef864614f5a129,
|
||||||
|
0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
|
||||||
|
0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
|
||||||
|
0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
|
||||||
|
0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
|
||||||
|
0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
|
||||||
|
0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
|
||||||
|
0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
|
||||||
|
0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
|
||||||
|
0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
|
||||||
|
0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
|
||||||
|
},
|
||||||
|
};
|
25
3rd/math/src/exp_data.h
Normal file
25
3rd/math/src/exp_data.h
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
#ifndef _EXP_DATA_H
|
||||||
|
#define _EXP_DATA_H
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#define EXP_TABLE_BITS 7
|
||||||
|
#define EXP_POLY_ORDER 5
|
||||||
|
#define EXP_USE_TOINT_NARROW 0
|
||||||
|
#define EXP2_POLY_ORDER 5
|
||||||
|
extern const struct exp_data {
|
||||||
|
double invln2N;
|
||||||
|
double shift;
|
||||||
|
double negln2hiN;
|
||||||
|
double negln2loN;
|
||||||
|
double poly[4]; /* Last four coefficients. */
|
||||||
|
double exp2_shift;
|
||||||
|
double exp2_poly[EXP2_POLY_ORDER];
|
||||||
|
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
|
||||||
|
} __exp_data;
|
||||||
|
|
||||||
|
#endif
|
8
3rd/math/src/fabs.c
Normal file
8
3rd/math/src/fabs.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double fabs(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
u.i &= ULLONG_NSHIFT/2;
|
||||||
|
return u.f;
|
||||||
|
}
|
12
3rd/math/src/factorial.c
Normal file
12
3rd/math/src/factorial.c
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
int factorial(int n)
|
||||||
|
{
|
||||||
|
if (n < 0) return (int)__math_invalid(-1.0f);
|
||||||
|
int32_t r = 1;
|
||||||
|
for (int32_t i = 2; i <= n; i++)
|
||||||
|
{
|
||||||
|
r = r * i;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
31
3rd/math/src/floor.c
Normal file
31
3rd/math/src/floor.c
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
#if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
|
||||||
|
#define EPS DBL_EPSILON
|
||||||
|
#elif FLT_EVAL_METHOD==2
|
||||||
|
#define EPS LDBL_EPSILON
|
||||||
|
#endif
|
||||||
|
static const double_t toint = 1/EPS;
|
||||||
|
|
||||||
|
double floor(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
int e = u.i >> 52 & 0x7ff;
|
||||||
|
double_t y;
|
||||||
|
|
||||||
|
if (e >= 0x3ff+52 || x == 0)
|
||||||
|
return x;
|
||||||
|
/* y = int(x) - x, where int(x) is an integer neighbor of x */
|
||||||
|
if (u.i >> 63)
|
||||||
|
y = x - toint + toint - x;
|
||||||
|
else
|
||||||
|
y = x + toint - toint - x;
|
||||||
|
/* special case because of non-nearest rounding modes */
|
||||||
|
if (e <= 0x3ff-1) {
|
||||||
|
FORCE_EVAL(y);
|
||||||
|
return u.i >> 63 ? -1 : 0;
|
||||||
|
}
|
||||||
|
if (y > 0)
|
||||||
|
return x + y - 1;
|
||||||
|
return x + y;
|
||||||
|
}
|
13
3rd/math/src/fmax.c
Normal file
13
3rd/math/src/fmax.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double fmax(double x, double y)
|
||||||
|
{
|
||||||
|
if (isnan(x))
|
||||||
|
return y;
|
||||||
|
if (isnan(y))
|
||||||
|
return x;
|
||||||
|
/* handle signed zeros, see C99 Annex F.9.9.2 */
|
||||||
|
if (signbit(x) != signbit(y))
|
||||||
|
return signbit(x) ? y : x;
|
||||||
|
return x < y ? y : x;
|
||||||
|
}
|
13
3rd/math/src/fmin.c
Normal file
13
3rd/math/src/fmin.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double fmin(double x, double y)
|
||||||
|
{
|
||||||
|
if (isnan(x))
|
||||||
|
return y;
|
||||||
|
if (isnan(y))
|
||||||
|
return x;
|
||||||
|
/* handle signed zeros, see C99 Annex F.9.9.2 */
|
||||||
|
if (signbit(x) != signbit(y))
|
||||||
|
return signbit(x) ? x : y;
|
||||||
|
return x < y ? x : y;
|
||||||
|
}
|
67
3rd/math/src/fmod.c
Normal file
67
3rd/math/src/fmod.c
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double fmod(double x, double y)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} ux = {x}, uy = {y};
|
||||||
|
int ex = ux.i>>52 & 0x7ff;
|
||||||
|
int ey = uy.i>>52 & 0x7ff;
|
||||||
|
int sx = ux.i>>63;
|
||||||
|
uint64_t i;
|
||||||
|
|
||||||
|
/* in the followings uxi should be ux.i, but then gcc wrongly adds */
|
||||||
|
/* float load/store to inner loops ruining performance and code size */
|
||||||
|
uint64_t uxi = ux.i;
|
||||||
|
|
||||||
|
if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
|
||||||
|
return (x*y)/(x*y);
|
||||||
|
if (uxi<<1 <= uy.i<<1) {
|
||||||
|
if (uxi<<1 == uy.i<<1)
|
||||||
|
return 0*x;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* normalize x and y */
|
||||||
|
if (!ex) {
|
||||||
|
for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
|
||||||
|
uxi <<= -ex + 1;
|
||||||
|
} else {
|
||||||
|
uxi &= ULLONG_NSHIFT >> 12;
|
||||||
|
uxi |= 1ULL << 52;
|
||||||
|
}
|
||||||
|
if (!ey) {
|
||||||
|
for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
|
||||||
|
uy.i <<= -ey + 1;
|
||||||
|
} else {
|
||||||
|
uy.i &= ULLONG_NSHIFT >> 12;
|
||||||
|
uy.i |= 1ULL << 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* x mod y */
|
||||||
|
for (; ex > ey; ex--) {
|
||||||
|
i = uxi - uy.i;
|
||||||
|
if (i >> 63 == 0) {
|
||||||
|
if (i == 0)
|
||||||
|
return 0*x;
|
||||||
|
uxi = i;
|
||||||
|
}
|
||||||
|
uxi <<= 1;
|
||||||
|
}
|
||||||
|
i = uxi - uy.i;
|
||||||
|
if (i >> 63 == 0) {
|
||||||
|
if (i == 0)
|
||||||
|
return 0*x;
|
||||||
|
uxi = i;
|
||||||
|
}
|
||||||
|
for (; uxi>>52 == 0; uxi <<= 1, ex--);
|
||||||
|
|
||||||
|
/* scale result */
|
||||||
|
if (ex > 0) {
|
||||||
|
uxi -= 1ULL << 52;
|
||||||
|
uxi |= (uint64_t)ex << 52;
|
||||||
|
} else {
|
||||||
|
uxi >>= -ex + 1;
|
||||||
|
}
|
||||||
|
uxi |= (uint64_t)sx << 63;
|
||||||
|
ux.i = uxi;
|
||||||
|
return ux.f;
|
||||||
|
}
|
16
3rd/math/src/fsum.c
Normal file
16
3rd/math/src/fsum.c
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double fsum(double* aptr, int n)
|
||||||
|
{
|
||||||
|
|
||||||
|
/*Kahan sum for float addition*/
|
||||||
|
double sum = 0, C = 0, Y, T;
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
Y = aptr[i] - C;
|
||||||
|
T = sum + Y;
|
||||||
|
C = T - sum - Y;
|
||||||
|
sum = Y;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
14
3rd/math/src/gcd.c
Normal file
14
3rd/math/src/gcd.c
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
int gcd(int a, int b)
|
||||||
|
{
|
||||||
|
if (a < 0) a = -a;
|
||||||
|
if (b < 0) b = -b;
|
||||||
|
while (b != 0)
|
||||||
|
{
|
||||||
|
int32_t t = b;
|
||||||
|
b = a % b;
|
||||||
|
a = t;
|
||||||
|
}
|
||||||
|
return a;
|
||||||
|
}
|
110
3rd/math/src/log.c
Normal file
110
3rd/math/src/log.c
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
/*
|
||||||
|
* Double-precision log(x) function.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include "log_data.h"
|
||||||
|
|
||||||
|
#define T __log_data.tab
|
||||||
|
#define T2 __log_data.tab2
|
||||||
|
#define B __log_data.poly1
|
||||||
|
#define A __log_data.poly
|
||||||
|
#define Ln2hi __log_data.ln2hi
|
||||||
|
#define Ln2lo __log_data.ln2lo
|
||||||
|
#define N (1 << LOG_TABLE_BITS)
|
||||||
|
#define OFF 0x3fe6000000000000
|
||||||
|
|
||||||
|
/* Top 16 bits of a double. */
|
||||||
|
static inline uint32_t top16(double x)
|
||||||
|
{
|
||||||
|
return asuint64(x) >> 48;
|
||||||
|
}
|
||||||
|
|
||||||
|
double log(double x)
|
||||||
|
{
|
||||||
|
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
|
||||||
|
uint64_t ix, iz, tmp;
|
||||||
|
uint32_t top;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
|
ix = asuint64(x);
|
||||||
|
top = top16(x);
|
||||||
|
#define LO asuint64(1.0 - 0x1p-4)
|
||||||
|
#define HI asuint64(1.0 + 0x1.09p-4)
|
||||||
|
if (predict_false(ix - LO < HI - LO)) {
|
||||||
|
/* Handle close to 1.0 inputs separately. */
|
||||||
|
/* Fix sign of zero with downward rounding when x==1. */
|
||||||
|
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
|
||||||
|
return 0;
|
||||||
|
r = x - 1.0;
|
||||||
|
r2 = r * r;
|
||||||
|
r3 = r * r2;
|
||||||
|
y = r3 *
|
||||||
|
(B[1] + r * B[2] + r2 * B[3] +
|
||||||
|
r3 * (B[4] + r * B[5] + r2 * B[6] +
|
||||||
|
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
|
||||||
|
/* Worst-case error is around 0.507 ULP. */
|
||||||
|
w = r * 0x1p27;
|
||||||
|
double_t rhi = r + w - w;
|
||||||
|
double_t rlo = r - rhi;
|
||||||
|
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
|
||||||
|
hi = r + w;
|
||||||
|
lo = r - hi + w;
|
||||||
|
lo += B[0] * rlo * (rhi + r);
|
||||||
|
y += lo;
|
||||||
|
y += hi;
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
||||||
|
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
|
||||||
|
/* x < 0x1p-1022 or inf or nan. */
|
||||||
|
if (ix * 2 == 0)
|
||||||
|
return __math_divzero(1);
|
||||||
|
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
|
||||||
|
return x;
|
||||||
|
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||||
|
return __math_invalid(x);
|
||||||
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint64(x * 0x1p52);
|
||||||
|
ix -= 52ULL << 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||||
|
The range is split into N subintervals.
|
||||||
|
The ith subinterval contains z and c is near its center. */
|
||||||
|
tmp = ix - OFF;
|
||||||
|
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
|
||||||
|
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
||||||
|
iz = ix - (tmp & 0xfffULL << 52);
|
||||||
|
invc = T[i].invc;
|
||||||
|
logc = T[i].logc;
|
||||||
|
z = asdouble(iz);
|
||||||
|
|
||||||
|
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
||||||
|
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||||
|
#if __FP_FAST_FMA
|
||||||
|
/* rounding error: 0x1p-55/N. */
|
||||||
|
r = __builtin_fma(z, invc, -1.0);
|
||||||
|
#else
|
||||||
|
/* rounding error: 0x1p-55/N + 0x1p-66. */
|
||||||
|
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||||
|
#endif
|
||||||
|
kd = (double_t)k;
|
||||||
|
|
||||||
|
/* hi + lo = r + log(c) + k*Ln2. */
|
||||||
|
w = kd * Ln2hi + logc;
|
||||||
|
hi = w + r;
|
||||||
|
lo = w - hi + r + kd * Ln2lo;
|
||||||
|
|
||||||
|
/* log(x) = lo + (log1p(r) - r) + hi. */
|
||||||
|
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||||
|
/* Worst case error if |y| > 0x1p-5:
|
||||||
|
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
|
||||||
|
Worst case error if |y| > 0x1p-4:
|
||||||
|
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
|
||||||
|
y = lo + r2 * A[0] +
|
||||||
|
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
100
3rd/math/src/log10.c
Normal file
100
3rd/math/src/log10.c
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Return the base 10 logarithm of x. See log.c for most comments.
|
||||||
|
*
|
||||||
|
* Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2
|
||||||
|
* as in log.c, then combine and scale in extra precision:
|
||||||
|
* log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
static const double
|
||||||
|
ivln10hi = 4.34294481878168880939e-01, /* 0x3fdbcb7b, 0x15200000 */
|
||||||
|
ivln10lo = 2.50829467116452752298e-11, /* 0x3dbb9438, 0xca9aadd5 */
|
||||||
|
log10_2hi = 3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
|
||||||
|
log10_2lo = 3.69423907715893078616e-13, /* 0x3D59FEF3, 0x11F12B36 */
|
||||||
|
Lg1 = 6.666666666666735130e-01, /* 3FE55555 55555593 */
|
||||||
|
Lg2 = 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
|
||||||
|
Lg3 = 2.857142874366239149e-01, /* 3FD24924 94229359 */
|
||||||
|
Lg4 = 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
|
||||||
|
Lg5 = 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
|
||||||
|
Lg6 = 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
|
||||||
|
Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
|
||||||
|
|
||||||
|
double log10(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
double_t hfsq,f,s,z,R,w,t1,t2,dk,y,hi,lo,val_hi,val_lo;
|
||||||
|
uint32_t hx;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
hx = u.i>>32;
|
||||||
|
k = 0;
|
||||||
|
if (hx < 0x00100000 || hx>>31) {
|
||||||
|
if (u.i<<1 == 0)
|
||||||
|
return -1/(x*x); /* log(+-0)=-inf */
|
||||||
|
if (hx>>31)
|
||||||
|
return (x-x)/0.0; /* log(-#) = NaN */
|
||||||
|
/* subnormal number, scale x up */
|
||||||
|
k -= 54;
|
||||||
|
x *= 0x1p54;
|
||||||
|
u.f = x;
|
||||||
|
hx = u.i>>32;
|
||||||
|
} else if (hx >= 0x7ff00000) {
|
||||||
|
return x;
|
||||||
|
} else if (hx == 0x3ff00000 && u.i<<32 == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* reduce x into [sqrt(2)/2, sqrt(2)] */
|
||||||
|
hx += 0x3ff00000 - 0x3fe6a09e;
|
||||||
|
k += (int)(hx>>20) - 0x3ff;
|
||||||
|
hx = (hx&0x000fffff) + 0x3fe6a09e;
|
||||||
|
u.i = (uint64_t)hx<<32 | (u.i&0xffffffff);
|
||||||
|
x = u.f;
|
||||||
|
|
||||||
|
f = x - 1.0;
|
||||||
|
hfsq = 0.5*f*f;
|
||||||
|
s = f/(2.0+f);
|
||||||
|
z = s*s;
|
||||||
|
w = z*z;
|
||||||
|
t1 = w*(Lg2+w*(Lg4+w*Lg6));
|
||||||
|
t2 = z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
|
||||||
|
R = t2 + t1;
|
||||||
|
|
||||||
|
/* See log2.c for details. */
|
||||||
|
/* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
|
||||||
|
hi = f - hfsq;
|
||||||
|
u.f = hi;
|
||||||
|
u.i &= (uint64_t)-1<<32;
|
||||||
|
hi = u.f;
|
||||||
|
lo = f - hi - hfsq + s*(hfsq+R);
|
||||||
|
|
||||||
|
/* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
|
||||||
|
val_hi = hi*ivln10hi;
|
||||||
|
dk = k;
|
||||||
|
y = dk*log10_2hi;
|
||||||
|
val_lo = dk*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extra precision in for adding y is not strictly needed
|
||||||
|
* since there is no very large cancellation near x = sqrt(2) or
|
||||||
|
* x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
|
||||||
|
* with some parallelism and it reduces the error for many args.
|
||||||
|
*/
|
||||||
|
w = y + val_hi;
|
||||||
|
val_lo += (y - w) + val_hi;
|
||||||
|
val_hi = w;
|
||||||
|
|
||||||
|
return val_lo + val_hi;
|
||||||
|
}
|
120
3rd/math/src/log2.c
Normal file
120
3rd/math/src/log2.c
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
* Double-precision log2(x) function.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include "log2_data.h"
|
||||||
|
|
||||||
|
#define T __log2_data.tab
|
||||||
|
#define T2 __log2_data.tab2
|
||||||
|
#define B __log2_data.poly1
|
||||||
|
#define A __log2_data.poly
|
||||||
|
#define InvLn2hi __log2_data.invln2hi
|
||||||
|
#define InvLn2lo __log2_data.invln2lo
|
||||||
|
#define N (1 << LOG2_TABLE_BITS)
|
||||||
|
#define OFF 0x3fe6000000000000
|
||||||
|
|
||||||
|
/* Top 16 bits of a double. */
|
||||||
|
static inline uint32_t top16(double x)
|
||||||
|
{
|
||||||
|
return asuint64(x) >> 48;
|
||||||
|
}
|
||||||
|
|
||||||
|
double log2(double x)
|
||||||
|
{
|
||||||
|
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
|
||||||
|
uint64_t ix, iz, tmp;
|
||||||
|
uint32_t top;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
|
ix = asuint64(x);
|
||||||
|
top = top16(x);
|
||||||
|
#define LO asuint64(1.0 - 0x1.5b51p-5)
|
||||||
|
#define HI asuint64(1.0 + 0x1.6ab2p-5)
|
||||||
|
if (predict_false(ix - LO < HI - LO)) {
|
||||||
|
/* Handle close to 1.0 inputs separately. */
|
||||||
|
/* Fix sign of zero with downward rounding when x==1. */
|
||||||
|
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
|
||||||
|
return 0;
|
||||||
|
r = x - 1.0;
|
||||||
|
#if __FP_FAST_FMA
|
||||||
|
hi = r * InvLn2hi;
|
||||||
|
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
|
||||||
|
#else
|
||||||
|
double_t rhi, rlo;
|
||||||
|
rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
|
||||||
|
rlo = r - rhi;
|
||||||
|
hi = rhi * InvLn2hi;
|
||||||
|
lo = rlo * InvLn2hi + r * InvLn2lo;
|
||||||
|
#endif
|
||||||
|
r2 = r * r; /* rounding error: 0x1p-62. */
|
||||||
|
r4 = r2 * r2;
|
||||||
|
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
|
||||||
|
p = r2 * (B[0] + r * B[1]);
|
||||||
|
y = hi + p;
|
||||||
|
lo += hi - y + p;
|
||||||
|
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
|
||||||
|
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
|
||||||
|
y += lo;
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
||||||
|
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
|
||||||
|
/* x < 0x1p-1022 or inf or nan. */
|
||||||
|
if (ix * 2 == 0)
|
||||||
|
return __math_divzero(1);
|
||||||
|
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
|
||||||
|
return x;
|
||||||
|
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||||
|
return __math_invalid(x);
|
||||||
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint64(x * 0x1p52);
|
||||||
|
ix -= 52ULL << 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||||
|
The range is split into N subintervals.
|
||||||
|
The ith subinterval contains z and c is near its center. */
|
||||||
|
tmp = ix - OFF;
|
||||||
|
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
|
||||||
|
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
||||||
|
iz = ix - (tmp & 0xfffULL << 52);
|
||||||
|
invc = T[i].invc;
|
||||||
|
logc = T[i].logc;
|
||||||
|
z = asdouble(iz);
|
||||||
|
kd = (double_t)k;
|
||||||
|
|
||||||
|
/* log2(x) = log2(z/c) + log2(c) + k. */
|
||||||
|
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||||
|
#if __FP_FAST_FMA
|
||||||
|
/* rounding error: 0x1p-55/N. */
|
||||||
|
r = __builtin_fma(z, invc, -1.0);
|
||||||
|
t1 = r * InvLn2hi;
|
||||||
|
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
|
||||||
|
#else
|
||||||
|
double_t rhi, rlo;
|
||||||
|
/* rounding error: 0x1p-55/N + 0x1p-65. */
|
||||||
|
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||||
|
rhi = asdouble(asuint64(r) & ULLONG_NSHIFT << 32);
|
||||||
|
rlo = r - rhi;
|
||||||
|
t1 = rhi * InvLn2hi;
|
||||||
|
t2 = rlo * InvLn2hi + r * InvLn2lo;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* hi + lo = r/ln2 + log2(c) + k. */
|
||||||
|
t3 = kd + logc;
|
||||||
|
hi = t3 + t1;
|
||||||
|
lo = t3 - hi + t1 + t2;
|
||||||
|
|
||||||
|
/* log2(r+1) = r/ln2 + r^2*poly(r). */
|
||||||
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
|
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||||
|
r4 = r2 * r2;
|
||||||
|
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
|
||||||
|
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
|
||||||
|
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
|
||||||
|
y = lo + r2 * p + hi;
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
201
3rd/math/src/log2_data.c
Normal file
201
3rd/math/src/log2_data.c
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
/*
|
||||||
|
* Data for log2.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "log2_data.h"
|
||||||
|
|
||||||
|
#define N (1 << LOG2_TABLE_BITS)
|
||||||
|
|
||||||
|
const struct log2_data __log2_data = {
|
||||||
|
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
|
||||||
|
.invln2hi = 0x1.7154765200000p+0,
|
||||||
|
.invln2lo = 0x1.705fc2eefa200p-33,
|
||||||
|
.poly1 = {
|
||||||
|
// relative error: 0x1.2fad8188p-63
|
||||||
|
// in -0x1.5b51p-5 0x1.6ab2p-5
|
||||||
|
-0x1.71547652b82fep-1,
|
||||||
|
0x1.ec709dc3a03f7p-2,
|
||||||
|
-0x1.71547652b7c3fp-2,
|
||||||
|
0x1.2776c50f05be4p-2,
|
||||||
|
-0x1.ec709dd768fe5p-3,
|
||||||
|
0x1.a61761ec4e736p-3,
|
||||||
|
-0x1.7153fbc64a79bp-3,
|
||||||
|
0x1.484d154f01b4ap-3,
|
||||||
|
-0x1.289e4a72c383cp-3,
|
||||||
|
0x1.0b32f285aee66p-3,
|
||||||
|
},
|
||||||
|
.poly = {
|
||||||
|
// relative error: 0x1.a72c2bf8p-58
|
||||||
|
// abs error: 0x1.67a552c8p-66
|
||||||
|
// in -0x1.f45p-8 0x1.f45p-8
|
||||||
|
-0x1.71547652b8339p-1,
|
||||||
|
0x1.ec709dc3a04bep-2,
|
||||||
|
-0x1.7154764702ffbp-2,
|
||||||
|
0x1.2776c50034c48p-2,
|
||||||
|
-0x1.ec7b328ea92bcp-3,
|
||||||
|
0x1.a6225e117f92ep-3,
|
||||||
|
},
|
||||||
|
/* Algorithm:
|
||||||
|
|
||||||
|
x = 2^k z
|
||||||
|
log2(x) = k + log2(c) + log2(z/c)
|
||||||
|
log2(z/c) = poly(z/c - 1)
|
||||||
|
|
||||||
|
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||||
|
into the ith one, then table entries are computed as
|
||||||
|
|
||||||
|
tab[i].invc = 1/c
|
||||||
|
tab[i].logc = (double)log2(c)
|
||||||
|
tab2[i].chi = (double)c
|
||||||
|
tab2[i].clo = (double)(c - (double)c)
|
||||||
|
|
||||||
|
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||||
|
floating point invc candidates around 1/center and selecting one for which
|
||||||
|
|
||||||
|
1) the rounding error in 0x1.8p10 + logc is 0,
|
||||||
|
2) the rounding error in z - chi - clo is < 0x1p-64 and
|
||||||
|
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
|
||||||
|
|
||||||
|
Note: 1) ensures that k + logc can be computed without rounding error, 2)
|
||||||
|
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
|
||||||
|
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||||
|
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
|
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||||
|
.tab = {
|
||||||
|
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
||||||
|
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
|
||||||
|
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
|
||||||
|
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
|
||||||
|
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
|
||||||
|
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
|
||||||
|
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
|
||||||
|
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
|
||||||
|
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
|
||||||
|
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
|
||||||
|
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
|
||||||
|
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
|
||||||
|
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
|
||||||
|
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
|
||||||
|
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
|
||||||
|
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
|
||||||
|
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
|
||||||
|
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
|
||||||
|
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
|
||||||
|
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
|
||||||
|
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
|
||||||
|
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
|
||||||
|
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
|
||||||
|
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
|
||||||
|
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
|
||||||
|
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
|
||||||
|
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
|
||||||
|
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
|
||||||
|
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
|
||||||
|
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
|
||||||
|
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
|
||||||
|
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
|
||||||
|
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
|
||||||
|
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
|
||||||
|
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
|
||||||
|
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
|
||||||
|
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
|
||||||
|
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
|
||||||
|
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
|
||||||
|
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
|
||||||
|
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
|
||||||
|
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
|
||||||
|
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
|
||||||
|
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
|
||||||
|
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
|
||||||
|
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
|
||||||
|
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
|
||||||
|
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
|
||||||
|
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
|
||||||
|
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
|
||||||
|
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
|
||||||
|
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
|
||||||
|
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
|
||||||
|
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
|
||||||
|
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
|
||||||
|
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
|
||||||
|
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
|
||||||
|
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
|
||||||
|
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
|
||||||
|
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
|
||||||
|
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
|
||||||
|
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
|
||||||
|
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
|
||||||
|
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
||||||
|
},
|
||||||
|
#if !__FP_FAST_FMA
|
||||||
|
.tab2 = {
|
||||||
|
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
||||||
|
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
|
||||||
|
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
|
||||||
|
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
|
||||||
|
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
|
||||||
|
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
|
||||||
|
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
|
||||||
|
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
|
||||||
|
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
|
||||||
|
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
|
||||||
|
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
|
||||||
|
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
|
||||||
|
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
|
||||||
|
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
|
||||||
|
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
|
||||||
|
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
|
||||||
|
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
|
||||||
|
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
|
||||||
|
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
|
||||||
|
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
|
||||||
|
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
|
||||||
|
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
|
||||||
|
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
|
||||||
|
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
|
||||||
|
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
|
||||||
|
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
|
||||||
|
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
|
||||||
|
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
|
||||||
|
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
|
||||||
|
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
|
||||||
|
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
|
||||||
|
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
|
||||||
|
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
|
||||||
|
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
|
||||||
|
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
|
||||||
|
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
|
||||||
|
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
|
||||||
|
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
|
||||||
|
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
|
||||||
|
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
|
||||||
|
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
|
||||||
|
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
|
||||||
|
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
|
||||||
|
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
|
||||||
|
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
|
||||||
|
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
|
||||||
|
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
|
||||||
|
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
|
||||||
|
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
|
||||||
|
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
|
||||||
|
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
|
||||||
|
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
|
||||||
|
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
|
||||||
|
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
|
||||||
|
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
|
||||||
|
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
|
||||||
|
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
|
||||||
|
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
|
||||||
|
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
|
||||||
|
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
|
||||||
|
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
|
||||||
|
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
|
||||||
|
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
|
||||||
|
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
||||||
|
},
|
||||||
|
#endif
|
||||||
|
};
|
27
3rd/math/src/log2_data.h
Normal file
27
3rd/math/src/log2_data.h
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
#ifndef _LOG2_DATA_H
|
||||||
|
#define _LOG2_DATA_H
|
||||||
|
|
||||||
|
|
||||||
|
#define LOG2_TABLE_BITS 6
|
||||||
|
#define LOG2_POLY_ORDER 7
|
||||||
|
#define LOG2_POLY1_ORDER 11
|
||||||
|
extern const struct log2_data {
|
||||||
|
double invln2hi;
|
||||||
|
double invln2lo;
|
||||||
|
double poly[LOG2_POLY_ORDER - 1];
|
||||||
|
double poly1[LOG2_POLY1_ORDER - 1];
|
||||||
|
struct {
|
||||||
|
double invc, logc;
|
||||||
|
} tab[1 << LOG2_TABLE_BITS];
|
||||||
|
#if !__FP_FAST_FMA
|
||||||
|
struct {
|
||||||
|
double chi, clo;
|
||||||
|
} tab2[1 << LOG2_TABLE_BITS];
|
||||||
|
#endif
|
||||||
|
} __log2_data;
|
||||||
|
|
||||||
|
#endif
|
328
3rd/math/src/log_data.c
Normal file
328
3rd/math/src/log_data.c
Normal file
@ -0,0 +1,328 @@
|
|||||||
|
/*
|
||||||
|
* Data for log.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "log_data.h"
|
||||||
|
|
||||||
|
#define N (1 << LOG_TABLE_BITS)
|
||||||
|
|
||||||
|
const struct log_data __log_data = {
|
||||||
|
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||||
|
.ln2lo = 0x1.ef35793c76730p-45,
|
||||||
|
.poly1 = {
|
||||||
|
// relative error: 0x1.c04d76cp-63
|
||||||
|
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
|
||||||
|
-0x1p-1,
|
||||||
|
0x1.5555555555577p-2,
|
||||||
|
-0x1.ffffffffffdcbp-3,
|
||||||
|
0x1.999999995dd0cp-3,
|
||||||
|
-0x1.55555556745a7p-3,
|
||||||
|
0x1.24924a344de3p-3,
|
||||||
|
-0x1.fffffa4423d65p-4,
|
||||||
|
0x1.c7184282ad6cap-4,
|
||||||
|
-0x1.999eb43b068ffp-4,
|
||||||
|
0x1.78182f7afd085p-4,
|
||||||
|
-0x1.5521375d145cdp-4,
|
||||||
|
},
|
||||||
|
.poly = {
|
||||||
|
// relative error: 0x1.926199e8p-56
|
||||||
|
// abs error: 0x1.882ff33p-65
|
||||||
|
// in -0x1.fp-9 0x1.fp-9
|
||||||
|
-0x1.0000000000001p-1,
|
||||||
|
0x1.555555551305bp-2,
|
||||||
|
-0x1.fffffffeb459p-3,
|
||||||
|
0x1.999b324f10111p-3,
|
||||||
|
-0x1.55575e506c89fp-3,
|
||||||
|
},
|
||||||
|
/* Algorithm:
|
||||||
|
|
||||||
|
x = 2^k z
|
||||||
|
log(x) = k ln2 + log(c) + log(z/c)
|
||||||
|
log(z/c) = poly(z/c - 1)
|
||||||
|
|
||||||
|
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||||
|
into the ith one, then table entries are computed as
|
||||||
|
|
||||||
|
tab[i].invc = 1/c
|
||||||
|
tab[i].logc = (double)log(c)
|
||||||
|
tab2[i].chi = (double)c
|
||||||
|
tab2[i].clo = (double)(c - (double)c)
|
||||||
|
|
||||||
|
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||||
|
floating point invc candidates around 1/center and selecting one for which
|
||||||
|
|
||||||
|
1) the rounding error in 0x1.8p9 + logc is 0,
|
||||||
|
2) the rounding error in z - chi - clo is < 0x1p-66 and
|
||||||
|
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
|
||||||
|
|
||||||
|
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
|
||||||
|
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
|
||||||
|
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||||
|
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||||
|
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||||
|
.tab = {
|
||||||
|
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
|
||||||
|
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
|
||||||
|
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
|
||||||
|
{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
|
||||||
|
{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
|
||||||
|
{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
|
||||||
|
{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
|
||||||
|
{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
|
||||||
|
{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
|
||||||
|
{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
|
||||||
|
{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
|
||||||
|
{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
|
||||||
|
{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
|
||||||
|
{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
|
||||||
|
{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
|
||||||
|
{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
|
||||||
|
{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
|
||||||
|
{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
|
||||||
|
{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
|
||||||
|
{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
|
||||||
|
{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
|
||||||
|
{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
|
||||||
|
{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
|
||||||
|
{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
|
||||||
|
{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
|
||||||
|
{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
|
||||||
|
{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
|
||||||
|
{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
|
||||||
|
{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
|
||||||
|
{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
|
||||||
|
{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
|
||||||
|
{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
|
||||||
|
{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
|
||||||
|
{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
|
||||||
|
{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
|
||||||
|
{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
|
||||||
|
{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
|
||||||
|
{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
|
||||||
|
{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
|
||||||
|
{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
|
||||||
|
{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
|
||||||
|
{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
|
||||||
|
{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
|
||||||
|
{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
|
||||||
|
{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
|
||||||
|
{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
|
||||||
|
{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
|
||||||
|
{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
|
||||||
|
{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
|
||||||
|
{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
|
||||||
|
{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
|
||||||
|
{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
|
||||||
|
{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
|
||||||
|
{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
|
||||||
|
{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
|
||||||
|
{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
|
||||||
|
{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
|
||||||
|
{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
|
||||||
|
{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
|
||||||
|
{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
|
||||||
|
{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
|
||||||
|
{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
|
||||||
|
{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
|
||||||
|
{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
|
||||||
|
{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
|
||||||
|
{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
|
||||||
|
{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
|
||||||
|
{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
|
||||||
|
{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
|
||||||
|
{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
|
||||||
|
{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
|
||||||
|
{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
|
||||||
|
{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
|
||||||
|
{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
|
||||||
|
{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
|
||||||
|
{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
|
||||||
|
{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
|
||||||
|
{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
|
||||||
|
{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
|
||||||
|
{0x1.008040614b195p+0, -0x1.0040979240000p-9},
|
||||||
|
{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
|
||||||
|
{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
|
||||||
|
{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
|
||||||
|
{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
|
||||||
|
{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
|
||||||
|
{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
|
||||||
|
{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
|
||||||
|
{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
|
||||||
|
{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
|
||||||
|
{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
|
||||||
|
{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
|
||||||
|
{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
|
||||||
|
{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
|
||||||
|
{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
|
||||||
|
{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
|
||||||
|
{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
|
||||||
|
{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
|
||||||
|
{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
|
||||||
|
{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
|
||||||
|
{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
|
||||||
|
{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
|
||||||
|
{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
|
||||||
|
{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
|
||||||
|
{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
|
||||||
|
{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
|
||||||
|
{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
|
||||||
|
{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
|
||||||
|
{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
|
||||||
|
{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
|
||||||
|
{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
|
||||||
|
{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
|
||||||
|
{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
|
||||||
|
{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
|
||||||
|
{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
|
||||||
|
{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
|
||||||
|
{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
|
||||||
|
{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
|
||||||
|
{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
|
||||||
|
{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
|
||||||
|
{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
|
||||||
|
{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
|
||||||
|
{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
|
||||||
|
{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
|
||||||
|
{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
|
||||||
|
{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
|
||||||
|
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
|
||||||
|
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
|
||||||
|
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
||||||
|
},
|
||||||
|
#if !__FP_FAST_FMA
|
||||||
|
.tab2 = {
|
||||||
|
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
|
||||||
|
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
|
||||||
|
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
|
||||||
|
{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
|
||||||
|
{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
|
||||||
|
{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
|
||||||
|
{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
|
||||||
|
{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
|
||||||
|
{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
|
||||||
|
{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
|
||||||
|
{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
|
||||||
|
{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
|
||||||
|
{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
|
||||||
|
{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
|
||||||
|
{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
|
||||||
|
{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
|
||||||
|
{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
|
||||||
|
{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
|
||||||
|
{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
|
||||||
|
{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
|
||||||
|
{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
|
||||||
|
{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
|
||||||
|
{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
|
||||||
|
{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
|
||||||
|
{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
|
||||||
|
{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
|
||||||
|
{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
|
||||||
|
{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
|
||||||
|
{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
|
||||||
|
{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
|
||||||
|
{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
|
||||||
|
{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
|
||||||
|
{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
|
||||||
|
{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
|
||||||
|
{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
|
||||||
|
{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
|
||||||
|
{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
|
||||||
|
{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
|
||||||
|
{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
|
||||||
|
{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
|
||||||
|
{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
|
||||||
|
{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
|
||||||
|
{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
|
||||||
|
{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
|
||||||
|
{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
|
||||||
|
{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
|
||||||
|
{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
|
||||||
|
{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
|
||||||
|
{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
|
||||||
|
{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
|
||||||
|
{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
|
||||||
|
{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
|
||||||
|
{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
|
||||||
|
{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
|
||||||
|
{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
|
||||||
|
{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
|
||||||
|
{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
|
||||||
|
{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
|
||||||
|
{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
|
||||||
|
{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
|
||||||
|
{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
|
||||||
|
{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
|
||||||
|
{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
|
||||||
|
{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
|
||||||
|
{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
|
||||||
|
{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
|
||||||
|
{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
|
||||||
|
{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
|
||||||
|
{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
|
||||||
|
{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
|
||||||
|
{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
|
||||||
|
{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
|
||||||
|
{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
|
||||||
|
{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
|
||||||
|
{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
|
||||||
|
{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
|
||||||
|
{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
|
||||||
|
{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
|
||||||
|
{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
|
||||||
|
{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
|
||||||
|
{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
|
||||||
|
{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
|
||||||
|
{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
|
||||||
|
{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
|
||||||
|
{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
|
||||||
|
{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
|
||||||
|
{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
|
||||||
|
{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
|
||||||
|
{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
|
||||||
|
{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
|
||||||
|
{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
|
||||||
|
{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
|
||||||
|
{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
|
||||||
|
{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
|
||||||
|
{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
|
||||||
|
{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
|
||||||
|
{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
|
||||||
|
{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
|
||||||
|
{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
|
||||||
|
{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
|
||||||
|
{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
|
||||||
|
{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
|
||||||
|
{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
|
||||||
|
{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
|
||||||
|
{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
|
||||||
|
{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
|
||||||
|
{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
|
||||||
|
{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
|
||||||
|
{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
|
||||||
|
{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
|
||||||
|
{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
|
||||||
|
{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
|
||||||
|
{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
|
||||||
|
{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
|
||||||
|
{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
|
||||||
|
{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
|
||||||
|
{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
|
||||||
|
{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
|
||||||
|
{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
|
||||||
|
{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
|
||||||
|
{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
|
||||||
|
{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
|
||||||
|
{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
|
||||||
|
{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
|
||||||
|
{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
|
||||||
|
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
|
||||||
|
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
|
||||||
|
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
||||||
|
},
|
||||||
|
#endif
|
||||||
|
};
|
27
3rd/math/src/log_data.h
Normal file
27
3rd/math/src/log_data.h
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
#ifndef _LOG_DATA_H
|
||||||
|
#define _LOG_DATA_H
|
||||||
|
|
||||||
|
|
||||||
|
#define LOG_TABLE_BITS 7
|
||||||
|
#define LOG_POLY_ORDER 6
|
||||||
|
#define LOG_POLY1_ORDER 12
|
||||||
|
extern const struct log_data {
|
||||||
|
double ln2hi;
|
||||||
|
double ln2lo;
|
||||||
|
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||||
|
double poly1[LOG_POLY1_ORDER - 1];
|
||||||
|
struct {
|
||||||
|
double invc, logc;
|
||||||
|
} tab[1 << LOG_TABLE_BITS];
|
||||||
|
#if !__FP_FAST_FMA
|
||||||
|
struct {
|
||||||
|
double chi, clo;
|
||||||
|
} tab2[1 << LOG_TABLE_BITS];
|
||||||
|
#endif
|
||||||
|
} __log_data;
|
||||||
|
|
||||||
|
#endif
|
34
3rd/math/src/modf.c
Normal file
34
3rd/math/src/modf.c
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double modf(double x, double *iptr)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
uint64_t mask;
|
||||||
|
int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
|
||||||
|
|
||||||
|
/* no fractional part */
|
||||||
|
if (e >= 52) {
|
||||||
|
*iptr = x;
|
||||||
|
if (e == 0x400 && u.i<<12 != 0) /* nan */
|
||||||
|
return x;
|
||||||
|
u.i &= 1ULL<<63;
|
||||||
|
return u.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* no integral part*/
|
||||||
|
if (e < 0) {
|
||||||
|
u.i &= 1ULL<<63;
|
||||||
|
*iptr = u.f;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
mask = ULLONG_NSHIFT>>12>>e;
|
||||||
|
if ((u.i & mask) == 0) {
|
||||||
|
*iptr = x;
|
||||||
|
u.i &= 1ULL<<63;
|
||||||
|
return u.f;
|
||||||
|
}
|
||||||
|
u.i &= ~mask;
|
||||||
|
*iptr = u.f;
|
||||||
|
return x - u.f;
|
||||||
|
}
|
341
3rd/math/src/pow.c
Normal file
341
3rd/math/src/pow.c
Normal file
@ -0,0 +1,341 @@
|
|||||||
|
/*
|
||||||
|
* Double-precision x^y function.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include "exp_data.h"
|
||||||
|
#include "pow_data.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
|
||||||
|
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
|
||||||
|
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define T __pow_log_data.tab
|
||||||
|
#define A __pow_log_data.poly
|
||||||
|
#define Ln2hi __pow_log_data.ln2hi
|
||||||
|
#define Ln2lo __pow_log_data.ln2lo
|
||||||
|
#define N (1 << POW_LOG_TABLE_BITS)
|
||||||
|
#define OFF 0x3fe6955500000000
|
||||||
|
|
||||||
|
/* Top 12 bits of a double (sign and exponent bits). */
|
||||||
|
static inline uint32_t top12(double x)
|
||||||
|
{
|
||||||
|
return asuint64(x) >> 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
|
||||||
|
additional 15 bits precision. IX is the bit representation of x, but
|
||||||
|
normalized in the subnormal range using the sign bit for the exponent. */
|
||||||
|
static inline double_t log_inline(uint64_t ix, double_t *tail)
|
||||||
|
{
|
||||||
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
|
||||||
|
uint64_t iz, tmp;
|
||||||
|
int k, i;
|
||||||
|
|
||||||
|
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||||
|
The range is split into N subintervals.
|
||||||
|
The ith subinterval contains z and c is near its center. */
|
||||||
|
tmp = ix - OFF;
|
||||||
|
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
|
||||||
|
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
||||||
|
iz = ix - (tmp & 0xfffULL << 52);
|
||||||
|
z = asdouble(iz);
|
||||||
|
kd = (double_t)k;
|
||||||
|
|
||||||
|
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
|
||||||
|
invc = T[i].invc;
|
||||||
|
logc = T[i].logc;
|
||||||
|
logctail = T[i].logctail;
|
||||||
|
|
||||||
|
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|
||||||
|
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
|
||||||
|
#if __FP_FAST_FMA
|
||||||
|
r = __builtin_fma(z, invc, -1.0);
|
||||||
|
#else
|
||||||
|
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
|
||||||
|
double_t zhi = asdouble((iz + (1ULL << 31)) & (ULLONG_NSHIFT << 32));
|
||||||
|
double_t zlo = z - zhi;
|
||||||
|
double_t rhi = zhi * invc - 1.0;
|
||||||
|
double_t rlo = zlo * invc;
|
||||||
|
r = rhi + rlo;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* k*Ln2 + log(c) + r. */
|
||||||
|
t1 = kd * Ln2hi + logc;
|
||||||
|
t2 = t1 + r;
|
||||||
|
lo1 = kd * Ln2lo + logctail;
|
||||||
|
lo2 = t1 - t2 + r;
|
||||||
|
|
||||||
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
|
double_t ar, ar2, ar3, lo3, lo4;
|
||||||
|
ar = A[0] * r; /* A[0] = -0.5. */
|
||||||
|
ar2 = r * ar;
|
||||||
|
ar3 = r * ar2;
|
||||||
|
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
||||||
|
#if __FP_FAST_FMA
|
||||||
|
hi = t2 + ar2;
|
||||||
|
lo3 = __builtin_fma(ar, r, -ar2);
|
||||||
|
lo4 = t2 - hi + ar2;
|
||||||
|
#else
|
||||||
|
double_t arhi = A[0] * rhi;
|
||||||
|
double_t arhi2 = rhi * arhi;
|
||||||
|
hi = t2 + arhi2;
|
||||||
|
lo3 = rlo * (ar + arhi);
|
||||||
|
lo4 = t2 - hi + arhi2;
|
||||||
|
#endif
|
||||||
|
/* p = log1p(r) - r - A[0]*r*r. */
|
||||||
|
p = (ar3 * (A[1] + r * A[2] +
|
||||||
|
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
|
||||||
|
lo = lo1 + lo2 + lo3 + lo4 + p;
|
||||||
|
y = hi + lo;
|
||||||
|
*tail = hi - y + lo;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef N
|
||||||
|
#undef T
|
||||||
|
#define N (1 << EXP_TABLE_BITS)
|
||||||
|
#define InvLn2N __exp_data.invln2N
|
||||||
|
#define NegLn2hiN __exp_data.negln2hiN
|
||||||
|
#define NegLn2loN __exp_data.negln2loN
|
||||||
|
#define Shift __exp_data.shift
|
||||||
|
#define T __exp_data.tab
|
||||||
|
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||||
|
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||||
|
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||||
|
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||||
|
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
|
||||||
|
|
||||||
|
/* Handle cases that may overflow or underflow when computing the result that
|
||||||
|
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||||
|
scale is in SBITS, however it has a computed exponent that may have
|
||||||
|
overflown into the sign bit so that needs to be adjusted before using it as
|
||||||
|
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||||
|
adjustment of scale, positive k here means the result may overflow and
|
||||||
|
negative k means the result may underflow. */
|
||||||
|
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
||||||
|
{
|
||||||
|
double_t scale, y;
|
||||||
|
|
||||||
|
if ((ki & 0x80000000) == 0) {
|
||||||
|
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||||
|
sbits -= 1009ull << 52;
|
||||||
|
scale = asdouble(sbits);
|
||||||
|
y = 0x1p1009 * (scale + scale * tmp);
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
||||||
|
/* k < 0, need special care in the subnormal range. */
|
||||||
|
sbits += 1022ull << 52;
|
||||||
|
/* Note: sbits is signed scale. */
|
||||||
|
scale = asdouble(sbits);
|
||||||
|
y = scale + scale * tmp;
|
||||||
|
if (fabs(y) < 1.0) {
|
||||||
|
/* Round y to the right precision before scaling it into the subnormal
|
||||||
|
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||||
|
E is the worst-case ulp error outside the subnormal range. So this
|
||||||
|
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||||
|
double_t hi, lo, one = 1.0;
|
||||||
|
if (y < 0.0)
|
||||||
|
one = -1.0;
|
||||||
|
lo = scale - y + scale * tmp;
|
||||||
|
hi = one + y;
|
||||||
|
lo = one - hi + y + lo;
|
||||||
|
y = eval_as_double(hi + lo) - one;
|
||||||
|
/* Fix the sign of 0. */
|
||||||
|
if (y == 0.0)
|
||||||
|
y = asdouble(sbits & 0x8000000000000000);
|
||||||
|
/* The underflow exception needs to be signaled explicitly. */
|
||||||
|
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
||||||
|
}
|
||||||
|
y = 0x1p-1022 * y;
|
||||||
|
return eval_as_double(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
|
||||||
|
|
||||||
|
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
||||||
|
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
|
||||||
|
static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
|
||||||
|
{
|
||||||
|
uint32_t abstop;
|
||||||
|
uint64_t ki, idx, top, sbits;
|
||||||
|
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||||
|
double_t kd, z, r, r2, scale, tail, tmp;
|
||||||
|
|
||||||
|
abstop = top12(x) & 0x7ff;
|
||||||
|
if (predict_false(abstop - top12(0x1p-54) >=
|
||||||
|
top12(512.0) - top12(0x1p-54))) {
|
||||||
|
if (abstop - top12(0x1p-54) >= 0x80000000) {
|
||||||
|
/* Avoid spurious underflow for tiny x. */
|
||||||
|
/* Note: 0 is common input. */
|
||||||
|
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||||
|
return sign_bias ? -one : one;
|
||||||
|
}
|
||||||
|
if (abstop >= top12(1024.0)) {
|
||||||
|
/* Note: inf and nan are already handled. */
|
||||||
|
if (asuint64(x) >> 63)
|
||||||
|
return __math_uflow(sign_bias);
|
||||||
|
else
|
||||||
|
return __math_oflow(sign_bias);
|
||||||
|
}
|
||||||
|
/* Large x is special cased below. */
|
||||||
|
abstop = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||||
|
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||||
|
z = InvLn2N * x;
|
||||||
|
#if TOINT_INTRINSICS
|
||||||
|
kd = roundtoint(z);
|
||||||
|
ki = converttoint(z);
|
||||||
|
#elif EXP_USE_TOINT_NARROW
|
||||||
|
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||||
|
kd = eval_as_double(z + Shift);
|
||||||
|
ki = asuint64(kd) >> 16;
|
||||||
|
kd = (double_t)(int32_t)ki;
|
||||||
|
#else
|
||||||
|
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||||
|
kd = eval_as_double(z + Shift);
|
||||||
|
ki = asuint64(kd);
|
||||||
|
kd -= Shift;
|
||||||
|
#endif
|
||||||
|
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||||
|
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
||||||
|
r += xtail;
|
||||||
|
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||||
|
idx = 2 * (ki % N);
|
||||||
|
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
|
||||||
|
tail = asdouble(T[idx]);
|
||||||
|
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||||
|
sbits = T[idx + 1] + top;
|
||||||
|
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||||
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||||
|
r2 = r * r;
|
||||||
|
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||||
|
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||||
|
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||||
|
if (predict_false(abstop == 0))
|
||||||
|
return specialcase(tmp, sbits, ki);
|
||||||
|
scale = asdouble(sbits);
|
||||||
|
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||||
|
is no spurious underflow here even without fma. */
|
||||||
|
return eval_as_double(scale + scale * tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||||
|
the bit representation of a non-zero finite floating-point value. */
|
||||||
|
static inline int checkint(uint64_t iy)
|
||||||
|
{
|
||||||
|
int e = iy >> 52 & 0x7ff;
|
||||||
|
if (e < 0x3ff)
|
||||||
|
return 0;
|
||||||
|
if (e > 0x3ff + 52)
|
||||||
|
return 2;
|
||||||
|
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
|
||||||
|
return 0;
|
||||||
|
if (iy & (1ULL << (0x3ff + 52 - e)))
|
||||||
|
return 1;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
|
||||||
|
static inline int zeroinfnan(uint64_t i)
|
||||||
|
{
|
||||||
|
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
double pow(double x, double y)
|
||||||
|
{
|
||||||
|
uint32_t sign_bias = 0;
|
||||||
|
uint64_t ix, iy;
|
||||||
|
uint32_t topx, topy;
|
||||||
|
|
||||||
|
ix = asuint64(x);
|
||||||
|
iy = asuint64(y);
|
||||||
|
topx = top12(x);
|
||||||
|
topy = top12(y);
|
||||||
|
if (predict_false(topx - 0x001 >= 0x7ff - 0x001 ||
|
||||||
|
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
|
||||||
|
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
|
||||||
|
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
|
||||||
|
/* Special cases: (x < 0x1p-126 or inf or nan) or
|
||||||
|
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
|
||||||
|
if (predict_false(zeroinfnan(iy))) {
|
||||||
|
if (2 * iy == 0)
|
||||||
|
return issignaling_inline(x) ? x + y : 1.0;
|
||||||
|
if (ix == asuint64(1.0))
|
||||||
|
return issignaling_inline(y) ? x + y : 1.0;
|
||||||
|
if (2 * ix > 2 * asuint64(INFINITY) ||
|
||||||
|
2 * iy > 2 * asuint64(INFINITY))
|
||||||
|
return x + y;
|
||||||
|
if (2 * ix == 2 * asuint64(1.0))
|
||||||
|
return 1.0;
|
||||||
|
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
|
||||||
|
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||||
|
return y * y;
|
||||||
|
}
|
||||||
|
if (predict_false(zeroinfnan(ix))) {
|
||||||
|
double_t x2 = x * x;
|
||||||
|
if (ix >> 63 && checkint(iy) == 1)
|
||||||
|
x2 = -x2;
|
||||||
|
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||||
|
thus division by zero exception can be signaled spuriously. */
|
||||||
|
return iy >> 63 ? fp_barrier(1 / x2) : x2;
|
||||||
|
}
|
||||||
|
/* Here x and y are non-zero finite. */
|
||||||
|
if (ix >> 63) {
|
||||||
|
/* Finite x < 0. */
|
||||||
|
int yint = checkint(iy);
|
||||||
|
if (yint == 0)
|
||||||
|
return __math_invalid(x);
|
||||||
|
if (yint == 1)
|
||||||
|
sign_bias = SIGN_BIAS;
|
||||||
|
ix &= 0x7fffffffffffffff;
|
||||||
|
topx &= 0x7ff;
|
||||||
|
}
|
||||||
|
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
|
||||||
|
/* Note: sign_bias == 0 here because y is not odd. */
|
||||||
|
if (ix == asuint64(1.0))
|
||||||
|
return 1.0;
|
||||||
|
if ((topy & 0x7ff) < 0x3be) {
|
||||||
|
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
|
||||||
|
if (WANT_ROUNDING)
|
||||||
|
return ix > asuint64(1.0) ? 1.0 + y :
|
||||||
|
1.0 - y;
|
||||||
|
else
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
|
return (ix > asuint64(1.0)) == (topy < 0x800) ?
|
||||||
|
__math_oflow(0) :
|
||||||
|
__math_uflow(0);
|
||||||
|
}
|
||||||
|
if (topx == 0) {
|
||||||
|
/* Normalize subnormal x so exponent becomes negative. */
|
||||||
|
ix = asuint64(x * 0x1p52);
|
||||||
|
ix &= 0x7fffffffffffffff;
|
||||||
|
ix -= 52ULL << 52;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double_t lo;
|
||||||
|
double_t hi = log_inline(ix, &lo);
|
||||||
|
double_t ehi, elo;
|
||||||
|
#if __FP_FAST_FMA
|
||||||
|
ehi = y * hi;
|
||||||
|
elo = y * lo + __builtin_fma(y, hi, -ehi);
|
||||||
|
#else
|
||||||
|
double_t yhi = asdouble(iy & ULLONG_NSHIFT << 27);
|
||||||
|
double_t ylo = y - yhi;
|
||||||
|
double_t lhi = asdouble(asuint64(hi) & ULLONG_NSHIFT << 27);
|
||||||
|
double_t llo = hi - lhi + lo;
|
||||||
|
ehi = yhi * lhi;
|
||||||
|
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
|
||||||
|
#endif
|
||||||
|
return exp_inline(ehi, elo, sign_bias);
|
||||||
|
}
|
180
3rd/math/src/pow_data.c
Normal file
180
3rd/math/src/pow_data.c
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
/*
|
||||||
|
* Data for the log part of pow.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "pow_data.h"
|
||||||
|
|
||||||
|
#define N (1 << POW_LOG_TABLE_BITS)
|
||||||
|
|
||||||
|
const struct pow_log_data __pow_log_data = {
|
||||||
|
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||||
|
.ln2lo = 0x1.ef35793c76730p-45,
|
||||||
|
.poly = {
|
||||||
|
// relative error: 0x1.11922ap-70
|
||||||
|
// in -0x1.6bp-8 0x1.6bp-8
|
||||||
|
// Coefficients are scaled to match the scaling during evaluation.
|
||||||
|
-0x1p-1,
|
||||||
|
0x1.555555555556p-2 * -2,
|
||||||
|
-0x1.0000000000006p-2 * -2,
|
||||||
|
0x1.999999959554ep-3 * 4,
|
||||||
|
-0x1.555555529a47ap-3 * 4,
|
||||||
|
0x1.2495b9b4845e9p-3 * -8,
|
||||||
|
-0x1.0002b8b263fc3p-3 * -8,
|
||||||
|
},
|
||||||
|
/* Algorithm:
|
||||||
|
|
||||||
|
x = 2^k z
|
||||||
|
log(x) = k ln2 + log(c) + log(z/c)
|
||||||
|
log(z/c) = poly(z/c - 1)
|
||||||
|
|
||||||
|
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
|
||||||
|
and z falls into the ith one, then table entries are computed as
|
||||||
|
|
||||||
|
tab[i].invc = 1/c
|
||||||
|
tab[i].logc = round(0x1p43*log(c))/0x1p43
|
||||||
|
tab[i].logctail = (double)(log(c) - logc)
|
||||||
|
|
||||||
|
where c is chosen near the center of the subinterval such that 1/c has only a
|
||||||
|
few precision bits so z/c - 1 is exactly representible as double:
|
||||||
|
|
||||||
|
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
|
||||||
|
|
||||||
|
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
|
||||||
|
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
|
||||||
|
error and the interval for z is selected such that near x == 1, where log(x)
|
||||||
|
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
||||||
|
.tab = {
|
||||||
|
#define A(a, b, c) {a, 0, b, c},
|
||||||
|
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
|
||||||
|
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
|
||||||
|
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
|
||||||
|
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
|
||||||
|
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
|
||||||
|
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
|
||||||
|
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
|
||||||
|
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
|
||||||
|
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
|
||||||
|
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
|
||||||
|
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
|
||||||
|
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
|
||||||
|
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
|
||||||
|
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
|
||||||
|
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
|
||||||
|
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
|
||||||
|
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
|
||||||
|
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
|
||||||
|
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
|
||||||
|
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
|
||||||
|
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
|
||||||
|
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
|
||||||
|
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
|
||||||
|
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
|
||||||
|
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
|
||||||
|
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
|
||||||
|
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
|
||||||
|
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
|
||||||
|
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
|
||||||
|
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
|
||||||
|
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
|
||||||
|
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
|
||||||
|
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
|
||||||
|
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
|
||||||
|
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
|
||||||
|
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
|
||||||
|
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
|
||||||
|
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
|
||||||
|
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
|
||||||
|
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
|
||||||
|
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
|
||||||
|
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
|
||||||
|
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
|
||||||
|
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
|
||||||
|
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
|
||||||
|
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
|
||||||
|
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
|
||||||
|
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
|
||||||
|
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
|
||||||
|
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
|
||||||
|
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
|
||||||
|
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
|
||||||
|
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
|
||||||
|
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
|
||||||
|
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
|
||||||
|
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
|
||||||
|
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
|
||||||
|
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
|
||||||
|
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
|
||||||
|
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
|
||||||
|
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
|
||||||
|
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
|
||||||
|
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
|
||||||
|
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
|
||||||
|
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
|
||||||
|
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
|
||||||
|
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
|
||||||
|
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
|
||||||
|
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
|
||||||
|
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
|
||||||
|
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
|
||||||
|
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
|
||||||
|
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
|
||||||
|
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
|
||||||
|
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
|
||||||
|
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
|
||||||
|
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
|
||||||
|
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
|
||||||
|
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
|
||||||
|
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
|
||||||
|
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
|
||||||
|
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
|
||||||
|
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
|
||||||
|
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
|
||||||
|
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
|
||||||
|
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
|
||||||
|
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
|
||||||
|
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
|
||||||
|
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
|
||||||
|
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
|
||||||
|
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
|
||||||
|
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
|
||||||
|
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
|
||||||
|
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
|
||||||
|
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
|
||||||
|
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
|
||||||
|
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
|
||||||
|
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
|
||||||
|
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
|
||||||
|
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
|
||||||
|
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
|
||||||
|
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
|
||||||
|
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
|
||||||
|
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
|
||||||
|
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
|
||||||
|
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
|
||||||
|
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
|
||||||
|
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
|
||||||
|
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
|
||||||
|
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
|
||||||
|
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
|
||||||
|
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
|
||||||
|
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
|
||||||
|
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
|
||||||
|
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
|
||||||
|
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
|
||||||
|
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
|
||||||
|
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
|
||||||
|
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
|
||||||
|
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
|
||||||
|
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
|
||||||
|
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
|
||||||
|
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
|
||||||
|
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
|
||||||
|
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
|
||||||
|
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
|
||||||
|
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
|
||||||
|
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
|
||||||
|
},
|
||||||
|
};
|
21
3rd/math/src/pow_data.h
Normal file
21
3rd/math/src/pow_data.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, Arm Limited.
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
#ifndef _POW_DATA_H
|
||||||
|
#define _POW_DATA_H
|
||||||
|
|
||||||
|
|
||||||
|
#define POW_LOG_TABLE_BITS 7
|
||||||
|
#define POW_LOG_POLY_ORDER 8
|
||||||
|
extern const struct pow_log_data {
|
||||||
|
double ln2hi;
|
||||||
|
double ln2lo;
|
||||||
|
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||||
|
/* Note: the pad field is unused, but allows slightly faster indexing. */
|
||||||
|
struct {
|
||||||
|
double invc, pad, logc, logctail;
|
||||||
|
} tab[1 << POW_LOG_TABLE_BITS];
|
||||||
|
} __pow_log_data;
|
||||||
|
|
||||||
|
#endif
|
8
3rd/math/src/radians.c
Normal file
8
3rd/math/src/radians.c
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double radians(double x)
|
||||||
|
{
|
||||||
|
if (isinf(x) || isnan(x)) return x;
|
||||||
|
double r = x * M_DEG2RAD;
|
||||||
|
return r;
|
||||||
|
}
|
32
3rd/math/src/scalbn.c
Normal file
32
3rd/math/src/scalbn.c
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double scalbn(double x, int n)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u;
|
||||||
|
double_t y = x;
|
||||||
|
|
||||||
|
if (n > 1023) {
|
||||||
|
y *= 0x1p1023;
|
||||||
|
n -= 1023;
|
||||||
|
if (n > 1023) {
|
||||||
|
y *= 0x1p1023;
|
||||||
|
n -= 1023;
|
||||||
|
if (n > 1023)
|
||||||
|
n = 1023;
|
||||||
|
}
|
||||||
|
} else if (n < -1022) {
|
||||||
|
/* make sure final n < -53 to avoid double
|
||||||
|
rounding in the subnormal range */
|
||||||
|
y *= 0x1p-1022 * 0x1p53;
|
||||||
|
n += 1022 - 53;
|
||||||
|
if (n < -1022) {
|
||||||
|
y *= 0x1p-1022 * 0x1p53;
|
||||||
|
n += 1022 - 53;
|
||||||
|
if (n < -1022)
|
||||||
|
n = -1022;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
u.i = (uint64_t)(0x3ff+n)<<52;
|
||||||
|
x = y * u.f;
|
||||||
|
return x;
|
||||||
|
}
|
78
3rd/math/src/sin.c
Normal file
78
3rd/math/src/sin.c
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* sin(x)
|
||||||
|
* Return sine function of x.
|
||||||
|
*
|
||||||
|
* kernel function:
|
||||||
|
* __sin ... sine function on [-pi/4,pi/4]
|
||||||
|
* __cos ... cose function on [-pi/4,pi/4]
|
||||||
|
* __rem_pio2 ... argument reduction routine
|
||||||
|
*
|
||||||
|
* Method.
|
||||||
|
* Let S,C and T denote the sin, cos and tan respectively on
|
||||||
|
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
|
||||||
|
* in [-pi/4 , +pi/4], and let n = k mod 4.
|
||||||
|
* We have
|
||||||
|
*
|
||||||
|
* n sin(x) cos(x) tan(x)
|
||||||
|
* ----------------------------------------------------------
|
||||||
|
* 0 S C T
|
||||||
|
* 1 C -S -1/T
|
||||||
|
* 2 -S -C T
|
||||||
|
* 3 -C S -1/T
|
||||||
|
* ----------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Special cases:
|
||||||
|
* Let trig be any of sin, cos, or tan.
|
||||||
|
* trig(+-INF) is NaN, with signals;
|
||||||
|
* trig(NaN) is that NaN;
|
||||||
|
*
|
||||||
|
* Accuracy:
|
||||||
|
* TRIG(x) returns trig(x) nearly rounded
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double sin(double x)
|
||||||
|
{
|
||||||
|
double y[2];
|
||||||
|
uint32_t ix;
|
||||||
|
unsigned n;
|
||||||
|
|
||||||
|
/* High word of x. */
|
||||||
|
GET_HIGH_WORD(ix, x);
|
||||||
|
ix &= 0x7fffffff;
|
||||||
|
|
||||||
|
/* |x| ~< pi/4 */
|
||||||
|
if (ix <= 0x3fe921fb) {
|
||||||
|
if (ix < 0x3e500000) { /* |x| < 2**-26 */
|
||||||
|
/* raise inexact if x != 0 and underflow if subnormal*/
|
||||||
|
FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
return __sin(x, 0.0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sin(Inf or NaN) is NaN */
|
||||||
|
if (ix >= 0x7ff00000)
|
||||||
|
return x - x;
|
||||||
|
|
||||||
|
/* argument reduction needed */
|
||||||
|
n = __rem_pio2(x, y);
|
||||||
|
switch (n&3) {
|
||||||
|
case 0: return __sin(y[0], y[1], 1);
|
||||||
|
case 1: return __cos(y[0], y[1]);
|
||||||
|
case 2: return -__sin(y[0], y[1], 1);
|
||||||
|
default:
|
||||||
|
return -__cos(y[0], y[1]);
|
||||||
|
}
|
||||||
|
}
|
156
3rd/math/src/sqrt.c
Normal file
156
3rd/math/src/sqrt.c
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
#include <math.h>
|
||||||
|
#include "sqrt_data.h"
|
||||||
|
|
||||||
|
#define FENV_SUPPORT 1
|
||||||
|
|
||||||
|
/* returns a*b*2^-32 - e, with error 0 <= e < 1. */
|
||||||
|
static inline uint32_t mul32(uint32_t a, uint32_t b)
|
||||||
|
{
|
||||||
|
return (uint64_t)a*b >> 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* returns a*b*2^-64 - e, with error 0 <= e < 3. */
|
||||||
|
static inline uint64_t mul64(uint64_t a, uint64_t b)
|
||||||
|
{
|
||||||
|
uint64_t ahi = a>>32;
|
||||||
|
uint64_t alo = a&0xffffffff;
|
||||||
|
uint64_t bhi = b>>32;
|
||||||
|
uint64_t blo = b&0xffffffff;
|
||||||
|
return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sqrt(double x)
|
||||||
|
{
|
||||||
|
uint64_t ix, top, m;
|
||||||
|
|
||||||
|
/* special case handling. */
|
||||||
|
ix = asuint64(x);
|
||||||
|
top = ix >> 52;
|
||||||
|
if (predict_false(top - 0x001 >= 0x7ff - 0x001)) {
|
||||||
|
/* x < 0x1p-1022 or inf or nan. */
|
||||||
|
if (ix * 2 == 0)
|
||||||
|
return x;
|
||||||
|
if (ix == 0x7ff0000000000000)
|
||||||
|
return x;
|
||||||
|
if (ix > 0x7ff0000000000000)
|
||||||
|
return __math_invalid(x);
|
||||||
|
/* x is subnormal, normalize it. */
|
||||||
|
ix = asuint64(x * 0x1p52);
|
||||||
|
top = ix >> 52;
|
||||||
|
top -= 52;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* argument reduction:
|
||||||
|
x = 4^e m; with integer e, and m in [1, 4)
|
||||||
|
m: fixed point representation [2.62]
|
||||||
|
2^e is the exponent part of the result. */
|
||||||
|
int even = top & 1;
|
||||||
|
m = (ix << 11) | 0x8000000000000000;
|
||||||
|
if (even) m >>= 1;
|
||||||
|
top = (top + 0x3ff) >> 1;
|
||||||
|
|
||||||
|
/* approximate r ~ 1/sqrt(m) and s ~ sqrt(m) when m in [1,4)
|
||||||
|
|
||||||
|
initial estimate:
|
||||||
|
7bit table lookup (1bit exponent and 6bit significand).
|
||||||
|
|
||||||
|
iterative approximation:
|
||||||
|
using 2 goldschmidt iterations with 32bit int arithmetics
|
||||||
|
and a final iteration with 64bit int arithmetics.
|
||||||
|
|
||||||
|
details:
|
||||||
|
|
||||||
|
the relative error (e = r0 sqrt(m)-1) of a linear estimate
|
||||||
|
(r0 = a m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best,
|
||||||
|
a table lookup is faster and needs one less iteration
|
||||||
|
6 bit lookup table (128b) gives |e| < 0x1.f9p-8
|
||||||
|
7 bit lookup table (256b) gives |e| < 0x1.fdp-9
|
||||||
|
for single and double prec 6bit is enough but for quad
|
||||||
|
prec 7bit is needed (or modified iterations). to avoid
|
||||||
|
one more iteration >=13bit table would be needed (16k).
|
||||||
|
|
||||||
|
a newton-raphson iteration for r is
|
||||||
|
w = r*r
|
||||||
|
u = 3 - m*w
|
||||||
|
r = r*u/2
|
||||||
|
can use a goldschmidt iteration for s at the end or
|
||||||
|
s = m*r
|
||||||
|
|
||||||
|
first goldschmidt iteration is
|
||||||
|
s = m*r
|
||||||
|
u = 3 - s*r
|
||||||
|
r = r*u/2
|
||||||
|
s = s*u/2
|
||||||
|
next goldschmidt iteration is
|
||||||
|
u = 3 - s*r
|
||||||
|
r = r*u/2
|
||||||
|
s = s*u/2
|
||||||
|
and at the end r is not computed only s.
|
||||||
|
|
||||||
|
they use the same amount of operations and converge at the
|
||||||
|
same quadratic rate, i.e. if
|
||||||
|
r1 sqrt(m) - 1 = e, then
|
||||||
|
r2 sqrt(m) - 1 = -3/2 e^2 - 1/2 e^3
|
||||||
|
the advantage of goldschmidt is that the mul for s and r
|
||||||
|
are independent (computed in parallel), however it is not
|
||||||
|
"self synchronizing": it only uses the input m in the
|
||||||
|
first iteration so rounding errors accumulate. at the end
|
||||||
|
or when switching to larger precision arithmetics rounding
|
||||||
|
errors dominate so the first iteration should be used.
|
||||||
|
|
||||||
|
the fixed point representations are
|
||||||
|
m: 2.30 r: 0.32, s: 2.30, d: 2.30, u: 2.30, three: 2.30
|
||||||
|
and after switching to 64 bit
|
||||||
|
m: 2.62 r: 0.64, s: 2.62, d: 2.62, u: 2.62, three: 2.62 */
|
||||||
|
|
||||||
|
static const uint64_t three = 0xc0000000;
|
||||||
|
uint64_t r, s, d, u, i;
|
||||||
|
|
||||||
|
i = (ix >> 46) % 128;
|
||||||
|
r = (uint32_t)__rsqrt_tab[i] << 16;
|
||||||
|
/* |r sqrt(m) - 1| < 0x1.fdp-9 */
|
||||||
|
s = mul32(m>>32, r);
|
||||||
|
/* |s/sqrt(m) - 1| < 0x1.fdp-9 */
|
||||||
|
d = mul32(s, r);
|
||||||
|
u = three - d;
|
||||||
|
r = mul32(r, u) << 1;
|
||||||
|
/* |r sqrt(m) - 1| < 0x1.7bp-16 */
|
||||||
|
s = mul32(s, u) << 1;
|
||||||
|
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
|
||||||
|
d = mul32(s, r);
|
||||||
|
u = three - d;
|
||||||
|
r = mul32(r, u) << 1;
|
||||||
|
/* |r sqrt(m) - 1| < 0x1.3704p-29 (measured worst-case) */
|
||||||
|
r = r << 32;
|
||||||
|
s = mul64(m, r);
|
||||||
|
d = mul64(s, r);
|
||||||
|
u = (three<<32) - d;
|
||||||
|
s = mul64(s, u); /* repr: 3.61 */
|
||||||
|
/* -0x1p-57 < s - sqrt(m) < 0x1.8001p-61 */
|
||||||
|
s = (s - 2) >> 9; /* repr: 12.52 */
|
||||||
|
/* -0x1.09p-52 < s - sqrt(m) < -0x1.fffcp-63 */
|
||||||
|
|
||||||
|
/* s < sqrt(m) < s + 0x1.09p-52,
|
||||||
|
compute nearest rounded result:
|
||||||
|
the nearest result to 52 bits is either s or s+0x1p-52,
|
||||||
|
we can decide by comparing (2^52 s + 0.5)^2 to 2^104 m. */
|
||||||
|
uint64_t d0, d1, d2;
|
||||||
|
double y, t;
|
||||||
|
d0 = (m << 42) - s*s;
|
||||||
|
d1 = s - d0;
|
||||||
|
d2 = d1 + s + 1;
|
||||||
|
s += d1 >> 63;
|
||||||
|
s &= 0x000fffffffffffff;
|
||||||
|
s |= top << 52;
|
||||||
|
y = asdouble(s);
|
||||||
|
if (FENV_SUPPORT) {
|
||||||
|
/* handle rounding modes and inexact exception:
|
||||||
|
only (s+1)^2 == 2^42 m case is exact otherwise
|
||||||
|
add a tiny value to cause the fenv effects. */
|
||||||
|
uint64_t tiny = predict_false(d2==0) ? 0 : 0x0010000000000000;
|
||||||
|
tiny |= (d1^d2) & 0x8000000000000000;
|
||||||
|
t = asdouble(tiny);
|
||||||
|
y = eval_as_double(y + t);
|
||||||
|
}
|
||||||
|
return y;
|
||||||
|
}
|
19
3rd/math/src/sqrt_data.c
Normal file
19
3rd/math/src/sqrt_data.c
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#include "sqrt_data.h"
|
||||||
|
const uint16_t __rsqrt_tab[128] = {
|
||||||
|
0xb451,0xb2f0,0xb196,0xb044,0xaef9,0xadb6,0xac79,0xab43,
|
||||||
|
0xaa14,0xa8eb,0xa7c8,0xa6aa,0xa592,0xa480,0xa373,0xa26b,
|
||||||
|
0xa168,0xa06a,0x9f70,0x9e7b,0x9d8a,0x9c9d,0x9bb5,0x9ad1,
|
||||||
|
0x99f0,0x9913,0x983a,0x9765,0x9693,0x95c4,0x94f8,0x9430,
|
||||||
|
0x936b,0x92a9,0x91ea,0x912e,0x9075,0x8fbe,0x8f0a,0x8e59,
|
||||||
|
0x8daa,0x8cfe,0x8c54,0x8bac,0x8b07,0x8a64,0x89c4,0x8925,
|
||||||
|
0x8889,0x87ee,0x8756,0x86c0,0x862b,0x8599,0x8508,0x8479,
|
||||||
|
0x83ec,0x8361,0x82d8,0x8250,0x81c9,0x8145,0x80c2,0x8040,
|
||||||
|
0xff02,0xfd0e,0xfb25,0xf947,0xf773,0xf5aa,0xf3ea,0xf234,
|
||||||
|
0xf087,0xeee3,0xed47,0xebb3,0xea27,0xe8a3,0xe727,0xe5b2,
|
||||||
|
0xe443,0xe2dc,0xe17a,0xe020,0xdecb,0xdd7d,0xdc34,0xdaf1,
|
||||||
|
0xd9b3,0xd87b,0xd748,0xd61a,0xd4f1,0xd3cd,0xd2ad,0xd192,
|
||||||
|
0xd07b,0xcf69,0xce5b,0xcd51,0xcc4a,0xcb48,0xca4a,0xc94f,
|
||||||
|
0xc858,0xc764,0xc674,0xc587,0xc49d,0xc3b7,0xc2d4,0xc1f4,
|
||||||
|
0xc116,0xc03c,0xbf65,0xbe90,0xbdbe,0xbcef,0xbc23,0xbb59,
|
||||||
|
0xba91,0xb9cc,0xb90a,0xb84a,0xb78c,0xb6d0,0xb617,0xb560,
|
||||||
|
};
|
12
3rd/math/src/sqrt_data.h
Normal file
12
3rd/math/src/sqrt_data.h
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#ifndef _SQRT_DATA_H
|
||||||
|
#define _SQRT_DATA_H
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
/* if x in [1,2): i = (int)(64*x);
|
||||||
|
if x in [2,4): i = (int)(32*x-64);
|
||||||
|
__rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
|
||||||
|
|__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
|
||||||
|
extern const uint16_t __rsqrt_tab[128];
|
||||||
|
|
||||||
|
#endif
|
70
3rd/math/src/tan.c
Normal file
70
3rd/math/src/tan.c
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/* origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */
|
||||||
|
/*
|
||||||
|
* ====================================================
|
||||||
|
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||||
|
* Permission to use, copy, modify, and distribute this
|
||||||
|
* software is freely granted, provided that this notice
|
||||||
|
* is preserved.
|
||||||
|
* ====================================================
|
||||||
|
*/
|
||||||
|
/* tan(x)
|
||||||
|
* Return tangent function of x.
|
||||||
|
*
|
||||||
|
* kernel function:
|
||||||
|
* __tan ... tangent function on [-pi/4,pi/4]
|
||||||
|
* __rem_pio2 ... argument reduction routine
|
||||||
|
*
|
||||||
|
* Method.
|
||||||
|
* Let S,C and T denote the sin, cos and tan respectively on
|
||||||
|
* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
|
||||||
|
* in [-pi/4 , +pi/4], and let n = k mod 4.
|
||||||
|
* We have
|
||||||
|
*
|
||||||
|
* n sin(x) cos(x) tan(x)
|
||||||
|
* ----------------------------------------------------------
|
||||||
|
* 0 S C T
|
||||||
|
* 1 C -S -1/T
|
||||||
|
* 2 -S -C T
|
||||||
|
* 3 -C S -1/T
|
||||||
|
* ----------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Special cases:
|
||||||
|
* Let trig be any of sin, cos, or tan.
|
||||||
|
* trig(+-INF) is NaN, with signals;
|
||||||
|
* trig(NaN) is that NaN;
|
||||||
|
*
|
||||||
|
* Accuracy:
|
||||||
|
* TRIG(x) returns trig(x) nearly rounded
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double tan(double x)
|
||||||
|
{
|
||||||
|
double y[2];
|
||||||
|
uint32_t ix;
|
||||||
|
unsigned n;
|
||||||
|
|
||||||
|
GET_HIGH_WORD(ix, x);
|
||||||
|
ix &= 0x7fffffff;
|
||||||
|
|
||||||
|
/* |x| ~< pi/4 */
|
||||||
|
if (ix <= 0x3fe921fb) {
|
||||||
|
if (ix < 0x3e400000) { /* |x| < 2**-27 */
|
||||||
|
/* raise inexact if x!=0 and underflow if subnormal */
|
||||||
|
FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
return __tan(x, 0.0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* tan(Inf or NaN) is NaN */
|
||||||
|
if (ix >= 0x7ff00000)
|
||||||
|
return x - x;
|
||||||
|
|
||||||
|
/* argument reduction */
|
||||||
|
n = __rem_pio2(x, y);
|
||||||
|
return __tan(y[0], y[1], n&1);
|
||||||
|
}
|
19
3rd/math/src/trunc.c
Normal file
19
3rd/math/src/trunc.c
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
double trunc(double x)
|
||||||
|
{
|
||||||
|
union {double f; uint64_t i;} u = {x};
|
||||||
|
int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
|
||||||
|
uint64_t m;
|
||||||
|
|
||||||
|
if (e >= 52 + 12)
|
||||||
|
return x;
|
||||||
|
if (e < 12)
|
||||||
|
e = 1;
|
||||||
|
m = ULLONG_NSHIFT >> e;
|
||||||
|
if ((u.i & m) == 0)
|
||||||
|
return x;
|
||||||
|
FORCE_EVAL(x + 0x1p120f);
|
||||||
|
u.i &= ~m;
|
||||||
|
return u.f;
|
||||||
|
}
|
@ -45,6 +45,13 @@ if(PK_ENABLE_OS)
|
|||||||
add_definitions(-DPK_ENABLE_OS=1)
|
add_definitions(-DPK_ENABLE_OS=1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
option(USE_MUSL_LIBC_MATH "" OFF)
|
||||||
|
if(USE_MUSL_LIBC_MATH)
|
||||||
|
add_subdirectory(3rd/math)
|
||||||
|
include_directories(3rd/math)
|
||||||
|
add_definitions(-DUSE_MUSL_LIBC_MATH)
|
||||||
|
endif()
|
||||||
|
|
||||||
option(PK_BUILD_MODULE_LZ4 "" OFF)
|
option(PK_BUILD_MODULE_LZ4 "" OFF)
|
||||||
if(PK_BUILD_MODULE_LZ4)
|
if(PK_BUILD_MODULE_LZ4)
|
||||||
add_subdirectory(3rd/lz4)
|
add_subdirectory(3rd/lz4)
|
||||||
@ -114,3 +121,7 @@ endif()
|
|||||||
if(PK_BUILD_MODULE_LIBHV)
|
if(PK_BUILD_MODULE_LIBHV)
|
||||||
target_link_libraries(${PROJECT_NAME} libhv_bindings)
|
target_link_libraries(${PROJECT_NAME} libhv_bindings)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(USE_MUSL_LIBC_MATH)
|
||||||
|
target_link_libraries(${PROJECT_NAME} musl_math)
|
||||||
|
endif()
|
@ -20,7 +20,7 @@ assert config in ['Debug', 'Release', 'RelWithDebInfo']
|
|||||||
|
|
||||||
os.chdir("build")
|
os.chdir("build")
|
||||||
|
|
||||||
code = os.system(f"cmake .. -DPK_ENABLE_OS=ON -DPK_BUILD_MODULE_LZ4=ON -DCMAKE_BUILD_TYPE={config} {extra_flags}")
|
code = os.system(f"cmake .. -DPK_ENABLE_OS=ON -DPK_BUILD_MODULE_LZ4=OFF -DUSE_MUSL_LIBC_MATH=ON -DCMAKE_BUILD_TYPE={config} {extra_flags}")
|
||||||
assert code == 0
|
assert code == 0
|
||||||
code = os.system(f"cmake --build . --config {config} -j 4")
|
code = os.system(f"cmake --build . --config {config} -j 4")
|
||||||
assert code == 0
|
assert code == 0
|
||||||
|
7
include/pocketpy/common/math.h
Normal file
7
include/pocketpy/common/math.h
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef USE_MUSL_LIBC_MATH
|
||||||
|
#include <math.h>
|
||||||
|
#else
|
||||||
|
#include "include/math.h"
|
||||||
|
#endif
|
@ -7,7 +7,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
void c11_sbuf__ctor(c11_sbuf* self) {
|
void c11_sbuf__ctor(c11_sbuf* self) {
|
||||||
c11_vector__ctor(&self->data, sizeof(char));
|
c11_vector__ctor(&self->data, sizeof(char));
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "pocketpy/objects/object.h"
|
#include "pocketpy/objects/object.h"
|
||||||
#include "pocketpy/common/sstream.h"
|
#include "pocketpy/common/sstream.h"
|
||||||
#include "pocketpy/interpreter/vm.h"
|
#include "pocketpy/interpreter/vm.h"
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
// https://bottosson.github.io/posts/gamutclipping/#oklab-to-linear-srgb-conversion
|
// https://bottosson.github.io/posts/gamutclipping/#oklab-to-linear-srgb-conversion
|
||||||
|
|
||||||
@ -15,9 +15,9 @@ static c11_vec3 linear_srgb_to_oklab(c11_vec3 c)
|
|||||||
float m = 0.2119034982f * c.x + 0.6806995451f * c.y + 0.1073969566f * c.z;
|
float m = 0.2119034982f * c.x + 0.6806995451f * c.y + 0.1073969566f * c.z;
|
||||||
float s = 0.0883024619f * c.x + 0.2817188376f * c.y + 0.6299787005f * c.z;
|
float s = 0.0883024619f * c.x + 0.2817188376f * c.y + 0.6299787005f * c.z;
|
||||||
|
|
||||||
float l_ = cbrtf(l);
|
float l_ = cbrt(l);
|
||||||
float m_ = cbrtf(m);
|
float m_ = cbrt(m);
|
||||||
float s_ = cbrtf(s);
|
float s_ = cbrt(s);
|
||||||
|
|
||||||
return (c11_vec3){{
|
return (c11_vec3){{
|
||||||
0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_,
|
0.2104542553f * l_ + 0.7936177850f * m_ - 0.0040720468f * s_,
|
||||||
@ -46,11 +46,11 @@ static c11_vec3 oklab_to_linear_srgb(c11_vec3 c)
|
|||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
static float _gamma_correct_inv(float x) {
|
static float _gamma_correct_inv(float x) {
|
||||||
return (x <= 0.04045f) ? (x / 12.92f) : powf((x + 0.055f) / 1.055f, 2.4f);
|
return (x <= 0.04045f) ? (x / 12.92f) : pow((x + 0.055f) / 1.055f, 2.4f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static float _gamma_correct(float x) {
|
static float _gamma_correct(float x) {
|
||||||
return (x <= 0.0031308f) ? (12.92f * x) : (1.055f * powf(x, 1.0f / 2.4f) - 0.055f);
|
return (x <= 0.0031308f) ? (12.92f * x) : (1.055f * pow(x, 1.0f / 2.4f) - 0.055f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static c11_vec3 srgb_to_linear_srgb(c11_vec3 c) {
|
static c11_vec3 srgb_to_linear_srgb(c11_vec3 c) {
|
||||||
@ -70,8 +70,8 @@ static c11_vec3 linear_srgb_to_srgb(c11_vec3 c) {
|
|||||||
static c11_vec3 _oklab_to_oklch(c11_vec3 c) {
|
static c11_vec3 _oklab_to_oklch(c11_vec3 c) {
|
||||||
c11_vec3 res;
|
c11_vec3 res;
|
||||||
res.x = c.x;
|
res.x = c.x;
|
||||||
res.y = sqrtf(c.y * c.y + c.z * c.z);
|
res.y = sqrt(c.y * c.y + c.z * c.z);
|
||||||
res.z = fmodf(atan2f(c.z, c.y), 2 * (float)PK_M_PI);
|
res.z = fmod(atan2(c.z, c.y), 2 * (float)PK_M_PI);
|
||||||
res.z = res.z * PK_M_RAD2DEG;
|
res.z = res.z * PK_M_RAD2DEG;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -79,8 +79,8 @@ static c11_vec3 _oklab_to_oklch(c11_vec3 c) {
|
|||||||
static c11_vec3 _oklch_to_oklab(c11_vec3 c) {
|
static c11_vec3 _oklch_to_oklab(c11_vec3 c) {
|
||||||
c11_vec3 res;
|
c11_vec3 res;
|
||||||
res.x = c.x;
|
res.x = c.x;
|
||||||
res.y = c.y * cosf(c.z * PK_M_DEG2RAD);
|
res.y = c.y * cos(c.z * PK_M_DEG2RAD);
|
||||||
res.z = c.y * sinf(c.z * PK_M_DEG2RAD);
|
res.z = c.y * sin(c.z * PK_M_DEG2RAD);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,9 +105,9 @@ static c11_vec3 oklch_to_linear_srgb(c11_vec3 c) {
|
|||||||
// fall back to RGB clamping
|
// fall back to RGB clamping
|
||||||
candidate = oklab_to_linear_srgb(_oklch_to_oklab(clamped));
|
candidate = oklab_to_linear_srgb(_oklch_to_oklab(clamped));
|
||||||
if(!_is_valid_srgb(candidate)) {
|
if(!_is_valid_srgb(candidate)) {
|
||||||
candidate.x = fmaxf(0.0f, fminf(1.0f, candidate.x));
|
candidate.x = fmax(0.0f, fmin(1.0f, candidate.x));
|
||||||
candidate.y = fmaxf(0.0f, fminf(1.0f, candidate.y));
|
candidate.y = fmax(0.0f, fmin(1.0f, candidate.y));
|
||||||
candidate.z = fmaxf(0.0f, fminf(1.0f, candidate.z));
|
candidate.z = fmax(0.0f, fmin(1.0f, candidate.z));
|
||||||
return candidate;
|
return candidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,7 +116,7 @@ static c11_vec3 oklch_to_linear_srgb(c11_vec3 c) {
|
|||||||
float start = 0.0f;
|
float start = 0.0f;
|
||||||
float end = c.y;
|
float end = c.y;
|
||||||
float range[2] = {0.0f, 0.4f};
|
float range[2] = {0.0f, 0.4f};
|
||||||
float resolution = (range[1] - range[0]) / powf(2, 13);
|
float resolution = (range[1] - range[0]) / pow(2, 13);
|
||||||
float _last_good_c = clamped.y;
|
float _last_good_c = clamped.y;
|
||||||
|
|
||||||
while(end - start > resolution) {
|
while(end - start > resolution) {
|
||||||
@ -142,8 +142,8 @@ static c11_vec3 srgb_to_hsv(c11_vec3 c) {
|
|||||||
float g = c.y;
|
float g = c.y;
|
||||||
float b = c.z;
|
float b = c.z;
|
||||||
|
|
||||||
float maxc = fmaxf(r, fmaxf(g, b));
|
float maxc = fmax(r, fmax(g, b));
|
||||||
float minc = fminf(r, fminf(g, b));
|
float minc = fmin(r, fmin(g, b));
|
||||||
float v = maxc;
|
float v = maxc;
|
||||||
if(minc == maxc) {
|
if(minc == maxc) {
|
||||||
return (c11_vec3){
|
return (c11_vec3){
|
||||||
@ -163,7 +163,7 @@ static c11_vec3 srgb_to_hsv(c11_vec3 c) {
|
|||||||
} else {
|
} else {
|
||||||
h = 4.0f + gc - rc;
|
h = 4.0f + gc - rc;
|
||||||
}
|
}
|
||||||
h = fmodf(h / 6.0f, 1.0f);
|
h = fmod(h / 6.0f, 1.0f);
|
||||||
return (c11_vec3){
|
return (c11_vec3){
|
||||||
{h, s, v}
|
{h, s, v}
|
||||||
};
|
};
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include "pocketpy/pocketpy.h"
|
#include "pocketpy/pocketpy.h"
|
||||||
#include "pocketpy/interpreter/vm.h"
|
#include "pocketpy/interpreter/vm.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
// https://easings.net/
|
// https://easings.net/
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "pocketpy/objects/object.h"
|
#include "pocketpy/objects/object.h"
|
||||||
#include "pocketpy/common/sstream.h"
|
#include "pocketpy/common/sstream.h"
|
||||||
#include "pocketpy/interpreter/vm.h"
|
#include "pocketpy/interpreter/vm.h"
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
static bool json_loads(int argc, py_Ref argv) {
|
static bool json_loads(int argc, py_Ref argv) {
|
||||||
PY_CHECK_ARGC(1);
|
PY_CHECK_ARGC(1);
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include "pocketpy/common/sstream.h"
|
#include "pocketpy/common/sstream.h"
|
||||||
#include "pocketpy/interpreter/vm.h"
|
#include "pocketpy/interpreter/vm.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
#define ONE_ARG_FUNC(name, func) \
|
#define ONE_ARG_FUNC(name, func) \
|
||||||
static bool math_##name(int argc, py_Ref argv) { \
|
static bool math_##name(int argc, py_Ref argv) { \
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
#include "pocketpy/common/utils.h"
|
#include "pocketpy/common/utils.h"
|
||||||
#include "pocketpy/interpreter/vm.h"
|
#include "pocketpy/interpreter/vm.h"
|
||||||
#include "pocketpy/objects/object.h"
|
#include "pocketpy/objects/object.h"
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
static bool isclose(float a, float b) { return fabs(a - b) < 1e-4; }
|
static bool isclose(float a, float b) { return fabs(a - b) < 1e-4; }
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
#include "pocketpy/common/_generated.h"
|
#include "pocketpy/common/_generated.h"
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
py_Ref py_getmodule(const char* path) {
|
py_Ref py_getmodule(const char* path) {
|
||||||
VM* vm = pk_current_vm;
|
VM* vm = pk_current_vm;
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
#include "pocketpy/common/sstream.h"
|
#include "pocketpy/common/sstream.h"
|
||||||
#include "pocketpy/pocketpy.h"
|
#include "pocketpy/pocketpy.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include "pocketpy/common/math.h"
|
||||||
|
|
||||||
static bool try_castfloat(py_Ref self, double* out) {
|
static bool try_castfloat(py_Ref self, double* out) {
|
||||||
switch(self->type) {
|
switch(self->type) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user