Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 81968 Details for
Bug 100289
Glibc patches to enhance performance on x86_64.
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
libm patch against glibc 2.4
1010_all_glibc-2.3.3-x86_64-new-libm-20060312.patch (text/plain), 434.38 KB, created by
Simon Strandman
on 2006-03-12 04:34:19 UTC
(
hide
)
Description:
libm patch against glibc 2.4
Filename:
MIME Type:
Creator:
Simon Strandman
Created:
2006-03-12 04:34:19 UTC
Size:
434.38 KB
patch
obsolete
>============================================================ >Index: sysdeps/x86_64/fpu/Makefile >--- sysdeps/x86_64/fpu/Makefile created >+++ sysdeps/x86_64/fpu/Makefile 2002-12-03 15:46:18.000000000 +0100 1.1 >@@ -0,0 +1,3 @@ >+ifeq ($(subdir),math) >+libm-sysdep_routines += w_remainder_piby2 w_remainder_piby2f >+endif >============================================================ >Index: sysdeps/x86_64/fpu/e_acos.c >--- sysdeps/x86_64/fpu/e_acos.c created >+++ sysdeps/x86_64/fpu/e_acos.c 2002-12-03 13:42:49.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_acosf.c >--- sysdeps/x86_64/fpu/e_acosf.c created >+++ sysdeps/x86_64/fpu/e_acosf.c 2002-12-03 13:42:49.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_asin.c >--- sysdeps/x86_64/fpu/e_asin.c created >+++ sysdeps/x86_64/fpu/e_asin.c 2002-12-03 13:42:49.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_asinf.c >--- sysdeps/x86_64/fpu/e_asinf.c created >+++ sysdeps/x86_64/fpu/e_asinf.c 2002-12-03 13:42:49.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_exp.c >--- sysdeps/x86_64/fpu/e_exp.c created >+++ sysdeps/x86_64/fpu/e_exp.c 2002-12-03 13:42:49.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_exp10.c >--- sysdeps/x86_64/fpu/e_exp10.c created >+++ sysdeps/x86_64/fpu/e_exp10.c 2002-12-03 13:42:50.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_exp10f.c >--- sysdeps/x86_64/fpu/e_exp10f.c created >+++ sysdeps/x86_64/fpu/e_exp10f.c 2002-12-03 13:42:50.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_exp2.c >--- sysdeps/x86_64/fpu/e_exp2.c created >+++ sysdeps/x86_64/fpu/e_exp2.c 2002-12-03 13:42:50.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_exp2f.c >--- sysdeps/x86_64/fpu/e_exp2f.c created >+++ sysdeps/x86_64/fpu/e_exp2f.c 2002-12-03 13:42:50.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_expf.c >--- sysdeps/x86_64/fpu/e_expf.c created >+++ sysdeps/x86_64/fpu/e_expf.c 2002-12-03 13:42:50.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_fmod.c >--- sysdeps/x86_64/fpu/e_fmod.c created >+++ sysdeps/x86_64/fpu/e_fmod.c 2002-12-03 13:42:51.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_fmodf.c >--- sysdeps/x86_64/fpu/e_fmodf.c created >+++ sysdeps/x86_64/fpu/e_fmodf.c 2002-12-03 13:42:51.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_hypot.c >--- sysdeps/x86_64/fpu/e_hypot.c created >+++ sysdeps/x86_64/fpu/e_hypot.c 2002-12-03 13:42:51.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_hypotf.c >--- sysdeps/x86_64/fpu/e_hypotf.c created >+++ sysdeps/x86_64/fpu/e_hypotf.c 2002-12-03 13:42:51.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_log.c >--- sysdeps/x86_64/fpu/e_log.c created >+++ sysdeps/x86_64/fpu/e_log.c 2002-12-03 13:42:51.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_log10.c >--- sysdeps/x86_64/fpu/e_log10.c created >+++ sysdeps/x86_64/fpu/e_log10.c 2002-12-03 13:42:52.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_log10f.c >--- sysdeps/x86_64/fpu/e_log10f.c created >+++ sysdeps/x86_64/fpu/e_log10f.c 2002-12-03 13:42:52.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_log2.c >--- sysdeps/x86_64/fpu/e_log2.c created >+++ sysdeps/x86_64/fpu/e_log2.c 2002-12-03 13:42:52.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_log2f.c >--- sysdeps/x86_64/fpu/e_log2f.c created >+++ sysdeps/x86_64/fpu/e_log2f.c 2002-12-03 13:42:52.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_logf.c >--- sysdeps/x86_64/fpu/e_logf.c created >+++ sysdeps/x86_64/fpu/e_logf.c 2002-12-03 13:42:52.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_pow.c >--- sysdeps/x86_64/fpu/e_pow.c created >+++ sysdeps/x86_64/fpu/e_pow.c 2002-12-03 13:42:52.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_powf.c >--- sysdeps/x86_64/fpu/e_powf.c created >+++ sysdeps/x86_64/fpu/e_powf.c 2002-12-03 13:42:53.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_remainder.c >--- sysdeps/x86_64/fpu/e_remainder.c created >+++ sysdeps/x86_64/fpu/e_remainder.c 2002-12-03 13:42:53.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_remainderf.c >--- sysdeps/x86_64/fpu/e_remainderf.c created >+++ sysdeps/x86_64/fpu/e_remainderf.c 2002-12-03 13:42:53.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_sinh.c >--- sysdeps/x86_64/fpu/e_sinh.c created >+++ sysdeps/x86_64/fpu/e_sinh.c 2002-12-03 13:42:53.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/e_sinhf.c >--- sysdeps/x86_64/fpu/e_sinhf.c created >+++ sysdeps/x86_64/fpu/e_sinhf.c 2002-12-03 13:42:53.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/libm_amd.h >--- sysdeps/x86_64/fpu/libm_amd.h created >+++ sysdeps/x86_64/fpu/libm_amd.h 2002-12-03 13:42:54.000000000 +0100 1.1 >@@ -0,0 +1,32 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#ifndef LIBM_AMD_H_INCLUDED >+#define LIBM_AMD_H_INCLUDED 1 >+ >+/* The following definition of weak_alias is extracted from >+ libc-symbols.h */ >+ >+/* Define ALIASNAME as a weak alias for NAME. >+ If weak aliases are not available, this defines a strong alias. */ >+# define weak_alias(name, aliasname) _weak_alias (name, aliasname) >+# define _weak_alias(name, aliasname) \ >+ extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))); >+ >+#include <math.h> >+ >+extern double chgsign(double x); >+extern float chgsignf(float x); >+ >+extern double fma(double x, double y, double z); >+extern float fmaf(float x, float y, float z); >+ >+extern void __remainder_piby2(double x, double *r, double *rr, int *region); >+extern void __remainder_piby2f(float x, double *r, int *region); >+ >+#endif /* LIBM_AMD_H_INCLUDED */ >============================================================ >Index: sysdeps/x86_64/fpu/libm_errno_amd.h >--- sysdeps/x86_64/fpu/libm_errno_amd.h created >+++ sysdeps/x86_64/fpu/libm_errno_amd.h 2002-12-03 13:42:54.000000000 +0100 1.1 >@@ -0,0 +1,18 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#ifndef LIBM_ERRNO_AMD_H_INCLUDED >+#define LIBM_ERRNO_AMD_H_INCLUDED 1 >+ >+#include <stdio.h> >+#include <errno.h> >+#ifndef __set_errno >+#define __set_errno(x) errno = (x) >+#endif >+ >+#endif /* LIBM_ERRNO_AMD_H_INCLUDED */ >============================================================ >Index: sysdeps/x86_64/fpu/libm_inlines_amd.h >--- sysdeps/x86_64/fpu/libm_inlines_amd.h created >+++ sysdeps/x86_64/fpu/libm_inlines_amd.h 2002-12-03 13:43:00.000000000 +0100 1.1 >@@ -0,0 +1,2260 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#ifndef LIBM_INLINES_AMD_H_INCLUDED >+#define LIBM_INLINES_AMD_H_INCLUDED 1 >+ >+#include "libm_util_amd.h" >+ >+#ifdef WIN32 >+#define inline __inline >+#endif >+ >+/* Set defines for inline functions calling other inlines */ >+#if defined(USE_VAL_WITH_FLAGS) || defined(USE_VALF_WITH_FLAGS) || \ >+ defined(USE_ZERO_WITH_FLAGS) || defined(USE_ZEROF_WITH_FLAGS) || \ >+ defined(USE_NAN_WITH_FLAGS) || defined(USE_NANF_WITH_FLAGS) || \ >+ defined(USE_INFINITY_WITH_FLAGS) || defined(USE_INFINITYF_WITH_FLAGS) || \ >+ defined(USE_SQRT_AMD_INLINE) || defined(USE_SQRTF_AMD_INLINE) >+#undef USE_RAISE_FPSW_FLAGS >+#define USE_RAISE_FPSW_FLAGS 1 >+#endif >+ >+#if defined(USE_SPLITDOUBLE) >+/* Splits double x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0. >+ Assumes that x is not zero, denormal, infinity or NaN, but these conditions >+ are not checked */ >+static inline void splitDouble(double x, int *e, double *m) >+{ >+ unsigned long ux, uy; >+ GET_BITS_DP64(x, ux); >+ uy = ux; >+ ux &= EXPBITS_DP64; >+ ux >>= EXPSHIFTBITS_DP64; >+ *e = (int)ux - EXPBIAS_DP64 + 1; >+ uy = (uy & (SIGNBIT_DP64 | MANTBITS_DP64)) | HALFEXPBITS_DP64; >+ PUT_BITS_DP64(uy, x); >+ *m = x; >+} >+#endif /* USE_SPLITDOUBLE */ >+ >+ >+#if defined(USE_SPLITDOUBLE_2) >+/* Splits double x into exponent e and mantissa m, where 1.0 <= abs(m) < 4.0. >+ Assumes that x is not zero, denormal, infinity or NaN, but these conditions >+ are not checked. Also assumes EXPBIAS_DP is odd. With this >+ assumption, e will be even on exit. */ >+static inline void splitDouble_2(double x, int *e, double *m) >+{ >+ unsigned long ux, vx; >+ GET_BITS_DP64(x, ux); >+ vx = ux; >+ ux &= EXPBITS_DP64; >+ ux >>= EXPSHIFTBITS_DP64; >+ if (ux & 1) >+ { >+ /* The exponent is odd */ >+ vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | ONEEXPBITS_DP64; >+ PUT_BITS_DP64(vx, x); >+ *m = x; >+ *e = ux - EXPBIAS_DP64; >+ } >+ else >+ { >+ /* The exponent is even */ >+ vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | TWOEXPBITS_DP64; >+ PUT_BITS_DP64(vx, x); >+ *m = x; >+ *e = ux - EXPBIAS_DP64 - 1; >+ } >+} >+#endif /* USE_SPLITDOUBLE_2 */ >+ >+ >+#if defined(USE_SPLITFLOAT) >+/* Splits float x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0. >+ Assumes that x is not zero, denormal, infinity or NaN, but these conditions >+ are not checked */ >+static inline void splitFloat(float x, int *e, float *m) >+{ >+ unsigned int ux, uy; >+ GET_BITS_SP32(x, ux); >+ uy = ux; >+ ux &= EXPBITS_SP32; >+ ux >>= EXPSHIFTBITS_SP32; >+ *e = (int)ux - EXPBIAS_SP32 + 1; >+ uy = (uy & (SIGNBIT_SP32 | MANTBITS_SP32)) | HALFEXPBITS_SP32; >+ PUT_BITS_SP32(uy, x); >+ *m = x; >+} >+#endif /* USE_SPLITFLOAT */ >+ >+ >+#if defined(USE_SCALEDOUBLE_1) >+/* Scales the double x by 2.0**n. >+ Assumes EMIN <= n <= EMAX, though this condition is not checked. */ >+static inline double scaleDouble_1(double x, int n) >+{ >+ double t; >+ /* Construct the number t = 2.0**n */ >+ PUT_BITS_DP64(((long)n + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t); >+ return x*t; >+} >+#endif /* USE_SCALEDOUBLE_1 */ >+ >+ >+#if defined(USE_SCALEDOUBLE_2) >+/* Scales the double x by 2.0**n. >+ Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */ >+static inline double scaleDouble_2(double x, int n) >+{ >+ double t1, t2; >+ int n1, n2; >+ n1 = n / 2; >+ n2 = n - n1; >+ /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */ >+ PUT_BITS_DP64(((long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1); >+ PUT_BITS_DP64(((long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2); >+ return (x*t1)*t2; >+} >+#endif /* USE_SCALEDOUBLE_2 */ >+ >+ >+#if defined(USE_SCALEDOUBLE_3) >+/* Scales the double x by 2.0**n. >+ Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */ >+static inline double scaleDouble_3(double x, int n) >+{ >+ double t1, t2, t3; >+ int n1, n2, n3; >+ n1 = n / 3; >+ n2 = (n - n1) / 2; >+ n3 = n - n1 - n2; >+ /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */ >+ PUT_BITS_DP64(((long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1); >+ PUT_BITS_DP64(((long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2); >+ PUT_BITS_DP64(((long)n3 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t3); >+ return ((x*t1)*t2)*t3; >+} >+#endif /* USE_SCALEDOUBLE_3 */ >+ >+ >+#if defined(USE_SCALEFLOAT_1) >+/* Scales the float x by 2.0**n. >+ Assumes EMIN <= n <= EMAX, though this condition is not checked. */ >+static inline double scaleFloat_1(float x, int n) >+{ >+ float t; >+ /* Construct the number t = 2.0**n */ >+ PUT_BITS_SP32((n + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t); >+ return x*t; >+} >+#endif /* USE_SCALEFLOAT_1 */ >+ >+ >+#if defined(USE_SCALEFLOAT_2) >+/* Scales the float x by 2.0**n. >+ Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */ >+static inline float scaleFloat_2(float x, int n) >+{ >+ float t1, t2; >+ int n1, n2; >+ n1 = n / 2; >+ n2 = n - n1; >+ /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */ >+ PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1); >+ PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2); >+ return (x*t1)*t2; >+} >+#endif /* USE_SCALEFLOAT_2 */ >+ >+ >+#if defined(USE_SCALEFLOAT_3) >+/* Scales the float x by 2.0**n. >+ Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */ >+static inline double scaleFloat_3(float x, int n) >+{ >+ float t1, t2, t3; >+ int n1, n2, n3; >+ n1 = n / 3; >+ n2 = (n - n1) / 2; >+ n3 = n - n1 - n2; >+ /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */ >+ PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1); >+ PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2); >+ PUT_BITS_SP32((n3 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t3); >+ return ((x*t1)*t2)*t3; >+} >+#endif /* USE_SCALEFLOAT_3 */ >+ >+#if defined(USE_SETPRECISIONDOUBLE) >+unsigned int setPrecisionDouble(void) >+{ >+ unsigned int cw, cwold = 0; >+#if defined(WIN32) >+ __asm fstcw cwold; >+ cw = cwold & (~0x00000300); /* These two bits control rounding precision */ >+ cw |= AMD_F_DOUBLE; >+ __asm fldcw cw; >+#elif defined(linux) >+ /* There is no precision control on Hammer */ >+#else >+ /* Do nowt */ >+#endif >+ return cwold; >+} >+#endif /* USE_SETPRECISIONDOUBLE */ >+ >+#if defined(USE_RESTOREPRECISION) >+void restorePrecision(unsigned int cwold) >+{ >+#if defined(WIN32) >+ __asm fldcw cwold; >+#elif defined(linux) >+ /* There is no precision control on Hammer */ >+#else >+ /* Do nowt */ >+#endif >+ return; >+} >+#endif /* USE_RESTOREPRECISION */ >+ >+ >+#if defined(USE_CLEAR_FPSW_FLAGS) >+/* Clears floating-point status flags. The argument should be >+ the bitwise or of the flags to be cleared, from the >+ list above, e.g. >+ clear_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID); >+ */ >+static inline void clear_fpsw_flags(int flags) >+{ >+#if defined(WIN32) >+ fpenv_type fenv; >+ /* Get the current floating-point environment */ >+ __asm fnstenv fenv; >+ fenv.status_word &= (~flags); >+ /* Put the floating-point environment back */ >+ __asm fldenv fenv; >+#elif defined(linux) >+ unsigned int cw; >+ /* Get the current floating-point control/status word */ >+ asm volatile ("STMXCSR %0" : "=m" (cw)); >+ cw &= (~flags); >+ asm volatile ("LDMXCSR %0" : : "m" (cw)); >+#else >+#error Unknown machine >+#endif >+} >+#endif /* USE_CLEAR_FPSW_FLAGS */ >+ >+ >+#if defined(USE_RAISE_FPSW_FLAGS) >+/* Raises floating-point status flags. The argument should be >+ the bitwise or of the flags to be raised, from the >+ list above, e.g. >+ raise_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID); >+ */ >+static inline void raise_fpsw_flags(int flags) >+{ >+#if defined(WIN32) >+ fpenv_type fenv; >+ /* Get the current floating-point environment */ >+ __asm fnstenv fenv; >+ fenv.status_word |= flags; >+ /* Put the floating-point environment back */ >+ __asm fldenv fenv; >+#elif defined(linux) >+ unsigned int cw; >+ /* Get the current floating-point control/status word */ >+ asm volatile ("STMXCSR %0" : "=m" (cw)); >+ cw |= flags; >+ asm volatile ("LDMXCSR %0" : : "m" (cw)); >+#else >+#error Unknown machine >+#endif >+} >+#endif /* USE_RAISE_FPSW_FLAGS */ >+ >+ >+#if defined(USE_GET_FPSW_INLINE) >+/* Return the current floating-point status word */ >+static inline unsigned int get_fpsw_inline(void) >+{ >+#if defined(WIN32) >+ unsigned short sw; >+ __asm fstsw sw; >+ return (unsigned int)sw; >+#elif defined(linux) >+ unsigned int sw; >+ asm volatile ("STMXCSR %0" : "=m" (sw)); >+ return sw; >+#else >+#error Unknown machine >+#endif >+} >+#endif /* USE_GET_FPSW_INLINE */ >+ >+#if defined(USE_SET_FPSW_INLINE) >+/* Set the floating-point status word */ >+static inline void set_fpsw_inline(unsigned int sw) >+{ >+#if defined(WIN32) >+ fpenv_type fenv; >+ /* Get the current floating-point environment */ >+ __asm fnstenv fenv; >+ /* Set the status word to sw */ >+ fenv.status_word = (unsigned short)sw; >+ /* Put the floating-point environment back */ >+ __asm fldenv fenv; >+#elif defined(linux) >+ /* Set the current floating-point control/status word */ >+ asm volatile ("LDMXCSR %0" : : "m" (sw)); >+#else >+#error Unknown machine >+#endif >+} >+#endif /* USE_SET_FPSW_INLINE */ >+ >+#if defined(USE_CLEAR_FPSW_INLINE) >+/* Clear all exceptions from the floating-point status word */ >+static inline void clear_fpsw_inline(void) >+{ >+#if defined(WIN32) >+ fpenv_type fenv; >+ /* Get the current floating-point environment */ >+ __asm fnstenv fenv; >+ /* Set the status word to 0 */ >+ fenv.status_word = 0; >+ /* Put the floating-point environment back */ >+ __asm fldenv fenv; >+#elif defined(linux) >+ unsigned int cw; >+ /* Get the current floating-point control/status word */ >+ asm volatile ("STMXCSR %0" : "=m" (cw)); >+ cw &= ~(AMD_F_INEXACT | AMD_F_UNDERFLOW | AMD_F_OVERFLOW | >+ AMD_F_DIVBYZERO | AMD_F_INVALID); >+ asm volatile ("LDMXCSR %0" : : "m" (cw)); >+#else >+#error Unknown machine >+#endif >+} >+#endif /* USE_CLEAR_FPSW_INLINE */ >+ >+ >+#if defined(USE_VAL_WITH_FLAGS) >+/* Returns a double value after raising the given flags, >+ e.g. val_with_flags(AMD_F_INEXACT); >+ */ >+static inline double val_with_flags(double val, int flags) >+{ >+ raise_fpsw_flags(flags); >+ return val; >+} >+#endif /* USE_VAL_WITH_FLAGS */ >+ >+#if defined(USE_VALF_WITH_FLAGS) >+/* Returns a float value after raising the given flags, >+ e.g. valf_with_flags(AMD_F_INEXACT); >+ */ >+static inline float valf_with_flags(float val, int flags) >+{ >+ raise_fpsw_flags(flags); >+ return val; >+} >+#endif /* USE_VALF_WITH_FLAGS */ >+ >+ >+#if defined(USE_ZERO_WITH_FLAGS) >+/* Returns a double +zero after raising the given flags, >+ e.g. zero_with_flags(AMD_F_INEXACT | AMD_F_INVALID); >+ */ >+static inline double zero_with_flags(int flags) >+{ >+ raise_fpsw_flags(flags); >+ return 0.0; >+} >+#endif /* USE_ZERO_WITH_FLAGS */ >+ >+ >+#if defined(USE_ZEROF_WITH_FLAGS) >+/* Returns a float +zero after raising the given flags, >+ e.g. zerof_with_flags(AMD_F_INEXACT | AMD_F_INVALID); >+ */ >+static inline float zerof_with_flags(int flags) >+{ >+ raise_fpsw_flags(flags); >+ return 0.0F; >+} >+#endif /* USE_ZEROF_WITH_FLAGS */ >+ >+ >+#if defined(USE_NAN_WITH_FLAGS) >+/* Returns a double quiet +nan after raising the given flags, >+ e.g. nan_with_flags(AMD_F_INVALID); >+*/ >+static inline double nan_with_flags(int flags) >+{ >+ double z; >+ raise_fpsw_flags(flags); >+ PUT_BITS_DP64(0x7ff8000000000000, z); >+ return z; >+} >+#endif /* USE_NAN_WITH_FLAGS */ >+ >+#if defined(USE_NANF_WITH_FLAGS) >+/* Returns a float quiet +nan after raising the given flags, >+ e.g. nanf_with_flags(AMD_F_INVALID); >+*/ >+static inline float nanf_with_flags(int flags) >+{ >+ float z; >+ raise_fpsw_flags(flags); >+ PUT_BITS_SP32(0x7fc00000, z); >+ return z; >+} >+#endif /* USE_NANF_WITH_FLAGS */ >+ >+ >+#ifdef USE_INFINITY_WITH_FLAGS >+/* Returns a positive double infinity after raising the given flags, >+ e.g. infinity_with_flags(AMD_F_OVERFLOW); >+*/ >+static inline double infinity_with_flags(int flags) >+{ >+ double z; >+ raise_fpsw_flags(flags); >+ PUT_BITS_DP64((unsigned long)(BIASEDEMAX_DP64 + 1) << EXPSHIFTBITS_DP64, z); >+ return z; >+} >+#endif /* USE_INFINITY_WITH_FLAGS */ >+ >+#ifdef USE_INFINITYF_WITH_FLAGS >+/* Returns a positive float infinity after raising the given flags, >+ e.g. infinityf_with_flags(AMD_F_OVERFLOW); >+*/ >+static inline float infinityf_with_flags(int flags) >+{ >+ float z; >+ raise_fpsw_flags(flags); >+ PUT_BITS_SP32((BIASEDEMAX_SP32 + 1) << EXPSHIFTBITS_SP32, z); >+ return z; >+} >+#endif /* USE_INFINITYF_WITH_FLAGS */ >+ >+ >+#if defined(USE_SPLITEXP) >+/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2). >+ Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments >+ abs(x) > large/(ln(base)) (where large is the largest representable >+ floating point number) should be handled separately instead of calling >+ this function. This function is called by exp_amd, exp2_amd, exp10_amd, >+ cosh_amd and sinh_amd. */ >+static inline void splitexp(double x, double logbase, >+ double thirtytwo_by_logbaseof2, >+ double logbaseof2_by_32_lead, >+ double logbaseof2_by_32_trail, >+ int *m, double *z1, double *z2) >+{ >+ double q, r, r1, r2, f1, f2; >+ int n, j; >+ >+/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain >+ leading and trailing parts respectively of precomputed >+ values of pow(2.0,j/32.0), for j = 0, 1, ..., 31. >+ two_to_jby32_lead_table contains the first 25 bits of precision, >+ and two_to_jby32_trail_table contains a further 53 bits precision. */ >+ >+ static const double two_to_jby32_lead_table[32] = { >+ 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ >+ 1.02189713716506958008e+00, /* 0x3ff059b0d0000000 */ >+ 1.04427373409271240234e+00, /* 0x3ff0b55860000000 */ >+ 1.06714040040969848633e+00, /* 0x3ff11301d0000000 */ >+ 1.09050768613815307617e+00, /* 0x3ff172b830000000 */ >+ 1.11438673734664916992e+00, /* 0x3ff1d48730000000 */ >+ 1.13878858089447021484e+00, /* 0x3ff2387a60000000 */ >+ 1.16372483968734741211e+00, /* 0x3ff29e9df0000000 */ >+ 1.18920707702636718750e+00, /* 0x3ff306fe00000000 */ >+ 1.21524733304977416992e+00, /* 0x3ff371a730000000 */ >+ 1.24185776710510253906e+00, /* 0x3ff3dea640000000 */ >+ 1.26905095577239990234e+00, /* 0x3ff44e0860000000 */ >+ 1.29683953523635864258e+00, /* 0x3ff4bfdad0000000 */ >+ 1.32523661851882934570e+00, /* 0x3ff5342b50000000 */ >+ 1.35425549745559692383e+00, /* 0x3ff5ab07d0000000 */ >+ 1.38390988111495971680e+00, /* 0x3ff6247eb0000000 */ >+ 1.41421353816986083984e+00, /* 0x3ff6a09e60000000 */ >+ 1.44518077373504638672e+00, /* 0x3ff71f75e0000000 */ >+ 1.47682613134384155273e+00, /* 0x3ff7a11470000000 */ >+ 1.50916439294815063477e+00, /* 0x3ff8258990000000 */ >+ 1.54221081733703613281e+00, /* 0x3ff8ace540000000 */ >+ 1.57598084211349487305e+00, /* 0x3ff93737b0000000 */ >+ 1.61049032211303710938e+00, /* 0x3ff9c49180000000 */ >+ 1.64575546979904174805e+00, /* 0x3ffa5503b0000000 */ >+ 1.68179279565811157227e+00, /* 0x3ffae89f90000000 */ >+ 1.71861928701400756836e+00, /* 0x3ffb7f76f0000000 */ >+ 1.75625211000442504883e+00, /* 0x3ffc199bd0000000 */ >+ 1.79470902681350708008e+00, /* 0x3ffcb720d0000000 */ >+ 1.83400803804397583008e+00, /* 0x3ffd5818d0000000 */ >+ 1.87416762113571166992e+00, /* 0x3ffdfc9730000000 */ >+ 1.91520655155181884766e+00, /* 0x3ffea4afa0000000 */ >+ 1.95714408159255981445e+00}; /* 0x3fff507650000000 */ >+ >+ static const double two_to_jby32_trail_table[32] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.14890470981563546737e-08, /* 0x3e48ac2ba1d73e2a */ >+ 4.83347014379782142328e-08, /* 0x3e69f3121ec53172 */ >+ 2.67125131841396124714e-10, /* 0x3df25b50a4ebbf1b */ >+ 4.65271045830351350190e-08, /* 0x3e68faa2f5b9bef9 */ >+ 5.24924336638693782574e-09, /* 0x3e368b9aa7805b80 */ >+ 5.38622214388600821910e-08, /* 0x3e6ceac470cd83f6 */ >+ 1.90902301017041969782e-08, /* 0x3e547f7b84b09745 */ >+ 3.79763538792174980894e-08, /* 0x3e64636e2a5bd1ab */ >+ 2.69306947081946450986e-08, /* 0x3e5ceaa72a9c5154 */ >+ 4.49683815095311756138e-08, /* 0x3e682468446b6824 */ >+ 1.41933332021066904914e-09, /* 0x3e18624b40c4dbd0 */ >+ 1.94146510233556266402e-08, /* 0x3e54d8a89c750e5e */ >+ 2.46409119489264118569e-08, /* 0x3e5a753e077c2a0f */ >+ 4.94812958044698886494e-08, /* 0x3e6a90a852b19260 */ >+ 8.48872238075784476136e-10, /* 0x3e0d2ac258f87d03 */ >+ 2.42032342089579394887e-08, /* 0x3e59fcef32422cbf */ >+ 3.32420002333182569170e-08, /* 0x3e61d8bee7ba46e2 */ >+ 1.45956577586525322754e-08, /* 0x3e4f580c36bea881 */ >+ 3.46452721050003920866e-08, /* 0x3e62999c25159f11 */ >+ 8.07090469079979051284e-09, /* 0x3e415506dadd3e2a */ >+ 2.99439161340839520436e-09, /* 0x3e29b8bc9e8a0388 */ >+ 9.83621719880452147153e-09, /* 0x3e451f8480e3e236 */ >+ 8.35492309647188080486e-09, /* 0x3e41f12ae45a1224 */ >+ 3.48493175137966283582e-08, /* 0x3e62b5a75abd0e6a */ >+ 1.11084703472699692902e-08, /* 0x3e47daf237553d84 */ >+ 5.03688744342840346564e-08, /* 0x3e6b0aa538444196 */ >+ 4.81896001063495806249e-08, /* 0x3e69df20d22a0798 */ >+ 4.83653666334089557746e-08, /* 0x3e69f7490e4bb40b */ >+ 1.29745882314081237628e-08, /* 0x3e4bdcdaf5cb4656 */ >+ 9.84532844621636118964e-09, /* 0x3e452486cc2c7b9d */ >+ 4.25828404545651943883e-08}; /* 0x3e66dc8a80ce9f09 */ >+ >+ /* >+ Step 1. Reduce the argument. >+ >+ To perform argument reduction, we find the integer n such that >+ x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64. >+ n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and >+ remainder by x - n*logbaseof2/32. The calculation of n is >+ straightforward whereas the computation of x - n*logbaseof2/32 >+ must be carried out carefully. >+ logbaseof2/32 is so represented in two pieces that >+ (1) logbaseof2/32 is known to extra precision, (2) the product >+ of n and the leading piece is a model number and is hence >+ calculated without error, and (3) the subtraction of the value >+ obtained in (2) from x is a model number and is hence again >+ obtained without error. >+ */ >+ >+ r = x * thirtytwo_by_logbaseof2; >+ /* Set n = nearest integer to r */ >+ /* This is faster on Hammer */ >+ if (r > 0) >+ n = (int)(r + 0.5); >+ else >+ n = (int)(r - 0.5); >+ >+ r1 = x - n * logbaseof2_by_32_lead; >+ r2 = - n * logbaseof2_by_32_trail; >+ >+ /* Set j = n mod 32: 5 mod 32 = 5, -5 mod 32 = 27, etc. */ >+ /* j = n % 32; >+ if (j < 0) j += 32; */ >+ j = n & 0x0000001f; >+ >+ f1 = two_to_jby32_lead_table[j]; >+ f2 = two_to_jby32_trail_table[j]; >+ >+ *m = (n - j) / 32; >+ >+ /* Step 2. The following is the core approximation. We approximate >+ exp(r1+r2)-1 by a polynomial. */ >+ >+ r1 *= logbase; r2 *= logbase; >+ >+ r = r1 + r2; >+ q = r1 + (r2 + >+ r*r*( 5.00000000000000008883e-01 + >+ r*( 1.66666666665260878863e-01 + >+ r*( 4.16666666662260795726e-02 + >+ r*( 8.33336798434219616221e-03 + >+ r*( 1.38889490863777199667e-03 )))))); >+ >+ /* Step 3. Function value reconstruction. >+ We now reconstruct the exponential of the input argument >+ so that exp(x) = 2**m * (z1 + z2). >+ The order of the computation below must be strictly observed. */ >+ >+ *z1 = f1; >+ *z2 = f2 + ((f1 + f2) * q); >+} >+#endif /* USE_SPLITEXP */ >+ >+ >+#if defined(USE_SPLITEXPF) >+/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2). >+ Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments >+ abs(x) > large/(ln(base)) (where large is the largest representable >+ floating point number) should be handled separately instead of calling >+ this function. This function is called by exp_amd, exp2_amd, exp10_amd, >+ cosh_amd and sinh_amd. */ >+static inline void splitexpf(float x, float logbase, >+ float thirtytwo_by_logbaseof2, >+ float logbaseof2_by_32_lead, >+ float logbaseof2_by_32_trail, >+ int *m, float *z1, float *z2) >+{ >+ float q, r, r1, r2, f1, f2; >+ int n, j; >+ >+/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain >+ leading and trailing parts respectively of precomputed >+ values of pow(2.0,j/32.0), for j = 0, 1, ..., 31. >+ two_to_jby32_lead_table contains the first 10 bits of precision, >+ and two_to_jby32_trail_table contains a further 24 bits precision. */ >+ >+ static const float two_to_jby32_lead_table[32] = { >+ 1.0000000000E+00F, /* 0x3F800000 */ >+ 1.0214843750E+00F, /* 0x3F82C000 */ >+ 1.0429687500E+00F, /* 0x3F858000 */ >+ 1.0664062500E+00F, /* 0x3F888000 */ >+ 1.0898437500E+00F, /* 0x3F8B8000 */ >+ 1.1132812500E+00F, /* 0x3F8E8000 */ >+ 1.1386718750E+00F, /* 0x3F91C000 */ >+ 1.1621093750E+00F, /* 0x3F94C000 */ >+ 1.1875000000E+00F, /* 0x3F980000 */ >+ 1.2148437500E+00F, /* 0x3F9B8000 */ >+ 1.2402343750E+00F, /* 0x3F9EC000 */ >+ 1.2675781250E+00F, /* 0x3FA24000 */ >+ 1.2949218750E+00F, /* 0x3FA5C000 */ >+ 1.3242187500E+00F, /* 0x3FA98000 */ >+ 1.3535156250E+00F, /* 0x3FAD4000 */ >+ 1.3828125000E+00F, /* 0x3FB10000 */ >+ 1.4140625000E+00F, /* 0x3FB50000 */ >+ 1.4433593750E+00F, /* 0x3FB8C000 */ >+ 1.4765625000E+00F, /* 0x3FBD0000 */ >+ 1.5078125000E+00F, /* 0x3FC10000 */ >+ 1.5410156250E+00F, /* 0x3FC54000 */ >+ 1.5742187500E+00F, /* 0x3FC98000 */ >+ 1.6093750000E+00F, /* 0x3FCE0000 */ >+ 1.6445312500E+00F, /* 0x3FD28000 */ >+ 1.6816406250E+00F, /* 0x3FD74000 */ >+ 1.7167968750E+00F, /* 0x3FDBC000 */ >+ 1.7558593750E+00F, /* 0x3FE0C000 */ >+ 1.7929687500E+00F, /* 0x3FE58000 */ >+ 1.8339843750E+00F, /* 0x3FEAC000 */ >+ 1.8730468750E+00F, /* 0x3FEFC000 */ >+ 1.9140625000E+00F, /* 0x3FF50000 */ >+ 1.9570312500E+00F}; /* 0x3FFA8000 */ >+ >+ static const float two_to_jby32_trail_table[32] = { >+ 0.0000000000E+00F, /* 0x00000000 */ >+ 4.1277357377E-04F, /* 0x39D86988 */ >+ 1.3050324051E-03F, /* 0x3AAB0D9F */ >+ 7.3415064253E-04F, /* 0x3A407404 */ >+ 6.6398258787E-04F, /* 0x3A2E0F1E */ >+ 1.1054925853E-03F, /* 0x3A90E62D */ >+ 1.1675967835E-04F, /* 0x38F4DCE0 */ >+ 1.6154836630E-03F, /* 0x3AD3BEA3 */ >+ 1.7071149778E-03F, /* 0x3ADFC146 */ >+ 4.0360994171E-04F, /* 0x39D39B9C */ >+ 1.6234370414E-03F, /* 0x3AD4C982 */ >+ 1.4728321694E-03F, /* 0x3AC10C0C */ >+ 1.9176795613E-03F, /* 0x3AFB5AA6 */ >+ 1.0178930825E-03F, /* 0x3A856AD3 */ >+ 7.3992193211E-04F, /* 0x3A41F752 */ >+ 1.0973819299E-03F, /* 0x3A8FD607 */ >+ 1.5106226783E-04F, /* 0x391E6678 */ >+ 1.8214319134E-03F, /* 0x3AEEBD1D */ >+ 2.6364589576E-04F, /* 0x398A39F4 */ >+ 1.3519275235E-03F, /* 0x3AB13329 */ >+ 1.1952003697E-03F, /* 0x3A9CA845 */ >+ 1.7620950239E-03F, /* 0x3AE6F619 */ >+ 1.1153318919E-03F, /* 0x3A923054 */ >+ 1.2242280645E-03F, /* 0x3AA07647 */ >+ 1.5220546629E-04F, /* 0x391F9958 */ >+ 1.8224230735E-03F, /* 0x3AEEDE5F */ >+ 3.9278529584E-04F, /* 0x39CDEEC0 */ >+ 1.7403248930E-03F, /* 0x3AE41B9D */ >+ 2.3711356334E-05F, /* 0x37C6E7C0 */ >+ 1.1207590578E-03F, /* 0x3A92E66F */ >+ 1.1440613307E-03F, /* 0x3A95F454 */ >+ 1.1287408415E-04F}; /* 0x38ECB6D0 */ >+ >+ /* >+ Step 1. Reduce the argument. >+ >+ To perform argument reduction, we find the integer n such that >+ x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64. >+ n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and >+ remainder by x - n*logbaseof2/32. The calculation of n is >+ straightforward whereas the computation of x - n*logbaseof2/32 >+ must be carried out carefully. >+ logbaseof2/32 is so represented in two pieces that >+ (1) logbaseof2/32 is known to extra precision, (2) the product >+ of n and the leading piece is a model number and is hence >+ calculated without error, and (3) the subtraction of the value >+ obtained in (2) from x is a model number and is hence again >+ obtained without error. >+ */ >+ >+ r = x * thirtytwo_by_logbaseof2; >+ /* Set n = nearest integer to r */ >+ /* This is faster on Hammer */ >+ if (r > 0) >+ n = (int)(r + 0.5F); >+ else >+ n = (int)(r - 0.5F); >+ >+ r1 = x - n * logbaseof2_by_32_lead; >+ r2 = - n * logbaseof2_by_32_trail; >+ >+ /* Set j = n mod 32: 5 mod 32 = 5, -5 mod 32 = 27, etc. */ >+ /* j = n % 32; >+ if (j < 0) j += 32; */ >+ j = n & 0x0000001f; >+ >+ f1 = two_to_jby32_lead_table[j]; >+ f2 = two_to_jby32_trail_table[j]; >+ >+ *m = (n - j) / 32; >+ >+ /* Step 2. The following is the core approximation. We approximate >+ exp(r1+r2)-1 by a polynomial. */ >+ >+ r1 *= logbase; r2 *= logbase; >+ >+ r = r1 + r2; >+ q = r1 + (r2 + >+ r*r*( 5.00000000000000008883e-01F + >+ r*( 1.66666666665260878863e-01F ))); >+ >+ /* Step 3. Function value reconstruction. >+ We now reconstruct the exponential of the input argument >+ so that exp(x) = 2**m * (z1 + z2). >+ The order of the computation below must be strictly observed. */ >+ >+ *z1 = f1; >+ *z2 = f2 + ((f1 + f2) * q); >+} >+#endif /* SPLITEXPF */ >+ >+ >+#if defined(USE_SCALEUPDOUBLE1024) >+/* Scales up a double (normal or denormal) whose bit pattern is given >+ as ux by 2**1024. There are no checks that the input number is >+ scalable by that amount. */ >+static inline void scaleUpDouble1024(unsigned long ux, unsigned long *ur) >+{ >+ unsigned long uy; >+ double y; >+ >+ if ((ux & EXPBITS_DP64) == 0) >+ { >+ /* ux is denormalised */ >+ PUT_BITS_DP64(ux | 0x4010000000000000, y); >+ if (ux & SIGNBIT_DP64) >+ y += 4.0; >+ else >+ y -= 4.0; >+ GET_BITS_DP64(y, uy); >+ } >+ else >+ /* ux is normal */ >+ uy = ux + 0x4000000000000000; >+ >+ *ur = uy; >+ return; >+} >+ >+#endif /* SCALEUPDOUBLE1024 */ >+ >+ >+#if defined(USE_SCALEDOWNDOUBLE) >+/* Scales down a double whose bit pattern is given as ux by 2**k. >+ There are no checks that the input number is scalable by that amount. */ >+static inline void scaleDownDouble(unsigned long ux, int k, >+ unsigned long *ur) >+{ >+ unsigned long uy, uk, ax, xsign; >+ int n, shift; >+ xsign = ux & SIGNBIT_DP64; >+ ax = ux & ~SIGNBIT_DP64; >+ n = ((ax & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - k; >+ if (n > 0) >+ { >+ uk = (unsigned long)n << EXPSHIFTBITS_DP64; >+ uy = (ax & ~EXPBITS_DP64) | uk; >+ } >+ else >+ { >+ uy = (ax & ~EXPBITS_DP64) | 0x0010000000000000; >+ shift = (1 - n); >+ if (shift > MANTLENGTH_DP64 + 1) >+ /* Sigh. Shifting works mod 64 so be careful not to shift too much */ >+ uy = 0; >+ else >+ { >+ /* Make sure we round the result */ >+ uy >>= shift - 1; >+ uy = (uy >> 1) + (uy & 1); >+ } >+ } >+ *ur = uy | xsign; >+} >+ >+#endif /* SCALEDOWNDOUBLE */ >+ >+ >+#if defined(USE_SCALEUPFLOAT128) >+/* Scales up a float (normal or denormal) whose bit pattern is given >+ as ux by 2**128. There are no checks that the input number is >+ scalable by that amount. */ >+static inline void scaleUpFloat128(unsigned int ux, unsigned int *ur) >+{ >+ unsigned int uy; >+ float y; >+ >+ if ((ux & EXPBITS_SP32) == 0) >+ { >+ /* ux is denormalised */ >+ PUT_BITS_SP32(ux | 0x40800000, y); >+ /* Compensate for the implicit bit just added */ >+ if (ux & SIGNBIT_SP32) >+ y += 4.0F; >+ else >+ y -= 4.0F; >+ GET_BITS_SP32(y, uy); >+ } >+ else >+ /* ux is normal */ >+ uy = ux + 0x40000000; >+ *ur = uy; >+} >+#endif /* SCALEUPFLOAT128 */ >+ >+ >+#if defined(USE_SCALEDOWNFLOAT) >+/* Scales down a float whose bit pattern is given as ux by 2**k. >+ There are no checks that the input number is scalable by that amount. */ >+static inline void scaleDownFloat(unsigned int ux, int k, >+ unsigned int *ur) >+{ >+ unsigned int uy, uk, ax, xsign; >+ int n, shift; >+ >+ xsign = ux & SIGNBIT_SP32; >+ ax = ux & ~SIGNBIT_SP32; >+ n = ((ax & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - k; >+ if (n > 0) >+ { >+ uk = (unsigned int)n << EXPSHIFTBITS_SP32; >+ uy = (ax & ~EXPBITS_SP32) | uk; >+ } >+ else >+ { >+ uy = (ax & ~EXPBITS_SP32) | 0x00800000; >+ shift = (1 - n); >+ if (shift > MANTLENGTH_SP32 + 1) >+ /* Sigh. Shifting works mod 32 so be careful not to shift too much */ >+ uy = 0; >+ else >+ { >+ /* Make sure we round the result */ >+ uy >>= shift - 1; >+ uy = (uy >> 1) + (uy & 1); >+ } >+ } >+ *ur = uy | xsign; >+} >+#endif /* SCALEDOWNFLOAT */ >+ >+ >+#if defined(USE_SQRT_AMD_INLINE) >+static inline double sqrt_amd_inline(double x) >+{ >+ /* >+ Computes the square root of x. >+ >+ The calculation is carried out in three steps. >+ >+ Step 1. Reduction. >+ The input argument is scaled to the interval [1, 4) by >+ computing >+ x = 2^e * y, where y in [1,4). >+ Furthermore y is decomposed as y = c + t where >+ c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64. >+ >+ Step 2. Approximation. >+ An approximation q = sqrt(1 + (t/c)) - 1 is obtained >+ from a basic series expansion using precomputed values >+ stored in rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl. >+ >+ Step 3. Reconstruction. >+ The value of sqrt(x) is reconstructed via >+ sqrt(x) = 2^(e/2) * sqrt(y) >+ = 2^(e/2) * sqrt(c) * sqrt(y/c) >+ = 2^(e/2) * sqrt(c) * sqrt(1 + t/c) >+ = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ] >+ */ >+ >+ unsigned long ux, ax, u; >+ double r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail; >+ int e, denorm = 0, index; >+ >+/* Arrays rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl contain >+ leading and trailing parts respectively of precomputed >+ values of sqrt(j/32), for j = 32, 33, ..., 128. >+ rt_jby32_lead_table_dbl contains the first 21 bits of precision, >+ and rt_jby32_trail_table_dbl contains a further 53 bits precision. */ >+ >+ static const double rt_jby32_lead_table_dbl[97] = { >+ 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ >+ 1.01550388336181640625e+00, /* 0x3ff03f8100000000 */ >+ 1.03077602386474609375e+00, /* 0x3ff07e0f00000000 */ >+ 1.04582500457763671875e+00, /* 0x3ff0bbb300000000 */ >+ 1.06065940856933593750e+00, /* 0x3ff0f87600000000 */ >+ 1.07528972625732421875e+00, /* 0x3ff1346300000000 */ >+ 1.08972454071044921875e+00, /* 0x3ff16f8300000000 */ >+ 1.10396957397460937500e+00, /* 0x3ff1a9dc00000000 */ >+ 1.11803340911865234375e+00, /* 0x3ff1e37700000000 */ >+ 1.13192272186279296875e+00, /* 0x3ff21c5b00000000 */ >+ 1.14564323425292968750e+00, /* 0x3ff2548e00000000 */ >+ 1.15920162200927734375e+00, /* 0x3ff28c1700000000 */ >+ 1.17260360717773437500e+00, /* 0x3ff2c2fc00000000 */ >+ 1.18585395812988281250e+00, /* 0x3ff2f94200000000 */ >+ 1.19895744323730468750e+00, /* 0x3ff32eee00000000 */ >+ 1.21191978454589843750e+00, /* 0x3ff3640600000000 */ >+ 1.22474479675292968750e+00, /* 0x3ff3988e00000000 */ >+ 1.23743629455566406250e+00, /* 0x3ff3cc8a00000000 */ >+ 1.25000000000000000000e+00, /* 0x3ff4000000000000 */ >+ 1.26243782043457031250e+00, /* 0x3ff432f200000000 */ >+ 1.27475452423095703125e+00, /* 0x3ff4656500000000 */ >+ 1.28695297241210937500e+00, /* 0x3ff4975c00000000 */ >+ 1.29903793334960937500e+00, /* 0x3ff4c8dc00000000 */ >+ 1.31101036071777343750e+00, /* 0x3ff4f9e600000000 */ >+ 1.32287502288818359375e+00, /* 0x3ff52a7f00000000 */ >+ 1.33463478088378906250e+00, /* 0x3ff55aaa00000000 */ >+ 1.34629058837890625000e+00, /* 0x3ff58a6800000000 */ >+ 1.35784721374511718750e+00, /* 0x3ff5b9be00000000 */ >+ 1.36930561065673828125e+00, /* 0x3ff5e8ad00000000 */ >+ 1.38066959381103515625e+00, /* 0x3ff6173900000000 */ >+ 1.39194107055664062500e+00, /* 0x3ff6456400000000 */ >+ 1.40312099456787109375e+00, /* 0x3ff6732f00000000 */ >+ 1.41421318054199218750e+00, /* 0x3ff6a09e00000000 */ >+ 1.42521858215332031250e+00, /* 0x3ff6cdb200000000 */ >+ 1.43614006042480468750e+00, /* 0x3ff6fa6e00000000 */ >+ 1.44697952270507812500e+00, /* 0x3ff726d400000000 */ >+ 1.45773792266845703125e+00, /* 0x3ff752e500000000 */ >+ 1.46841716766357421875e+00, /* 0x3ff77ea300000000 */ >+ 1.47901916503906250000e+00, /* 0x3ff7aa1000000000 */ >+ 1.48954677581787109375e+00, /* 0x3ff7d52f00000000 */ >+ 1.50000000000000000000e+00, /* 0x3ff8000000000000 */ >+ 1.51038074493408203125e+00, /* 0x3ff82a8500000000 */ >+ 1.52068996429443359375e+00, /* 0x3ff854bf00000000 */ >+ 1.53093051910400390625e+00, /* 0x3ff87eb100000000 */ >+ 1.54110336303710937500e+00, /* 0x3ff8a85c00000000 */ >+ 1.55120849609375000000e+00, /* 0x3ff8d1c000000000 */ >+ 1.56124877929687500000e+00, /* 0x3ff8fae000000000 */ >+ 1.57122516632080078125e+00, /* 0x3ff923bd00000000 */ >+ 1.58113861083984375000e+00, /* 0x3ff94c5800000000 */ >+ 1.59099006652832031250e+00, /* 0x3ff974b200000000 */ >+ 1.60078048706054687500e+00, /* 0x3ff99ccc00000000 */ >+ 1.61051177978515625000e+00, /* 0x3ff9c4a800000000 */ >+ 1.62018489837646484375e+00, /* 0x3ff9ec4700000000 */ >+ 1.62979984283447265625e+00, /* 0x3ffa13a900000000 */ >+ 1.63935947418212890625e+00, /* 0x3ffa3ad100000000 */ >+ 1.64886283874511718750e+00, /* 0x3ffa61be00000000 */ >+ 1.65831184387207031250e+00, /* 0x3ffa887200000000 */ >+ 1.66770744323730468750e+00, /* 0x3ffaaeee00000000 */ >+ 1.67705059051513671875e+00, /* 0x3ffad53300000000 */ >+ 1.68634128570556640625e+00, /* 0x3ffafb4100000000 */ >+ 1.69558238983154296875e+00, /* 0x3ffb211b00000000 */ >+ 1.70477199554443359375e+00, /* 0x3ffb46bf00000000 */ >+ 1.71391296386718750000e+00, /* 0x3ffb6c3000000000 */ >+ 1.72300529479980468750e+00, /* 0x3ffb916e00000000 */ >+ 1.73204994201660156250e+00, /* 0x3ffbb67a00000000 */ >+ 1.74104785919189453125e+00, /* 0x3ffbdb5500000000 */ >+ 1.75000000000000000000e+00, /* 0x3ffc000000000000 */ >+ 1.75890541076660156250e+00, /* 0x3ffc247a00000000 */ >+ 1.76776695251464843750e+00, /* 0x3ffc48c600000000 */ >+ 1.77658367156982421875e+00, /* 0x3ffc6ce300000000 */ >+ 1.78535652160644531250e+00, /* 0x3ffc90d200000000 */ >+ 1.79408740997314453125e+00, /* 0x3ffcb49500000000 */ >+ 1.80277538299560546875e+00, /* 0x3ffcd82b00000000 */ >+ 1.81142139434814453125e+00, /* 0x3ffcfb9500000000 */ >+ 1.82002735137939453125e+00, /* 0x3ffd1ed500000000 */ >+ 1.82859230041503906250e+00, /* 0x3ffd41ea00000000 */ >+ 1.83711719512939453125e+00, /* 0x3ffd64d500000000 */ >+ 1.84560203552246093750e+00, /* 0x3ffd879600000000 */ >+ 1.85404872894287109375e+00, /* 0x3ffdaa2f00000000 */ >+ 1.86245727539062500000e+00, /* 0x3ffdcca000000000 */ >+ 1.87082862854003906250e+00, /* 0x3ffdeeea00000000 */ >+ 1.87916183471679687500e+00, /* 0x3ffe110c00000000 */ >+ 1.88745784759521484375e+00, /* 0x3ffe330700000000 */ >+ 1.89571857452392578125e+00, /* 0x3ffe54dd00000000 */ >+ 1.90394306182861328125e+00, /* 0x3ffe768d00000000 */ >+ 1.91213226318359375000e+00, /* 0x3ffe981800000000 */ >+ 1.92028617858886718750e+00, /* 0x3ffeb97e00000000 */ >+ 1.92840576171875000000e+00, /* 0x3ffedac000000000 */ >+ 1.93649101257324218750e+00, /* 0x3ffefbde00000000 */ >+ 1.94454288482666015625e+00, /* 0x3fff1cd900000000 */ >+ 1.95256233215332031250e+00, /* 0x3fff3db200000000 */ >+ 1.96054744720458984375e+00, /* 0x3fff5e6700000000 */ >+ 1.96850109100341796875e+00, /* 0x3fff7efb00000000 */ >+ 1.97642326354980468750e+00, /* 0x3fff9f6e00000000 */ >+ 1.98431301116943359375e+00, /* 0x3fffbfbf00000000 */ >+ 1.99217128753662109375e+00, /* 0x3fffdfef00000000 */ >+ 2.00000000000000000000e+00}; /* 0x4000000000000000 */ >+ >+ static const double rt_jby32_trail_table_dbl[97] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 9.17217678638807524014e-07, /* 0x3eaec6d70177881c */ >+ 3.82539669043705364790e-07, /* 0x3e99abfb41bd6b24 */ >+ 2.85899577162227138140e-08, /* 0x3e5eb2bf6bab55a2 */ >+ 7.63210485349101216659e-07, /* 0x3ea99bed9b2d8d0c */ >+ 9.32123004127716212874e-07, /* 0x3eaf46e029c1b296 */ >+ 1.95174719169309219157e-07, /* 0x3e8a3226fc42f30c */ >+ 5.34316371481845492427e-07, /* 0x3ea1edbe20701d73 */ >+ 5.79631242504454563052e-07, /* 0x3ea372fe94f82be7 */ >+ 4.20404384109571705948e-07, /* 0x3e9c367e08e7bb06 */ >+ 6.89486030314147010716e-07, /* 0x3ea722a3d0a66608 */ >+ 6.89927685625314560328e-07, /* 0x3ea7266f067ca1d6 */ >+ 3.32778123013641425828e-07, /* 0x3e965515a9b34850 */ >+ 1.64433259436999584387e-07, /* 0x3e8611e23ef6c1bd */ >+ 4.37590875197899335723e-07, /* 0x3e9d5dc1059ed8e7 */ >+ 1.79808183816018617413e-07, /* 0x3e88222982d0e4f4 */ >+ 7.46386593615986477624e-08, /* 0x3e7409212e7d0322 */ >+ 5.72520794105201454728e-07, /* 0x3ea335ea8a5fcf39 */ >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 2.96860689431670420344e-07, /* 0x3e93ec071e938bfe */ >+ 3.54167239176257065345e-07, /* 0x3e97c48bfd9862c6 */ >+ 7.95211265664474710063e-07, /* 0x3eaaaed010f74671 */ >+ 1.72327048595145565621e-07, /* 0x3e87211cbfeb62e0 */ >+ 6.99494915996239297020e-07, /* 0x3ea7789d9660e72d */ >+ 6.32644111701500844315e-07, /* 0x3ea53a5f1d36f1cf */ >+ 6.20124838851440463844e-10, /* 0x3e054eacff2057dc */ >+ 6.13404719757812629969e-07, /* 0x3ea4951b3e6a83cc */ >+ 3.47654909777986407387e-07, /* 0x3e9754aa76884c66 */ >+ 7.83106177002392475763e-07, /* 0x3eaa46d4b1de1074 */ >+ 5.33337372440526357008e-07, /* 0x3ea1e55548f92635 */ >+ 2.01508648555298681765e-08, /* 0x3e55a3070dd17788 */ >+ 5.25472356925843939587e-07, /* 0x3ea1a1c5eedb0801 */ >+ 3.81831102861301692797e-07, /* 0x3e999fcef32422cc */ >+ 6.99220602161420018738e-07, /* 0x3ea776425d6b0199 */ >+ 6.01209702477462624811e-07, /* 0x3ea42c5a1e0191a2 */ >+ 9.01437000591944740554e-08, /* 0x3e7832a0bdff1327 */ >+ 5.10428680864685379950e-08, /* 0x3e6b674743636676 */ >+ 3.47895267104621031421e-07, /* 0x3e9758cb90d2f714 */ >+ 7.80735841510641848628e-07, /* 0x3eaa3278459cde25 */ >+ 1.35158752025506517690e-07, /* 0x3e822404f4a103ee */ >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.76523947728535489812e-09, /* 0x3e1e539af6892ac5 */ >+ 6.68280121328499932183e-07, /* 0x3ea66c7b872c9cd0 */ >+ 5.70135482405123276616e-07, /* 0x3ea3216d2f43887d */ >+ 1.37705134737562525897e-07, /* 0x3e827b832cbedc0e */ >+ 7.09655107074516613672e-07, /* 0x3ea7cfe41579091d */ >+ 7.20302724551461693011e-07, /* 0x3ea82b5a713c490a */ >+ 4.69926266058212796694e-07, /* 0x3e9f8945932d872e */ >+ 2.19244345915999437026e-07, /* 0x3e8d6d2da9490251 */ >+ 1.91141411617401877927e-07, /* 0x3e89a791a3114e4a */ >+ 5.72297665296622053774e-07, /* 0x3ea333ffe005988d */ >+ 5.61055484436830560103e-07, /* 0x3ea2d36e0ed49ab1 */ >+ 2.76225500213991506100e-07, /* 0x3e92898498f55f9e */ >+ 7.58466189522395692908e-07, /* 0x3ea9732cca1032a3 */ >+ 1.56893371256836029827e-07, /* 0x3e850ed0b02a22d2 */ >+ 4.06038997708867066507e-07, /* 0x3e9b3fb265b1e40a */ >+ 5.51305629612057435809e-07, /* 0x3ea27fade682d1de */ >+ 5.64778487026561123207e-07, /* 0x3ea2f36906f707ba */ >+ 3.92609705553556897517e-07, /* 0x3e9a58fbbee883b6 */ >+ 9.09698438776943827802e-07, /* 0x3eae864005bca6d7 */ >+ 1.05949774066016139743e-07, /* 0x3e7c70d02300f263 */ >+ 7.16578798392844784244e-07, /* 0x3ea80b5d712d8e3e */ >+ 6.86233073531233972561e-07, /* 0x3ea706b27cc7d390 */ >+ 7.99211473033494452908e-07, /* 0x3eaad12c9d849a97 */ >+ 8.65552275731027456121e-07, /* 0x3ead0b09954e764b */ >+ 6.75456120386058448618e-07, /* 0x3ea6aa1fb7826cbd */ >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 4.99167184520462138743e-07, /* 0x3ea0bfd03f46763c */ >+ 4.51720373502110930296e-10, /* 0x3dff0abfb4adfb9e */ >+ 1.28874162718371367439e-07, /* 0x3e814c151f991b2e */ >+ 5.85529267186999798656e-07, /* 0x3ea3a5a879b09292 */ >+ 1.01827770937125531924e-07, /* 0x3e7b558d173f9796 */ >+ 2.54736389177809626508e-07, /* 0x3e9118567cd83fb8 */ >+ 6.98925535290464831294e-07, /* 0x3ea773b981896751 */ >+ 1.20940735036524314513e-07, /* 0x3e803b7df49f48a8 */ >+ 5.43759351196479689657e-08, /* 0x3e6d315f22491900 */ >+ 1.11957989042397958409e-07, /* 0x3e7e0db1c5bb84b2 */ >+ 8.47006714134442661218e-07, /* 0x3eac6bbb7644ff76 */ >+ 8.92831044643427836228e-07, /* 0x3eadf55c3afec01f */ >+ 7.77828292464916501663e-07, /* 0x3eaa197e81034da3 */ >+ 6.48469316302918797451e-08, /* 0x3e71683f4920555d */ >+ 2.12579816658859849140e-07, /* 0x3e8c882fd78bb0b0 */ >+ 7.61222472580559138435e-07, /* 0x3ea98ad9eb7b83ec */ >+ 2.86488961857314189607e-07, /* 0x3e9339d7c7777273 */ >+ 2.14637363790165363515e-07, /* 0x3e8ccee237cae6fe */ >+ 5.44137005612605847831e-08, /* 0x3e6d368fe324a146 */ >+ 2.58378284856442408413e-07, /* 0x3e9156e7b6d99b45 */ >+ 3.15848939061134843091e-07, /* 0x3e95323e5310b5c1 */ >+ 6.60530466255089632309e-07, /* 0x3ea629e9db362f5d */ >+ 7.63436345535852301127e-07, /* 0x3ea99dde4728d7ec */ >+ 8.68233432860324345268e-08, /* 0x3e774e746878544d */ >+ 9.45465175398023087082e-07, /* 0x3eafb97be873a87d */ >+ 8.77499534786171267246e-07, /* 0x3ead71a9e23c2f63 */ >+ 2.74055432394999316135e-07, /* 0x3e92643c89cda173 */ >+ 4.72129009349126213532e-07, /* 0x3e9faf1d57a4d56c */ >+ 8.93777032327078947306e-07, /* 0x3eadfd7c7ab7b282 */ >+ 0.00000000000000000000e+00}; /* 0x0000000000000000 */ >+ >+ >+ /* Handle special arguments first */ >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ if(ax >= 0x7ff0000000000000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_DP64) >+ /* x is negative infinity */ >+ return nan_with_flags(AMD_F_INVALID); >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ else if (ux & SIGNBIT_DP64) >+ { >+ /* x is negative. */ >+ if (ux == SIGNBIT_DP64) >+ /* Handle negative zero first */ >+ return x; >+ else >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ else if (ux <= 0x000fffffffffffff) >+ { >+ /* x is denormalised or zero */ >+ if (ux == 0) >+ /* x is zero */ >+ return x; >+ else >+ { >+ /* x is denormalised; scale it up */ >+ /* Normalize x by increasing the exponent by 60 >+ and subtracting a correction to account for the implicit >+ bit. This replaces a slow denormalized >+ multiplication by a fast normal subtraction. */ >+ static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */ >+ denorm = 1; >+ GET_BITS_DP64(x, ux); >+ PUT_BITS_DP64(ux | 0x03d0000000000000, x); >+ x -= corr; >+ GET_BITS_DP64(x, ux); >+ } >+ } >+ >+ /* Main algorithm */ >+ >+ /* >+ Find y and e such that x = 2^e * y, where y in [1,4). >+ This is done using an in-lined variant of splitDouble, >+ which also ensures that e is even. >+ */ >+ y = x; >+ ux &= EXPBITS_DP64; >+ ux >>= EXPSHIFTBITS_DP64; >+ if (ux & 1) >+ { >+ GET_BITS_DP64(y, u); >+ u &= (SIGNBIT_DP64 | MANTBITS_DP64); >+ u |= ONEEXPBITS_DP64; >+ PUT_BITS_DP64(u, y); >+ e = ux - EXPBIAS_DP64; >+ } >+ else >+ { >+ GET_BITS_DP64(y, u); >+ u &= (SIGNBIT_DP64 | MANTBITS_DP64); >+ u |= TWOEXPBITS_DP64; >+ PUT_BITS_DP64(u, y); >+ e = ux - EXPBIAS_DP64 - 1; >+ } >+ >+ >+ /* Find the index of the sub-interval of [1,4) in which y lies. */ >+ >+ index = (int)(32.0*y+0.5); >+ >+ /* Look up the table values and compute c and r = c/t */ >+ >+ rtc_lead = rt_jby32_lead_table_dbl[index-32]; >+ rtc_trail = rt_jby32_trail_table_dbl[index-32]; >+ c = 0.03125*index; >+ r = (y - c)/c; >+ >+ /* >+ Find q = sqrt(1+r) - 1. >+ From one step of Newton on (q+1)^2 = 1+r >+ */ >+ >+ p = r*0.5 - r*r*(0.1250079870 - r*(0.6250522999E-01)); >+ twop = p + p; >+ q = p - (p*p + (twop - r))/(twop + 2.0); >+ >+ /* Reconstruction */ >+ >+ rtc = rtc_lead + rtc_trail; >+ e >>= 1; /* e = e/2 */ >+ z = rtc_lead + (rtc*q+rtc_trail); >+ >+ if (denorm) >+ { >+ /* Scale by 2**(e-30) */ >+ PUT_BITS_DP64(((long)(e - 30) + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r); >+ z *= r; >+ } >+ else >+ { >+ /* Scale by 2**e */ >+ PUT_BITS_DP64(((long)e + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r); >+ z *= r; >+ } >+ >+ return z; >+ >+} >+#endif /* SQRT_AMD_INLINE */ >+ >+#if defined(USE_SQRTF_AMD_INLINE) >+ >+static inline float sqrtf_amd_inline(float x) >+{ >+ /* >+ Computes the square root of x. >+ >+ The calculation is carried out in three steps. >+ >+ Step 1. Reduction. >+ The input argument is scaled to the interval [1, 4) by >+ computing >+ x = 2^e * y, where y in [1,4). >+ Furthermore y is decomposed as y = c + t where >+ c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64. >+ >+ Step 2. Approximation. >+ An approximation q = sqrt(1 + (t/c)) - 1 is obtained >+ from a basic series expansion using precomputed values >+ stored in rt_jby32_lead_table_float and rt_jby32_trail_table_float. >+ >+ Step 3. Reconstruction. >+ The value of sqrt(x) is reconstructed via >+ sqrt(x) = 2^(e/2) * sqrt(y) >+ = 2^(e/2) * sqrt(c) * sqrt(y/c) >+ = 2^(e/2) * sqrt(c) * sqrt(1 + t/c) >+ = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ] >+ */ >+ >+ unsigned int ux, ax, u; >+ float r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail; >+ int e, denorm = 0, index; >+ >+/* Arrays rt_jby32_lead_table_float and rt_jby32_trail_table_float contain >+ leading and trailing parts respectively of precomputed >+ values of sqrt(j/32), for j = 32, 33, ..., 128. >+ rt_jby32_lead_table_float contains the first 13 bits of precision, >+ and rt_jby32_trail_table_float contains a further 24 bits precision. */ >+ >+static const float rt_jby32_lead_table_float[97] = { >+ 1.00000000000000000000e+00F, /* 0x3f800000 */ >+ 1.01538085937500000000e+00F, /* 0x3f81f800 */ >+ 1.03076171875000000000e+00F, /* 0x3f83f000 */ >+ 1.04565429687500000000e+00F, /* 0x3f85d800 */ >+ 1.06054687500000000000e+00F, /* 0x3f87c000 */ >+ 1.07519531250000000000e+00F, /* 0x3f89a000 */ >+ 1.08959960937500000000e+00F, /* 0x3f8b7800 */ >+ 1.10375976562500000000e+00F, /* 0x3f8d4800 */ >+ 1.11791992187500000000e+00F, /* 0x3f8f1800 */ >+ 1.13183593750000000000e+00F, /* 0x3f90e000 */ >+ 1.14550781250000000000e+00F, /* 0x3f92a000 */ >+ 1.15917968750000000000e+00F, /* 0x3f946000 */ >+ 1.17236328125000000000e+00F, /* 0x3f961000 */ >+ 1.18579101562500000000e+00F, /* 0x3f97c800 */ >+ 1.19873046875000000000e+00F, /* 0x3f997000 */ >+ 1.21191406250000000000e+00F, /* 0x3f9b2000 */ >+ 1.22460937500000000000e+00F, /* 0x3f9cc000 */ >+ 1.23730468750000000000e+00F, /* 0x3f9e6000 */ >+ 1.25000000000000000000e+00F, /* 0x3fa00000 */ >+ 1.26220703125000000000e+00F, /* 0x3fa19000 */ >+ 1.27465820312500000000e+00F, /* 0x3fa32800 */ >+ 1.28686523437500000000e+00F, /* 0x3fa4b800 */ >+ 1.29882812500000000000e+00F, /* 0x3fa64000 */ >+ 1.31079101562500000000e+00F, /* 0x3fa7c800 */ >+ 1.32275390625000000000e+00F, /* 0x3fa95000 */ >+ 1.33447265625000000000e+00F, /* 0x3faad000 */ >+ 1.34619140625000000000e+00F, /* 0x3fac5000 */ >+ 1.35766601562500000000e+00F, /* 0x3fadc800 */ >+ 1.36914062500000000000e+00F, /* 0x3faf4000 */ >+ 1.38061523437500000000e+00F, /* 0x3fb0b800 */ >+ 1.39184570312500000000e+00F, /* 0x3fb22800 */ >+ 1.40307617187500000000e+00F, /* 0x3fb39800 */ >+ 1.41406250000000000000e+00F, /* 0x3fb50000 */ >+ 1.42504882812500000000e+00F, /* 0x3fb66800 */ >+ 1.43603515625000000000e+00F, /* 0x3fb7d000 */ >+ 1.44677734375000000000e+00F, /* 0x3fb93000 */ >+ 1.45751953125000000000e+00F, /* 0x3fba9000 */ >+ 1.46826171875000000000e+00F, /* 0x3fbbf000 */ >+ 1.47900390625000000000e+00F, /* 0x3fbd5000 */ >+ 1.48950195312500000000e+00F, /* 0x3fbea800 */ >+ 1.50000000000000000000e+00F, /* 0x3fc00000 */ >+ 1.51025390625000000000e+00F, /* 0x3fc15000 */ >+ 1.52050781250000000000e+00F, /* 0x3fc2a000 */ >+ 1.53076171875000000000e+00F, /* 0x3fc3f000 */ >+ 1.54101562500000000000e+00F, /* 0x3fc54000 */ >+ 1.55102539062500000000e+00F, /* 0x3fc68800 */ >+ 1.56103515625000000000e+00F, /* 0x3fc7d000 */ >+ 1.57104492187500000000e+00F, /* 0x3fc91800 */ >+ 1.58105468750000000000e+00F, /* 0x3fca6000 */ >+ 1.59082031250000000000e+00F, /* 0x3fcba000 */ >+ 1.60058593750000000000e+00F, /* 0x3fcce000 */ >+ 1.61035156250000000000e+00F, /* 0x3fce2000 */ >+ 1.62011718750000000000e+00F, /* 0x3fcf6000 */ >+ 1.62963867187500000000e+00F, /* 0x3fd09800 */ >+ 1.63916015625000000000e+00F, /* 0x3fd1d000 */ >+ 1.64868164062500000000e+00F, /* 0x3fd30800 */ >+ 1.65820312500000000000e+00F, /* 0x3fd44000 */ >+ 1.66748046875000000000e+00F, /* 0x3fd57000 */ >+ 1.67700195312500000000e+00F, /* 0x3fd6a800 */ >+ 1.68627929687500000000e+00F, /* 0x3fd7d800 */ >+ 1.69555664062500000000e+00F, /* 0x3fd90800 */ >+ 1.70458984375000000000e+00F, /* 0x3fda3000 */ >+ 1.71386718750000000000e+00F, /* 0x3fdb6000 */ >+ 1.72290039062500000000e+00F, /* 0x3fdc8800 */ >+ 1.73193359375000000000e+00F, /* 0x3fddb000 */ >+ 1.74096679687500000000e+00F, /* 0x3fded800 */ >+ 1.75000000000000000000e+00F, /* 0x3fe00000 */ >+ 1.75878906250000000000e+00F, /* 0x3fe12000 */ >+ 1.76757812500000000000e+00F, /* 0x3fe24000 */ >+ 1.77636718750000000000e+00F, /* 0x3fe36000 */ >+ 1.78515625000000000000e+00F, /* 0x3fe48000 */ >+ 1.79394531250000000000e+00F, /* 0x3fe5a000 */ >+ 1.80273437500000000000e+00F, /* 0x3fe6c000 */ >+ 1.81127929687500000000e+00F, /* 0x3fe7d800 */ >+ 1.81982421875000000000e+00F, /* 0x3fe8f000 */ >+ 1.82836914062500000000e+00F, /* 0x3fea0800 */ >+ 1.83691406250000000000e+00F, /* 0x3feb2000 */ >+ 1.84545898437500000000e+00F, /* 0x3fec3800 */ >+ 1.85400390625000000000e+00F, /* 0x3fed5000 */ >+ 1.86230468750000000000e+00F, /* 0x3fee6000 */ >+ 1.87060546875000000000e+00F, /* 0x3fef7000 */ >+ 1.87915039062500000000e+00F, /* 0x3ff08800 */ >+ 1.88745117187500000000e+00F, /* 0x3ff19800 */ >+ 1.89550781250000000000e+00F, /* 0x3ff2a000 */ >+ 1.90380859375000000000e+00F, /* 0x3ff3b000 */ >+ 1.91210937500000000000e+00F, /* 0x3ff4c000 */ >+ 1.92016601562500000000e+00F, /* 0x3ff5c800 */ >+ 1.92822265625000000000e+00F, /* 0x3ff6d000 */ >+ 1.93627929687500000000e+00F, /* 0x3ff7d800 */ >+ 1.94433593750000000000e+00F, /* 0x3ff8e000 */ >+ 1.95239257812500000000e+00F, /* 0x3ff9e800 */ >+ 1.96044921875000000000e+00F, /* 0x3ffaf000 */ >+ 1.96826171875000000000e+00F, /* 0x3ffbf000 */ >+ 1.97631835937500000000e+00F, /* 0x3ffcf800 */ >+ 1.98413085937500000000e+00F, /* 0x3ffdf800 */ >+ 1.99194335937500000000e+00F, /* 0x3ffef800 */ >+ 2.00000000000000000000e+00F}; /* 0x40000000 */ >+ >+static const float rt_jby32_trail_table_float[97] = { >+ 0.00000000000000000000e+00F, /* 0x00000000 */ >+ 1.23941208585165441036e-04F, /* 0x3901f637 */ >+ 1.46876545841223560274e-05F, /* 0x37766aff */ >+ 1.70736297150142490864e-04F, /* 0x393307ad */ >+ 1.13296780909877270460e-04F, /* 0x38ed99bf */ >+ 9.53458802541717886925e-05F, /* 0x38c7f46e */ >+ 1.25126505736261606216e-04F, /* 0x39033464 */ >+ 2.10342666832730174065e-04F, /* 0x395c8f6e */ >+ 1.14066875539720058441e-04F, /* 0x38ef3730 */ >+ 8.72047676239162683487e-05F, /* 0x38b6e1b4 */ >+ 1.36111237225122749805e-04F, /* 0x390eb915 */ >+ 2.26244374061934649944e-05F, /* 0x37bdc99c */ >+ 2.40658700931817293167e-04F, /* 0x397c5954 */ >+ 6.31069415248930454254e-05F, /* 0x38845848 */ >+ 2.27412077947519719601e-04F, /* 0x396e7577 */ >+ 5.90185391047270968556e-06F, /* 0x36c6088a */ >+ 1.35496389702893793583e-04F, /* 0x390e1409 */ >+ 1.32179571664892137051e-04F, /* 0x390a99af */ >+ 0.00000000000000000000e+00F, /* 0x00000000 */ >+ 2.31086043640971183777e-04F, /* 0x39724fb0 */ >+ 9.66752704698592424393e-05F, /* 0x38cabe24 */ >+ 8.85332483449019491673e-05F, /* 0x38b9aaed */ >+ 2.09980673389509320259e-04F, /* 0x395c2e42 */ >+ 2.20044588786549866199e-04F, /* 0x3966bbc5 */ >+ 1.21749282698146998882e-04F, /* 0x38ff53a6 */ >+ 1.62125259521417319775e-04F, /* 0x392a002b */ >+ 9.97955357888713479042e-05F, /* 0x38d14952 */ >+ 1.81545779923908412457e-04F, /* 0x393e5d53 */ >+ 1.65768768056295812130e-04F, /* 0x392dd237 */ >+ 5.48927710042335093021e-05F, /* 0x38663caa */ >+ 9.53875860432162880898e-05F, /* 0x38c80ad2 */ >+ 4.53481625299900770187e-05F, /* 0x383e3438 */ >+ 1.51062369695864617825e-04F, /* 0x391e667f */ >+ 1.70453247847035527229e-04F, /* 0x3932bbb2 */ >+ 1.05505387182347476482e-04F, /* 0x38dd42c6 */ >+ 2.02269104192964732647e-04F, /* 0x39541833 */ >+ 2.18442466575652360916e-04F, /* 0x39650db4 */ >+ 1.55796806211583316326e-04F, /* 0x39235d63 */ >+ 1.60395247803535312414e-05F, /* 0x37868c9e */ >+ 4.49578510597348213196e-05F, /* 0x383c9120 */ >+ 0.00000000000000000000e+00F, /* 0x00000000 */ >+ 1.26840444863773882389e-04F, /* 0x39050079 */ >+ 1.82820076588541269302e-04F, /* 0x393fb364 */ >+ 1.69370483490638434887e-04F, /* 0x3931990b */ >+ 8.78757418831810355186e-05F, /* 0x38b849ee */ >+ 1.83815121999941766262e-04F, /* 0x3940be7f */ >+ 2.14343352126888930798e-04F, /* 0x3960c15b */ >+ 1.80714370799250900745e-04F, /* 0x393d7e25 */ >+ 8.41425862745381891727e-05F, /* 0x38b075b5 */ >+ 1.69945167726837098598e-04F, /* 0x3932334f */ >+ 1.95121858268976211548e-04F, /* 0x394c99a0 */ >+ 1.60778334247879683971e-04F, /* 0x3928969b */ >+ 6.79871009197086095810e-05F, /* 0x388e944c */ >+ 1.61929419846273958683e-04F, /* 0x3929cb99 */ >+ 1.99474830878898501396e-04F, /* 0x39512a1e */ >+ 1.81604162207804620266e-04F, /* 0x393e6cff */ >+ 1.09270178654696792364e-04F, /* 0x38e527fb */ >+ 2.27539261686615645885e-04F, /* 0x396e979b */ >+ 4.90300008095800876617e-05F, /* 0x384da590 */ >+ 6.28985289949923753738e-05F, /* 0x3883e864 */ >+ 2.58551553997676819563e-05F, /* 0x37d8e386 */ >+ 1.82868374395184218884e-04F, /* 0x393fc05b */ >+ 4.64625991298817098141e-05F, /* 0x3842e0d6 */ >+ 1.05703387816902250051e-04F, /* 0x38ddad13 */ >+ 1.17213814519345760345e-04F, /* 0x38f5d0b0 */ >+ 8.17377731436863541603e-05F, /* 0x38ab6aa2 */ >+ 0.00000000000000000000e+00F, /* 0x00000000 */ >+ 1.16847433673683553934e-04F, /* 0x38f50bfd */ >+ 1.88827965757809579372e-04F, /* 0x3946001f */ >+ 2.16612941585481166840e-04F, /* 0x39632298 */ >+ 2.00857131858356297016e-04F, /* 0x39529d2d */ >+ 1.42199307447299361229e-04F, /* 0x39151b56 */ >+ 4.12627305195201188326e-05F, /* 0x382d1185 */ >+ 1.42796401632949709892e-04F, /* 0x3915bb9e */ >+ 2.03253570361994206905e-04F, /* 0x39552077 */ >+ 2.23214170546270906925e-04F, /* 0x396a0e99 */ >+ 2.03244591830298304558e-04F, /* 0x39551e0e */ >+ 1.43898156238719820976e-04F, /* 0x3916e35e */ >+ 4.57155256299301981926e-05F, /* 0x383fbeac */ >+ 1.53365719597786664963e-04F, /* 0x3920d0cc */ >+ 2.23224633373320102692e-04F, /* 0x396a1168 */ >+ 1.16566716314991936088e-05F, /* 0x37439106 */ >+ 7.43694272387074306607e-06F, /* 0x36f98ada */ >+ 2.11048507480882108212e-04F, /* 0x395d4ce7 */ >+ 1.34682719362899661064e-04F, /* 0x390d399e */ >+ 2.29425968427676707506e-05F, /* 0x37c074da */ >+ 1.20421340398024767637e-04F, /* 0x38fc8ab7 */ >+ 1.83421318070031702518e-04F, /* 0x394054c9 */ >+ 2.12376224226318299770e-04F, /* 0x395eb14f */ >+ 2.07710763788782060146e-04F, /* 0x3959ccef */ >+ 1.69840845046564936638e-04F, /* 0x3932174e */ >+ 9.91739216260612010956e-05F, /* 0x38cffb98 */ >+ 2.40249748458154499531e-04F, /* 0x397beb8d */ >+ 1.05178231024183332920e-04F, /* 0x38dc9322 */ >+ 1.82623916771262884140e-04F, /* 0x393f7ebc */ >+ 2.28821940254420042038e-04F, /* 0x396fefec */ >+ 0.00000000000000000000e+00F}; /* 0x00000000 */ >+ >+ >+/* Handle special arguments first */ >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ >+ if(ax >= 0x7f800000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_SP32) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_SP32) >+ return nanf_with_flags(AMD_F_INVALID); >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ else if (ux & SIGNBIT_SP32) >+ { >+ /* x is negative. */ >+ if (x == 0.0F) >+ /* Handle negative zero first */ >+ return x; >+ else >+ return nanf_with_flags(AMD_F_INVALID); >+ } >+ else if (ux <= 0x007fffff) >+ { >+ /* x is denormalised or zero */ >+ if (ux == 0) >+ /* x is zero */ >+ return x; >+ else >+ { >+ /* x is denormalised; scale it up */ >+ /* Normalize x by increasing the exponent by 26 >+ and subtracting a correction to account for the implicit >+ bit. This replaces a slow denormalized >+ multiplication by a fast normal subtraction. */ >+ static const float corr = 7.888609052210118054e-31F; /* 0x0d800000 */ >+ denorm = 1; >+ GET_BITS_SP32(x, ux); >+ PUT_BITS_SP32(ux | 0x0d800000, x); >+ x -= corr; >+ GET_BITS_SP32(x, ux); >+ } >+ } >+ >+ /* Main algorithm */ >+ >+ /* >+ Find y and e such that x = 2^e * y, where y in [1,4). >+ This is done using an in-lined variant of splitFloat, >+ which also ensures that e is even. >+ */ >+ y = x; >+ ux &= EXPBITS_SP32; >+ ux >>= EXPSHIFTBITS_SP32; >+ if (ux & 1) >+ { >+ GET_BITS_SP32(y, u); >+ u &= (SIGNBIT_SP32 | MANTBITS_SP32); >+ u |= ONEEXPBITS_SP32; >+ PUT_BITS_SP32(u, y); >+ e = ux - EXPBIAS_SP32; >+ } >+ else >+ { >+ GET_BITS_SP32(y, u); >+ u &= (SIGNBIT_SP32 | MANTBITS_SP32); >+ u |= TWOEXPBITS_SP32; >+ PUT_BITS_SP32(u, y); >+ e = ux - EXPBIAS_SP32 - 1; >+ } >+ >+ /* Find the index of the sub-interval of [1,4) in which y lies. */ >+ >+ index = (int)(32.0F*y+0.5); >+ >+ /* Look up the table values and compute c and r = c/t */ >+ >+ rtc_lead = rt_jby32_lead_table_float[index-32]; >+ rtc_trail = rt_jby32_trail_table_float[index-32]; >+ c = 0.03125F*index; >+ r = (y - c)/c; >+ >+ /* >+ Find q = sqrt(1+r) - 1. >+ From one step of Newton on (q+1)^2 = 1+r >+ */ >+ >+ p = r*0.5F - r*r*(0.1250079870F - r*(0.6250522999e-01F)); >+ twop = p + p; >+ q = p - (p*p + (twop - r))/(twop + 2.0); >+ >+ /* Reconstruction */ >+ >+ rtc = rtc_lead + rtc_trail; >+ e >>= 1; /* e = e/2 */ >+ z = rtc_lead + (rtc*q+rtc_trail); >+ >+ if (denorm) >+ { >+ /* Scale by 2**(e-13) */ >+ PUT_BITS_SP32(((e - 13) + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r); >+ z *= r; >+ } >+ else >+ { >+ /* Scale by 2**e */ >+ PUT_BITS_SP32((e + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r); >+ z *= r; >+ } >+ >+ return z; >+ >+} >+#endif /* SQRTF_AMD_INLINE */ >+ >+#ifdef USE_LOG_KERNEL_AMD >+static inline void log_kernel_amd64(double x, unsigned long ux, int *xexp, double *r1, double *r2) >+{ >+ >+ int expadjust; >+ double r, z1, z2, correction, f, f1, f2, q, u, v, poly; >+ int index; >+ >+ /* >+ Computes natural log(x). Algorithm based on: >+ Ping-Tak Peter Tang >+ "Table-driven implementation of the logarithm function in IEEE >+ floating-point arithmetic" >+ ACM Transactions on Mathematical Software (TOMS) >+ Volume 16, Issue 4 (December 1990) >+ */ >+ >+/* Arrays ln_lead_table and ln_tail_table contain >+ leading and trailing parts respectively of precomputed >+ values of natural log(1+i/64), for i = 0, 1, ..., 64. >+ ln_lead_table contains the first 24 bits of precision, >+ and ln_tail_table contains a further 53 bits precision. */ >+ >+ static const double ln_lead_table[65] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.55041813850402832031e-02, /* 0x3f8fc0a800000000 */ >+ 3.07716131210327148438e-02, /* 0x3f9f829800000000 */ >+ 4.58095073699951171875e-02, /* 0x3fa7745800000000 */ >+ 6.06245994567871093750e-02, /* 0x3faf0a3000000000 */ >+ 7.52233862876892089844e-02, /* 0x3fb341d700000000 */ >+ 8.96121263504028320312e-02, /* 0x3fb6f0d200000000 */ >+ 1.03796780109405517578e-01, /* 0x3fba926d00000000 */ >+ 1.17783010005950927734e-01, /* 0x3fbe270700000000 */ >+ 1.31576299667358398438e-01, /* 0x3fc0d77e00000000 */ >+ 1.45181953907012939453e-01, /* 0x3fc2955280000000 */ >+ 1.58604979515075683594e-01, /* 0x3fc44d2b00000000 */ >+ 1.71850204467773437500e-01, /* 0x3fc5ff3000000000 */ >+ 1.84922337532043457031e-01, /* 0x3fc7ab8900000000 */ >+ 1.97825729846954345703e-01, /* 0x3fc9525a80000000 */ >+ 2.10564732551574707031e-01, /* 0x3fcaf3c900000000 */ >+ 2.23143517971038818359e-01, /* 0x3fcc8ff780000000 */ >+ 2.35566020011901855469e-01, /* 0x3fce270700000000 */ >+ 2.47836112976074218750e-01, /* 0x3fcfb91800000000 */ >+ 2.59957492351531982422e-01, /* 0x3fd0a324c0000000 */ >+ 2.71933674812316894531e-01, /* 0x3fd1675c80000000 */ >+ 2.83768117427825927734e-01, /* 0x3fd22941c0000000 */ >+ 2.95464158058166503906e-01, /* 0x3fd2e8e280000000 */ >+ 3.07025015354156494141e-01, /* 0x3fd3a64c40000000 */ >+ 3.18453729152679443359e-01, /* 0x3fd4618bc0000000 */ >+ 3.29753279685974121094e-01, /* 0x3fd51aad80000000 */ >+ 3.40926527976989746094e-01, /* 0x3fd5d1bd80000000 */ >+ 3.51976394653320312500e-01, /* 0x3fd686c800000000 */ >+ 3.62905442714691162109e-01, /* 0x3fd739d7c0000000 */ >+ 3.73716354370117187500e-01, /* 0x3fd7eaf800000000 */ >+ 3.84411692619323730469e-01, /* 0x3fd89a3380000000 */ >+ 3.94993782043457031250e-01, /* 0x3fd9479400000000 */ >+ 4.05465066432952880859e-01, /* 0x3fd9f323c0000000 */ >+ 4.15827870368957519531e-01, /* 0x3fda9cec80000000 */ >+ 4.26084339618682861328e-01, /* 0x3fdb44f740000000 */ >+ 4.36236739158630371094e-01, /* 0x3fdbeb4d80000000 */ >+ 4.46287095546722412109e-01, /* 0x3fdc8ff7c0000000 */ >+ 4.56237375736236572266e-01, /* 0x3fdd32fe40000000 */ >+ 4.66089725494384765625e-01, /* 0x3fddd46a00000000 */ >+ 4.75845873355865478516e-01, /* 0x3fde744240000000 */ >+ 4.85507786273956298828e-01, /* 0x3fdf128f40000000 */ >+ 4.95077252388000488281e-01, /* 0x3fdfaf5880000000 */ >+ 5.04556000232696533203e-01, /* 0x3fe02552a0000000 */ >+ 5.13945698738098144531e-01, /* 0x3fe0723e40000000 */ >+ 5.23248136043548583984e-01, /* 0x3fe0be72e0000000 */ >+ 5.32464742660522460938e-01, /* 0x3fe109f380000000 */ >+ 5.41597247123718261719e-01, /* 0x3fe154c3c0000000 */ >+ 5.50647079944610595703e-01, /* 0x3fe19ee6a0000000 */ >+ 5.59615731239318847656e-01, /* 0x3fe1e85f40000000 */ >+ 5.68504691123962402344e-01, /* 0x3fe23130c0000000 */ >+ 5.77315330505371093750e-01, /* 0x3fe2795e00000000 */ >+ 5.86049020290374755859e-01, /* 0x3fe2c0e9e0000000 */ >+ 5.94707071781158447266e-01, /* 0x3fe307d720000000 */ >+ 6.03290796279907226562e-01, /* 0x3fe34e2880000000 */ >+ 6.11801505088806152344e-01, /* 0x3fe393e0c0000000 */ >+ 6.20240390300750732422e-01, /* 0x3fe3d90260000000 */ >+ 6.28608644008636474609e-01, /* 0x3fe41d8fe0000000 */ >+ 6.36907458305358886719e-01, /* 0x3fe4618bc0000000 */ >+ 6.45137906074523925781e-01, /* 0x3fe4a4f840000000 */ >+ 6.53301239013671875000e-01, /* 0x3fe4e7d800000000 */ >+ 6.61398470401763916016e-01, /* 0x3fe52a2d20000000 */ >+ 6.69430613517761230469e-01, /* 0x3fe56bf9c0000000 */ >+ 6.77398800849914550781e-01, /* 0x3fe5ad4040000000 */ >+ 6.85303986072540283203e-01, /* 0x3fe5ee02a0000000 */ >+ 6.93147122859954833984e-01}; /* 0x3fe62e42e0000000 */ >+ >+ static const double ln_tail_table[65] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 5.15092497094772879206e-09, /* 0x3e361f807c79f3db */ >+ 4.55457209735272790188e-08, /* 0x3e6873c1980267c8 */ >+ 2.86612990859791781788e-08, /* 0x3e5ec65b9f88c69e */ >+ 2.23596477332056055352e-08, /* 0x3e58022c54cc2f99 */ >+ 3.49498983167142274770e-08, /* 0x3e62c37a3a125330 */ >+ 3.23392843005887000414e-08, /* 0x3e615cad69737c93 */ >+ 1.35722380472479366661e-08, /* 0x3e4d256ab1b285e9 */ >+ 2.56504325268044191098e-08, /* 0x3e5b8abcb97a7aa2 */ >+ 5.81213608741512136843e-08, /* 0x3e6f34239659a5dc */ >+ 5.59374849578288093334e-08, /* 0x3e6e07fd48d30177 */ >+ 5.06615629004996189970e-08, /* 0x3e6b32df4799f4f6 */ >+ 5.24588857848400955725e-08, /* 0x3e6c29e4f4f21cf8 */ >+ 9.61968535632653505972e-10, /* 0x3e1086c848df1b59 */ >+ 1.34829655346594463137e-08, /* 0x3e4cf456b4764130 */ >+ 3.65557749306383026498e-08, /* 0x3e63a02ffcb63398 */ >+ 3.33431709374069198903e-08, /* 0x3e61e6a6886b0976 */ >+ 5.13008650536088382197e-08, /* 0x3e6b8abcb97a7aa2 */ >+ 5.09285070380306053751e-08, /* 0x3e6b578f8aa35552 */ >+ 3.20853940845502057341e-08, /* 0x3e6139c871afb9fc */ >+ 4.06713248643004200446e-08, /* 0x3e65d5d30701ce64 */ >+ 5.57028186706125221168e-08, /* 0x3e6de7bcb2d12142 */ >+ 5.48356693724804282546e-08, /* 0x3e6d708e984e1664 */ >+ 1.99407553679345001938e-08, /* 0x3e556945e9c72f36 */ >+ 1.96585517245087232086e-09, /* 0x3e20e2f613e85bda */ >+ 6.68649386072067321503e-09, /* 0x3e3cb7e0b42724f6 */ >+ 5.89936034642113390002e-08, /* 0x3e6fac04e52846c7 */ >+ 2.85038578721554472484e-08, /* 0x3e5e9b14aec442be */ >+ 5.09746772910284482606e-08, /* 0x3e6b5de8034e7126 */ >+ 5.54234668933210171467e-08, /* 0x3e6dc157e1b259d3 */ >+ 6.29100830926604004874e-09, /* 0x3e3b05096ad69c62 */ >+ 2.61974119468563937716e-08, /* 0x3e5c2116faba4cdd */ >+ 4.16752115011186398935e-08, /* 0x3e665fcc25f95b47 */ >+ 2.47747534460820790327e-08, /* 0x3e5a9a08498d4850 */ >+ 5.56922172017964209793e-08, /* 0x3e6de647b1465f77 */ >+ 2.76162876992552906035e-08, /* 0x3e5da71b7bf7861d */ >+ 7.08169709942321478061e-09, /* 0x3e3e6a6886b09760 */ >+ 5.77453510221151779025e-08, /* 0x3e6f0075eab0ef64 */ >+ 4.43021445893361960146e-09, /* 0x3e33071282fb989b */ >+ 3.15140984357495864573e-08, /* 0x3e60eb43c3f1bed2 */ >+ 2.95077445089736670973e-08, /* 0x3e5faf06ecb35c84 */ >+ 1.44098510263167149349e-08, /* 0x3e4ef1e63db35f68 */ >+ 1.05196987538551827693e-08, /* 0x3e469743fb1a71a5 */ >+ 5.23641361722697546261e-08, /* 0x3e6c1cdf404e5796 */ >+ 7.72099925253243069458e-09, /* 0x3e4094aa0ada625e */ >+ 5.62089493829364197156e-08, /* 0x3e6e2d4c96fde3ec */ >+ 3.53090261098577946927e-08, /* 0x3e62f4d5e9a98f34 */ >+ 3.80080516835568242269e-08, /* 0x3e6467c96ecc5cbe */ >+ 5.66961038386146408282e-08, /* 0x3e6e7040d03dec5a */ >+ 4.42287063097349852717e-08, /* 0x3e67bebf4282de36 */ >+ 3.45294525105681104660e-08, /* 0x3e6289b11aeb783f */ >+ 2.47132034530447431509e-08, /* 0x3e5a891d1772f538 */ >+ 3.59655343422487209774e-08, /* 0x3e634f10be1fb591 */ >+ 5.51581770357780862071e-08, /* 0x3e6d9ce1d316eb93 */ >+ 3.60171867511861372793e-08, /* 0x3e63562a19a9c442 */ >+ 1.94511067964296180547e-08, /* 0x3e54e2adf548084c */ >+ 1.54137376631349347838e-08, /* 0x3e508ce55cc8c97a */ >+ 3.93171034490174464173e-09, /* 0x3e30e2f613e85bda */ >+ 5.52990607758839766440e-08, /* 0x3e6db03ebb0227bf */ >+ 3.29990737637586136511e-08, /* 0x3e61b75bb09cb098 */ >+ 1.18436010922446096216e-08, /* 0x3e496f16abb9df22 */ >+ 4.04248680368301346709e-08, /* 0x3e65b3f399411c62 */ >+ 2.27418915900284316293e-08, /* 0x3e586b3e59f65355 */ >+ 1.70263791333409206020e-08, /* 0x3e52482ceae1ac12 */ >+ 5.76999904754328540596e-08}; /* 0x3e6efa39ef35793c */ >+ >+ /* Approximating polynomial coefficients for x near 1.0 */ >+ static const double >+ ca_1 = 8.33333333333317923934e-02, /* 0x3fb55555555554e6 */ >+ ca_2 = 1.25000000037717509602e-02, /* 0x3f89999999bac6d4 */ >+ ca_3 = 2.23213998791944806202e-03, /* 0x3f62492307f1519f */ >+ ca_4 = 4.34887777707614552256e-04; /* 0x3f3c8034c85dfff0 */ >+ >+ /* Approximating polynomial coefficients for other x */ >+ static const double >+ cb_1 = 8.33333333333333593622e-02, /* 0x3fb5555555555557 */ >+ cb_2 = 1.24999999978138668903e-02, /* 0x3f89999999865ede */ >+ cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */ >+ >+ static const unsigned long >+ log_thresh1 = 0x3fee0faa00000000, >+ log_thresh2 = 0x3ff1082c00000000; >+ >+ /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000 >+ log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */ >+ if (ux >= log_thresh1 && ux <= log_thresh2) >+ { >+ /* Arguments close to 1.0 are handled separately to maintain >+ accuracy. >+ >+ The approximation in this region exploits the identity >+ log( 1 + r ) = log( 1 + u/2 ) / log( 1 - u/2 ), where >+ u = 2r / (2+r). >+ Note that the right hand side has an odd Taylor series expansion >+ which converges much faster than the Taylor series expansion of >+ log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by >+ u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1). >+ >+ One subtlety is that since u cannot be calculated from >+ r exactly, the rounding error in the first u should be >+ avoided if possible. To accomplish this, we observe that >+ u = r - r*r/(2+r). >+ Since x (=1+r) is the input argument, and thus presumed exact, >+ the formula above approximates u accurately because >+ u = r - correction, >+ and the magnitude of "correction" (of the order of r*r) >+ is small. >+ With these observations, we will approximate log( 1 + r ) by >+ r + ( (A1*u^3 + ... + An*u^(2n+1)) - correction ). >+ >+ We approximate log(1+r) by an odd polynomial in u, where >+ u = 2r/(2+r) = r - r*r/(2+r). >+ */ >+ r = x - 1.0; >+ u = r / (2.0 + r); >+ correction = r * u; >+ u = u + u; >+ v = u * u; >+ z1 = r; >+ z2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction); >+ *r1 = z1; >+ *r2 = z2; >+ *xexp = 0; >+ } >+ else >+ { >+ /* >+ First, we decompose the argument x to the form >+ x = 2**M * (F1 + F2), >+ where 1 <= F1+F2 < 2, M has the value of an integer, >+ F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128. >+ >+ Second, we approximate log( 1 + F2/F1 ) by an odd polynomial >+ in U, where U = 2 F2 / (2 F2 + F1). >+ Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ). >+ The core approximation calculates >+ Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U - 1. >+ Note that log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ), >+ thus, Poly = 2 arctanh( U/2 ) / U - 1. >+ >+ It is not hard to see that >+ log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ). >+ Hence, we return Z1 = log(F1), and Z2 = log( 1 + F2/F1). >+ The values of log(F1) are calculated beforehand and stored >+ in the program. >+ */ >+ >+ f = x; >+ if (ux < IMPBIT_DP64) >+ { >+ /* The input argument x is denormalized */ >+ /* Normalize f by increasing the exponent by 60 >+ and subtracting a correction to account for the implicit >+ bit. This replaces a slow denormalized >+ multiplication by a fast normal subtraction. */ >+ static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */ >+ GET_BITS_DP64(f, ux); >+ ux |= 0x03d0000000000000; >+ PUT_BITS_DP64(ux, f); >+ f -= corr; >+ GET_BITS_DP64(f, ux); >+ expadjust = 60; >+ } >+ else >+ expadjust = 0; >+ >+ /* Store the exponent of x in xexp and put >+ f into the range [0.5,1) */ >+ *xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust; >+ PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f); >+ >+ /* Now x = 2**xexp * f, 1/2 <= f < 1. */ >+ >+ /* Set index to be the nearest integer to 128*f */ >+ r = 128.0 * f; >+ index = (int)(r + 0.5); >+ >+ z1 = ln_lead_table[index-64]; >+ q = ln_tail_table[index-64]; >+ f1 = index * 0.0078125; /* 0.0078125 = 1/128 */ >+ f2 = f - f1; >+ /* At this point, x = 2**xexp * ( f1 + f2 ) where >+ f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */ >+ >+ /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */ >+ /* u = f2 / (f1 + 0.5 * f2); */ >+ u = f2 / (f1 + 0.5 * f2); >+ >+ /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1). >+ The core approximation calculates >+ poly = [log(1 + u/2) - log(1 - u/2)]/u - 1 */ >+ v = u * u; >+ poly = (v * (cb_1 + v * (cb_2 + v * cb_3))); >+ z2 = q + (u + u * poly); >+ *r1 = z1; >+ *r2 = z2; >+ } >+ return; >+} >+#endif /* USE_LOG_KERNEL_AMD */ >+ >+#if defined(USE_REMAINDER_PIBY2F_INLINE) >+/* Define this to get debugging print statements activated */ >+#define DEBUGGING_PRINT >+#undef DEBUGGING_PRINT >+ >+ >+#ifdef DEBUGGING_PRINT >+#include <stdio.h> >+char *d2b(long d, int bitsper, int point) >+{ >+ static char buff[200]; >+ int i, j; >+ j = bitsper; >+ if (point >= 0 && point <= bitsper) >+ j++; >+ buff[j] = '\0'; >+ for (i = bitsper - 1; i >= 0; i--) >+ { >+ j--; >+ if (d % 2 == 1) >+ buff[j] = '1'; >+ else >+ buff[j] = '0'; >+ if (i == point) >+ { >+ j--; >+ buff[j] = '.'; >+ } >+ d /= 2; >+ } >+ return buff; >+} >+#endif >+ >+/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using >+ extra precision, and return the result in r. >+ Return value "region" tells how many lots of pi/2 were subtracted >+ from x to put it in the range [-pi/4,pi/4], mod 4. */ >+static inline void __remainder_piby2f_inline(double x, unsigned long ux, double *r, int *region) >+{ >+ >+ /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */ >+ static const double >+ eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */ >+ >+ static const double >+ piby2 = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */ >+ twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ >+ pi = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */ >+ three_piby2 = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */ >+ two_pi = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */ >+ five_piby2 = 7.85398163397448278999e+00; /* 0x401f6a7a2955385e */ >+ >+ /* Each of these threshold values is the closest machine >+ number BELOW a multiple of pi/4, i.e. they are not >+ rounded to nearest. thresh1 is 1*pi/4, thresh3 is 3*pi/4, etc. >+ This ensures that we end up in precisely the correct region. */ >+ static const double >+ thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */ >+ thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */ >+ thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */ >+ thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */ >+ thresh9 = 7.0685834705770345109954e+00; /* 0x401c463abeccb2bb */ >+ >+ static const double cancellationThresh = 1.0e-5; >+ int done = 0; >+ >+ /* For small values of x, up to 11*pi/4, we do double precision >+ subtraction of the relevant multiple of pi/2 */ >+ if (x <= eleven_piby4) /* x <= 11*pi/4 */ >+ { >+ double t, ctest; >+ >+ if (x <= thresh5) /* x < 5*pi/4 */ >+ { >+ if (x <= thresh1) /* x < pi/4 */ >+ { >+ /* Quick return if x is already less than pi/4 */ >+ *r = x; >+ *region = 0; >+ return; >+ } >+ else if (x <= thresh3) /* x < 3*pi/4 */ >+ { >+ t = x - piby2; >+ *region = 1; >+ } >+ else /* x < 5*pi/4 */ >+ { >+ t = x - pi; >+ *region = 2; >+ } >+ } >+ else >+ { >+ if (x <= thresh7) /* x < 7*pi/4 */ >+ { >+ t = x - three_piby2; >+ *region = 3; >+ } >+ else if (x <= thresh9) /* x < 9*pi/4 */ >+ { >+ t = x - two_pi; >+ *region = 0; >+ } >+ else /* x < 11*pi/4 */ >+ { >+ t = x - five_piby2; >+ *region = 1; >+ } >+ } >+ >+ /* Check for massive cancellation which may happen very close >+ to multiples of pi/2 */ >+ if (t < 0.0) >+ ctest = -t; >+ else >+ ctest = t; >+#ifdef DEBUGGING_PRINT >+ printf("Cancellation threshold test = (%g > %g)\n", >+ ctest, cancellationThresh); >+#endif >+ >+ /* Check if cancellation error was not too large */ >+ if (ctest > cancellationThresh) >+ { >+ *r = t; >+ done = 1; >+ } >+ /* Otherwise fall through to the expensive method */ >+ } >+ else if (x <= 1.0e6) >+ { >+ /* This range reduction is accurate enough for x up to >+ approximately 2**(20) except near multiples of pi/2 */ >+ >+ /* We perform double precision arithmetic to find the >+ nearest multiple of pi/2 to x */ >+ int reg; >+ double z, w, c, ctest; >+ >+ /* Multiply x by 2/pi in double precision, result in z */ >+ z = x * twobypi; >+ >+#ifdef DEBUGGING_PRINT >+ printf("z = %30.20e = %s\n", z, double2hex(&z)); >+#endif >+ >+ /* Find reg, the nearest integer to z */ >+ reg = (int)(z + 0.5); >+ >+#ifdef DEBUGGING_PRINT >+ printf("reg = %d\n", reg); >+#endif >+ >+ /* Subtract reg from z, result in w */ >+ w = z - reg; >+ >+#ifdef DEBUGGING_PRINT >+ printf("w = %30.20e = %s\n", w, double2hex(&w)); >+#endif >+ >+ /* Check for massive cancellation which may happen very close >+ to multiples of pi/2 */ >+ if (w < 0.0) >+ ctest = -w; >+ else >+ ctest = w; >+ >+ /* If cancellation is not too severe, continue with this method. >+ Otherwise we fall through to the expensive, accurate method */ >+ if (ctest > cancellationThresh) >+ { >+ /* Multiply w by pi/2 */ >+ c = w * piby2; >+ *r = c; >+ *region = reg & 3; >+ >+#ifdef DEBUGGING_PRINT >+ printf("r = %30.20e = %s\n", *r, double2hex(r)); >+#endif >+ done = 1; >+ } >+ } >+ >+ if (!done) >+ { >+ /* This method simulates multi-precision floating-point >+ arithmetic and is accurate for all 1 <= x < infinity */ >+#if 0 >+ const int bitsper = 36; >+#else >+#define bitsper 36 >+#endif >+ unsigned long res[10]; >+ unsigned long u, carry, mask, mant, nextbits; >+ int first, last, i, rexp, xexp, resexp, ltb, determ, bc; >+ double dx; >+ static const double >+ piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ >+ static unsigned long pibits[] = >+ { >+ 0L, >+ 5215L, 13000023176L, 11362338026L, 67174558139L, >+ 34819822259L, 10612056195L, 67816420731L, 57840157550L, >+ 19558516809L, 50025467026L, 25186875954L, 18152700886L >+ }; >+ >+#ifdef DEBUGGING_PRINT >+ printf("On entry, x = %25.20e = %s\n", x, double2hex(&x)); >+#endif >+ >+ xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); >+ ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29; >+ >+#ifdef DEBUGGING_PRINT >+ printf("ux = %s\n", d2b(ux, 64, -1)); >+#endif >+ >+ /* Now ux is the mantissa bit pattern of x as a long integer */ >+ mask = (1L << bitsper) - 1; >+ >+ /* Set first and last to the positions of the first >+ and last chunks of 2/pi that we need */ >+ first = xexp / bitsper; >+ resexp = xexp - first * bitsper; >+ /* 120 is the theoretical maximum number of bits (actually >+ 115 for IEEE single precision) that we need to extract >+ from the middle of 2/pi to compute the reduced argument >+ accurately enough for our purposes */ >+ last = first + 120 / bitsper; >+ >+#ifdef DEBUGGING_PRINT >+ printf("first = %d, last = %d\n", first, last); >+#endif >+ >+ /* Do a long multiplication of the bits of 2/pi by the >+ integer mantissa */ >+#if 0 >+ for (i = last; i >= first; i--) >+ { >+ u = pibits[i] * ux + carry; >+ res[i - first] = u & mask; >+ carry = u >> bitsper; >+ } >+ res[last - first + 1] = 0; >+#else >+ /* Unroll the loop. This is only correct because we know >+ that bitsper is fixed as 36. */ >+ res[4] = 0; >+ u = pibits[last] * ux; >+ res[3] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last - 1] * ux + carry; >+ res[2] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last - 2] * ux + carry; >+ res[1] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[first] * ux + carry; >+ res[0] = u & mask; >+#endif >+ >+#ifdef DEBUGGING_PRINT >+ printf("resexp = %d\n", resexp); >+ printf("Significant part of x * 2/pi with binary" >+ " point in correct place:\n"); >+ for (i = 0; i <= last - first; i++) >+ { >+ if (i > 0 && i % 5 == 0) >+ printf("\n "); >+ if (i == 1) >+ printf("%s ", d2b(res[i], bitsper, resexp)); >+ else >+ printf("%s ", d2b(res[i], bitsper, -1)); >+ } >+ printf("\n"); >+#endif >+ >+ /* Reconstruct the result */ >+ ltb = (int)((((res[0] << bitsper) | res[1]) >+ >> (bitsper - 1 - resexp)) & 7); >+ >+ /* determ says whether the fractional part is >= 0.5 */ >+ determ = ltb & 1; >+ >+#ifdef DEBUGGING_PRINT >+ printf("ltb = %d (last two bits before binary point" >+ " and first bit after)\n", ltb); >+ printf("determ = %d (1 means need to negate because the fractional\n" >+ " part of x * 2/pi is greater than 0.5)\n", determ); >+#endif >+ >+ i = 1; >+ if (determ) >+ { >+ /* The mantissa is >= 0.5. We want to subtract it >+ from 1.0 by negating all the bits */ >+ *region = ((ltb >> 1) + 1) & 3; >+ mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1); >+ while (mant < 0x0000000000010000) >+ { >+ i++; >+ mant = (mant << bitsper) | (~(res[i]) & mask); >+ } >+ nextbits = (~(res[i+1]) & mask); >+ } >+ else >+ { >+ *region = (ltb >> 1); >+ mant = res[1] & ((1L << (bitsper - resexp)) - 1); >+ while (mant < 0x0000000000010000) >+ { >+ i++; >+ mant = (mant << bitsper) | res[i]; >+ } >+ nextbits = res[i+1]; >+ } >+ >+#ifdef DEBUGGING_PRINT >+ printf("First bits of mant = %s\n", d2b(mant, bitsper, -1)); >+#endif >+ >+ /* Normalize the mantissa. The shift value 6 here, determined by >+ trial and error, seems to give optimal speed. */ >+ bc = 0; >+ while (mant < 0x0000400000000000) >+ { >+ bc += 6; >+ mant <<= 6; >+ } >+ while (mant < 0x0010000000000000) >+ { >+ bc++; >+ mant <<= 1; >+ } >+ mant |= nextbits >> (bitsper - bc); >+ >+ rexp = 52 + resexp - bc - i * bitsper; >+ >+#ifdef DEBUGGING_PRINT >+ printf("Normalised mantissa = 0x%016lx\n", mant); >+ printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp); >+#endif >+ >+ /* Put the result exponent rexp onto the mantissa pattern */ >+ u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64; >+ ux = (mant & MANTBITS_DP64) | u; >+ if (determ) >+ /* If we negated the mantissa we negate x too */ >+ ux |= SIGNBIT_DP64; >+ PUT_BITS_DP64(ux, dx); >+ >+#ifdef DEBUGGING_PRINT >+ printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx)); >+#endif >+ >+ /* x is a double precision version of the fractional part of >+ x * 2 / pi. Multiply x by pi/2 in double precision >+ to get the reduced argument r. */ >+ *r = dx * piby2; >+ >+#ifdef DEBUGGING_PRINT >+ printf(" r = frac(x*2/pi) * pi/2:\n"); >+ printf(" r = %25.20e = %s\n", *r, double2hex(r)); >+ printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n", >+ *region); >+#endif >+ } >+} >+#endif /* USE_REMAINDER_PIBY2F_INLINE */ >+ >+#endif /* LIBM_INLINES_AMD_H_INCLUDED */ >============================================================ >Index: sysdeps/x86_64/fpu/libm_util_amd.h >--- sysdeps/x86_64/fpu/libm_util_amd.h created >+++ sysdeps/x86_64/fpu/libm_util_amd.h 2002-12-03 13:43:01.000000000 +0100 1.1 >@@ -0,0 +1,101 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#ifndef LIBM_UTIL_AMD_H_INCLUDED >+#define LIBM_UTIL_AMD_H_INCLUDED 1 >+ >+/* Compile-time verification that type long is the same size >+ as type double (i.e. we are really on a 64-bit machine) */ >+void check_long_against_double_size(int machine_is_64_bit[(sizeof(long) == sizeof(double))?1:-1]); >+ >+ >+/* Definitions for double functions on 64 bit machines */ >+#define SIGNBIT_DP64 0x8000000000000000 >+#define EXPBITS_DP64 0x7ff0000000000000 >+#define MANTBITS_DP64 0x000fffffffffffff >+#define ONEEXPBITS_DP64 0x3ff0000000000000 >+#define TWOEXPBITS_DP64 0x4000000000000000 >+#define HALFEXPBITS_DP64 0x3fe0000000000000 >+#define IMPBIT_DP64 0x0010000000000000 >+#define QNANBITPATT_DP64 0x7ff8000000000000 >+#define PINFBITPATT_DP64 0x7ff0000000000000 >+#define NINFBITPATT_DP64 0xfff0000000000000 >+#define EXPBIAS_DP64 1023 >+#define EXPSHIFTBITS_DP64 52 >+#define BIASEDEMIN_DP64 1 >+#define EMIN_DP64 -1022 >+#define BIASEDEMAX_DP64 2046 >+#define EMAX_DP64 1023 >+#define LAMBDA_DP64 1.0e300 >+#define MANTLENGTH_DP64 53 >+#define BASEDIGITS_DP64 15 >+ >+ >+/* These definitions, used by float functions, >+ are for both 32 and 64 bit machines */ >+#define SIGNBIT_SP32 0x80000000 >+#define EXPBITS_SP32 0x7f800000 >+#define MANTBITS_SP32 0x007fffff >+#define ONEEXPBITS_SP32 0x3f800000 >+#define TWOEXPBITS_SP32 0x40000000 >+#define HALFEXPBITS_SP32 0x3f000000 >+#define IMPBIT_SP32 0x00800000 >+#define QNANBITPATT_SP32 0x7fc00000 >+#define PINFBITPATT_SP32 0x7f800000 >+#define NINFBITPATT_SP32 0xff800000 >+#define EXPBIAS_SP32 127 >+#define EXPSHIFTBITS_SP32 23 >+#define BIASEDEMIN_SP32 1 >+#define EMIN_SP32 -126 >+#define BIASEDEMAX_SP32 254 >+#define EMAX_SP32 127 >+#define LAMBDA_SP32 1.0e30 >+#define MANTLENGTH_SP32 24 >+#define BASEDIGITS_SP32 7 >+ >+#define CLASS_SIGNALLING_NAN 1 >+#define CLASS_QUIET_NAN 2 >+#define CLASS_NEGATIVE_INFINITY 3 >+#define CLASS_NEGATIVE_NORMAL_NONZERO 4 >+#define CLASS_NEGATIVE_DENORMAL 5 >+#define CLASS_NEGATIVE_ZERO 6 >+#define CLASS_POSITIVE_ZERO 7 >+#define CLASS_POSITIVE_DENORMAL 8 >+#define CLASS_POSITIVE_NORMAL_NONZERO 9 >+#define CLASS_POSITIVE_INFINITY 10 >+ >+#define OLD_BITS_SP32(x) (*((unsigned int *)&x)) >+#define OLD_BITS_DP64(x) (*((unsigned long *)&x)) >+ >+/* Alternatives to the above functions which don't have >+ problems when using high optimization levels on gcc */ >+#define GET_BITS_SP32(x, ux) {union {float f; unsigned int i;} _bitsy; _bitsy.f = (x); ux = _bitsy.i;} >+#define PUT_BITS_SP32(ux, x) {union {float f; unsigned int i;} _bitsy; _bitsy.i = (ux); x = _bitsy.f;} >+#define GET_BITS_DP64(x, ux) {union {double d; unsigned long i;} _bitsy; _bitsy.d = (x); ux = _bitsy.i;} >+#define PUT_BITS_DP64(ux, x) {union {double d; unsigned long i;} _bitsy; _bitsy.i = (ux); x = _bitsy.d;} >+ >+ >+/* Processor-dependent floating-point status flags */ >+#define AMD_F_INEXACT 0x00000020 >+#define AMD_F_UNDERFLOW 0x00000010 >+#define AMD_F_OVERFLOW 0x00000008 >+#define AMD_F_DIVBYZERO 0x00000004 >+#define AMD_F_INVALID 0x00000001 >+ >+/* Processor-dependent floating-point precision-control flags */ >+#define AMD_F_EXTENDED 0x00000300 >+#define AMD_F_DOUBLE 0x00000200 >+#define AMD_F_SINGLE 0x00000000 >+ >+/* Processor-dependent floating-point rounding-control flags */ >+#define AMD_F_RC_NEAREST 0x00000000 >+#define AMD_F_RC_DOWN 0x00002000 >+#define AMD_F_RC_UP 0x00004000 >+#define AMD_F_RC_ZERO 0x00006000 >+ >+#endif /* LIBM_UTIL_AMD_H_INCLUDED */ >============================================================ >Index: sysdeps/x86_64/fpu/s_atan2.c >--- sysdeps/x86_64/fpu/s_atan2.c created >+++ sysdeps/x86_64/fpu/s_atan2.c 2002-12-03 13:43:03.000000000 +0100 1.1 >@@ -0,0 +1,746 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VAL_WITH_FLAGS >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_SCALEUPDOUBLE1024 >+#define USE_SCALEDOWNDOUBLE >+#include "libm_inlines_amd.h" >+#undef USE_SCALEDOWNDOUBLE >+#undef USE_SCALEUPDOUBLE1024 >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_VAL_WITH_FLAGS >+ >+/* Deal with errno for out-of-range arguments >+ (only used when _LIB_VERSION is _SVID_) */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_edom(double x, double y) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = y; >+ exc.type = DOMAIN; >+ exc.name = (char *)"atan2"; >+ exc.retval = HUGE; >+ if (!matherr(&exc)) >+ { >+ (void)fputs("atan2: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+double __atan2(double y, double x) >+{ >+ /* Arrays atan_jby256_lead and atan_jby256_tail contain >+ leading and trailing parts respectively of precomputed >+ values of atan(j/256), for j = 16, 17, ..., 256. >+ atan_jby256_lead contains the first 21 bits of precision, >+ and atan_jby256_tail contains a further 53 bits precision. */ >+ >+ static const double atan_jby256_lead[ 241] = { >+ 6.24187886714935302734e-02, /* 0x3faff55b00000000 */ >+ 6.63088560104370117188e-02, /* 0x3fb0f99e00000000 */ >+ 7.01969265937805175781e-02, /* 0x3fb1f86d00000000 */ >+ 7.40829110145568847656e-02, /* 0x3fb2f71900000000 */ >+ 7.79666304588317871094e-02, /* 0x3fb3f59f00000000 */ >+ 8.18479657173156738281e-02, /* 0x3fb4f3fd00000000 */ >+ 8.57268571853637695312e-02, /* 0x3fb5f23200000000 */ >+ 8.96031260490417480469e-02, /* 0x3fb6f03b00000000 */ >+ 9.34767723083496093750e-02, /* 0x3fb7ee1800000000 */ >+ 9.73475575447082519531e-02, /* 0x3fb8ebc500000000 */ >+ 1.01215422153472900391e-01, /* 0x3fb9e94100000000 */ >+ 1.05080246925354003906e-01, /* 0x3fbae68a00000000 */ >+ 1.08941912651062011719e-01, /* 0x3fbbe39e00000000 */ >+ 1.12800359725952148438e-01, /* 0x3fbce07c00000000 */ >+ 1.16655409336090087891e-01, /* 0x3fbddd2100000000 */ >+ 1.20507001876831054688e-01, /* 0x3fbed98c00000000 */ >+ 1.24354958534240722656e-01, /* 0x3fbfd5ba00000000 */ >+ 1.28199219703674316406e-01, /* 0x3fc068d500000000 */ >+ 1.32039666175842285156e-01, /* 0x3fc0e6ad00000000 */ >+ 1.35876297950744628906e-01, /* 0x3fc1646500000000 */ >+ 1.39708757400512695312e-01, /* 0x3fc1e1fa00000000 */ >+ 1.43537282943725585938e-01, /* 0x3fc25f6e00000000 */ >+ 1.47361397743225097656e-01, /* 0x3fc2dcbd00000000 */ >+ 1.51181221008300781250e-01, /* 0x3fc359e800000000 */ >+ 1.54996633529663085938e-01, /* 0x3fc3d6ee00000000 */ >+ 1.58807516098022460938e-01, /* 0x3fc453ce00000000 */ >+ 1.62613749504089355469e-01, /* 0x3fc4d08700000000 */ >+ 1.66415214538574218750e-01, /* 0x3fc54d1800000000 */ >+ 1.70211911201477050781e-01, /* 0x3fc5c98100000000 */ >+ 1.74003481864929199219e-01, /* 0x3fc645bf00000000 */ >+ 1.77790164947509765625e-01, /* 0x3fc6c1d400000000 */ >+ 1.81571602821350097656e-01, /* 0x3fc73dbd00000000 */ >+ 1.85347914695739746094e-01, /* 0x3fc7b97b00000000 */ >+ 1.89118742942810058594e-01, /* 0x3fc8350b00000000 */ >+ 1.92884206771850585938e-01, /* 0x3fc8b06e00000000 */ >+ 1.96644186973571777344e-01, /* 0x3fc92ba300000000 */ >+ 2.00398445129394531250e-01, /* 0x3fc9a6a800000000 */ >+ 2.04147100448608398438e-01, /* 0x3fca217e00000000 */ >+ 2.07889914512634277344e-01, /* 0x3fca9c2300000000 */ >+ 2.11626768112182617188e-01, /* 0x3fcb169600000000 */ >+ 2.15357661247253417969e-01, /* 0x3fcb90d700000000 */ >+ 2.19082474708557128906e-01, /* 0x3fcc0ae500000000 */ >+ 2.22801089286804199219e-01, /* 0x3fcc84bf00000000 */ >+ 2.26513504981994628906e-01, /* 0x3fccfe6500000000 */ >+ 2.30219483375549316406e-01, /* 0x3fcd77d500000000 */ >+ 2.33919143676757812500e-01, /* 0x3fcdf11000000000 */ >+ 2.37612247467041015625e-01, /* 0x3fce6a1400000000 */ >+ 2.41298794746398925781e-01, /* 0x3fcee2e100000000 */ >+ 2.44978547096252441406e-01, /* 0x3fcf5b7500000000 */ >+ 2.48651623725891113281e-01, /* 0x3fcfd3d100000000 */ >+ 2.52317905426025390625e-01, /* 0x3fd025fa00000000 */ >+ 2.55977153778076171875e-01, /* 0x3fd061ee00000000 */ >+ 2.59629487991333007812e-01, /* 0x3fd09dc500000000 */ >+ 2.63274669647216796875e-01, /* 0x3fd0d97e00000000 */ >+ 2.66912937164306640625e-01, /* 0x3fd1151a00000000 */ >+ 2.70543813705444335938e-01, /* 0x3fd1509700000000 */ >+ 2.74167299270629882812e-01, /* 0x3fd18bf500000000 */ >+ 2.77783632278442382812e-01, /* 0x3fd1c73500000000 */ >+ 2.81392335891723632812e-01, /* 0x3fd2025500000000 */ >+ 2.84993648529052734375e-01, /* 0x3fd23d5600000000 */ >+ 2.88587331771850585938e-01, /* 0x3fd2783700000000 */ >+ 2.92173147201538085938e-01, /* 0x3fd2b2f700000000 */ >+ 2.95751571655273437500e-01, /* 0x3fd2ed9800000000 */ >+ 2.99322128295898437500e-01, /* 0x3fd3281800000000 */ >+ 3.02884817123413085938e-01, /* 0x3fd3627700000000 */ >+ 3.06439399719238281250e-01, /* 0x3fd39cb400000000 */ >+ 3.09986352920532226562e-01, /* 0x3fd3d6d100000000 */ >+ 3.13524961471557617188e-01, /* 0x3fd410cb00000000 */ >+ 3.17055702209472656250e-01, /* 0x3fd44aa400000000 */ >+ 3.20578098297119140625e-01, /* 0x3fd4845a00000000 */ >+ 3.24092388153076171875e-01, /* 0x3fd4bdee00000000 */ >+ 3.27598333358764648438e-01, /* 0x3fd4f75f00000000 */ >+ 3.31095933914184570312e-01, /* 0x3fd530ad00000000 */ >+ 3.34585189819335937500e-01, /* 0x3fd569d800000000 */ >+ 3.38066101074218750000e-01, /* 0x3fd5a2e000000000 */ >+ 3.41538190841674804688e-01, /* 0x3fd5dbc300000000 */ >+ 3.45002174377441406250e-01, /* 0x3fd6148400000000 */ >+ 3.48457098007202148438e-01, /* 0x3fd64d1f00000000 */ >+ 3.51903676986694335938e-01, /* 0x3fd6859700000000 */ >+ 3.55341434478759765625e-01, /* 0x3fd6bdea00000000 */ >+ 3.58770608901977539062e-01, /* 0x3fd6f61900000000 */ >+ 3.62190723419189453125e-01, /* 0x3fd72e2200000000 */ >+ 3.65602254867553710938e-01, /* 0x3fd7660700000000 */ >+ 3.69004726409912109375e-01, /* 0x3fd79dc600000000 */ >+ 3.72398376464843750000e-01, /* 0x3fd7d56000000000 */ >+ 3.75782966613769531250e-01, /* 0x3fd80cd400000000 */ >+ 3.79158496856689453125e-01, /* 0x3fd8442200000000 */ >+ 3.82525205612182617188e-01, /* 0x3fd87b4b00000000 */ >+ 3.85882616043090820312e-01, /* 0x3fd8b24d00000000 */ >+ 3.89230966567993164062e-01, /* 0x3fd8e92900000000 */ >+ 3.92570018768310546875e-01, /* 0x3fd91fde00000000 */ >+ 3.95900011062622070312e-01, /* 0x3fd9566d00000000 */ >+ 3.99220705032348632812e-01, /* 0x3fd98cd500000000 */ >+ 4.02532100677490234375e-01, /* 0x3fd9c31600000000 */ >+ 4.05834197998046875000e-01, /* 0x3fd9f93000000000 */ >+ 4.09126996994018554688e-01, /* 0x3fda2f2300000000 */ >+ 4.12410259246826171875e-01, /* 0x3fda64ee00000000 */ >+ 4.15684223175048828125e-01, /* 0x3fda9a9200000000 */ >+ 4.18948888778686523438e-01, /* 0x3fdad00f00000000 */ >+ 4.22204017639160156250e-01, /* 0x3fdb056400000000 */ >+ 4.25449609756469726562e-01, /* 0x3fdb3a9100000000 */ >+ 4.28685665130615234375e-01, /* 0x3fdb6f9600000000 */ >+ 4.31912183761596679688e-01, /* 0x3fdba47300000000 */ >+ 4.35129165649414062500e-01, /* 0x3fdbd92800000000 */ >+ 4.38336372375488281250e-01, /* 0x3fdc0db400000000 */ >+ 4.41534280776977539062e-01, /* 0x3fdc421900000000 */ >+ 4.44722414016723632812e-01, /* 0x3fdc765500000000 */ >+ 4.47900772094726562500e-01, /* 0x3fdcaa6800000000 */ >+ 4.51069593429565429688e-01, /* 0x3fdcde5300000000 */ >+ 4.54228639602661132812e-01, /* 0x3fdd121500000000 */ >+ 4.57377910614013671875e-01, /* 0x3fdd45ae00000000 */ >+ 4.60517644882202148438e-01, /* 0x3fdd791f00000000 */ >+ 4.63647603988647460938e-01, /* 0x3fddac6700000000 */ >+ 4.66767549514770507812e-01, /* 0x3fdddf8500000000 */ >+ 4.69877958297729492188e-01, /* 0x3fde127b00000000 */ >+ 4.72978591918945312500e-01, /* 0x3fde454800000000 */ >+ 4.76069211959838867188e-01, /* 0x3fde77eb00000000 */ >+ 4.79150056838989257812e-01, /* 0x3fdeaa6500000000 */ >+ 4.82221126556396484375e-01, /* 0x3fdedcb600000000 */ >+ 4.85282421112060546875e-01, /* 0x3fdf0ede00000000 */ >+ 4.88333940505981445312e-01, /* 0x3fdf40dd00000000 */ >+ 4.91375446319580078125e-01, /* 0x3fdf72b200000000 */ >+ 4.94406938552856445312e-01, /* 0x3fdfa45d00000000 */ >+ 4.97428894042968750000e-01, /* 0x3fdfd5e000000000 */ >+ 5.00440597534179687500e-01, /* 0x3fe0039c00000000 */ >+ 5.03442764282226562500e-01, /* 0x3fe01c3400000000 */ >+ 5.06434917449951171875e-01, /* 0x3fe034b700000000 */ >+ 5.09417057037353515625e-01, /* 0x3fe04d2500000000 */ >+ 5.12389183044433593750e-01, /* 0x3fe0657e00000000 */ >+ 5.15351772308349609375e-01, /* 0x3fe07dc300000000 */ >+ 5.18304347991943359375e-01, /* 0x3fe095f300000000 */ >+ 5.21246910095214843750e-01, /* 0x3fe0ae0e00000000 */ >+ 5.24179458618164062500e-01, /* 0x3fe0c61400000000 */ >+ 5.27101993560791015625e-01, /* 0x3fe0de0500000000 */ >+ 5.30014991760253906250e-01, /* 0x3fe0f5e200000000 */ >+ 5.32917976379394531250e-01, /* 0x3fe10daa00000000 */ >+ 5.35810947418212890625e-01, /* 0x3fe1255d00000000 */ >+ 5.38693904876708984375e-01, /* 0x3fe13cfb00000000 */ >+ 5.41567325592041015625e-01, /* 0x3fe1548500000000 */ >+ 5.44430732727050781250e-01, /* 0x3fe16bfa00000000 */ >+ 5.47284126281738281250e-01, /* 0x3fe1835a00000000 */ >+ 5.50127506256103515625e-01, /* 0x3fe19aa500000000 */ >+ 5.52961349487304687500e-01, /* 0x3fe1b1dc00000000 */ >+ 5.55785179138183593750e-01, /* 0x3fe1c8fe00000000 */ >+ 5.58598995208740234375e-01, /* 0x3fe1e00b00000000 */ >+ 5.61403274536132812500e-01, /* 0x3fe1f70400000000 */ >+ 5.64197540283203125000e-01, /* 0x3fe20de800000000 */ >+ 5.66981792449951171875e-01, /* 0x3fe224b700000000 */ >+ 5.69756031036376953125e-01, /* 0x3fe23b7100000000 */ >+ 5.72520732879638671875e-01, /* 0x3fe2521700000000 */ >+ 5.75275897979736328125e-01, /* 0x3fe268a900000000 */ >+ 5.78021049499511718750e-01, /* 0x3fe27f2600000000 */ >+ 5.80756187438964843750e-01, /* 0x3fe2958e00000000 */ >+ 5.83481788635253906250e-01, /* 0x3fe2abe200000000 */ >+ 5.86197376251220703125e-01, /* 0x3fe2c22100000000 */ >+ 5.88903427124023437500e-01, /* 0x3fe2d84c00000000 */ >+ 5.91599464416503906250e-01, /* 0x3fe2ee6200000000 */ >+ 5.94285964965820312500e-01, /* 0x3fe3046400000000 */ >+ 5.96962928771972656250e-01, /* 0x3fe31a5200000000 */ >+ 5.99629878997802734375e-01, /* 0x3fe3302b00000000 */ >+ 6.02287292480468750000e-01, /* 0x3fe345f000000000 */ >+ 6.04934692382812500000e-01, /* 0x3fe35ba000000000 */ >+ 6.07573032379150390625e-01, /* 0x3fe3713d00000000 */ >+ 6.10201358795166015625e-01, /* 0x3fe386c500000000 */ >+ 6.12820148468017578125e-01, /* 0x3fe39c3900000000 */ >+ 6.15428924560546875000e-01, /* 0x3fe3b19800000000 */ >+ 6.18028640747070312500e-01, /* 0x3fe3c6e400000000 */ >+ 6.20618820190429687500e-01, /* 0x3fe3dc1c00000000 */ >+ 6.23198986053466796875e-01, /* 0x3fe3f13f00000000 */ >+ 6.25770092010498046875e-01, /* 0x3fe4064f00000000 */ >+ 6.28331184387207031250e-01, /* 0x3fe41b4a00000000 */ >+ 6.30883216857910156250e-01, /* 0x3fe4303200000000 */ >+ 6.33425712585449218750e-01, /* 0x3fe4450600000000 */ >+ 6.35958671569824218750e-01, /* 0x3fe459c600000000 */ >+ 6.38482093811035156250e-01, /* 0x3fe46e7200000000 */ >+ 6.40995979309082031250e-01, /* 0x3fe4830a00000000 */ >+ 6.43500804901123046875e-01, /* 0x3fe4978f00000000 */ >+ 6.45996093750000000000e-01, /* 0x3fe4ac0000000000 */ >+ 6.48482322692871093750e-01, /* 0x3fe4c05e00000000 */ >+ 6.50959014892578125000e-01, /* 0x3fe4d4a800000000 */ >+ 6.53426170349121093750e-01, /* 0x3fe4e8de00000000 */ >+ 6.55884265899658203125e-01, /* 0x3fe4fd0100000000 */ >+ 6.58332824707031250000e-01, /* 0x3fe5111000000000 */ >+ 6.60772323608398437500e-01, /* 0x3fe5250c00000000 */ >+ 6.63202762603759765625e-01, /* 0x3fe538f500000000 */ >+ 6.65623664855957031250e-01, /* 0x3fe54cca00000000 */ >+ 6.68035984039306640625e-01, /* 0x3fe5608d00000000 */ >+ 6.70438766479492187500e-01, /* 0x3fe5743c00000000 */ >+ 6.72832489013671875000e-01, /* 0x3fe587d800000000 */ >+ 6.75216674804687500000e-01, /* 0x3fe59b6000000000 */ >+ 6.77592277526855468750e-01, /* 0x3fe5aed600000000 */ >+ 6.79958820343017578125e-01, /* 0x3fe5c23900000000 */ >+ 6.82316303253173828125e-01, /* 0x3fe5d58900000000 */ >+ 6.84664726257324218750e-01, /* 0x3fe5e8c600000000 */ >+ 6.87004089355468750000e-01, /* 0x3fe5fbf000000000 */ >+ 6.89334869384765625000e-01, /* 0x3fe60f0800000000 */ >+ 6.91656589508056640625e-01, /* 0x3fe6220d00000000 */ >+ 6.93969249725341796875e-01, /* 0x3fe634ff00000000 */ >+ 6.96272850036621093750e-01, /* 0x3fe647de00000000 */ >+ 6.98567867279052734375e-01, /* 0x3fe65aab00000000 */ >+ 7.00854301452636718750e-01, /* 0x3fe66d6600000000 */ >+ 7.03131675720214843750e-01, /* 0x3fe6800e00000000 */ >+ 7.05400466918945312500e-01, /* 0x3fe692a400000000 */ >+ 7.07660198211669921875e-01, /* 0x3fe6a52700000000 */ >+ 7.09911346435546875000e-01, /* 0x3fe6b79800000000 */ >+ 7.12153911590576171875e-01, /* 0x3fe6c9f700000000 */ >+ 7.14387893676757812500e-01, /* 0x3fe6dc4400000000 */ >+ 7.16613292694091796875e-01, /* 0x3fe6ee7f00000000 */ >+ 7.18829631805419921875e-01, /* 0x3fe700a700000000 */ >+ 7.21037864685058593750e-01, /* 0x3fe712be00000000 */ >+ 7.23237514495849609375e-01, /* 0x3fe724c300000000 */ >+ 7.25428581237792968750e-01, /* 0x3fe736b600000000 */ >+ 7.27611064910888671875e-01, /* 0x3fe7489700000000 */ >+ 7.29785442352294921875e-01, /* 0x3fe75a6700000000 */ >+ 7.31950759887695312500e-01, /* 0x3fe76c2400000000 */ >+ 7.34108448028564453125e-01, /* 0x3fe77dd100000000 */ >+ 7.36257076263427734375e-01, /* 0x3fe78f6b00000000 */ >+ 7.38397598266601562500e-01, /* 0x3fe7a0f400000000 */ >+ 7.40530014038085937500e-01, /* 0x3fe7b26c00000000 */ >+ 7.42654323577880859375e-01, /* 0x3fe7c3d300000000 */ >+ 7.44770050048828125000e-01, /* 0x3fe7d52800000000 */ >+ 7.46877670288085937500e-01, /* 0x3fe7e66c00000000 */ >+ 7.48976707458496093750e-01, /* 0x3fe7f79e00000000 */ >+ 7.51068115234375000000e-01, /* 0x3fe808c000000000 */ >+ 7.53150939941406250000e-01, /* 0x3fe819d000000000 */ >+ 7.55226135253906250000e-01, /* 0x3fe82ad000000000 */ >+ 7.57292747497558593750e-01, /* 0x3fe83bbe00000000 */ >+ 7.59351730346679687500e-01, /* 0x3fe84c9c00000000 */ >+ 7.61402606964111328125e-01, /* 0x3fe85d6900000000 */ >+ 7.63445377349853515625e-01, /* 0x3fe86e2500000000 */ >+ 7.65480041503906250000e-01, /* 0x3fe87ed000000000 */ >+ 7.67507076263427734375e-01, /* 0x3fe88f6b00000000 */ >+ 7.69526004791259765625e-01, /* 0x3fe89ff500000000 */ >+ 7.71537303924560546875e-01, /* 0x3fe8b06f00000000 */ >+ 7.73540973663330078125e-01, /* 0x3fe8c0d900000000 */ >+ 7.75536537170410156250e-01, /* 0x3fe8d13200000000 */ >+ 7.77523994445800781250e-01, /* 0x3fe8e17a00000000 */ >+ 7.79504299163818359375e-01, /* 0x3fe8f1b300000000 */ >+ 7.81476497650146484375e-01, /* 0x3fe901db00000000 */ >+ 7.83441066741943359375e-01, /* 0x3fe911f300000000 */ >+ 7.85398006439208984375e-01}; /* 0x3fe921fb00000000 */ >+ >+ static const double atan_jby256_tail[ 241] = { >+ 2.13244638182005395671e-08, /* 0x3e56e59fbd38db2c */ >+ 3.89093864761712760656e-08, /* 0x3e64e3aa54dedf96 */ >+ 4.44780900009437454576e-08, /* 0x3e67e105ab1bda88 */ >+ 1.15344768460112754160e-08, /* 0x3e48c5254d013fd0 */ >+ 3.37271051945395312705e-09, /* 0x3e2cf8ab3ad62670 */ >+ 2.40857608736109859459e-08, /* 0x3e59dca4bec80468 */ >+ 1.85853810450623807768e-08, /* 0x3e53f4b5ec98a8da */ >+ 5.14358299969225078306e-08, /* 0x3e6b9d49619d81fe */ >+ 8.85023985412952486748e-09, /* 0x3e43017887460934 */ >+ 1.59425154214358432060e-08, /* 0x3e511e3eca0b9944 */ >+ 1.95139937737755753164e-08, /* 0x3e54f3f73c5a332e */ >+ 2.64909755273544319715e-08, /* 0x3e5c71c8ae0e00a6 */ >+ 4.43388037881231070144e-08, /* 0x3e67cde0f86fbdc7 */ >+ 2.14757072421821274557e-08, /* 0x3e570f328c889c72 */ >+ 2.61049792670754218852e-08, /* 0x3e5c07ae9b994efe */ >+ 7.81439350674466302231e-09, /* 0x3e40c8021d7b1698 */ >+ 3.60125207123751024094e-08, /* 0x3e635585edb8cb22 */ >+ 6.15276238179343767917e-08, /* 0x3e70842567b30e96 */ >+ 9.54387964641184285058e-08, /* 0x3e799e811031472e */ >+ 3.02789566851502754129e-08, /* 0x3e6041821416bcee */ >+ 1.16888650949870856331e-07, /* 0x3e7f6086e4dc96f4 */ >+ 1.07580956468653338863e-08, /* 0x3e471a535c5f1b58 */ >+ 8.33454265379535427653e-08, /* 0x3e765f743fe63ca1 */ >+ 1.10790279272629526068e-07, /* 0x3e7dbd733472d014 */ >+ 1.08394277896366207424e-07, /* 0x3e7d18cc4d8b0d1d */ >+ 9.22176086126841098800e-08, /* 0x3e78c12553c8fb29 */ >+ 7.90938592199048786990e-08, /* 0x3e753b49e2e8f991 */ >+ 8.66445407164293125637e-08, /* 0x3e77422ae148c141 */ >+ 1.40839973537092438671e-08, /* 0x3e4e3ec269df56a8 */ >+ 1.19070438507307600689e-07, /* 0x3e7ff6754e7e0ac9 */ >+ 6.40451663051716197071e-08, /* 0x3e7131267b1b5aad */ >+ 1.08338682076343674522e-07, /* 0x3e7d14fa403a94bc */ >+ 3.52999550187922736222e-08, /* 0x3e62f396c089a3d8 */ >+ 1.05983273930043077202e-07, /* 0x3e7c731d78fa95bb */ >+ 1.05486124078259553339e-07, /* 0x3e7c50f385177399 */ >+ 5.82167732281776477773e-08, /* 0x3e6f41409c6f2c20 */ >+ 1.08696483983403942633e-07, /* 0x3e7d2d90c4c39ec0 */ >+ 4.47335086122377542835e-08, /* 0x3e680420696f2106 */ >+ 1.26896287162615723528e-08, /* 0x3e4b40327943a2e8 */ >+ 4.06534471589151404531e-08, /* 0x3e65d35e02f3d2a2 */ >+ 3.84504846300557026690e-08, /* 0x3e64a498288117b0 */ >+ 3.60715006404807269080e-08, /* 0x3e635da119afb324 */ >+ 6.44725903165522722801e-08, /* 0x3e714e85cdb9a908 */ >+ 3.63749249976409461305e-08, /* 0x3e638754e5547b9a */ >+ 1.03901294413833913794e-07, /* 0x3e7be40ae6ce3246 */ >+ 6.25379756302167880580e-08, /* 0x3e70c993b3bea7e7 */ >+ 6.63984302368488828029e-08, /* 0x3e71d2dd89ac3359 */ >+ 3.21844598971548278059e-08, /* 0x3e61476603332c46 */ >+ 1.16030611712765830905e-07, /* 0x3e7f25901bac55b7 */ >+ 1.17464622142347730134e-07, /* 0x3e7f881b7c826e28 */ >+ 7.54604017965808996596e-08, /* 0x3e7441996d698d20 */ >+ 1.49234929356206556899e-07, /* 0x3e8407ac521ea089 */ >+ 1.41416924523217430259e-07, /* 0x3e82fb0c6c4b1723 */ >+ 2.13308065617483489011e-07, /* 0x3e8ca135966a3e18 */ >+ 5.04230937933302320146e-08, /* 0x3e6b1218e4d646e4 */ >+ 5.45874922281655519035e-08, /* 0x3e6d4e72a350d288 */ >+ 1.51849028914786868886e-07, /* 0x3e84617e2f04c329 */ >+ 3.09004308703769273010e-08, /* 0x3e6096ec41e82650 */ >+ 9.67574548184738317664e-08, /* 0x3e79f91f25773e6e */ >+ 4.02508285529322212824e-08, /* 0x3e659c0820f1d674 */ >+ 3.01222268096861091157e-08, /* 0x3e602bf7a2df1064 */ >+ 2.36189860670079288680e-07, /* 0x3e8fb36bfc40508f */ >+ 1.14095158111080887695e-07, /* 0x3e7ea08f3f8dc892 */ >+ 7.42349089746573467487e-08, /* 0x3e73ed6254656a0e */ >+ 5.12515583196230380184e-08, /* 0x3e6b83f5e5e69c58 */ >+ 2.19290391828763918102e-07, /* 0x3e8d6ec2af768592 */ >+ 3.83263512187553886471e-08, /* 0x3e6493889a226f94 */ >+ 1.61513486284090523855e-07, /* 0x3e85ad8fa65279ba */ >+ 5.09996743535589922261e-08, /* 0x3e6b615784d45434 */ >+ 1.23694037861246766534e-07, /* 0x3e809a184368f145 */ >+ 8.23367955351123783984e-08, /* 0x3e761a2439b0d91c */ >+ 1.07591766213053694014e-07, /* 0x3e7ce1a65e39a978 */ >+ 1.42789947524631815640e-07, /* 0x3e832a39a93b6a66 */ >+ 1.32347123024711878538e-07, /* 0x3e81c3699af804e7 */ >+ 2.17626067316598149229e-08, /* 0x3e575e0f4e44ede8 */ >+ 2.34454866923044288656e-07, /* 0x3e8f77ced1a7a83b */ >+ 2.82966370261766916053e-09, /* 0x3e284e7f0cb1b500 */ >+ 2.29300919890907632975e-07, /* 0x3e8ec6b838b02dfe */ >+ 1.48428270450261284915e-07, /* 0x3e83ebf4dfbeda87 */ >+ 1.87937408574313982512e-07, /* 0x3e89397aed9cb475 */ >+ 6.13685946813334055347e-08, /* 0x3e707937bc239c54 */ >+ 1.98585022733583817493e-07, /* 0x3e8aa754553131b6 */ >+ 7.68394131623752961662e-08, /* 0x3e74a05d407c45dc */ >+ 1.28119052312436745644e-07, /* 0x3e8132231a206dd0 */ >+ 7.02119104719236502733e-08, /* 0x3e72d8ecfdd69c88 */ >+ 9.87954793820636301943e-08, /* 0x3e7a852c74218606 */ >+ 1.72176752381034986217e-07, /* 0x3e871bf2baeebb50 */ >+ 1.12877225146169704119e-08, /* 0x3e483d7db7491820 */ >+ 5.33549829555851737993e-08, /* 0x3e6ca50d92b6da14 */ >+ 2.13833275710816521345e-08, /* 0x3e56f5cde8530298 */ >+ 1.16243518048290556393e-07, /* 0x3e7f343198910740 */ >+ 6.29926408369055877943e-08, /* 0x3e70e8d241ccd80a */ >+ 6.45429039328021963791e-08, /* 0x3e71535ac619e6c8 */ >+ 8.64001922814281933403e-08, /* 0x3e77316041c36cd2 */ >+ 9.50767572202325800240e-08, /* 0x3e7985a000637d8e */ >+ 5.80851497508121135975e-08, /* 0x3e6f2f29858c0a68 */ >+ 1.82350561135024766232e-07, /* 0x3e8879847f96d909 */ >+ 1.98948680587390608655e-07, /* 0x3e8ab3d319e12e42 */ >+ 7.83548663450197659846e-08, /* 0x3e75088162dfc4c2 */ >+ 3.04374234486798594427e-08, /* 0x3e605749a1cd9d8c */ >+ 2.76135725629797411787e-08, /* 0x3e5da65c6c6b8618 */ >+ 4.32610105454203065470e-08, /* 0x3e6739bf7df1ad64 */ >+ 5.17107515324127256994e-08, /* 0x3e6bc31252aa3340 */ >+ 2.82398327875841444660e-08, /* 0x3e5e528191ad3aa8 */ >+ 1.87482469524195595399e-07, /* 0x3e8929d93df19f18 */ >+ 2.97481891662714096139e-08, /* 0x3e5ff11eb693a080 */ >+ 9.94421570843584316402e-09, /* 0x3e455ae3f145a3a0 */ >+ 1.07056210730391848428e-07, /* 0x3e7cbcd8c6c0ca82 */ >+ 6.25589580466881163081e-08, /* 0x3e70cb04d425d304 */ >+ 9.56641013869464593803e-08, /* 0x3e79adfcab5be678 */ >+ 1.88056307148355440276e-07, /* 0x3e893d90c5662508 */ >+ 8.38850689379557880950e-08, /* 0x3e768489bd35ff40 */ >+ 5.01215865527674122924e-09, /* 0x3e3586ed3da2b7e0 */ >+ 1.74166095998522089762e-07, /* 0x3e87604d2e850eee */ >+ 9.96779574395363585849e-08, /* 0x3e7ac1d12bfb53d8 */ >+ 5.98432026368321460686e-09, /* 0x3e39b3d468274740 */ >+ 1.18362922366887577169e-07, /* 0x3e7fc5d68d10e53c */ >+ 1.86086833284154215946e-07, /* 0x3e88f9e51884becb */ >+ 1.97671457251348941011e-07, /* 0x3e8a87f0869c06d1 */ >+ 1.42447160717199237159e-07, /* 0x3e831e7279f685fa */ >+ 1.05504240785546574184e-08, /* 0x3e46a8282f9719b0 */ >+ 3.13335218371639189324e-08, /* 0x3e60d2724a8a44e0 */ >+ 1.96518418901914535399e-07, /* 0x3e8a60524b11ad4e */ >+ 2.17692035039173536059e-08, /* 0x3e575fdf832750f0 */ >+ 2.15613114426529981675e-07, /* 0x3e8cf06902e4cd36 */ >+ 5.68271098300441214948e-08, /* 0x3e6e82422d4f6d10 */ >+ 1.70331455823369124256e-08, /* 0x3e524a091063e6c0 */ >+ 9.17590028095709583247e-08, /* 0x3e78a1a172dc6f38 */ >+ 2.77266304112916566247e-07, /* 0x3e929b6619f8a92d */ >+ 9.37041937614656939690e-08, /* 0x3e79274d9c1b70c8 */ >+ 1.56116346368316796511e-08, /* 0x3e50c34b1fbb7930 */ >+ 4.13967433808382727413e-08, /* 0x3e6639866c20eb50 */ >+ 1.70164749185821616276e-07, /* 0x3e86d6d0f6832e9e */ >+ 4.01708788545600086008e-07, /* 0x3e9af54def99f25e */ >+ 2.59663539226050551563e-07, /* 0x3e916cfc52a00262 */ >+ 2.22007487655027469542e-07, /* 0x3e8dcc1e83569c32 */ >+ 2.90542250809644081369e-07, /* 0x3e937f7a551ed425 */ >+ 4.67720537666628903341e-07, /* 0x3e9f6360adc98887 */ >+ 2.79799803956772554802e-07, /* 0x3e92c6ec8d35a2c1 */ >+ 2.07344552327432547723e-07, /* 0x3e8bd44df84cb036 */ >+ 2.54705698692735196368e-07, /* 0x3e9117cf826e310e */ >+ 4.26848589539548450728e-07, /* 0x3e9ca533f332cfc9 */ >+ 2.52506723633552216197e-07, /* 0x3e90f208509dbc2e */ >+ 2.14684129933849704964e-07, /* 0x3e8cd07d93c945de */ >+ 3.20134822201596505431e-07, /* 0x3e957bdfd67e6d72 */ >+ 9.93537565749855712134e-08, /* 0x3e7aab89c516c658 */ >+ 3.70792944827917252327e-08, /* 0x3e63e823b1a1b8a0 */ >+ 1.41772749369083698972e-07, /* 0x3e8307464a9d6d3c */ >+ 4.22446601490198804306e-07, /* 0x3e9c5993cd438843 */ >+ 4.11818433724801511540e-07, /* 0x3e9ba2fca02ab554 */ >+ 1.19976381502605310519e-07, /* 0x3e801a5b6983a268 */ >+ 3.43703078571520905265e-08, /* 0x3e6273d1b350efc8 */ >+ 1.66128705555453270379e-07, /* 0x3e864c238c37b0c6 */ >+ 5.00499610023283006540e-08, /* 0x3e6aded07370a300 */ >+ 1.75105139941208062123e-07, /* 0x3e878091197eb47e */ >+ 7.70807146729030327334e-08, /* 0x3e74b0f245e0dabc */ >+ 2.45918607526895836121e-07, /* 0x3e9080d9794e2eaf */ >+ 2.18359020958626199345e-07, /* 0x3e8d4ec242b60c76 */ >+ 8.44342887976445333569e-09, /* 0x3e4221d2f940caa0 */ >+ 1.07506148687888629299e-07, /* 0x3e7cdbc42b2bba5c */ >+ 5.36544954316820904572e-08, /* 0x3e6cce37bb440840 */ >+ 3.39109101518396596341e-07, /* 0x3e96c1d999cf1dd0 */ >+ 2.60098720293920613340e-08, /* 0x3e5bed8a07eb0870 */ >+ 8.42678991664621455827e-08, /* 0x3e769ed88f490e3c */ >+ 5.36972237470183633197e-08, /* 0x3e6cd41719b73ef0 */ >+ 4.28192558171921681288e-07, /* 0x3e9cbc4ac95b41b7 */ >+ 2.71535491483955143294e-07, /* 0x3e9238f1b890f5d7 */ >+ 7.84094998145075780203e-08, /* 0x3e750c4282259cc4 */ >+ 3.43880599134117431863e-07, /* 0x3e9713d2de87b3e2 */ >+ 1.32878065060366481043e-07, /* 0x3e81d5a7d2255276 */ >+ 4.18046802627967629428e-07, /* 0x3e9c0dfd48227ac1 */ >+ 2.65042411765766019424e-07, /* 0x3e91c964dab76753 */ >+ 1.70383695347518643694e-07, /* 0x3e86de56d5704496 */ >+ 1.54096497259613515678e-07, /* 0x3e84aeb71fd19968 */ >+ 2.36543402412459813461e-07, /* 0x3e8fbf91c57b1918 */ >+ 4.38416350106876736790e-07, /* 0x3e9d6bef7fbe5d9a */ >+ 3.03892161339927775731e-07, /* 0x3e9464d3dc249066 */ >+ 3.31136771605664899240e-07, /* 0x3e9638e2ec4d9073 */ >+ 6.49494294526590682218e-08, /* 0x3e716f4a7247ea7c */ >+ 4.10423429887181345747e-09, /* 0x3e31a0a740f1d440 */ >+ 1.70831640869113847224e-07, /* 0x3e86edbb0114a33c */ >+ 1.10811512657909180966e-07, /* 0x3e7dbee8bf1d513c */ >+ 3.23677724749783611964e-07, /* 0x3e95b8bdb0248f73 */ >+ 3.55662734259192678528e-07, /* 0x3e97de3d3f5eac64 */ >+ 2.30102333489738219140e-07, /* 0x3e8ee24187ae448a */ >+ 4.47429004000738629714e-07, /* 0x3e9e06c591ec5192 */ >+ 7.78167135617329598659e-08, /* 0x3e74e3861a332738 */ >+ 9.90345291908535415737e-08, /* 0x3e7a9599dcc2bfe4 */ >+ 5.85800913143113728314e-08, /* 0x3e6f732fbad43468 */ >+ 4.57859062410871843857e-07, /* 0x3e9eb9f573b727d9 */ >+ 3.67993069723390929794e-07, /* 0x3e98b212a2eb9897 */ >+ 2.90836464322977276043e-07, /* 0x3e9384884c167215 */ >+ 2.51621574250131388318e-07, /* 0x3e90e2d363020051 */ >+ 2.75789824740652815545e-07, /* 0x3e92820879fbd022 */ >+ 3.88985776250314403593e-07, /* 0x3e9a1ab9893e4b30 */ >+ 1.40214080183768019611e-07, /* 0x3e82d1b817a24478 */ >+ 3.23451432223550478373e-08, /* 0x3e615d7b8ded4878 */ >+ 9.15979180730608444470e-08, /* 0x3e78968f9db3a5e4 */ >+ 3.44371402498640470421e-07, /* 0x3e971c4171fe135f */ >+ 3.40401897215059498077e-07, /* 0x3e96d80f605d0d8c */ >+ 1.06431813453707950243e-07, /* 0x3e7c91f043691590 */ >+ 1.46204238932338846248e-07, /* 0x3e839f8a15fce2b2 */ >+ 9.94610376972039046878e-09, /* 0x3e455beda9d94b80 */ >+ 2.01711528092681771039e-07, /* 0x3e8b12c15d60949a */ >+ 2.72027977986191568296e-07, /* 0x3e924167b312bfe3 */ >+ 2.48402602511693757964e-07, /* 0x3e90ab8633070277 */ >+ 1.58480011219249621715e-07, /* 0x3e854554ebbc80ee */ >+ 3.00372828113368713281e-08, /* 0x3e60204aef5a4bb8 */ >+ 3.67816204583541976394e-07, /* 0x3e98af08c679cf2c */ >+ 2.46169793032343824291e-07, /* 0x3e90852a330ae6c8 */ >+ 1.70080468270204253247e-07, /* 0x3e86d3eb9ec32916 */ >+ 1.67806717763872914315e-07, /* 0x3e8685cb7fcbbafe */ >+ 2.67715622006907942620e-07, /* 0x3e91f751c1e0bd95 */ >+ 2.14411342550299170574e-08, /* 0x3e5705b1b0f72560 */ >+ 4.11228221283669073277e-07, /* 0x3e9b98d8d808ca92 */ >+ 3.52311752396749662260e-08, /* 0x3e62ea22c75cc980 */ >+ 3.52718000397367821054e-07, /* 0x3e97aba62bca0350 */ >+ 4.38857387992911129814e-07, /* 0x3e9d73833442278c */ >+ 3.22574606753482540743e-07, /* 0x3e95a5ca1fb18bf9 */ >+ 3.28730371182804296828e-08, /* 0x3e61a6092b6ecf28 */ >+ 7.56672470607639279700e-08, /* 0x3e744fd049aac104 */ >+ 3.26750155316369681821e-09, /* 0x3e2c114fd8df5180 */ >+ 3.21724445362095284743e-07, /* 0x3e95972f130feae5 */ >+ 1.06639427371776571151e-07, /* 0x3e7ca034a55fe198 */ >+ 3.41020788139524715063e-07, /* 0x3e96e2b149990227 */ >+ 1.00582838631232552824e-07, /* 0x3e7b00000294592c */ >+ 3.68439433859276640065e-07, /* 0x3e98b9bdc442620e */ >+ 2.20403078342388012027e-07, /* 0x3e8d94fdfabf3e4e */ >+ 1.62841467098298142534e-07, /* 0x3e85db30b145ad9a */ >+ 2.25325348296680733838e-07, /* 0x3e8e3e1eb95022b0 */ >+ 4.37462238226421614339e-07, /* 0x3e9d5b8b45442bd6 */ >+ 3.52055880555040706500e-07, /* 0x3e97a046231ecd2e */ >+ 4.75614398494781776825e-07, /* 0x3e9feafe3ef55232 */ >+ 3.60998399033215317516e-07, /* 0x3e9839e7bfd78267 */ >+ 3.79292434611513945954e-08, /* 0x3e645cf49d6fa900 */ >+ 1.29859015528549300061e-08, /* 0x3e4be3132b27f380 */ >+ 3.15927546985474913188e-07, /* 0x3e9533980bb84f9f */ >+ 2.28533679887379668031e-08, /* 0x3e5889e2ce3ba390 */ >+ 1.17222541823553133877e-07, /* 0x3e7f7778c3ad0cc8 */ >+ 1.51991208405464415857e-07, /* 0x3e846660cec4eba2 */ >+ 1.56958239325240655564e-07}; /* 0x3e85110b4611a626 */ >+ >+ /* Some constants and split constants. */ >+ >+ static double pi = 3.1415926535897932e+00, /* 0x400921fb54442d18 */ >+ piby2 = 1.5707963267948966e+00, /* 0x3ff921fb54442d18 */ >+ piby4 = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */ >+ three_piby4 = 2.3561944901923449e+00, /* 0x4002d97c7f3321d2 */ >+ pi_head = 3.1415926218032836e+00, /* 0x400921fb50000000 */ >+ pi_tail = 3.1786509547056392e-08, /* 0x3e6110b4611a6263 */ >+ piby2_head = 1.5707963267948965e+00, /* 0x3ff921fb54442d18 */ >+ piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */ >+ >+ double u, v, vbyu, q1, q2, s, u1, vu1, u2, vu2, uu, c, r; >+ unsigned int swap_vu, index, xzero, yzero, xnan, ynan, xinf, yinf; >+ int m, xexp, yexp, diffexp; >+ >+ /* Find properties of arguments x and y. */ >+ >+ unsigned long ux, ui, aux, xneg, uy, auy, yneg; >+ >+ GET_BITS_DP64(x, ux); >+ GET_BITS_DP64(y, uy); >+ aux = ux & ~SIGNBIT_DP64; >+ auy = uy & ~SIGNBIT_DP64; >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ xneg = ux & SIGNBIT_DP64; >+ yneg = uy & SIGNBIT_DP64; >+ xzero = (aux == 0); >+ yzero = (auy == 0); >+ xnan = (aux > PINFBITPATT_DP64); >+ ynan = (auy > PINFBITPATT_DP64); >+ xinf = (aux == PINFBITPATT_DP64); >+ yinf = (auy == PINFBITPATT_DP64); >+ >+ diffexp = yexp - xexp; >+ >+ /* Special cases */ >+ >+ if (xnan) >+ return x + x; >+ else if (ynan) >+ return y + y; >+ else if (yzero) >+ { /* Zero y gives +-0 for positive x >+ and +-pi for negative x */ >+ if ((_LIB_VERSION == _SVID_) && xzero) >+ /* Sigh - _SVID_ defines atan2(0,0) as a domain error */ >+ return retval_errno_edom(x, y); >+ else if (xneg) >+ { >+ if (yneg) return val_with_flags(-pi,AMD_F_INEXACT); >+ else return val_with_flags(pi,AMD_F_INEXACT); >+ } >+ else return y; >+ } >+ else if (xzero) >+ { /* Zero x gives +- pi/2 >+ depending on sign of y */ >+ if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT); >+ else val_with_flags(piby2,AMD_F_INEXACT); >+ } >+ >+ /* Scale up both x and y if they are both below 1/4. >+ This avoids any possible later denormalised arithmetic. */ >+ >+ if ((xexp < 1021 && yexp < 1021)) >+ { >+ scaleUpDouble1024(ux, &ux); >+ scaleUpDouble1024(uy, &uy); >+ PUT_BITS_DP64(ux, x); >+ PUT_BITS_DP64(uy, y); >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ diffexp = yexp - xexp; >+ } >+ >+ if (diffexp > 56) >+ { /* abs(y)/abs(x) > 2^56 => arctan(x/y) >+ is insignificant compared to piby2 */ >+ if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT); >+ else return val_with_flags(piby2,AMD_F_INEXACT); >+ } >+ else if (diffexp < -28 && (!xneg)) >+ { /* x positive and dominant over y by a factor of 2^28. >+ In this case atan(y/x) is y/x to machine accuracy. */ >+ >+ if (diffexp < -1074) /* Result underflows */ >+ { >+ if (yneg) >+ return val_with_flags(-0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW); >+ else >+ return val_with_flags(0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW); >+ } >+ else >+ { >+ if (diffexp < -1022) >+ { >+ /* Result will likely be denormalized */ >+ y = scaleDouble_1(y, 100); >+ y /= x; >+ /* Now y is 2^100 times the true result. Scale it back down. */ >+ GET_BITS_DP64(y, uy); >+ scaleDownDouble(uy, 100, &uy); >+ PUT_BITS_DP64(uy, y); >+ if ((uy & EXPBITS_DP64) == 0) >+ return val_with_flags(y, AMD_F_INEXACT | AMD_F_UNDERFLOW); >+ else >+ return y; >+ } >+ else >+ return y / x; >+ } >+ } >+ else if (diffexp < -56 && xneg) >+ { /* abs(x)/abs(y) > 2^56 and x < 0 => arctan(y/x) >+ is insignificant compared to pi */ >+ if (yneg) return val_with_flags(-pi,AMD_F_INEXACT); >+ else return val_with_flags(pi,AMD_F_INEXACT); >+ } >+ else if (yinf && xinf) >+ { /* If abs(x) and abs(y) are both infinity >+ return +-pi/4 or +- 3pi/4 according to >+ signs. */ >+ if (xneg) >+ { >+ if (yneg) return val_with_flags(-three_piby4,AMD_F_INEXACT); >+ else return val_with_flags(three_piby4,AMD_F_INEXACT); >+ } >+ else >+ { >+ if (yneg) return val_with_flags(-piby4,AMD_F_INEXACT); >+ else return val_with_flags(piby4,AMD_F_INEXACT); >+ } >+ } >+ >+ /* General case: take absolute values of arguments */ >+ >+ u = x; v = y; >+ if (xneg) u = -x; >+ if (yneg) v = -y; >+ >+ /* Swap u and v if necessary to obtain 0 < v < u. Compute v/u. */ >+ >+ swap_vu = (u < v); >+ if (swap_vu) { uu = u; u = v; v = uu; } >+ vbyu = v/u; >+ >+ if (vbyu > 0.0625) >+ { /* General values of v/u. Use a look-up >+ table and series expansion. */ >+ >+ index = (int)(256*vbyu + 0.5); >+ q1 = atan_jby256_lead[index-16]; >+ q2 = atan_jby256_tail[index-16]; >+ c = index*1./256; >+ GET_BITS_DP64(u, ui); >+ m = (int)((ui & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ u = scaleDouble_2(u,-m); >+ v = scaleDouble_2(v,-m); >+ GET_BITS_DP64(u, ui); >+ PUT_BITS_DP64(0xfffffffff8000000 & ui, u1); /* 26 leading bits of u */ >+ u2 = u - u1; >+ >+ r = ((v-c*u1)-c*u2)/(u+c*v); >+ >+ /* Polynomial approximation to atan(r) */ >+ >+ s = r*r; >+ q2 = q2 + r - r*(s * (0.33333333333224095522 - s*(0.19999918038989143496))); >+ } >+ else if (vbyu < 1.e-8) >+ { /* v/u is small enough that atan(v/u) = v/u */ >+ q1 = 0.0; >+ q2 = vbyu; >+ } >+ else /* vbyu <= 0.0625 */ >+ { >+ /* Small values of v/u. Use a series expansion >+ computed carefully to minimise cancellation */ >+ >+ GET_BITS_DP64(u, ui); >+ PUT_BITS_DP64(0xffffffff00000000 & ui, u1); >+ GET_BITS_DP64(vbyu, ui); >+ PUT_BITS_DP64(0xffffffff00000000 & ui, vu1); >+ u2 = u - u1; >+ vu2 = vbyu - vu1; >+ >+ q1 = 0.0; >+ s = vbyu*vbyu; >+ q2 = vbyu + >+ ((((v - u1*vu1) - u2*vu1) - u*vu2)/u - >+ (vbyu*s*(0.33333333333333170500 - >+ s*(0.19999999999393223405 - >+ s*(0.14285713561807169030 - >+ s*(0.11110736283514525407 - >+ s*(0.90029810285449784439E-01))))))); >+ } >+ >+ /* Tidy-up according to which quadrant the arguments lie in */ >+ >+ if (swap_vu) {q1 = piby2_head - q1; q2 = piby2_tail - q2;} >+ if (xneg) {q1 = pi_head - q1; q2 = pi_tail - q2;} >+ q1 = q1 + q2; >+ >+ if (yneg) q1 = - q1; >+ >+ return q1; >+} >+ >+weak_alias (__atan2, atan2) >============================================================ >Index: sysdeps/x86_64/fpu/s_atan2f.c >--- sysdeps/x86_64/fpu/s_atan2f.c created >+++ sysdeps/x86_64/fpu/s_atan2f.c 2002-12-03 13:43:05.000000000 +0100 1.1 >@@ -0,0 +1,459 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VAL_WITH_FLAGS >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_SCALEUPDOUBLE1024 >+#define USE_SCALEDOWNDOUBLE >+#include "libm_inlines_amd.h" >+#undef USE_SCALEDOWNDOUBLE >+#undef USE_SCALEUPDOUBLE1024 >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_VAL_WITH_FLAGS >+ >+/* Deal with errno for out-of-range arguments >+ (only used when _LIB_VERSION is _SVID_) */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_edom(float x, float y) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)y; >+ exc.type = DOMAIN; >+ exc.name = (char *)"atan2f"; >+ exc.retval = HUGE; >+ if (!matherr(&exc)) >+ { >+ (void)fputs("atan2f: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+float __atan2f(float fy, float fx) >+{ >+ /* Array atan_jby256 contains precomputed values of atan(j/256), >+ for j = 16, 17, ..., 256. */ >+ >+ static const double atan_jby256[ 241] = { >+ 6.24188099959573430842e-02, /* 0x3faff55bb72cfde9 */ >+ 6.63088949198234745008e-02, /* 0x3fb0f99ea71d52a6 */ >+ 7.01969710718705064423e-02, /* 0x3fb1f86dbf082d58 */ >+ 7.40829225490337306415e-02, /* 0x3fb2f719318a4a9a */ >+ 7.79666338315423007588e-02, /* 0x3fb3f59f0e7c559d */ >+ 8.18479898030765457007e-02, /* 0x3fb4f3fd677292fb */ >+ 8.57268757707448092464e-02, /* 0x3fb5f2324fd2d7b2 */ >+ 8.96031774848717321724e-02, /* 0x3fb6f03bdcea4b0c */ >+ 9.34767811585894559112e-02, /* 0x3fb7ee182602f10e */ >+ 9.73475734872236708739e-02, /* 0x3fb8ebc54478fb28 */ >+ 1.01215441667466668485e-01, /* 0x3fb9e94153cfdcf1 */ >+ 1.05080273416329528224e-01, /* 0x3fbae68a71c722b8 */ >+ 1.08941956989865793015e-01, /* 0x3fbbe39ebe6f07c3 */ >+ 1.12800381201659388752e-01, /* 0x3fbce07c5c3cca32 */ >+ 1.16655435441069349478e-01, /* 0x3fbddd21701eba6e */ >+ 1.20507009691224548087e-01, /* 0x3fbed98c2190043a */ >+ 1.24354994546761424279e-01, /* 0x3fbfd5ba9aac2f6d */ >+ 1.28199281231298117811e-01, /* 0x3fc068d584212b3d */ >+ 1.32039761614638734288e-01, /* 0x3fc0e6adccf40881 */ >+ 1.35876328229701304195e-01, /* 0x3fc1646541060850 */ >+ 1.39708874289163620386e-01, /* 0x3fc1e1fafb043726 */ >+ 1.43537293701821222491e-01, /* 0x3fc25f6e171a535c */ >+ 1.47361481088651630200e-01, /* 0x3fc2dcbdb2fba1ff */ >+ 1.51181331798580037562e-01, /* 0x3fc359e8edeb99a3 */ >+ 1.54996741923940972718e-01, /* 0x3fc3d6eee8c6626c */ >+ 1.58807608315631065832e-01, /* 0x3fc453cec6092a9e */ >+ 1.62613828597948567589e-01, /* 0x3fc4d087a9da4f17 */ >+ 1.66415301183114927586e-01, /* 0x3fc54d18ba11570a */ >+ 1.70211925285474380276e-01, /* 0x3fc5c9811e3ec269 */ >+ 1.74003600935367680469e-01, /* 0x3fc645bfffb3aa73 */ >+ 1.77790228992676047071e-01, /* 0x3fc6c1d4898933d8 */ >+ 1.81571711160032150945e-01, /* 0x3fc73dbde8a7d201 */ >+ 1.85347949995694760705e-01, /* 0x3fc7b97b4bce5b02 */ >+ 1.89118848926083965578e-01, /* 0x3fc8350be398ebc7 */ >+ 1.92884312257974643856e-01, /* 0x3fc8b06ee2879c28 */ >+ 1.96644245190344985064e-01, /* 0x3fc92ba37d050271 */ >+ 2.00398553825878511514e-01, /* 0x3fc9a6a8e96c8626 */ >+ 2.04147145182116990236e-01, /* 0x3fca217e601081a5 */ >+ 2.07889927202262986272e-01, /* 0x3fca9c231b403279 */ >+ 2.11626808765629753628e-01, /* 0x3fcb1696574d780b */ >+ 2.15357699697738047551e-01, /* 0x3fcb90d7529260a2 */ >+ 2.19082510780057748701e-01, /* 0x3fcc0ae54d768466 */ >+ 2.22801153759394493514e-01, /* 0x3fcc84bf8a742e6d */ >+ 2.26513541356919617664e-01, /* 0x3fccfe654e1d5395 */ >+ 2.30219587276843717927e-01, /* 0x3fcd77d5df205736 */ >+ 2.33919206214733416127e-01, /* 0x3fcdf110864c9d9d */ >+ 2.37612313865471241892e-01, /* 0x3fce6a148e96ec4d */ >+ 2.41298826930858800743e-01, /* 0x3fcee2e1451d980c */ >+ 2.44978663126864143473e-01, /* 0x3fcf5b75f92c80dd */ >+ 2.48651741190513253521e-01, /* 0x3fcfd3d1fc40dbe4 */ >+ 2.52317980886427151166e-01, /* 0x3fd025fa510665b5 */ >+ 2.55977303013005474952e-01, /* 0x3fd061eea03d6290 */ >+ 2.59629629408257511791e-01, /* 0x3fd09dc597d86362 */ >+ 2.63274882955282396590e-01, /* 0x3fd0d97ee509acb3 */ >+ 2.66912987587400396539e-01, /* 0x3fd1151a362431c9 */ >+ 2.70543868292936529052e-01, /* 0x3fd150973a9ce546 */ >+ 2.74167451119658789338e-01, /* 0x3fd18bf5a30bf178 */ >+ 2.77783663178873208022e-01, /* 0x3fd1c735212dd883 */ >+ 2.81392432649178403370e-01, /* 0x3fd2025567e47c95 */ >+ 2.84993688779881237938e-01, /* 0x3fd23d562b381041 */ >+ 2.88587361894077354396e-01, /* 0x3fd278372057ef45 */ >+ 2.92173383391398755471e-01, /* 0x3fd2b2f7fd9b5fe2 */ >+ 2.95751685750431536626e-01, /* 0x3fd2ed987a823cfe */ >+ 2.99322202530807379706e-01, /* 0x3fd328184fb58951 */ >+ 3.02884868374971361060e-01, /* 0x3fd362773707ebcb */ >+ 3.06439619009630070945e-01, /* 0x3fd39cb4eb76157b */ >+ 3.09986391246883430384e-01, /* 0x3fd3d6d129271134 */ >+ 3.13525122985043869228e-01, /* 0x3fd410cbad6c7d32 */ >+ 3.17055753209146973237e-01, /* 0x3fd44aa436c2af09 */ >+ 3.20578221991156986359e-01, /* 0x3fd4845a84d0c21b */ >+ 3.24092470489871664618e-01, /* 0x3fd4bdee586890e6 */ >+ 3.27598440950530811477e-01, /* 0x3fd4f75f73869978 */ >+ 3.31096076704132047386e-01, /* 0x3fd530ad9951cd49 */ >+ 3.34585322166458920545e-01, /* 0x3fd569d88e1b4cd7 */ >+ 3.38066122836825466713e-01, /* 0x3fd5a2e0175e0f4e */ >+ 3.41538425296541714449e-01, /* 0x3fd5dbc3fbbe768d */ >+ 3.45002177207105076295e-01, /* 0x3fd614840309cfe1 */ >+ 3.48457327308122011278e-01, /* 0x3fd64d1ff635c1c5 */ >+ 3.51903825414964732676e-01, /* 0x3fd685979f5fa6fd */ >+ 3.55341622416168290144e-01, /* 0x3fd6bdeac9cbd76c */ >+ 3.58770670270572189509e-01, /* 0x3fd6f61941e4def0 */ >+ 3.62190922004212156882e-01, /* 0x3fd72e22d53aa2a9 */ >+ 3.65602331706966821034e-01, /* 0x3fd7660752817501 */ >+ 3.69004854528964421068e-01, /* 0x3fd79dc6899118d1 */ >+ 3.72398446676754202311e-01, /* 0x3fd7d5604b63b3f7 */ >+ 3.75783065409248884237e-01, /* 0x3fd80cd46a14b1d0 */ >+ 3.79158669033441808605e-01, /* 0x3fd84422b8df95d7 */ >+ 3.82525216899905096124e-01, /* 0x3fd87b4b0c1ebedb */ >+ 3.85882669398073752109e-01, /* 0x3fd8b24d394a1b25 */ >+ 3.89230987951320717144e-01, /* 0x3fd8e92916f5cde8 */ >+ 3.92570135011828580396e-01, /* 0x3fd91fde7cd0c662 */ >+ 3.95900074055262896078e-01, /* 0x3fd9566d43a34907 */ >+ 3.99220769575252543149e-01, /* 0x3fd98cd5454d6b18 */ >+ 4.02532187077682512832e-01, /* 0x3fd9c3165cc58107 */ >+ 4.05834293074804064450e-01, /* 0x3fd9f93066168001 */ >+ 4.09127055079168300278e-01, /* 0x3fda2f233e5e530b */ >+ 4.12410441597387267265e-01, /* 0x3fda64eec3cc23fc */ >+ 4.15684422123729413467e-01, /* 0x3fda9a92d59e98cf */ >+ 4.18948967133552840902e-01, /* 0x3fdad00f5422058b */ >+ 4.22204048076583571270e-01, /* 0x3fdb056420ae9343 */ >+ 4.25449637370042266227e-01, /* 0x3fdb3a911da65c6c */ >+ 4.28685708391625730496e-01, /* 0x3fdb6f962e737efb */ >+ 4.31912235472348193799e-01, /* 0x3fdba473378624a5 */ >+ 4.35129193889246812521e-01, /* 0x3fdbd9281e528191 */ >+ 4.38336559857957774877e-01, /* 0x3fdc0db4c94ec9ef */ >+ 4.41534310525166673322e-01, /* 0x3fdc42191ff11eb6 */ >+ 4.44722423960939305942e-01, /* 0x3fdc76550aad71f8 */ >+ 4.47900879150937292206e-01, /* 0x3fdcaa6872f3631b */ >+ 4.51069655988523443568e-01, /* 0x3fdcde53432c1350 */ >+ 4.54228735266762495559e-01, /* 0x3fdd121566b7f2ad */ >+ 4.57378098670320809571e-01, /* 0x3fdd45aec9ec862b */ >+ 4.60517728767271039558e-01, /* 0x3fdd791f5a1226f4 */ >+ 4.63647609000806093515e-01, /* 0x3fddac670561bb4f */ >+ 4.66767723680866497560e-01, /* 0x3fdddf85bb026974 */ >+ 4.69878057975686880265e-01, /* 0x3fde127b6b0744af */ >+ 4.72978597903265574054e-01, /* 0x3fde4548066cf51a */ >+ 4.76069330322761219421e-01, /* 0x3fde77eb7f175a34 */ >+ 4.79150242925822533735e-01, /* 0x3fdeaa65c7cf28c4 */ >+ 4.82221324227853687105e-01, /* 0x3fdedcb6d43f8434 */ >+ 4.85282563559221225002e-01, /* 0x3fdf0ede98f393cf */ >+ 4.88333951056405479729e-01, /* 0x3fdf40dd0b541417 */ >+ 4.91375477653101910835e-01, /* 0x3fdf72b221a4e495 */ >+ 4.94407135071275316562e-01, /* 0x3fdfa45dd3029258 */ >+ 4.97428915812172245392e-01, /* 0x3fdfd5e0175fdf83 */ >+ 5.00440813147294050189e-01, /* 0x3fe0039c73c1a40b */ >+ 5.03442821109336358099e-01, /* 0x3fe01c341e82422d */ >+ 5.06434934483096732549e-01, /* 0x3fe034b709250488 */ >+ 5.09417148796356245022e-01, /* 0x3fe04d25314342e5 */ >+ 5.12389460310737621107e-01, /* 0x3fe0657e94db30cf */ >+ 5.15351866012543347040e-01, /* 0x3fe07dc3324e9b38 */ >+ 5.18304363603577900044e-01, /* 0x3fe095f30861a58f */ >+ 5.21246951491958210312e-01, /* 0x3fe0ae0e1639866c */ >+ 5.24179628782913242802e-01, /* 0x3fe0c6145b5b43da */ >+ 5.27102395269579471204e-01, /* 0x3fe0de05d7aa6f7c */ >+ 5.30015251423793132268e-01, /* 0x3fe0f5e28b67e295 */ >+ 5.32918198386882147055e-01, /* 0x3fe10daa77307a0d */ >+ 5.35811237960463593311e-01, /* 0x3fe1255d9bfbd2a8 */ >+ 5.38694372597246617929e-01, /* 0x3fe13cfbfb1b056e */ >+ 5.41567605391844897333e-01, /* 0x3fe1548596376469 */ >+ 5.44430940071603086672e-01, /* 0x3fe16bfa6f5137e1 */ >+ 5.47284380987436924748e-01, /* 0x3fe1835a88be7c13 */ >+ 5.50127933104692989907e-01, /* 0x3fe19aa5e5299f99 */ >+ 5.52961601994028217888e-01, /* 0x3fe1b1dc87904284 */ >+ 5.55785393822313511514e-01, /* 0x3fe1c8fe7341f64f */ >+ 5.58599315343562330405e-01, /* 0x3fe1e00babdefeb3 */ >+ 5.61403373889889367732e-01, /* 0x3fe1f7043557138a */ >+ 5.64197577362497537656e-01, /* 0x3fe20de813e823b1 */ >+ 5.66981934222700489912e-01, /* 0x3fe224b74c1d192a */ >+ 5.69756453482978431069e-01, /* 0x3fe23b71e2cc9e6a */ >+ 5.72521144698072359525e-01, /* 0x3fe25217dd17e501 */ >+ 5.75276017956117824426e-01, /* 0x3fe268a940696da6 */ >+ 5.78021083869819540801e-01, /* 0x3fe27f261273d1b3 */ >+ 5.80756353567670302596e-01, /* 0x3fe2958e59308e30 */ >+ 5.83481838685214859730e-01, /* 0x3fe2abe21aded073 */ >+ 5.86197551356360535557e-01, /* 0x3fe2c2215e024465 */ >+ 5.88903504204738026395e-01, /* 0x3fe2d84c2961e48b */ >+ 5.91599710335111383941e-01, /* 0x3fe2ee628406cbca */ >+ 5.94286183324841177367e-01, /* 0x3fe30464753b090a */ >+ 5.96962937215401501234e-01, /* 0x3fe31a52048874be */ >+ 5.99629986503951384336e-01, /* 0x3fe3302b39b78856 */ >+ 6.02287346134964152178e-01, /* 0x3fe345f01cce37bb */ >+ 6.04935031491913965951e-01, /* 0x3fe35ba0b60eccce */ >+ 6.07573058389022313541e-01, /* 0x3fe3713d0df6c503 */ >+ 6.10201443063065118722e-01, /* 0x3fe386c52d3db11e */ >+ 6.12820202165241245673e-01, /* 0x3fe39c391cd41719 */ >+ 6.15429352753104952356e-01, /* 0x3fe3b198e5e2564a */ >+ 6.18028912282561737612e-01, /* 0x3fe3c6e491c78dc4 */ >+ 6.20618898599929469384e-01, /* 0x3fe3dc1c2a188504 */ >+ 6.23199329934065904268e-01, /* 0x3fe3f13fb89e96f4 */ >+ 6.25770224888563042498e-01, /* 0x3fe4064f47569f48 */ >+ 6.28331602434009650615e-01, /* 0x3fe41b4ae06fea41 */ >+ 6.30883481900321840818e-01, /* 0x3fe430328e4b26d5 */ >+ 6.33425882969144482537e-01, /* 0x3fe445065b795b55 */ >+ 6.35958825666321447834e-01, /* 0x3fe459c652badc7f */ >+ 6.38482330354437466191e-01, /* 0x3fe46e727efe4715 */ >+ 6.40996417725432032775e-01, /* 0x3fe4830aeb5f7bfd */ >+ 6.43501108793284370968e-01, /* 0x3fe4978fa3269ee1 */ >+ 6.45996424886771558604e-01, /* 0x3fe4ac00b1c71762 */ >+ 6.48482387642300484032e-01, /* 0x3fe4c05e22de94e4 */ >+ 6.50959018996812410762e-01, /* 0x3fe4d4a8023414e8 */ >+ 6.53426341180761927063e-01, /* 0x3fe4e8de5bb6ec04 */ >+ 6.55884376711170835605e-01, /* 0x3fe4fd013b7dd17e */ >+ 6.58333148384755983962e-01, /* 0x3fe51110adc5ed81 */ >+ 6.60772679271132590273e-01, /* 0x3fe5250cbef1e9fa */ >+ 6.63202992706093175102e-01, /* 0x3fe538f57b89061e */ >+ 6.65624112284960989250e-01, /* 0x3fe54ccaf0362c8f */ >+ 6.68036061856020157990e-01, /* 0x3fe5608d29c70c34 */ >+ 6.70438865514021320458e-01, /* 0x3fe5743c352b33b9 */ >+ 6.72832547593763097282e-01, /* 0x3fe587d81f732fba */ >+ 6.75217132663749830535e-01, /* 0x3fe59b60f5cfab9d */ >+ 6.77592645519925151909e-01, /* 0x3fe5aed6c5909517 */ >+ 6.79959111179481823228e-01, /* 0x3fe5c2399c244260 */ >+ 6.82316554874748071313e-01, /* 0x3fe5d58987169b18 */ >+ 6.84665002047148862907e-01, /* 0x3fe5e8c6941043cf */ >+ 6.87004478341244895212e-01, /* 0x3fe5fbf0d0d5cc49 */ >+ 6.89335009598845749323e-01, /* 0x3fe60f084b46e05e */ >+ 6.91656621853199760075e-01, /* 0x3fe6220d115d7b8d */ >+ 6.93969341323259825138e-01, /* 0x3fe634ff312d1f3b */ >+ 6.96273194408023488045e-01, /* 0x3fe647deb8e20b8f */ >+ 6.98568207680949848637e-01, /* 0x3fe65aabb6c07b02 */ >+ 7.00854407884450081312e-01, /* 0x3fe66d663923e086 */ >+ 7.03131821924453670469e-01, /* 0x3fe6800e4e7e2857 */ >+ 7.05400476865049030906e-01, /* 0x3fe692a40556fb6a */ >+ 7.07660399923197958039e-01, /* 0x3fe6a5276c4b0575 */ >+ 7.09911618463524796141e-01, /* 0x3fe6b798920b3d98 */ >+ 7.12154159993178659249e-01, /* 0x3fe6c9f7855c3198 */ >+ 7.14388052156768926793e-01, /* 0x3fe6dc44551553ae */ >+ 7.16613322731374569052e-01, /* 0x3fe6ee7f10204aef */ >+ 7.18829999621624415873e-01, /* 0x3fe700a7c5784633 */ >+ 7.21038110854851588272e-01, /* 0x3fe712be84295198 */ >+ 7.23237684576317874097e-01, /* 0x3fe724c35b4fae7b */ >+ 7.25428749044510712274e-01, /* 0x3fe736b65a172dff */ >+ 7.27611332626510676214e-01, /* 0x3fe748978fba8e0f */ >+ 7.29785463793429123314e-01, /* 0x3fe75a670b82d8d8 */ >+ 7.31951171115916565668e-01, /* 0x3fe76c24dcc6c6c0 */ >+ 7.34108483259739652560e-01, /* 0x3fe77dd112ea22c7 */ >+ 7.36257428981428097003e-01, /* 0x3fe78f6bbd5d315e */ >+ 7.38398037123989547936e-01, /* 0x3fe7a0f4eb9c19a2 */ >+ 7.40530336612692630105e-01, /* 0x3fe7b26cad2e50fd */ >+ 7.42654356450917929600e-01, /* 0x3fe7c3d311a6092b */ >+ 7.44770125716075148681e-01, /* 0x3fe7d528289fa093 */ >+ 7.46877673555587429099e-01, /* 0x3fe7e66c01c114fd */ >+ 7.48977029182941400620e-01, /* 0x3fe7f79eacb97898 */ >+ 7.51068221873802288613e-01, /* 0x3fe808c03940694a */ >+ 7.53151280962194302759e-01, /* 0x3fe819d0b7158a4c */ >+ 7.55226235836744863583e-01, /* 0x3fe82ad036000005 */ >+ 7.57293115936992444759e-01, /* 0x3fe83bbec5cdee22 */ >+ 7.59351950749757920178e-01, /* 0x3fe84c9c7653f7ea */ >+ 7.61402769805578416573e-01, /* 0x3fe85d69576cc2c5 */ >+ 7.63445602675201784315e-01, /* 0x3fe86e2578f87ae5 */ >+ 7.65480478966144461950e-01, /* 0x3fe87ed0eadc5a2a */ >+ 7.67507428319308182552e-01, /* 0x3fe88f6bbd023118 */ >+ 7.69526480405658186434e-01, /* 0x3fe89ff5ff57f1f7 */ >+ 7.71537664922959498526e-01, /* 0x3fe8b06fc1cf3dfe */ >+ 7.73541011592573490852e-01, /* 0x3fe8c0d9145cf49d */ >+ 7.75536550156311621507e-01, /* 0x3fe8d13206f8c4ca */ >+ 7.77524310373347682379e-01, /* 0x3fe8e17aa99cc05d */ >+ 7.79504322017186335181e-01, /* 0x3fe8f1b30c44f167 */ >+ 7.81476614872688268854e-01, /* 0x3fe901db3eeef187 */ >+ 7.83441218733151756304e-01, /* 0x3fe911f35199833b */ >+ 7.85398163397448278999e-01}; /* 0x3fe921fb54442d18 */ >+ >+ /* Some constants. */ >+ >+ static double pi = 3.1415926535897932e+00, /* 0x400921fb54442d18 */ >+ piby2 = 1.5707963267948966e+00, /* 0x3ff921fb54442d18 */ >+ piby4 = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */ >+ three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */ >+ >+ double u, v, vbyu, q, s, uu, r; >+ unsigned int swap_vu, index, xzero, yzero, xnan, ynan, xinf, yinf; >+ int xexp, yexp, diffexp; >+ >+ double x = fx; >+ double y = fy; >+ >+ /* Find properties of arguments x and y. */ >+ >+ unsigned long ux, aux, xneg, uy, auy, yneg; >+ >+ GET_BITS_DP64(x, ux); >+ GET_BITS_DP64(y, uy); >+ aux = ux & ~SIGNBIT_DP64; >+ auy = uy & ~SIGNBIT_DP64; >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ xneg = ux & SIGNBIT_DP64; >+ yneg = uy & SIGNBIT_DP64; >+ xzero = (aux == 0); >+ yzero = (auy == 0); >+ xnan = (aux > PINFBITPATT_DP64); >+ ynan = (auy > PINFBITPATT_DP64); >+ xinf = (aux == PINFBITPATT_DP64); >+ yinf = (auy == PINFBITPATT_DP64); >+ >+ diffexp = yexp - xexp; >+ >+ /* Special cases */ >+ >+ if (xnan) >+ return x + x; >+ else if (ynan) >+ return y + y; >+ else if (yzero) >+ { /* Zero y gives +-0 for positive x >+ and +-pi for negative x */ >+ if ((_LIB_VERSION == _SVID_) && xzero) >+ /* Sigh - _SVID_ defines atan2(0,0) as a domain error */ >+ return retval_errno_edom(x, y); >+ else if (xneg) >+ { >+ if (yneg) return val_with_flags(-pi,AMD_F_INEXACT); >+ else return val_with_flags(pi,AMD_F_INEXACT); >+ } >+ else return y; >+ } >+ else if (xzero) >+ { /* Zero x gives +- pi/2 >+ depending on sign of y */ >+ if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT); >+ else val_with_flags(piby2,AMD_F_INEXACT); >+ } >+ >+ if (diffexp > 26) >+ { /* abs(y)/abs(x) > 2^26 => arctan(x/y) >+ is insignificant compared to piby2 */ >+ if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT); >+ else return val_with_flags(piby2,AMD_F_INEXACT); >+ } >+ else if (diffexp < -13 && (!xneg)) >+ { /* x positive and dominant over y by a factor of 2^13. >+ In this case atan(y/x) is y/x to machine accuracy. */ >+ >+ if (diffexp < -150) /* Result underflows */ >+ { >+ if (yneg) >+ return val_with_flags(-0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW); >+ else >+ return val_with_flags(0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW); >+ } >+ else >+ { >+ if (diffexp < -126) >+ { >+ /* Result will likely be denormalized */ >+ y = scaleDouble_1(y, 100); >+ y /= x; >+ /* Now y is 2^100 times the true result. Scale it back down. */ >+ GET_BITS_DP64(y, uy); >+ scaleDownDouble(uy, 100, &uy); >+ PUT_BITS_DP64(uy, y); >+ if ((uy & EXPBITS_DP64) == 0) >+ return val_with_flags(y, AMD_F_INEXACT | AMD_F_UNDERFLOW); >+ else >+ return y; >+ } >+ else >+ return y / x; >+ } >+ } >+ else if (diffexp < -26 && xneg) >+ { /* abs(x)/abs(y) > 2^56 and x < 0 => arctan(y/x) >+ is insignificant compared to pi */ >+ if (yneg) return val_with_flags(-pi,AMD_F_INEXACT); >+ else return val_with_flags(pi,AMD_F_INEXACT); >+ } >+ else if (yinf && xinf) >+ { /* If abs(x) and abs(y) are both infinity >+ return +-pi/4 or +- 3pi/4 according to >+ signs. */ >+ if (xneg) >+ { >+ if (yneg) return val_with_flags(-three_piby4,AMD_F_INEXACT); >+ else return val_with_flags(three_piby4,AMD_F_INEXACT); >+ } >+ else >+ { >+ if (yneg) return val_with_flags(-piby4,AMD_F_INEXACT); >+ else return val_with_flags(piby4,AMD_F_INEXACT); >+ } >+ } >+ >+ /* General case: take absolute values of arguments */ >+ >+ u = x; v = y; >+ if (xneg) u = -x; >+ if (yneg) v = -y; >+ >+ /* Swap u and v if necessary to obtain 0 < v < u. Compute v/u. */ >+ >+ swap_vu = (u < v); >+ if (swap_vu) { uu = u; u = v; v = uu; } >+ vbyu = v/u; >+ >+ if (vbyu > 0.0625) >+ { /* General values of v/u. Use a look-up >+ table and series expansion. */ >+ >+ index = (int)(256*vbyu + 0.5); >+ r = (256*v-index*u)/(256*u+index*v); >+ >+ /* Polynomial approximation to atan(vbyu) */ >+ >+ s = r*r; >+ q = atan_jby256[index-16] + r - r*s*0.33333333333224095522; >+ } >+ else if (vbyu < 1.e-4) >+ { /* v/u is small enough that atan(v/u) = v/u */ >+ q = vbyu; >+ } >+ else /* vbyu <= 0.0625 */ >+ { >+ /* Small values of v/u. Use a series expansion */ >+ >+ s = vbyu*vbyu; >+ q = vbyu - >+ vbyu*s*(0.33333333333333170500 - >+ s*(0.19999999999393223405 - >+ s*0.14285713561807169030)); >+ } >+ >+ /* Tidy-up according to which quadrant the arguments lie in */ >+ >+ if (swap_vu) {q = piby2 - q;} >+ if (xneg) {q = pi - q;} >+ if (yneg) q = - q; >+ return q; >+} >+ >+weak_alias (__atan2f, atan2f) >============================================================ >Index: sysdeps/x86_64/fpu/s_ceil.c >--- sysdeps/x86_64/fpu/s_ceil.c created >+++ sysdeps/x86_64/fpu/s_ceil.c 2002-12-03 13:43:05.000000000 +0100 1.1 >@@ -0,0 +1,57 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+double __ceil(double x) >+{ >+ double r; >+ long rexp, xneg; >+ unsigned long ux, ax, ur, mask; >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ xneg = (ux != ax); >+ >+ if (ax >= 0x4340000000000000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^53 */ >+ if (ax > 0x7ff0000000000000) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ return x; >+ } >+ else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */ >+ { >+ if (ax == 0x0000000000000000) >+ /* x is +zero or -zero; return the same zero */ >+ return x; >+ else if (xneg) /* x < 0.0 */ >+ return 0.0; >+ else >+ return 1.0; >+ } >+ else >+ { >+ rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ /* Mask out the bits of r that we don't want */ >+ mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1; >+ ur = (ux & ~mask); >+ PUT_BITS_DP64(ur, r); >+ if (xneg || (ur == ux)) >+ return r; >+ else >+ /* We threw some bits away and x was positive */ >+ return r + 1.0; >+ } >+ >+} >+ >+weak_alias (__ceil, ceil) >============================================================ >Index: sysdeps/x86_64/fpu/s_ceilf.c >--- sysdeps/x86_64/fpu/s_ceilf.c created >+++ sysdeps/x86_64/fpu/s_ceilf.c 2002-12-03 13:43:06.000000000 +0100 1.1 >@@ -0,0 +1,56 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+float __ceilf(float x) >+{ >+ float r; >+ int rexp, xneg; >+ unsigned int ux, ax, ur, mask; >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ xneg = (ux != ax); >+ >+ if (ax >= 0x4b800000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^24 */ >+ if (ax > 0x7f800000) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ return x; >+ } >+ else if (ax < 0x3f800000) /* abs(x) < 1.0 */ >+ { >+ if (ax == 0x00000000) >+ /* x is +zero or -zero; return the same zero */ >+ return x; >+ else if (xneg) /* x < 0.0 */ >+ return 0.0F; >+ else >+ return 1.0F; >+ } >+ else >+ { >+ rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ /* Mask out the bits of r that we don't want */ >+ mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1; >+ ur = (ux & ~mask); >+ PUT_BITS_SP32(ur, r); >+ >+ if (xneg || (ux == ur)) return r; >+ else >+ /* We threw some bits away and x was positive */ >+ return r + 1.0F; >+ } >+} >+ >+weak_alias (__ceilf, ceilf) >============================================================ >Index: sysdeps/x86_64/fpu/s_copysign.c >--- sysdeps/x86_64/fpu/s_copysign.c created >+++ sysdeps/x86_64/fpu/s_copysign.c 2002-12-03 13:43:06.000000000 +0100 1.1 >@@ -0,0 +1,29 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+/* Returns the absolute value of x with the sign of y. >+ NaNs are not considered special; their sign bits are handled >+ the same as for any other number. */ >+ >+double __copysign(double x, double y) >+{ >+ /* This works on Hammer */ >+ double temp = -0.0; /* 0x8000000000000000 */ >+ /* AND the bit pattern with y, result in y */ >+ asm volatile ("andpd %0, %1" : : "x" (temp), "x" (y)); >+ /* AND the ones-complement of the bit pattern with x, result in temp */ >+ asm volatile ("andnpd %0, %1" : : "x" (x), "x" (temp)); >+ asm volatile ("orpd %0, %1" : : "x" (temp), "x" (y)); >+ return y; >+} >+ >+ >+weak_alias (__copysign, copysign) >============================================================ >Index: sysdeps/x86_64/fpu/s_copysignf.c >--- sysdeps/x86_64/fpu/s_copysignf.c created >+++ sysdeps/x86_64/fpu/s_copysignf.c 2002-12-03 13:43:06.000000000 +0100 1.1 >@@ -0,0 +1,29 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+ /* Returns the absolute value of x with the sign of y. >+ NaNs are not considered special; their sign bits are handled >+ the same as for any other number. */ >+ >+float __copysignf(float x, float y) >+{ >+ /* This works on Hammer */ >+ float temp = -0.0; /* 0x80000000 */ >+ /* AND the bit pattern with y, result in y */ >+ asm volatile ("andps %0, %1" : : "x" (temp), "x" (y)); >+ /* AND the ones-complement of the bit pattern with x, result in temp */ >+ asm volatile ("andnps %0, %1" : : "x" (x), "x" (temp)); >+ asm volatile ("orps %0, %1" : : "x" (temp), "x" (y)); >+ return y; >+} >+ >+ >+weak_alias (__copysignf, copysignf) >============================================================ >Index: sysdeps/x86_64/fpu/s_cos.c >--- sysdeps/x86_64/fpu/s_cos.c created >+++ sysdeps/x86_64/fpu/s_cos.c 2002-12-03 13:43:06.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/s_cosf.c >--- sysdeps/x86_64/fpu/s_cosf.c created >+++ sysdeps/x86_64/fpu/s_cosf.c 2002-12-03 13:43:07.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/s_finite.c >--- sysdeps/x86_64/fpu/s_finite.c created >+++ sysdeps/x86_64/fpu/s_finite.c 2002-12-03 15:16:20.000000000 +0100 1.1 >@@ -0,0 +1,28 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+/* Returns 0 if x is infinite or NaN, otherwise returns 1 */ >+ >+int __finite(double x) >+{ >+ /* This works on Hammer */ >+ double temp = 1.0e444; /* = infinity = 0x7ff0000000000000 */ >+ volatile int retval; >+ retval = 0; >+ asm volatile ("andpd %0, %1;" : : "x" (temp), "x" (x)); >+ asm volatile ("comisd %0, %1" : : "x" (temp), "x" (x)); >+ asm volatile ("setnz %0" : "=g" (retval)); >+ return retval; >+} >+ >+hidden_def (__finite) >+weak_alias (__finite, finite) >+ >============================================================ >Index: sysdeps/x86_64/fpu/s_finitef.c >--- sysdeps/x86_64/fpu/s_finitef.c created >+++ sysdeps/x86_64/fpu/s_finitef.c 2002-12-03 15:16:32.000000000 +0100 1.1 >@@ -0,0 +1,27 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+/* Returns 0 if x is infinite or NaN, otherwise returns 1 */ >+ >+int __finitef(float x) >+{ >+ /* This works on Hammer */ >+ float temp = 1.0e444; /* = infinity = 0x7f800000 */ >+ volatile int retval; >+ retval = 0; >+ asm volatile ("andps %0, %1;" : : "x" (temp), "x" (x)); >+ asm volatile ("comiss %0, %1" : : "x" (temp), "x" (x)); >+ asm volatile ("setnz %0" : "=g" (retval)); >+ return retval; >+} >+ >+hidden_def (__finitef) >+weak_alias (__finitef, finitef) >============================================================ >Index: sysdeps/x86_64/fpu/s_floor.c >--- sysdeps/x86_64/fpu/s_floor.c created >+++ sysdeps/x86_64/fpu/s_floor.c 2002-12-03 13:43:08.000000000 +0100 1.1 >@@ -0,0 +1,60 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+double __floor(double x) >+{ >+ double r; >+ long rexp, xneg; >+ >+ >+ unsigned long ux, ax, ur, mask; >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ xneg = (ux != ax); >+ >+ if (ax >= 0x4340000000000000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^53 */ >+ if (ax > 0x7ff0000000000000) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ return x; >+ } >+ else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */ >+ { >+ if (ax == 0x0000000000000000) >+ /* x is +zero or -zero; return the same zero */ >+ return x; >+ else if (xneg) /* x < 0.0 */ >+ return -1.0; >+ else >+ return 0.0; >+ } >+ else >+ { >+ r = x; >+ rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ /* Mask out the bits of r that we don't want */ >+ mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1; >+ ur = (ux & ~mask); >+ PUT_BITS_DP64(ur, r); >+ if (xneg && (ur != ux)) >+ /* We threw some bits away and x was negative */ >+ return r - 1.0; >+ else >+ return r; >+ } >+ >+} >+ >+weak_alias (__floor, floor) >============================================================ >Index: sysdeps/x86_64/fpu/s_floorf.c >--- sysdeps/x86_64/fpu/s_floorf.c created >+++ sysdeps/x86_64/fpu/s_floorf.c 2002-12-03 13:43:08.000000000 +0100 1.1 >@@ -0,0 +1,56 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+float __floorf(float x) >+{ >+ float r; >+ int rexp, xneg; >+ unsigned int ux, ax, ur, mask; >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ xneg = (ux != ax); >+ >+ if (ax >= 0x4b800000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^24 */ >+ if (ax > 0x7f800000) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ return x; >+ } >+ else if (ax < 0x3f800000) /* abs(x) < 1.0 */ >+ { >+ if (ax == 0x00000000) >+ /* x is +zero or -zero; return the same zero */ >+ return x; >+ else if (xneg) /* x < 0.0 */ >+ return -1.0F; >+ else >+ return 0.0F; >+ } >+ else >+ { >+ rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ /* Mask out the bits of r that we don't want */ >+ mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1; >+ ur = (ux & ~mask); >+ PUT_BITS_SP32(ur, r); >+ if (xneg && (ux != ur)) >+ /* We threw some bits away and x was negative */ >+ return r - 1.0F; >+ else >+ return r; >+ } >+} >+ >+weak_alias (__floorf, floorf) >============================================================ >Index: sysdeps/x86_64/fpu/s_fma.c >--- sysdeps/x86_64/fpu/s_fma.c created >+++ sysdeps/x86_64/fpu/s_fma.c 2002-12-03 13:43:08.000000000 +0100 1.1 >@@ -0,0 +1,117 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#include "libm_inlines_amd.h" >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+ >+double __fma(double a, double b, double sum) >+{ >+ /* Returns a * b + sum with no intermediate loss of precision */ >+ >+ double ha, ta, hb, tb, z, zz, r, s, az, asum; >+ int ua, ub, usum; >+ int scaled, expover, expunder, scaleexp; >+ unsigned long u; >+ >+ GET_BITS_DP64(a, u); >+ ua = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ GET_BITS_DP64(b, u); >+ ub = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ GET_BITS_DP64(sum, u); >+ usum = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ >+ if (ua == EMAX_DP64 + 1 || ub == EMAX_DP64 + 1 || usum == EMAX_DP64 + 1) >+ { >+ /* One or more of the arguments is NaN or infinity. The >+ result will also be NaN or infinity. */ >+ return a * b + sum; >+ } >+ else if (ua + ub > usum + 2 * MANTLENGTH_DP64) >+ { >+ /* sum is negligible compared with the extra-length product a*b */ >+ return a*b; >+ } >+ else if (usum > ua + ub + MANTLENGTH_DP64) >+ { >+ /* The product a*b is negligible compared with sum */ >+ return sum; >+ } >+ >+ expover = EMAX_DP64 - 2; >+ expunder = EMIN_DP64 + MANTLENGTH_DP64; >+ scaleexp = 0; >+ >+ >+ if (ua + ub > expover || usum > expover) >+ { >+ /* The result is likely to overflow. Scale down in an attempt >+ to avoid unnecessary overflow. The true result may still overflow. */ >+ scaled = 1; >+ scaleexp = expover / 2; >+ a = scaleDouble_1(a, -scaleexp); >+ b = scaleDouble_1(b, -scaleexp); >+ sum = scaleDouble_2(sum, -2*scaleexp); >+ } >+ else if (ua + ub < expunder) >+ { >+ /* The product a*b is near underflow; scale up */ >+ scaled = 1; >+ scaleexp = expunder / 2; >+ a = scaleDouble_1(a, -scaleexp); >+ b = scaleDouble_1(b, -scaleexp); >+ sum = scaleDouble_2(sum, -2*scaleexp); >+ } >+ else >+ scaled = 0; >+ >+ /* Split a into ha (head) and ta (tail). Do the same for b. */ >+ ha = a; >+ GET_BITS_DP64(ha, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, ha); >+ ta = a - ha; >+ hb = b; >+ GET_BITS_DP64(hb, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hb); >+ tb = b - hb; >+ >+ /* Carefully multiply the parts together. z is the most significant >+ part of the result, and zz the least significant part */ >+ z = a * b; >+ zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb; >+ >+ /* Set az = abs(z), asum = abs(sum) */ >+ GET_BITS_DP64(z, u); >+ u &= ~SIGNBIT_DP64; >+ PUT_BITS_DP64(u, az); >+ GET_BITS_DP64(sum, u); >+ u &= ~SIGNBIT_DP64; >+ PUT_BITS_DP64(u, asum); >+ >+ /* Carefully add (z,zz) to sum */ >+ r = z + sum; >+ >+ if (az > asum) >+ s = ((z - r) + sum) + zz; >+ else >+ s = ((sum - r) + z) + zz; >+ >+ if (scaled) >+ return scaleDouble_1(r + s, 2*scaleexp); >+ else >+ return r + s; >+} >+ >+weak_alias (__fma, fma) >============================================================ >Index: sysdeps/x86_64/fpu/s_fmaf.c >--- sysdeps/x86_64/fpu/s_fmaf.c created >+++ sysdeps/x86_64/fpu/s_fmaf.c 2002-12-03 13:43:09.000000000 +0100 1.1 >@@ -0,0 +1,116 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SCALEFLOAT_1 >+#define USE_SCALEFLOAT_2 >+#include "libm_inlines_amd.h" >+#undef USE_SCALEFLOAT_1 >+#undef USE_SCALEFLOAT_2 >+ >+float __fmaf(float a, float b, float sum) >+{ >+ /* Returns a * b + sum with no intermediate loss of precision */ >+ >+ float ha, ta, hb, tb, z, zz, r, s, az, asum; >+ int ua, ub, usum; >+ int scaled, expover, expunder, scaleexp; >+ unsigned int u; >+ >+ GET_BITS_SP32(a, u); >+ ua = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ GET_BITS_SP32(b, u); >+ ub = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ GET_BITS_SP32(sum, u); >+ usum = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ >+ if (ua == EMAX_SP32 + 1 || ub == EMAX_SP32 + 1 || usum == EMAX_SP32 + 1) >+ { >+ /* One or more of the arguments is NaN or infinity. The >+ result will also be NaN or infinity. */ >+ return a * b + sum; >+ } >+ else if (ua + ub > usum + 2 * MANTLENGTH_SP32) >+ { >+ /* sum is negligible compared with the extra-length product a*b */ >+ return a*b; >+ } >+ else if (usum > ua + ub + MANTLENGTH_SP32) >+ { >+ /* The product a*b is negligible compared with sum */ >+ return sum; >+ } >+ >+ expover = EMAX_SP32 - 2; >+ expunder = EMIN_SP32 + MANTLENGTH_SP32; >+ scaleexp = 0; >+ >+ if (ua + ub > expover || usum > expover) >+ { >+ /* The result is likely to overflow. Scale down in an attempt >+ to avoid unnecessary overflow. The true result may still overflow. */ >+ scaled = 1; >+ scaleexp = expover / 2; >+ a = scaleFloat_1(a, -scaleexp); >+ b = scaleFloat_1(b, -scaleexp); >+ sum = scaleFloat_2(sum, -2*scaleexp); >+ } >+ else if (ua + ub < expunder) >+ { >+ /* The product a*b is near underflow; scale up */ >+ scaled = 1; >+ scaleexp = expunder / 2; >+ a = scaleFloat_1(a, -scaleexp); >+ b = scaleFloat_1(b, -scaleexp); >+ sum = scaleFloat_2(sum, -2*scaleexp); >+ } >+ else >+ scaled = 0; >+ >+ /* Split a into ha (head) and ta (tail). Do the same for b. */ >+ ha = a; >+ GET_BITS_SP32(ha, u); >+ u &= 0xfffff000; >+ PUT_BITS_SP32(u, ha); >+ ta = a - ha; >+ hb = b; >+ GET_BITS_SP32(hb, u); >+ u &= 0xfffff000; >+ PUT_BITS_SP32(u, hb); >+ tb = b - hb; >+ >+ /* Carefully multiply the parts together. z is the most significant >+ part of the result, and zz the least significant part */ >+ z = a * b; >+ zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb; >+ >+ /* Set az = abs(z), asum = abs(sum) */ >+ GET_BITS_SP32(z, u); >+ u &= ~SIGNBIT_SP32; >+ PUT_BITS_SP32(u, az); >+ GET_BITS_SP32(sum, u); >+ u &= ~SIGNBIT_SP32; >+ PUT_BITS_SP32(u, asum); >+ >+ /* Carefully add (z,zz) to sum */ >+ r = z + sum; >+ >+ if (az > asum) >+ s = ((z - r) + sum) + zz; >+ else >+ s = ((sum - r) + z) + zz; >+ >+ if (scaled) >+ return scaleFloat_1(r + s, 2*scaleexp); >+ else >+ return r + s; >+} >+ >+weak_alias (__fmaf, fmaf) >============================================================ >Index: sysdeps/x86_64/fpu/s_logb.c >--- sysdeps/x86_64/fpu/s_logb.c created >+++ sysdeps/x86_64/fpu/s_logb.c 2002-12-03 13:43:09.000000000 +0100 1.1 >@@ -0,0 +1,62 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_INFINITY_WITH_FLAGS >+ >+double __logb(double x) >+{ >+ >+ unsigned long ux; >+ long u; >+ GET_BITS_DP64(x, ux); >+ u = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ if ((ux & ~SIGNBIT_DP64) == 0) >+ /* x is +/-zero. Return -infinity with div-by-zero flag. */ >+ return -infinity_with_flags(AMD_F_DIVBYZERO); >+ else if (EMIN_DP64 <= u && u <= EMAX_DP64) >+ /* x is a normal number */ >+ return u; >+ else if (u > EMAX_DP64) >+ { >+ /* x is infinity or NaN */ >+ if ((ux & MANTBITS_DP64) == 0) >+ /* x is +/-infinity. Return +infinity with no flags. */ >+ return infinity_with_flags(0); >+ else >+ /* x is NaN, result is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ } >+ else >+ { >+ /* x is denormalized. */ >+#ifdef FOLLOW_IEEE754_LOGB >+ /* Return the value of the minimum exponent to ensure that >+ the relationship between logb and scalb, defined in >+ IEEE 754, holds. */ >+ return EMIN_DP64; >+#else >+ /* Follow the rule set by IEEE 854 for logb */ >+ ux &= MANTBITS_DP64; >+ u = EMIN_DP64; >+ while (ux < IMPBIT_DP64) >+ { >+ ux <<= 1; >+ u--; >+ } >+ return u; >+#endif >+ } >+ >+} >+ >+weak_alias (__logb, logb) >============================================================ >Index: sysdeps/x86_64/fpu/s_logbf.c >--- sysdeps/x86_64/fpu/s_logbf.c created >+++ sysdeps/x86_64/fpu/s_logbf.c 2002-12-03 13:43:09.000000000 +0100 1.1 >@@ -0,0 +1,60 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_INFINITYF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_INFINITYF_WITH_FLAGS >+ >+float __logbf(float x) >+{ >+ unsigned int ux; >+ int u; >+ GET_BITS_SP32(x, ux); >+ u = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ if ((ux & ~SIGNBIT_SP32) == 0) >+ /* x is +/-zero. Return -infinity with div-by-zero flag. */ >+ return -infinityf_with_flags(AMD_F_DIVBYZERO); >+ else if (EMIN_SP32 <= u && u <= EMAX_SP32) >+ /* x is a normal number */ >+ return u; >+ else if (u > EMAX_SP32) >+ { >+ /* x is infinity or NaN */ >+ if ((ux & MANTBITS_SP32) == 0) >+ /* x is +/-infinity. Return +infinity with no flags. */ >+ return infinityf_with_flags(0); >+ else >+ /* x is NaN, result is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ } >+ else >+ { >+ /* x is denormalized. */ >+#ifdef FOLLOW_IEEE754_LOGB >+ /* Return the value of the minimum exponent to ensure that >+ the relationship between logb and scalb, defined in >+ IEEE 754, holds. */ >+ return EMIN_SP32; >+#else >+ /* Follow the rule set by IEEE 854 for logb */ >+ ux &= MANTBITS_SP32; >+ u = EMIN_SP32; >+ while (ux < IMPBIT_SP32) >+ { >+ ux <<= 1; >+ u--; >+ } >+ return u; >+#endif >+ } >+} >+ >+weak_alias (__logbf, logbf) >============================================================ >Index: sysdeps/x86_64/fpu/s_modf.c >--- sysdeps/x86_64/fpu/s_modf.c created >+++ sysdeps/x86_64/fpu/s_modf.c 2002-12-03 13:43:10.000000000 +0100 1.1 >@@ -0,0 +1,59 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+double __modf(double x, double *iptr) >+{ >+ /* modf splits the argument x into integer and fraction parts, >+ each with the same sign as x. */ >+ >+ >+ long xexp; >+ unsigned long ux, ax, mask; >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ if (ax >= 0x4340000000000000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^53 */ >+ if (ax > 0x7ff0000000000000) >+ { >+ /* x is NaN */ >+ *iptr = x; >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ } >+ else >+ { >+ /* x is infinity or large. Return zero with the sign of x */ >+ *iptr = x; >+ PUT_BITS_DP64(ux & SIGNBIT_DP64, x); >+ return x; >+ } >+ } >+ else if (ax < 0x3ff0000000000000) >+ { >+ /* abs(x) < 1.0. Set iptr to zero with the sign of x >+ and return x. */ >+ PUT_BITS_DP64(ux & SIGNBIT_DP64, *iptr); >+ return x; >+ } >+ else >+ { >+ xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ /* Mask out the bits of x that we don't want */ >+ mask = (1L << (EXPSHIFTBITS_DP64 - xexp)) - 1; >+ PUT_BITS_DP64(ux & ~mask, *iptr); >+ return x - *iptr; >+ } >+ >+} >+ >+weak_alias (__modf, modf) >============================================================ >Index: sysdeps/x86_64/fpu/s_modff.c >--- sysdeps/x86_64/fpu/s_modff.c created >+++ sysdeps/x86_64/fpu/s_modff.c 2002-12-03 13:43:10.000000000 +0100 1.1 >@@ -0,0 +1,54 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+float __modff(float x, float *iptr) >+{ >+ /* modff splits the argument x into integer and fraction parts, >+ each with the same sign as x. */ >+ >+ unsigned int ux, mask; >+ int xexp; >+ >+ GET_BITS_SP32(x, ux); >+ xexp = ((ux & (~SIGNBIT_SP32)) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ >+ if (xexp < 0) >+ { >+ /* abs(x) < 1.0. Set iptr to zero with the sign of x >+ and return x. */ >+ PUT_BITS_SP32(ux & SIGNBIT_SP32, *iptr); >+ return x; >+ } >+ else if (xexp < EXPSHIFTBITS_SP32) >+ { >+ /* x lies between 1.0 and 2**(24) */ >+ /* Mask out the bits of x that we don't want */ >+ mask = (1 << (EXPSHIFTBITS_SP32 - xexp)) - 1; >+ PUT_BITS_SP32(ux & ~mask, *iptr); >+ return x - *iptr; >+ } >+ else if ((ux & (~SIGNBIT_SP32)) > 0x7f800000) >+ { >+ /* x is NaN */ >+ *iptr = x; >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ } >+ else >+ { >+ /* x is infinity or large. Set iptr to x and return zero >+ with the sign of x. */ >+ *iptr = x; >+ PUT_BITS_SP32(ux & SIGNBIT_SP32, x); >+ return x; >+ } >+} >+ >+weak_alias (__modff, modff) >============================================================ >Index: sysdeps/x86_64/fpu/s_sin.c >--- sysdeps/x86_64/fpu/s_sin.c created >+++ sysdeps/x86_64/fpu/s_sin.c 2002-12-03 13:43:10.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >Index: sysdeps/x86_64/fpu/s_sincos.c >--- sysdeps/x86_64/fpu/s_sincos.c created >+++ sysdeps/x86_64/fpu/s_sincos.c 2002-12-03 13:43:11.000000000 +0100 1.1 >@@ -0,0 +1,311 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_NAN_WITH_FLAGS >+#define USE_VAL_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */ >+static inline double sin_piby4(double x, double xx) >+{ >+ /* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... >+ = x * (1 - x^2/3! + x^4/5! - x^6/7! ... >+ = x * f(w) >+ where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... >+ We use a minimax approximation of (f(w) - 1) / w >+ because this produces an expansion in even powers of x. >+ If xx (the tail of x) is non-zero, we add a correction >+ term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx) >+ is an approximation to cos(x)*sin(xx) valid because >+ xx is tiny relative to x. >+ */ >+ static const double >+ c1 = -0.166666666666666646259241729, >+ c2 = 0.833333333333095043065222816e-2, >+ c3 = -0.19841269836761125688538679e-3, >+ c4 = 0.275573161037288022676895908448e-5, >+ c5 = -0.25051132068021699772257377197e-7, >+ c6 = 0.159181443044859136852668200e-9; >+ double x2, x3, r; >+ x2 = x * x; >+ x3 = x2 * x; >+ r = (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6)))); >+ if (xx == 0.0) >+ return x + x3 * (c1 + x2 * r); >+ else >+ return x - ((x2 * (0.5 * xx - x3 * r) - xx) - x3 * c1); >+} >+ >+/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */ >+static inline double cos_piby4(double x, double xx) >+{ >+ /* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... >+ = f(w) >+ where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... >+ We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) >+ because this produces an expansion in even powers of x. >+ If xx (the tail of x) is non-zero, we subtract a correction >+ term g(x,xx) = x*xx to the result, where g(x,xx) >+ is an approximation to sin(x)*sin(xx) valid because >+ xx is tiny relative to x. >+ */ >+ double r, x2, t; >+ static const double >+ c1 = 0.41666666666666665390037e-1, >+ c2 = -0.13888888888887398280412e-2, >+ c3 = 0.248015872987670414957399e-4, >+ c4 = -0.275573172723441909470836e-6, >+ c5 = 0.208761463822329611076335e-8, >+ c6 = -0.113826398067944859590880e-10; >+ >+ x2 = x * x; >+ r = 0.5 * x2; >+ t = 1.0 - r; >+ return t + ((((1.0 - t) - r) - x * xx) + x2 * x2 * >+ (c1 + x2 * (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6)))))); >+} >+ >+void __sincos(double x, double *s, double *c) >+{ >+ double r, rr; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ GET_BITS_DP64(x, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ >+ { >+ if (ax == 0x0000000000000000) >+ { >+ *s = x; >+ *c = 1.0; >+ } >+ else >+ { >+ *s = x; >+ *c = val_with_flags(1.0, AMD_F_INEXACT); >+ } >+ } >+ else >+ { >+ *s = x - x*x*x*0.166666666666666666; >+ *c = 1.0 - x*x*0.5; >+ } >+ } >+ else >+ { >+ *s = sin_piby4(x, 0.0); >+ *c = cos_piby4(x, 0.0); >+ } >+ return; >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ *s = *c = x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ *s = *c = nan_with_flags(AMD_F_INVALID); >+ return; >+ } >+ >+ xneg = (ax != ux); >+ >+ >+ if (xneg) >+ x = -x; >+ >+ /* Reduce x into range [-pi/4,pi/4] */ >+ __remainder_piby2(x, &r, &rr, ®ion); >+ >+ if (xneg) >+ { >+ switch (region) >+ { >+ default: >+ case 0: >+ *s = -sin_piby4(r, rr); >+ *c = cos_piby4(r, rr); >+ break; >+ case 1: >+ *s = -cos_piby4(r, rr); >+ *c = -sin_piby4(r, rr); >+ break; >+ case 2: >+ *s = sin_piby4(r, rr); >+ *c = -cos_piby4(r, rr); >+ break; >+ case 3: >+ *s = cos_piby4(r, rr); >+ *c = sin_piby4(r, rr); >+ break; >+ } >+ } >+ else >+ { >+ switch (region) >+ { >+ default: >+ case 0: >+ *s = sin_piby4(r, rr); >+ *c = cos_piby4(r, rr); >+ break; >+ case 1: >+ *s = cos_piby4(r, rr); >+ *c = -sin_piby4(r, rr); >+ break; >+ case 2: >+ *s = -sin_piby4(r, rr); >+ *c = -cos_piby4(r, rr); >+ break; >+ case 3: >+ *s = -cos_piby4(r, rr); >+ *c = sin_piby4(r, rr); >+ break; >+ } >+ } >+ return; >+} >+ >+double __sin(double x) >+{ >+ double r, rr; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ GET_BITS_DP64(x, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ >+ { >+ if (ax == 0x0000000000000000) >+ return x; >+ else >+ return val_with_flags(x, AMD_F_INEXACT); >+ } >+ else >+ return x - x*x*x*0.166666666666666666; >+ } >+ else >+ return sin_piby4(x, 0.0); >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ xneg = (ax != ux); >+ >+ >+ if (xneg) >+ x = -x; >+ >+ /* Reduce x into range [-pi/4,pi/4] */ >+ __remainder_piby2(x, &r, &rr, ®ion); >+ >+ if (xneg) >+ { >+ switch (region) >+ { >+ default: >+ case 0: return -sin_piby4(r, rr); >+ case 1: return -cos_piby4(r, rr); >+ case 2: return sin_piby4(r, rr); >+ case 3: return cos_piby4(r, rr); >+ } >+ } >+ else >+ { >+ switch (region) >+ { >+ default: >+ case 0: return sin_piby4(r, rr); >+ case 1: return cos_piby4(r, rr); >+ case 2: return -sin_piby4(r, rr); >+ case 3: return -cos_piby4(r, rr); >+ } >+ } >+} >+ >+double __cos(double x) >+{ >+ double r, rr; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ GET_BITS_DP64(x, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ >+ { >+ if (ax == 0x0000000000000000) /* abs(x) = 0.0 */ >+ return 1.0; >+ else >+ return val_with_flags(1.0, AMD_F_INEXACT); >+ } >+ else >+ return 1.0 - x*x*0.5; >+ } >+ else >+ return cos_piby4(x, 0.0); >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ xneg = (ax != ux); >+ >+ >+ if (xneg) >+ x = -x; >+ >+ /* Reduce x into range [-pi/4,pi/4] */ >+ __remainder_piby2(x, &r, &rr, ®ion); >+ >+ switch (region) >+ { >+ default: >+ case 0: return cos_piby4(r, rr); >+ case 1: return -sin_piby4(r, rr); >+ case 2: return -cos_piby4(r, rr); >+ case 3: return sin_piby4(r, rr); >+ } >+} >+ >+weak_alias (__sin, sin) >+weak_alias (__cos, cos) >+weak_alias (__sincos, sincos) >============================================================ >Index: sysdeps/x86_64/fpu/s_sincosf.c >--- sysdeps/x86_64/fpu/s_sincosf.c created >+++ sysdeps/x86_64/fpu/s_sincosf.c 2002-12-03 13:43:11.000000000 +0100 1.1 >@@ -0,0 +1,321 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_REMAINDER_PIBY2F_INLINE >+#define USE_VAL_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_VAL_WITH_FLAGS >+#undef USE_NAN_WITH_FLAGS >+#undef USE_REMAINDER_PIBY2F_INLINE >+ >+/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */ >+static inline double sinf_piby4(double x) >+{ >+ /* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ... >+ = x * (1 - x^2/3! + x^4/5! - x^6/7! ... >+ = x * f(w) >+ where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ... >+ We use a minimax approximation of (f(w) - 1) / w >+ because this produces an expansion in even powers of x. >+ */ >+ double x2; >+ static const double >+ c1 = -0.166666666638608441788607926e0, >+ c2 = 0.833333187633086262120839299e-2, >+ c3 = -0.198400874359527693921333720e-3, >+ c4 = 0.272500015145584081596826911e-5; >+ >+ x2 = x * x; >+ return (x + x * x2 * (c1 + x2 * (c2 + x2 * (c3 + x2 * c4)))); >+} >+ >+/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */ >+static inline double cosf_piby4(double x) >+{ >+ /* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ... >+ = f(w) >+ where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ... >+ We use a minimax approximation of (f(w) - 1 + w/2) / (w*w) >+ because this produces an expansion in even powers of x. >+ */ >+ double x2; >+ static const double >+ c1 = 0.41666666664325175238031e-1, >+ c2 = -0.13888887673175665567647e-2, >+ c3 = 0.24800600878112441958053e-4, >+ c4 = -0.27301013343179832472841e-6; >+ >+ x2 = x * x; >+ return (1.0 - 0.5 * x2 + (x2 * x2 * >+ (c1 + x2 * (c2 + x2 * (c3 + x2 * c4))))); >+} >+ >+ >+void __sincosf(float x, float *s, float *c) >+{ >+ double r, dx; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ >+ dx = x; >+ >+ GET_BITS_DP64(dx, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax == 0x0000000000000000) >+ { >+ *s = dx; >+ *c = 1.0; >+ } >+ else >+ { >+ *s = val_with_flags(dx, AMD_F_INEXACT); >+ *c = val_with_flags(1.0, AMD_F_INEXACT); >+ } >+ } >+ else >+ { >+ *s = dx - dx*dx*dx*0.166666666666666666; >+ *c = 1.0 - dx*dx*0.5; >+ } >+ } >+ else >+ { >+ *s = sinf_piby4(x); >+ *c = cosf_piby4(x); >+ } >+ return; >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ *s = *c = dx + dx; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ *s = *c = nan_with_flags(AMD_F_INVALID); >+ return; >+ } >+ >+ xneg = (ux >> 63); >+ >+ if (xneg) >+ dx = -dx; >+ >+ /* Reduce abs(x) into range [-pi/4,pi/4] */ >+ __remainder_piby2f_inline(dx, ax, &r, ®ion); >+ >+ if (xneg) >+ { >+ switch (region) >+ { >+ default: >+ case 0: >+ *s = -sinf_piby4(r); >+ *c = cosf_piby4(r); >+ break; >+ case 1: >+ *s = -cosf_piby4(r); >+ *c = -sinf_piby4(r); >+ break; >+ case 2: >+ *s = sinf_piby4(r); >+ *c = -cosf_piby4(r); >+ break; >+ case 3: >+ *s = cosf_piby4(r); >+ *c = sinf_piby4(r); >+ break; >+ } >+ } >+ else >+ { >+ switch (region) >+ { >+ default: >+ case 0: >+ *s = sinf_piby4(r); >+ *c = cosf_piby4(r); >+ break; >+ case 1: >+ *s = cosf_piby4(r); >+ *c = -sinf_piby4(r); >+ break; >+ case 2: >+ *s = -sinf_piby4(r); >+ *c = -cosf_piby4(r); >+ break; >+ case 3: >+ *s = -cosf_piby4(r); >+ *c = sinf_piby4(r); >+ break; >+ } >+ } >+} >+ >+float __sinf(float x) >+{ >+ double r, dx; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ >+ dx = x; >+ >+ GET_BITS_DP64(dx, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax == 0x0000000000000000) >+ return x; >+ else >+ return val_with_flags(dx, AMD_F_INEXACT); >+ } >+ else >+ return x - x*x*x*0.166666666666666666; >+ } >+ else >+ return sinf_piby4(dx); >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ >+ xneg = (ux >> 63); >+ >+ if (xneg) >+ dx = -dx; >+ >+ /* Reduce abs(x) into range [-pi/4,pi/4] */ >+ __remainder_piby2f_inline(dx, ax, &r, ®ion); >+ >+ if (xneg) >+ { >+ switch (region) >+ { >+ default: >+ case 0: return -sinf_piby4(r); >+ case 1: return -cosf_piby4(r); >+ case 2: return sinf_piby4(r); >+ case 3: return cosf_piby4(r); >+ } >+ } >+ else >+ { >+ switch (region) >+ { >+ default: >+ case 0: return sinf_piby4(r); >+ case 1: return cosf_piby4(r); >+ case 2: return -sinf_piby4(r); >+ case 3: return -cosf_piby4(r); >+ } >+ } >+} >+ >+#if 1 >+/* Stupidly, computing cosf via sincosf is much faster, >+ even though sincosf does the same work and more. */ >+float __cosf(float x) >+{ >+ float s, c; >+ __sincosf(x, &s, &c); >+ return c; >+} >+ >+#else >+/* This is the way cosf should be done, but it runs half >+ as fast as it ought to */ >+ >+float __cosf(float x) >+{ >+ double r, dx; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ >+ dx = x; >+ >+ GET_BITS_DP64(dx, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax == 0x0000000000000000) >+ return 1.0F; >+ else >+ return val_with_flags(1.0, AMD_F_INEXACT); >+ } >+ else >+ return 1.0F - x*x*0.5F; >+ } >+ else >+ return cosf_piby4(dx); >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ >+ xneg = (ux >> 63); >+ >+ if (xneg) >+ dx = -dx; >+ >+ /* Reduce abs(x) into range [-pi/4,pi/4] */ >+ __remainder_piby2f_inline(dx, ax, &r, ®ion); >+ >+ switch (region) >+ { >+ default: >+ case 0: return cosf_piby4(r); >+ case 1: return -sinf_piby4(r); >+ case 2: return -cosf_piby4(r); >+ case 3: return sinf_piby4(r); >+ } >+} >+#endif >+ >+weak_alias (__sinf, sinf) >+weak_alias (__cosf, cosf) >+weak_alias (__sincosf, sincosf) >============================================================ >Index: sysdeps/x86_64/fpu/s_sinf.c >--- sysdeps/x86_64/fpu/s_sinf.c created >+++ sysdeps/x86_64/fpu/s_sinf.c 2002-12-03 13:43:12.000000000 +0100 1.1 >@@ -0,0 +1 @@ >+/* Not needed. */ >============================================================ >============================================================ >Index: sysdeps/x86_64/fpu/s_tanf.c >--- sysdeps/x86_64/fpu/s_tanf.c created >+++ sysdeps/x86_64/fpu/s_tanf.c 2002-12-03 13:43:12.000000000 +0100 1.1 >@@ -0,0 +1,97 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_REMAINDER_PIBY2F_INLINE >+#define USE_VAL_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_VAL_WITH_FLAGS >+#undef USE_NAN_WITH_FLAGS >+#undef USE_REMAINDER_PIBY2F_INLINE >+ >+/* tan(x) approximation valid on the interval [-pi/4,pi/4]. >+ If recip is true return -1/tan(x) instead. */ >+static inline double tanf_piby4(double x, int recip) >+{ >+ double r, t; >+ >+ /* Core Remez [1,2] approximation to tan(x) on the >+ interval [0,pi/4]. */ >+ r = x*x; >+ t = x + x*r* >+ (0.385296071263995406715129e0 - >+ 0.172032480471481694693109e-1 * r) / >+ (0.115588821434688393452299e+1 + >+ (-0.51396505478854532132342e0 + >+ 0.1844239256901656082986661e-1 * r) * r); >+ >+ if (recip) >+ return -1.0 / t; >+ else >+ return t; >+} >+ >+float __tanf(float x) >+{ >+ double r, dx; >+ int region, xneg; >+ >+ unsigned long ux, ax; >+ >+ dx = x; >+ >+ GET_BITS_DP64(dx, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax == 0x0000000000000000) >+ return dx; >+ else >+ return val_with_flags(dx, AMD_F_INEXACT); >+ } >+ else >+ return dx + dx*dx*dx*0.333333333333333333; >+ } >+ else >+ return tanf_piby4(dx, 0); >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return dx + dx; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ >+ xneg = (ux >> 63); >+ >+ if (xneg) >+ x = -x; >+ >+ /* Reduce x into range [-pi/4,pi/4] */ >+ __remainder_piby2f_inline(x, ax, &r, ®ion); >+ >+ if (xneg) >+ return -tanf_piby4(r, region & 1); >+ else >+ return tanf_piby4(r, region & 1); >+} >+ >+weak_alias (__tanf, tanf) >============================================================ >Index: sysdeps/x86_64/fpu/s_trunc.c >--- sysdeps/x86_64/fpu/s_trunc.c created >+++ sysdeps/x86_64/fpu/s_trunc.c 2002-12-03 13:43:13.000000000 +0100 1.1 >@@ -0,0 +1,48 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+double __trunc(double x) >+{ >+ double r; >+ long rexp; >+ unsigned long ux, ax, mask; >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ if (ax >= 0x4340000000000000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^53 */ >+ if (ax > 0x7ff0000000000000) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ return x; >+ } >+ else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */ >+ { >+ /* Return zero with the sign of x */ >+ PUT_BITS_DP64(ux & SIGNBIT_DP64, x); >+ return x; >+ } >+ else >+ { >+ r = x; >+ rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ /* Mask out the bits of r that we don't want */ >+ mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1; >+ PUT_BITS_DP64(ux & ~mask, r); >+ return r; >+ } >+ >+} >+ >+weak_alias (__trunc, trunc) >============================================================ >Index: sysdeps/x86_64/fpu/s_truncf.c >--- sysdeps/x86_64/fpu/s_truncf.c created >+++ sysdeps/x86_64/fpu/s_truncf.c 2002-12-03 13:43:13.000000000 +0100 1.1 >@@ -0,0 +1,47 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+float __truncf(float x) >+{ >+ float r; >+ int rexp; >+ unsigned int ux, ax, mask; >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ >+ if (ax >= 0x4b800000) >+ { >+ /* abs(x) is either NaN, infinity, or >= 2^24 */ >+ if (ax > 0x7f800000) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ return x; >+ } >+ else if (ax < 0x3f800000) /* abs(x) < 1.0 */ >+ { >+ /* Return zero with the sign of x */ >+ PUT_BITS_SP32(ux & SIGNBIT_SP32, x); >+ return x; >+ } >+ else >+ { >+ r = x; >+ rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ /* Mask out the bits of r that we don't want */ >+ mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1; >+ PUT_BITS_SP32(ux & ~mask, r); >+ return r; >+ } >+} >+ >+weak_alias (__truncf, truncf) >============================================================ >Index: sysdeps/x86_64/fpu/w_acos.c >--- sysdeps/x86_64/fpu/w_acos.c created >+++ sysdeps/x86_64/fpu/w_acos.c 2002-12-03 13:43:13.000000000 +0100 1.1 >@@ -0,0 +1,139 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VAL_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+/* Deal with errno for out-of-range argument */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_edom(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = DOMAIN; >+ exc.name = (char *)"acos"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = nan_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+ (void)fputs("acos: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+double __acos(double x) >+{ >+ /* Computes arccos(x). >+ The argument is first reduced by noting that arccos(x) >+ is invalid for abs(x) > 1. For denormal and small >+ arguments arccos(x) = pi/2 to machine accuracy. >+ Remaining argument ranges are handled as follows. >+ For abs(x) <= 0.5 use >+ arccos(x) = pi/2 - arcsin(x) >+ = pi/2 - (x + x^3*R(x^2)) >+ where R(x^2) is a rational minimax approximation to >+ (arcsin(x) - x)/x^3. >+ For abs(x) > 0.5 exploit the identity: >+ arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) >+ together with the above rational approximation, and >+ reconstruct the terms carefully. >+ */ >+ >+ /* Some constants and split constants. */ >+ >+ static const double >+ pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ >+ piby2 = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ >+ piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ >+ piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ >+ >+ double u, y, s=0.0, r; >+ int xexp, xnan, transform=0; >+ >+ unsigned long ux, aux, xneg; >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ xneg = (ux & SIGNBIT_DP64); >+ xnan = (aux > PINFBITPATT_DP64); >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ >+ /* Special cases */ >+ >+ if (xexp < -56) >+ { /* y small enough that arccos(x) = pi/2 */ >+ return val_with_flags(piby2, AMD_F_INEXACT); >+ } >+ else if (xnan) return x + x; >+ else if (xexp >= 0) >+ { /* abs(x) >= 1.0 */ >+ if (x == 1.0) return 0.0; >+ else if (x == -1.0) return val_with_flags(pi, AMD_F_INEXACT); >+ else return retval_errno_edom(x); >+ } >+ >+ if (xneg) y = -x; >+ else y = x; >+ >+ transform = (xexp >= -1); /* abs(x) >= 0.5 */ >+ >+ if (transform) >+ { /* Transform y into the range [0,0.5) */ >+ transform = 1; >+ r = 0.5*(1-y); >+ /* Hammer sqrt instruction */ >+ asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r)); >+ y = s; >+ } >+ else >+ r = y*y; >+ >+ /* Use a rational approximation for [0.0, 0.5] */ >+ >+ u = r*(0.227485835556935010735943483075 + >+ (-0.445017216867635649900123110649 + >+ (0.275558175256937652532686256258 + >+ (-0.0549989809235685841612020091328 + >+ (0.00109242697235074662306043804220 + >+ 0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/ >+ (1.36491501334161032038194214209 + >+ (-3.28431505720958658909889444194 + >+ (2.76568859157270989520376345954 + >+ (-0.943639137032492685763471240072 + >+ 0.105869422087204370341222318533*r)*r)*r)*r); >+ >+ if (transform) >+ { /* Reconstruct acos carefully in transformed region */ >+ if (xneg) return pi - 2*(s+(y*u - piby2_tail)); >+ else >+ { >+ double c, s1; >+ unsigned long us; >+ GET_BITS_DP64(s, us); >+ PUT_BITS_DP64(0xffffffff00000000 & us, s1); >+ c = (r-s1*s1)/(s+s1); >+ return 2*s1 + (2*c+2*y*u); >+ } >+ } >+ else >+ return piby2_head - (x - (piby2_tail - x*u)); >+} >+ >+weak_alias (__acos, acos) >============================================================ >Index: sysdeps/x86_64/fpu/w_acosf.c >--- sysdeps/x86_64/fpu/w_acosf.c created >+++ sysdeps/x86_64/fpu/w_acosf.c 2002-12-03 13:43:14.000000000 +0100 1.1 >@@ -0,0 +1,141 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VALF_WITH_FLAGS >+#define USE_NANF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NANF_WITH_FLAGS >+#undef USE_VALF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range argument */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_edom(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = DOMAIN; >+ exc.name = (char *)"acosf"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = nanf_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+ (void)fputs("acosf: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+float __acosf(float x) >+{ >+ /* Computes arccos(x). >+ The argument is first reduced by noting that arccos(x) >+ is invalid for abs(x) > 1. For denormal and small >+ arguments arccos(x) = pi/2 to machine accuracy. >+ Remaining argument ranges are handled as follows. >+ For abs(x) <= 0.5 use >+ arccos(x) = pi/2 - arcsin(x) >+ = pi/2 - (x + x^3*R(x^2)) >+ where R(x^2) is a rational minimax approximation to >+ (arcsin(x) - x)/x^3. >+ For abs(x) > 0.5 exploit the identity: >+ arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) >+ together with the above rational approximation, and >+ reconstruct the terms carefully. >+ */ >+ >+ /* Some constants and split constants. */ >+ >+ static const float >+ piby2 = 1.5707963705e+00F; /* 0x3fc90fdb */ >+ static const double >+ pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ >+ piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ >+ piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ >+ >+ float u, y, s = 0.0F, r; >+ int xexp, xnan, transform = 0; >+ >+ unsigned int ux, aux, xneg; >+ >+ >+ GET_BITS_SP32(x, ux); >+ /* >+ ux = (*((unsigned int *)&x)); >+ */ >+ aux = ux & ~SIGNBIT_SP32; >+ xneg = (ux & SIGNBIT_SP32); >+ xnan = (aux > PINFBITPATT_SP32); >+ xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ >+ /* Special cases */ >+ >+ if (xexp < -26) >+ /* y small enough that arccos(x) = pi/2 */ >+ return valf_with_flags(piby2, AMD_F_INEXACT); >+ else if (xnan) return x + x; >+ else if (xexp >= 0) >+ { /* abs(x) >= 1.0 */ >+ if (x == 1.0F) return 0.0F; >+ else if (x == -1.0F) return valf_with_flags(pi, AMD_F_INEXACT); >+ else return retval_errno_edom(x); >+ } >+ >+ if (xneg) y = -x; >+ else y = x; >+ >+ transform = (xexp >= -1); /* abs(x) >= 0.5 */ >+ >+ if (transform) >+ { /* Transform y into the range [0,0.5) */ >+ transform = 1; >+ r = 0.5F*(1-y); >+ /* Hammer sqrt instruction */ >+ asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r)); >+ y = s; >+ } >+ else >+ r = y*y; >+ >+ /* Use a rational approximation for [0.0, 0.5] */ >+ >+ u=r*(0.184161606965100694821398249421F + >+ (-0.0565298683201845211985026327361F + >+ (-0.0133819288943925804214011424456F - >+ 0.00396137437848476485201154797087F*r)*r)*r)/ >+ (1.10496961524520294485512696706F - >+ 0.836411276854206731913362287293F*r); >+ >+ if (transform) >+ { >+ /* Reconstruct acos carefully in transformed region */ >+ if (xneg) >+ return pi - 2.0F*(s+(y*u - piby2_tail)); >+ else >+ { >+ float c, s1; >+ unsigned int us; >+ GET_BITS_SP32(s, us); >+ PUT_BITS_SP32(0xffff0000 & us, s1); >+ c = (r-s1*s1)/(s+s1); >+ return 2.0F*s1 + (2.0F*c+2.0F*y*u); >+ } >+ } >+ else >+ return piby2_head - (x - (piby2_tail - x*u)); >+} >+ >+weak_alias (__acosf, acosf) >============================================================ >Index: sysdeps/x86_64/fpu/w_asin.c >--- sysdeps/x86_64/fpu/w_asin.c created >+++ sysdeps/x86_64/fpu/w_asin.c 2002-12-03 13:43:14.000000000 +0100 1.1 >@@ -0,0 +1,144 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VAL_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+/* Deal with errno for out-of-range argument */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_edom(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = DOMAIN; >+ exc.name = (char *)"asin"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = nan_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+ (void)fputs("asin: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+double __asin(double x) >+{ >+ /* Computes arcsin(x). >+ The argument is first reduced by noting that arcsin(x) >+ is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x). >+ For denormal and small arguments arcsin(x) = x to machine >+ accuracy. Remaining argument ranges are handled as follows. >+ For abs(x) <= 0.5 use >+ arcsin(x) = x + x^3*R(x^2) >+ where R(x^2) is a rational minimax approximation to >+ (arcsin(x) - x)/x^3. >+ For abs(x) > 0.5 exploit the identity: >+ arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2) >+ together with the above rational approximation, and >+ reconstruct the terms carefully. >+ */ >+ >+ /* Some constants and split constants. */ >+ >+ static const double >+ piby2_tail = 6.1232339957367660e-17, /* 0x3c91a62633145c07 */ >+ hpiby2_head = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */ >+ piby2 = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */ >+ double u, v, y, s=0.0, r; >+ int xexp, xnan, transform=0; >+ >+ unsigned long ux, aux, xneg; >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ xneg = (ux & SIGNBIT_DP64); >+ xnan = (aux > PINFBITPATT_DP64); >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ >+ /* Special cases */ >+ >+ if (xexp < -28) >+ { /* y small enough that arcsin(x) = x */ >+ return val_with_flags(x, AMD_F_INEXACT); >+ } >+ else if (xnan) return x + x; >+ else if (xexp >= 0) >+ { /* abs(x) >= 1.0 */ >+ if (x == 1.0) return val_with_flags(piby2, AMD_F_INEXACT); >+ else if (x == -1.0) return val_with_flags(-piby2, AMD_F_INEXACT); >+ else return retval_errno_edom(x); >+ >+ } >+ >+ if (xneg) y = -x; >+ else y = x; >+ >+ transform = (xexp >= -1); /* abs(x) >= 0.5 */ >+ >+ if (transform) >+ { /* Transform y into the range [0,0.5) */ >+ transform = 1; >+ r = 0.5*(1-y); >+ /* Hammer sqrt instruction */ >+ asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r)); >+ y = s; >+ } >+ else >+ { >+ r = y*y; >+ } >+ >+ /* Use a rational approximation for [0.0, 0.5] */ >+ >+ u = r*(0.227485835556935010735943483075 + >+ (-0.445017216867635649900123110649 + >+ (0.275558175256937652532686256258 + >+ (-0.0549989809235685841612020091328 + >+ (0.00109242697235074662306043804220 + >+ 0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/ >+ (1.36491501334161032038194214209 + >+ (-3.28431505720958658909889444194 + >+ (2.76568859157270989520376345954 + >+ (-0.943639137032492685763471240072 + >+ 0.105869422087204370341222318533*r)*r)*r)*r); >+ >+ if (transform) >+ { /* Reconstruct asin carefully in transformed region */ >+ { >+ double c, s1, p, q; >+ unsigned long us; >+ GET_BITS_DP64(s, us); >+ PUT_BITS_DP64(0xffffffff00000000 & us, s1); >+ c = (r-s1*s1)/(s+s1); >+ p = 2*s*u-(piby2_tail-2*c); >+ q = hpiby2_head-2*s1; >+ v = hpiby2_head-(p-q); >+ } >+ } >+ else >+ { >+ v = y + y*u; >+ } >+ >+ if (xneg) return -v; >+ else return v; >+} >+ >+weak_alias (__asin, asin) >============================================================ >Index: sysdeps/x86_64/fpu/w_asinf.c >--- sysdeps/x86_64/fpu/w_asinf.c created >+++ sysdeps/x86_64/fpu/w_asinf.c 2002-12-03 13:43:15.000000000 +0100 1.1 >@@ -0,0 +1,133 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VALF_WITH_FLAGS >+#define USE_NANF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NANF_WITH_FLAGS >+#undef USE_VALF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range argument */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_edom(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = DOMAIN; >+ exc.name = (char *)"asinf"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = nanf_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+ (void)fputs("asinf: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+float __asinf(float x) >+{ >+ /* Computes arcsin(x). >+ The argument is first reduced by noting that arcsin(x) >+ is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x). >+ For denormal and small arguments arcsin(x) = x to machine >+ accuracy. Remaining argument ranges are handled as follows. >+ For abs(x) <= 0.5 use >+ arcsin(x) = x + x^3*R(x^2) >+ where R(x^2) is a rational minimax approximation to >+ (arcsin(x) - x)/x^3. >+ For abs(x) > 0.5 exploit the identity: >+ arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2) >+ together with the above rational approximation, and >+ reconstruct the terms carefully. >+ */ >+ >+ /* Some constants and split constants. */ >+ >+ static const float >+ piby2_tail = 7.5497894159e-08F, /* 0x33a22168 */ >+ hpiby2_head = 7.8539812565e-01F, /* 0x3f490fda */ >+ piby2 = 1.5707963705e+00F; /* 0x3fc90fdb */ >+ float u, v, y, s = 0.0F, r; >+ int xexp, xnan, transform = 0; >+ >+ unsigned int ux, aux, xneg; >+ GET_BITS_SP32(x, ux); >+ aux = ux & ~SIGNBIT_SP32; >+ xneg = (ux & SIGNBIT_SP32); >+ xnan = (aux > PINFBITPATT_SP32); >+ xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; >+ >+ /* Special cases */ >+ >+ if (xexp < -14) >+ /* y small enough that arcsin(x) = x */ >+ return valf_with_flags(x, AMD_F_INEXACT); >+ else if (xnan) return x + x; >+ else if (xexp >= 0) >+ { >+ /* abs(x) >= 1.0 */ >+ if (x == 1.0F) return valf_with_flags(piby2, AMD_F_INEXACT); >+ else if (x == -1.0F) return valf_with_flags(-piby2, AMD_F_INEXACT); >+ else return retval_errno_edom(x); >+ } >+ >+ if (xneg) y = -x; >+ else y = x; >+ >+ transform = (xexp >= -1); /* abs(x) >= 0.5 */ >+ >+ if (transform) >+ { /* Transform y into the range [0,0.5) */ >+ transform = 1; >+ r = 0.5F*(1-y); >+ /* Hammer sqrt instruction */ >+ asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r)); >+ y = s; >+ } >+ else >+ r = y*y; >+ >+ /* Use a rational approximation for [0.0, 0.5] */ >+ >+ u=r*(0.184161606965100694821398249421F + >+ (-0.0565298683201845211985026327361F + >+ (-0.0133819288943925804214011424456F - >+ 0.00396137437848476485201154797087F*r)*r)*r)/ >+ (1.10496961524520294485512696706F - >+ 0.836411276854206731913362287293F*r); >+ >+ if (transform) >+ { >+ /* Reconstruct asin carefully in transformed region */ >+ float c, s1, p, q; >+ unsigned int us; >+ GET_BITS_SP32(s, us); >+ PUT_BITS_SP32(0xffff0000 & us, s1); >+ c = (r-s1*s1)/(s+s1); >+ p = 2.0F*s*u-(piby2_tail-2.0F*c); >+ q = hpiby2_head-2*s1; >+ v = hpiby2_head-(p-q); >+ } >+ else >+ v = y + y*u; >+ >+ if (xneg) return -v; >+ else return v; >+} >+ >+weak_alias (__asinf, asinf) >============================================================ >Index: sysdeps/x86_64/fpu/w_exp.c >--- sysdeps/x86_64/fpu/w_exp.c created >+++ sysdeps/x86_64/fpu/w_exp.c 2002-12-03 13:43:16.000000000 +0100 1.1 >@@ -0,0 +1,159 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_ZERO_WITH_FLAGS >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_ZERO_WITH_FLAGS >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_INFINITY_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange_overflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"exp"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline double retval_errno_erange_underflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"exp"; >+ exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+double __exp(double x) >+{ >+ static const double >+ max_exp_arg = 7.09782712893383973096e+02, /* 0x40862e42fefa39ef */ >+ min_exp_arg = -7.45133219101941108420e+02, /* 0xc0874910d52d3051 */ >+ thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ >+ log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ >+ log2_by_32_trail = 5.68948749532545630390e-11; /* 0x3dcf473de6af278e */ >+ >+ double z1, z2, z; >+ int m; >+ unsigned long ux, ax; >+ >+ /* >+ Computation of exp(x). >+ >+ We compute the values m, z1, and z2 such that >+ exp(x) = 2**m * (z1 + z2), where >+ exp(x) is the natural exponential of x. >+ >+ Computations needed in order to obtain m, z1, and z2 >+ involve three steps. >+ >+ First, we reduce the argument x to the form >+ x = n * log2/32 + remainder, >+ where n has the value of an integer and |remainder| <= log2/64. >+ The value of n = x * 32/log2 rounded to the nearest integer and >+ the remainder = x - n*log2/32. >+ >+ Second, we approximate exp(r1 + r2) - 1 where r1 is the leading >+ part of the remainder and r2 is the trailing part of the remainder. >+ >+ Third, we reconstruct the exponential of x so that >+ exp(x) = 2**m * (z1 + z2). >+ */ >+ >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ if (ax >= 0x40862e42fefa39ef) /* abs(x) >= 709.78... */ >+ { >+ if(ax >= 0x7ff0000000000000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_DP64) >+ /* x is negative infinity; return 0.0 with no flags. */ >+ return 0.0; >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ if (x > max_exp_arg) >+ /* Return +infinity with overflow flag */ >+ return retval_errno_erange_overflow(x); >+ else if (x < min_exp_arg) >+ /* x is negative. Return +zero with underflow and inexact flags */ >+ return retval_errno_erange_underflow(x); >+ } >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3fb0000000000000) /* abs(x) < 1/16 */ >+ { >+ if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */ >+ z = 1.0 + x; /* Raises inexact if x is non-zero */ >+ else >+ z = (((((((((( >+ 1.0/3628800)*x+ >+ 1.0/362880)*x+ >+ 1.0/40320)*x+ >+ 1.0/5040)*x+ >+ 1.0/720)*x+ >+ 1.0/120)*x+ >+ 1.0/24)*x+ >+ 1.0/6)*x+ >+ 1.0/2)*x+ >+ 1.0)*x + 1.0; >+ } >+ else >+ { >+ /* Find m, z1 and z2 such that exp(x) = 2**m * (z1 + z2) */ >+ >+ splitexp(x, 1.0, thirtytwo_by_log2, log2_by_32_lead, log2_by_32_trail, >+ &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**m */ >+ >+ if (m >= EMIN_DP64 && m <= EMAX_DP64) >+ z = scaleDouble_1((z1+z2),m); >+ else >+ z = scaleDouble_2((z1+z2),m); >+ } >+ return z; >+} >+ >+ >+weak_alias (__exp, __ieee754_exp) >+weak_alias (__exp, exp) >============================================================ >Index: sysdeps/x86_64/fpu/w_exp2.c >--- sysdeps/x86_64/fpu/w_exp2.c created >+++ sysdeps/x86_64/fpu/w_exp2.c 2002-12-03 13:43:18.000000000 +0100 1.1 >@@ -0,0 +1,172 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_ZERO_WITH_FLAGS >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_ZERO_WITH_FLAGS >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_INFINITY_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange_overflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"exp2"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline double retval_errno_erange_underflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"exp2"; >+ exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+double __exp2(double x) >+{ >+ static const double >+ max_exp2_arg = 1024.0, /* 0x4090000000000000 */ >+ min_exp2_arg = -1074.0, /* 0xc090c80000000000 */ >+ log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */ >+ log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */ >+ log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */ >+ one_by_32_lead = 0.03125; >+ >+ double y, z1, z2, z, hx, tx, y1, y2; >+ int m; >+ unsigned long ux, ax; >+ >+ /* >+ Computation of exp2(x). >+ >+ We compute the values m, z1, and z2 such that >+ exp2(x) = 2**m * (z1 + z2), where exp2(x) is 2**x. >+ >+ Computations needed in order to obtain m, z1, and z2 >+ involve three steps. >+ >+ First, we reduce the argument x to the form >+ x = n/32 + remainder, >+ where n has the value of an integer and |remainder| <= 1/64. >+ The value of n = x * 32 rounded to the nearest integer and >+ the remainder = x - n/32. >+ >+ Second, we approximate exp2(r1 + r2) - 1 where r1 is the leading >+ part of the remainder and r2 is the trailing part of the remainder. >+ >+ Third, we reconstruct exp2(x) so that >+ exp2(x) = 2**m * (z1 + z2). >+ */ >+ >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ if (ax >= 0x4090000000000000) /* abs(x) >= 1024.0 */ >+ { >+ if(ax >= 0x7ff0000000000000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_DP64) >+ /* x is negative infinity; return 0.0 with no flags. */ >+ return 0.0; >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ if (x > max_exp2_arg) >+ /* Return +infinity with overflow flag */ >+ return retval_errno_erange_overflow(x); >+ else if (x < min_exp2_arg) >+ /* x is negative. Return +zero with underflow and inexact flags */ >+ return retval_errno_erange_underflow(x); >+ } >+ >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3fb7154764ee6c2f) /* abs(x) < 1/(16*log2) */ >+ { >+ if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */ >+ return 1.0 + x; /* Raises inexact if x is non-zero */ >+ else >+ { >+ /* Split x into hx (head) and tx (tail). */ >+ unsigned long u; >+ hx = x; >+ GET_BITS_DP64(hx, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hx); >+ tx = x - hx; >+ /* Carefully multiply x by log2. y1 is the most significant >+ part of the result, and y2 the least significant part */ >+ y1 = x * log2_lead; >+ y2 = (((hx * log2_lead - y1) + hx * log2_tail) + >+ tx * log2_lead) + tx * log2_tail; >+ >+ y = y1 + y2; >+ z = (9.99564649780173690e-1 + >+ (1.61251249355268050e-5 + >+ (2.37986978239838493e-2 + >+ 2.68724774856111190e-7*y)*y)*y)/ >+ (9.99564649780173692e-1 + >+ (-4.99766199765151309e-1 + >+ (1.070876894098586184e-1 + >+ (-1.189773642681502232e-2 + >+ 5.9480622371960190616e-4*y)*y)*y)*y); >+ z = ((z * y1) + (z * y2)) + 1.0; >+ } >+ } >+ else >+ { >+ /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */ >+ >+ splitexp(x, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**m */ >+ if (m > EMIN_DP64 && m < EMAX_DP64) >+ z = scaleDouble_1((z1+z2),m); >+ else >+ z = scaleDouble_2((z1+z2),m); >+ } >+ return z; >+} >+ >+weak_alias (__exp2, exp2) >+weak_alias (__exp2, __libm_ieee754_exp2) >============================================================ >Index: sysdeps/x86_64/fpu/w_exp2f.c >--- sysdeps/x86_64/fpu/w_exp2f.c created >+++ sysdeps/x86_64/fpu/w_exp2f.c 2002-12-03 13:43:18.000000000 +0100 1.1 >@@ -0,0 +1,155 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXPF >+#define USE_SCALEFLOAT_1 >+#define USE_SCALEFLOAT_2 >+#define USE_INFINITYF_WITH_FLAGS >+#define USE_ZEROF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXPF >+#undef USE_SCALEFLOAT_1 >+#undef USE_SCALEFLOAT_2 >+#undef USE_INFINITYF_WITH_FLAGS >+#undef USE_ZEROF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange_overflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"exp2f"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline float retval_errno_erange_underflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"exp2f"; >+ exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+float __exp2f(float x) >+{ >+ static const float >+ max_exp2_arg = 128.0F, /* 0x43000000 */ >+ min_exp2_arg = -149.0F, /* 0xc3150000 */ >+ log2 = 6.931471824645996e-01F, /* 0x3f317218 */ >+ one_by_32_lead = 0.03125F; >+ >+ float y, z1, z2, z; >+ int m; >+ unsigned int ux, ax; >+ >+ /* >+ Computation of exp2f(x). >+ >+ We compute the values m, z1, and z2 such that >+ exp2f(x) = 2**m * (z1 + z2), where exp2f(x) is 2**x. >+ >+ Computations needed in order to obtain m, z1, and z2 >+ involve three steps. >+ >+ First, we reduce the argument x to the form >+ x = n/32 + remainder, >+ where n has the value of an integer and |remainder| <= 1/64. >+ The value of n = x * 32 rounded to the nearest integer and >+ the remainder = x - n/32. >+ >+ Second, we approximate exp2f(r1 + r2) - 1 where r1 is the leading >+ part of the remainder and r2 is the trailing part of the remainder. >+ >+ Third, we reconstruct exp2f(x) so that >+ exp2f(x) = 2**m * (z1 + z2). >+ */ >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ >+ if (ax >= 0x43000000) /* abs(x) >= 128.0 */ >+ { >+ if(ax >= 0x7f800000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_SP32) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_SP32) >+ /* x is negative infinity; return 0.0 with no flags. */ >+ return 0.0F; >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ if (x > max_exp2_arg) >+ /* Return +infinity with overflow flag */ >+ return retval_errno_erange_overflow(x); >+ else if (x < min_exp2_arg) >+ /* x is negative. Return +zero with underflow and inexact flags */ >+ return retval_errno_erange_underflow(x); >+ } >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3cb8aa3b) /* abs(x) < 1/(64*log2) */ >+ { >+ if (ax < 0x32800000) /* abs(x) < 2^(-26) */ >+ return 1.0F + x; /* Raises inexact if x is non-zero */ >+ else >+ { >+ y = log2*x; >+ z = (((((((( >+ 1.0F/40320)*y+ >+ 1.0F/5040)*y+ >+ 1.0F/720)*y+ >+ 1.0F/120)*y+ >+ 1.0F/24)*y+ >+ 1.0F/6)*y+ >+ 1.0F/2)*y+ >+ 1.0F)*y + 1.0; >+ } >+ } >+ else >+ { >+ /* Find m, z1 and z2 such that exp2f(x) = 2**m * (z1 + z2) */ >+ >+ splitexpf(x, log2, 32.0F, one_by_32_lead, 0.0F, &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**m */ >+ >+ if (m >= EMIN_SP32 && m <= EMAX_SP32) >+ z = scaleFloat_1((z1+z2),m); >+ else >+ z = scaleFloat_2((z1+z2),m); >+ } >+ return z; >+} >+ >+weak_alias (__exp2f, exp2f) >+weak_alias (__exp2f, __libm_ieee754_exp2f) >============================================================ >Index: sysdeps/x86_64/fpu/w_expf.c >--- sysdeps/x86_64/fpu/w_expf.c created >+++ sysdeps/x86_64/fpu/w_expf.c 2002-12-03 13:43:19.000000000 +0100 1.1 >@@ -0,0 +1,154 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXPF >+#define USE_SCALEFLOAT_1 >+#define USE_SCALEFLOAT_2 >+#define USE_ZEROF_WITH_FLAGS >+#define USE_INFINITYF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXPF >+#undef USE_SCALEFLOAT_1 >+#undef USE_SCALEFLOAT_2 >+#undef USE_ZEROF_WITH_FLAGS >+#undef USE_INFINITYF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange_overflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"expf"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline float retval_errno_erange_underflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"expf"; >+ exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+float __expf(float x) >+{ >+ static const float >+ max_exp_arg = 8.8722839355E+01, /* 0x42B17218 */ >+ min_exp_arg = -1.0327893066E+02, /* 0xC2CE8ED0 */ >+ thirtytwo_by_log2 = 4.6166240692E+01, /* 0x4238AA3B */ >+ log2_by_32_lead = 2.1659851074E-02, /* 0x3CB17000 */ >+ log2_by_32_tail = 9.9831822808E-07; /* 0x3585FDF4 */ >+ >+ float z1, z2, z; >+ int m; >+ unsigned int ux, ax; >+ >+ /* >+ Computation of exp(x). >+ >+ We compute the values m, z1, and z2 such that >+ exp(x) = 2**m * (z1 + z2), where >+ exp(x) is the natural exponential of x. >+ >+ Computations needed in order to obtain m, z1, and z2 >+ involve three steps. >+ >+ First, we reduce the argument x to the form >+ x = n * log2/32 + remainder, >+ where n has the value of an integer and |remainder| <= log2/64. >+ The value of n = x * 32/log2 rounded to the nearest integer and >+ the remainder = x - n*log2/32. >+ >+ Second, we approximate exp(r1 + r2) - 1 where r1 is the leading >+ part of the remainder and r2 is the trailing part of the remainder. >+ >+ Third, we reconstruct the exponential of x so that >+ exp(x) = 2**m * (z1 + z2). >+ */ >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ >+ if (ax >= 0x42B17218) /* abs(x) >= 88.7... */ >+ { >+ if(ax >= 0x7f800000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_SP32) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_SP32) >+ /* x is negative infinity; return 0.0 with no flags */ >+ return 0.0; >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ if (x > max_exp_arg) >+ /* Return +infinity with overflow flag */ >+ return retval_errno_erange_overflow(x); >+ else if (x < min_exp_arg) >+ /* x is negative. Return +zero with underflow and inexact flags */ >+ return retval_errno_erange_underflow(x); >+ } >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3c800000) /* abs(x) < 1/64 */ >+ { >+ if (ax < 0x32800000) /* abs(x) < 2^(-26) */ >+ return 1.0 + x; /* Raises inexact if x is non-zero */ >+ else >+ z = ((((((( >+ 1.0/5040)*x+ >+ 1.0/720)*x+ >+ 1.0/120)*x+ >+ 1.0/24)*x+ >+ 1.0/6)*x+ >+ 1.0/2)*x+ >+ 1.0)*x + 1.0; >+ } >+ else >+ { >+ /* Find m and z such that exp(x) = 2**m * (z1 + z2) */ >+ >+ splitexpf(x, 1.0, thirtytwo_by_log2, log2_by_32_lead, >+ log2_by_32_tail, &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**m */ >+ >+ if (m >= EMIN_SP32 && m <= EMAX_SP32) >+ z = scaleFloat_1((z1+z2),m); >+ else >+ z = scaleFloat_2((z1+z2),m); >+ } >+ return z; >+} >+ >+weak_alias (__expf, expf) >+weak_alias (__expf, __ieee754_expf) >============================================================ >Index: sysdeps/x86_64/fpu/w_fmod.c >--- sysdeps/x86_64/fpu/w_fmod.c created >+++ sysdeps/x86_64/fpu/w_fmod.c 2002-12-03 13:43:19.000000000 +0100 1.1 >@@ -0,0 +1,2 @@ >+#define COMPILING_FMOD >+#include <w_remainder.c> >============================================================ >Index: sysdeps/x86_64/fpu/w_fmodf.c >--- sysdeps/x86_64/fpu/w_fmodf.c created >+++ sysdeps/x86_64/fpu/w_fmodf.c 2002-12-03 13:43:19.000000000 +0100 1.1 >@@ -0,0 +1,2 @@ >+#define COMPILING_FMOD >+#include <w_remainderf.c> >============================================================ >Index: sysdeps/x86_64/fpu/w_hypot.c >--- sysdeps/x86_64/fpu/w_hypot.c created >+++ sysdeps/x86_64/fpu/w_hypot.c 2002-12-03 13:43:20.000000000 +0100 1.1 >@@ -0,0 +1,190 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SCALEDOUBLE_1 >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SCALEDOUBLE_1 >+#undef USE_INFINITY_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange_overflow(double x, double y) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = y; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"hypot"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+double __hypot(double x, double y) >+{ >+ /* Returns sqrt(x*x + y*y) with no overflow or underflow unless >+ the result warrants it */ >+ >+ const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */ >+ >+ double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts; >+ unsigned long xexp, yexp, ux, uy, ut; >+ int dexp, expadjust; >+ >+ GET_BITS_DP64(x, ux); >+ ux &= ~SIGNBIT_DP64; >+ GET_BITS_DP64(y, uy); >+ uy &= ~SIGNBIT_DP64; >+ xexp = (ux >> EXPSHIFTBITS_DP64); >+ yexp = (uy >> EXPSHIFTBITS_DP64); >+ >+ if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1) >+ { >+ /* One or both of the arguments are NaN or infinity. The >+ result will also be NaN or infinity. */ >+ retval = x*x + y*y; >+ if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) || >+ ((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64))) >+ /* x or y is infinity. ISO C99 defines that we must >+ return +infinity, even if the other argument is NaN. >+ Note that the computation of x*x + y*y above will already >+ have raised invalid if either x or y is a signalling NaN. */ >+ return infinity_with_flags(0); >+ else >+ /* One or both of x or y is NaN, and neither is infinity. >+ Raise invalid if it's a signalling NaN */ >+ return retval; >+ } >+ >+ /* Set x = abs(x) and y = abs(y) */ >+ PUT_BITS_DP64(ux, x); >+ PUT_BITS_DP64(uy, y); >+ >+ /* The difference in exponents between x and y */ >+ dexp = xexp - yexp; >+ expadjust = 0; >+ >+ if (ux == 0) >+ /* x is zero */ >+ return y; >+ else if (uy == 0) >+ /* y is zero */ >+ return x; >+ else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1) >+ /* One of x and y is insignificant compared to the other */ >+ return x + y; /* Raise inexact */ >+ else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500) >+ { >+ /* Danger of overflow; scale down by 2**600. */ >+ expadjust = 600; >+ ux -= 0x2580000000000000; >+ PUT_BITS_DP64(ux, x); >+ uy -= 0x2580000000000000; >+ PUT_BITS_DP64(uy, y); >+ } >+ else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500) >+ { >+ /* Danger of underflow; scale up by 2**600. */ >+ expadjust = -600; >+ if (xexp == 0) >+ { >+ /* x is denormal - handle by adding 601 to the exponent >+ and then subtracting a correction for the implicit bit */ >+ PUT_BITS_DP64(ux + 0x2590000000000000, x); >+ x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ >+ GET_BITS_DP64(x, ux); >+ } >+ else >+ { >+ /* x is normal - just increase the exponent by 600 */ >+ ux += 0x2580000000000000; >+ PUT_BITS_DP64(ux, x); >+ } >+ if (yexp == 0) >+ { >+ PUT_BITS_DP64(uy + 0x2590000000000000, y); >+ y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ >+ GET_BITS_DP64(y, uy); >+ } >+ else >+ { >+ uy += 0x2580000000000000; >+ PUT_BITS_DP64(uy, y); >+ } >+ } >+ >+ >+#ifdef FAST_BUT_GREATER_THAN_ONE_ULP >+ /* Not awful, but results in accuracy loss larger than 1 ulp */ >+ r = x*x + y*y >+#else >+ /* Slower but more accurate */ >+ >+ /* Sort so that x is greater than y */ >+ if (x < y) >+ { >+ u = y; >+ y = x; >+ x = u; >+ ut = ux; >+ ux = uy; >+ uy = ut; >+ } >+ >+ /* Split x into hx and tx, head and tail */ >+ PUT_BITS_DP64(ux & 0xfffffffff8000000, hx); >+ tx = x - hx; >+ >+ PUT_BITS_DP64(uy & 0xfffffffff8000000, hy); >+ ty = y - hy; >+ >+ /* Compute r = x*x + y*y with extra precision */ >+ x2 = x*x; >+ y2 = y*y; >+ hs = x2 + y2; >+ >+ if (dexp == 0) >+ /* We take most care when x and y have equal exponents, >+ i.e. are almost the same size */ >+ ts = (((x2 - hs) + y2) + >+ ((hx * hx - x2) + 2 * hx * tx) + tx * tx) + >+ ((hy * hy - y2) + 2 * hy * ty) + ty * ty; >+ else >+ ts = (((x2 - hs) + y2) + >+ ((hx * hx - x2) + 2 * hx * tx) + tx * tx); >+ >+ r = hs + ts; >+#endif >+ >+ /* The sqrt can introduce another half ulp error. */ >+ /* Hammer sqrt instruction */ >+ asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r)); >+ >+ /* If necessary scale the result back. This may lead to >+ overflow but if so that's the correct result. */ >+ retval = scaleDouble_1(retval, expadjust); >+ >+ if (retval > large) >+ /* The result overflowed. Deal with errno. */ >+ return retval_errno_erange_overflow(x, y); >+ >+ return retval; >+} >+ >+weak_alias (__hypot, hypot) >+weak_alias (__hypot, __ieee754_hypot) >============================================================ >Index: sysdeps/x86_64/fpu/w_hypotf.c >--- sysdeps/x86_64/fpu/w_hypotf.c created >+++ sysdeps/x86_64/fpu/w_hypotf.c 2002-12-03 13:43:20.000000000 +0100 1.1 >@@ -0,0 +1,98 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#if USE_SOFTWARE_SQRT >+#define USE_SQRTF_AMD_INLINE >+#endif >+#define USE_INFINITYF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#if USE_SOFTWARE_SQRT >+#undef USE_SQRTF_AMD_INLINE >+#endif >+#undef USE_INFINITYF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange_overflow(float x, float y) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)y; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"hypotf"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+float __hypotf(float x, float y) >+{ >+ /* Returns sqrt(x*x + y*y) with no overflow or underflow unless >+ the result warrants it */ >+ >+ /* Do intermediate computations in double precision >+ and use sqrt instruction from chip if available. */ >+ double dx = x, dy = y, dr, retval; >+ >+ /* The largest finite float, stored as a double */ >+ const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */ >+ >+ >+ unsigned long ux, uy, avx, avy; >+ >+ GET_BITS_DP64(x, avx); >+ avx &= ~SIGNBIT_DP64; >+ GET_BITS_DP64(y, avy); >+ avy &= ~SIGNBIT_DP64; >+ ux = (avx >> EXPSHIFTBITS_DP64); >+ uy = (avy >> EXPSHIFTBITS_DP64); >+ >+ if (ux == BIASEDEMAX_DP64 + 1 || uy == BIASEDEMAX_DP64 + 1) >+ { >+ retval = x*x + y*y; >+ /* One or both of the arguments are NaN or infinity. The >+ result will also be NaN or infinity. */ >+ if (((ux == BIASEDEMAX_DP64 + 1) && !(avx & MANTBITS_DP64)) || >+ ((uy == BIASEDEMAX_DP64 + 1) && !(avy & MANTBITS_DP64))) >+ /* x or y is infinity. ISO C99 defines that we must >+ return +infinity, even if the other argument is NaN. >+ Note that the computation of x*x + y*y above will already >+ have raised invalid if either x or y is a signalling NaN. */ >+ return infinityf_with_flags(0); >+ else >+ /* One or both of x or y is NaN, and neither is infinity. >+ Raise invalid if it's a signalling NaN */ >+ return retval; >+ } >+ >+ dr = (dx*dx + dy*dy); >+ >+#if USE_SOFTWARE_SQRT >+ retval = sqrtf_amd_inline(r); >+#else >+ /* Hammer sqrt instruction */ >+ asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (dr)); >+#endif >+ >+ if (retval > large) >+ return retval_errno_erange_overflow(x, y); >+ else >+ return retval; >+ } >+ >+weak_alias (__hypotf, hypotf) >+weak_alias (__hypotf, __ieee754_hypotf) >============================================================ >Index: sysdeps/x86_64/fpu/w_log.c >--- sysdeps/x86_64/fpu/w_log.c created >+++ sysdeps/x86_64/fpu/w_log.c 2002-12-03 15:35:29.000000000 +0100 1.1 >@@ -0,0 +1,489 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_NAN_WITH_FLAGS >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_INFINITY_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange_overflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = SING; >+#if defined(COMPILING_LOG10) >+ exc.name = (char *)"log10"; >+#elif defined(COMPILING_LOG2) >+ exc.name = (char *)"log2"; >+#else >+ exc.name = (char *)"log"; >+#endif >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = -HUGE; >+ else >+ exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+/* Deal with errno for out-of-range argument */ >+static inline double retval_errno_edom(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = DOMAIN; >+#if defined(COMPILING_LOG10) >+ exc.name = (char *)"log10"; >+#elif defined(COMPILING_LOG2) >+ exc.name = (char *)"log2"; >+#else >+ exc.name = (char *)"log"; >+#endif >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = -HUGE; >+ else >+ exc.retval = nan_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+#if defined(COMPILING_LOG10) >+ (void)fputs("log10: DOMAIN error\n", stderr); >+#elif defined(COMPILING_LOG2) >+ (void)fputs("log2: DOMAIN error\n", stderr); >+#else >+ (void)fputs("log: DOMAIN error\n", stderr); >+#endif >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+#if defined(COMPILING_LOG10) >+double __log10(double x) >+#elif defined(COMPILING_LOG2) >+double __log2(double x) >+#else >+double __log(double x) >+#endif >+{ >+ >+ int expadjust, xexp; >+ double r, r1, r2, correction, f, f1, f2, q, u, v, z1, z2, poly; >+ int index; >+ unsigned long ux; >+#if defined(COMPILING_LOG10) || defined (COMPILING_LOG2) >+ unsigned long ut; >+#endif >+ >+ /* >+ Computes natural log(x). Algorithm based on: >+ Ping-Tak Peter Tang >+ "Table-driven implementation of the logarithm function in IEEE >+ floating-point arithmetic" >+ ACM Transactions on Mathematical Software (TOMS) >+ Volume 16, Issue 4 (December 1990) >+ */ >+ >+/* Arrays ln_lead_table and ln_tail_table contain >+ leading and trailing parts respectively of precomputed >+ values of natural log(1+i/64), for i = 0, 1, ..., 64. >+ ln_lead_table contains the first 24 bits of precision, >+ and ln_tail_table contains a further 53 bits precision. */ >+ >+ static const double ln_lead_table[65] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.55041813850402832031e-02, /* 0x3f8fc0a800000000 */ >+ 3.07716131210327148438e-02, /* 0x3f9f829800000000 */ >+ 4.58095073699951171875e-02, /* 0x3fa7745800000000 */ >+ 6.06245994567871093750e-02, /* 0x3faf0a3000000000 */ >+ 7.52233862876892089844e-02, /* 0x3fb341d700000000 */ >+ 8.96121263504028320312e-02, /* 0x3fb6f0d200000000 */ >+ 1.03796780109405517578e-01, /* 0x3fba926d00000000 */ >+ 1.17783010005950927734e-01, /* 0x3fbe270700000000 */ >+ 1.31576299667358398438e-01, /* 0x3fc0d77e00000000 */ >+ 1.45181953907012939453e-01, /* 0x3fc2955280000000 */ >+ 1.58604979515075683594e-01, /* 0x3fc44d2b00000000 */ >+ 1.71850204467773437500e-01, /* 0x3fc5ff3000000000 */ >+ 1.84922337532043457031e-01, /* 0x3fc7ab8900000000 */ >+ 1.97825729846954345703e-01, /* 0x3fc9525a80000000 */ >+ 2.10564732551574707031e-01, /* 0x3fcaf3c900000000 */ >+ 2.23143517971038818359e-01, /* 0x3fcc8ff780000000 */ >+ 2.35566020011901855469e-01, /* 0x3fce270700000000 */ >+ 2.47836112976074218750e-01, /* 0x3fcfb91800000000 */ >+ 2.59957492351531982422e-01, /* 0x3fd0a324c0000000 */ >+ 2.71933674812316894531e-01, /* 0x3fd1675c80000000 */ >+ 2.83768117427825927734e-01, /* 0x3fd22941c0000000 */ >+ 2.95464158058166503906e-01, /* 0x3fd2e8e280000000 */ >+ 3.07025015354156494141e-01, /* 0x3fd3a64c40000000 */ >+ 3.18453729152679443359e-01, /* 0x3fd4618bc0000000 */ >+ 3.29753279685974121094e-01, /* 0x3fd51aad80000000 */ >+ 3.40926527976989746094e-01, /* 0x3fd5d1bd80000000 */ >+ 3.51976394653320312500e-01, /* 0x3fd686c800000000 */ >+ 3.62905442714691162109e-01, /* 0x3fd739d7c0000000 */ >+ 3.73716354370117187500e-01, /* 0x3fd7eaf800000000 */ >+ 3.84411692619323730469e-01, /* 0x3fd89a3380000000 */ >+ 3.94993782043457031250e-01, /* 0x3fd9479400000000 */ >+ 4.05465066432952880859e-01, /* 0x3fd9f323c0000000 */ >+ 4.15827870368957519531e-01, /* 0x3fda9cec80000000 */ >+ 4.26084339618682861328e-01, /* 0x3fdb44f740000000 */ >+ 4.36236739158630371094e-01, /* 0x3fdbeb4d80000000 */ >+ 4.46287095546722412109e-01, /* 0x3fdc8ff7c0000000 */ >+ 4.56237375736236572266e-01, /* 0x3fdd32fe40000000 */ >+ 4.66089725494384765625e-01, /* 0x3fddd46a00000000 */ >+ 4.75845873355865478516e-01, /* 0x3fde744240000000 */ >+ 4.85507786273956298828e-01, /* 0x3fdf128f40000000 */ >+ 4.95077252388000488281e-01, /* 0x3fdfaf5880000000 */ >+ 5.04556000232696533203e-01, /* 0x3fe02552a0000000 */ >+ 5.13945698738098144531e-01, /* 0x3fe0723e40000000 */ >+ 5.23248136043548583984e-01, /* 0x3fe0be72e0000000 */ >+ 5.32464742660522460938e-01, /* 0x3fe109f380000000 */ >+ 5.41597247123718261719e-01, /* 0x3fe154c3c0000000 */ >+ 5.50647079944610595703e-01, /* 0x3fe19ee6a0000000 */ >+ 5.59615731239318847656e-01, /* 0x3fe1e85f40000000 */ >+ 5.68504691123962402344e-01, /* 0x3fe23130c0000000 */ >+ 5.77315330505371093750e-01, /* 0x3fe2795e00000000 */ >+ 5.86049020290374755859e-01, /* 0x3fe2c0e9e0000000 */ >+ 5.94707071781158447266e-01, /* 0x3fe307d720000000 */ >+ 6.03290796279907226562e-01, /* 0x3fe34e2880000000 */ >+ 6.11801505088806152344e-01, /* 0x3fe393e0c0000000 */ >+ 6.20240390300750732422e-01, /* 0x3fe3d90260000000 */ >+ 6.28608644008636474609e-01, /* 0x3fe41d8fe0000000 */ >+ 6.36907458305358886719e-01, /* 0x3fe4618bc0000000 */ >+ 6.45137906074523925781e-01, /* 0x3fe4a4f840000000 */ >+ 6.53301239013671875000e-01, /* 0x3fe4e7d800000000 */ >+ 6.61398470401763916016e-01, /* 0x3fe52a2d20000000 */ >+ 6.69430613517761230469e-01, /* 0x3fe56bf9c0000000 */ >+ 6.77398800849914550781e-01, /* 0x3fe5ad4040000000 */ >+ 6.85303986072540283203e-01, /* 0x3fe5ee02a0000000 */ >+ 6.93147122859954833984e-01}; /* 0x3fe62e42e0000000 */ >+ >+ static const double ln_tail_table[65] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 5.15092497094772879206e-09, /* 0x3e361f807c79f3db */ >+ 4.55457209735272790188e-08, /* 0x3e6873c1980267c8 */ >+ 2.86612990859791781788e-08, /* 0x3e5ec65b9f88c69e */ >+ 2.23596477332056055352e-08, /* 0x3e58022c54cc2f99 */ >+ 3.49498983167142274770e-08, /* 0x3e62c37a3a125330 */ >+ 3.23392843005887000414e-08, /* 0x3e615cad69737c93 */ >+ 1.35722380472479366661e-08, /* 0x3e4d256ab1b285e9 */ >+ 2.56504325268044191098e-08, /* 0x3e5b8abcb97a7aa2 */ >+ 5.81213608741512136843e-08, /* 0x3e6f34239659a5dc */ >+ 5.59374849578288093334e-08, /* 0x3e6e07fd48d30177 */ >+ 5.06615629004996189970e-08, /* 0x3e6b32df4799f4f6 */ >+ 5.24588857848400955725e-08, /* 0x3e6c29e4f4f21cf8 */ >+ 9.61968535632653505972e-10, /* 0x3e1086c848df1b59 */ >+ 1.34829655346594463137e-08, /* 0x3e4cf456b4764130 */ >+ 3.65557749306383026498e-08, /* 0x3e63a02ffcb63398 */ >+ 3.33431709374069198903e-08, /* 0x3e61e6a6886b0976 */ >+ 5.13008650536088382197e-08, /* 0x3e6b8abcb97a7aa2 */ >+ 5.09285070380306053751e-08, /* 0x3e6b578f8aa35552 */ >+ 3.20853940845502057341e-08, /* 0x3e6139c871afb9fc */ >+ 4.06713248643004200446e-08, /* 0x3e65d5d30701ce64 */ >+ 5.57028186706125221168e-08, /* 0x3e6de7bcb2d12142 */ >+ 5.48356693724804282546e-08, /* 0x3e6d708e984e1664 */ >+ 1.99407553679345001938e-08, /* 0x3e556945e9c72f36 */ >+ 1.96585517245087232086e-09, /* 0x3e20e2f613e85bda */ >+ 6.68649386072067321503e-09, /* 0x3e3cb7e0b42724f6 */ >+ 5.89936034642113390002e-08, /* 0x3e6fac04e52846c7 */ >+ 2.85038578721554472484e-08, /* 0x3e5e9b14aec442be */ >+ 5.09746772910284482606e-08, /* 0x3e6b5de8034e7126 */ >+ 5.54234668933210171467e-08, /* 0x3e6dc157e1b259d3 */ >+ 6.29100830926604004874e-09, /* 0x3e3b05096ad69c62 */ >+ 2.61974119468563937716e-08, /* 0x3e5c2116faba4cdd */ >+ 4.16752115011186398935e-08, /* 0x3e665fcc25f95b47 */ >+ 2.47747534460820790327e-08, /* 0x3e5a9a08498d4850 */ >+ 5.56922172017964209793e-08, /* 0x3e6de647b1465f77 */ >+ 2.76162876992552906035e-08, /* 0x3e5da71b7bf7861d */ >+ 7.08169709942321478061e-09, /* 0x3e3e6a6886b09760 */ >+ 5.77453510221151779025e-08, /* 0x3e6f0075eab0ef64 */ >+ 4.43021445893361960146e-09, /* 0x3e33071282fb989b */ >+ 3.15140984357495864573e-08, /* 0x3e60eb43c3f1bed2 */ >+ 2.95077445089736670973e-08, /* 0x3e5faf06ecb35c84 */ >+ 1.44098510263167149349e-08, /* 0x3e4ef1e63db35f68 */ >+ 1.05196987538551827693e-08, /* 0x3e469743fb1a71a5 */ >+ 5.23641361722697546261e-08, /* 0x3e6c1cdf404e5796 */ >+ 7.72099925253243069458e-09, /* 0x3e4094aa0ada625e */ >+ 5.62089493829364197156e-08, /* 0x3e6e2d4c96fde3ec */ >+ 3.53090261098577946927e-08, /* 0x3e62f4d5e9a98f34 */ >+ 3.80080516835568242269e-08, /* 0x3e6467c96ecc5cbe */ >+ 5.66961038386146408282e-08, /* 0x3e6e7040d03dec5a */ >+ 4.42287063097349852717e-08, /* 0x3e67bebf4282de36 */ >+ 3.45294525105681104660e-08, /* 0x3e6289b11aeb783f */ >+ 2.47132034530447431509e-08, /* 0x3e5a891d1772f538 */ >+ 3.59655343422487209774e-08, /* 0x3e634f10be1fb591 */ >+ 5.51581770357780862071e-08, /* 0x3e6d9ce1d316eb93 */ >+ 3.60171867511861372793e-08, /* 0x3e63562a19a9c442 */ >+ 1.94511067964296180547e-08, /* 0x3e54e2adf548084c */ >+ 1.54137376631349347838e-08, /* 0x3e508ce55cc8c97a */ >+ 3.93171034490174464173e-09, /* 0x3e30e2f613e85bda */ >+ 5.52990607758839766440e-08, /* 0x3e6db03ebb0227bf */ >+ 3.29990737637586136511e-08, /* 0x3e61b75bb09cb098 */ >+ 1.18436010922446096216e-08, /* 0x3e496f16abb9df22 */ >+ 4.04248680368301346709e-08, /* 0x3e65b3f399411c62 */ >+ 2.27418915900284316293e-08, /* 0x3e586b3e59f65355 */ >+ 1.70263791333409206020e-08, /* 0x3e52482ceae1ac12 */ >+ 5.76999904754328540596e-08}; /* 0x3e6efa39ef35793c */ >+ >+#ifndef COMPILING_LOG2 >+ /* log2_lead and log2_tail sum to an extra-precise version >+ of log(2) */ >+ static const double >+ log2_lead = 6.93147122859954833984e-01, /* 0x3fe62e42e0000000 */ >+ log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */ >+#endif >+ >+ static const double >+ /* Approximating polynomial coefficients for x near 1.0 */ >+ ca_1 = 8.33333333333317923934e-02, /* 0x3fb55555555554e6 */ >+ ca_2 = 1.25000000037717509602e-02, /* 0x3f89999999bac6d4 */ >+ ca_3 = 2.23213998791944806202e-03, /* 0x3f62492307f1519f */ >+ ca_4 = 4.34887777707614552256e-04, /* 0x3f3c8034c85dfff0 */ >+ >+ /* Approximating polynomial coefficients for other x */ >+ cb_1 = 8.33333333333333593622e-02, /* 0x3fb5555555555557 */ >+ cb_2 = 1.24999999978138668903e-02, /* 0x3f89999999865ede */ >+ cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */ >+ >+#if defined(COMPILING_LOG10) >+ /* log10e_lead and log10e_tail sum to an extra-precision >+ version of log10(e) (19 bits in lead) */ >+ static const double >+ log10e_lead = 4.34293746948242187500e-01, /* 0x3fdbcb7800000000 */ >+ log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */ >+#elif defined(COMPILING_LOG2) >+ /* log2e_lead and log2e_tail sum to an extra-precision >+ version of log2(e) (19 bits in lead) */ >+ static const double >+ log2e_lead = 1.44269180297851562500E+00, /* 0x3FF7154400000000 */ >+ log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */ >+#endif >+ >+ static const unsigned long >+ log_thresh1 = 0x3fee0faa00000000, >+ log_thresh2 = 0x3ff1082c00000000; >+ >+ >+ GET_BITS_DP64(x, ux); >+ >+ if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ { >+ /* x is infinity */ >+ if (ux & SIGNBIT_DP64) >+ /* x is negative infinity. Return a NaN. */ >+ return retval_errno_edom(x); >+ else >+ return x; >+ } >+ } >+ else if (!(ux & ~SIGNBIT_DP64)) >+ /* x is +/-zero. Return -infinity with div-by-zero flag. */ >+ return retval_errno_erange_overflow(x); >+ else if (ux & SIGNBIT_DP64) >+ /* x is negative. Return a NaN. */ >+ return retval_errno_edom(x); >+ >+ >+ /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000 >+ log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */ >+ if (ux >= log_thresh1 && ux <= log_thresh2) >+ { >+ /* Arguments close to 1.0 are handled separately to maintain >+ accuracy. >+ >+ The approximation in this region exploits the identity >+ log( 1 + r ) = log( 1 + u/2 ) - log( 1 - u/2 ), where >+ u = 2r / (2+r). >+ Note that the right hand side has an odd Taylor series expansion >+ which converges much faster than the Taylor series expansion of >+ log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by >+ u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1). >+ >+ One subtlety is that since u cannot be calculated from >+ r exactly, the rounding error in the first u should be >+ avoided if possible. To accomplish this, we observe that >+ u = r - r*r/(2+r). >+ Since x (=1+r) is the input argument, and thus presumed exact, >+ the formula above approximates u accurately because >+ u = r - correction, >+ and the magnitude of "correction" (of the order of r*r) >+ is small. >+ With these observations, we will approximate log( 1 + r ) by >+ r + ( (A1*u^3 + ... + An*u^(2n+1)) - correction ). >+ >+ We approximate log(1+r) by an odd polynomial in u, where >+ u = 2r/(2+r) = r - r*r/(2+r). >+ */ >+ r = x - 1.0; >+ u = r / (2.0 + r); >+ correction = r * u; >+ u = u + u; >+ v = u * u; >+ r1 = r; >+ r2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction); >+#if defined(COMPILING_LOG10) >+ /* At this point r1,r2 is an extra-precise approximation to >+ natural log(x). Convert it to log10(x) by multiplying >+ carefully by log10(e). >+ Shift some bits from r1 to r2 so that log10e_lead*r1 >+ can be computed without rounding error */ >+ r = r1; >+ GET_BITS_DP64(r1, ut); >+ PUT_BITS_DP64(ut & 0xffffffff00000000, r1); >+ r2 = r2 + (r - r1); >+ return (((log10e_tail*r2) + log10e_tail*r1) + log10e_lead*r2) + >+ log10e_lead*r1; >+#elif defined(COMPILING_LOG2) >+ /* Similarly handle log2(x) by multiplying carefully by log2(e). */ >+ r = r1; >+ GET_BITS_DP64(r1, ut); >+ PUT_BITS_DP64(ut & 0xffffffff00000000, r1); >+ r2 = r2 + (r - r1); >+ return (((log2e_tail*r2) + log2e_tail*r1) + log2e_lead*r2) + >+ log2e_lead*r1; >+#else >+ return r1 + r2; >+#endif >+ } >+ else >+ { >+ /* >+ First, we decompose the argument x to the form >+ x = 2**M * (F1 + F2), >+ where 1 <= F1+F2 < 2, M has the value of an integer, >+ F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128. >+ >+ Second, we approximate log( 1 + F2/F1 ) by an odd polynomial >+ in U, where U = 2 F2 / (2 F1 + F2). >+ Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ). >+ The core approximation calculates >+ Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U - 1. >+ Note that log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ), >+ thus, Poly = 2 arctanh( U/2 ) / U - 1. >+ >+ It is not hard to see that >+ log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ). >+ Hence, we return Z1 = log(F1), and Z2 = log( 1 + F2/F1). >+ The values of log(F1) are calculated beforehand and stored >+ in the program. >+ */ >+ >+ if (ux < IMPBIT_DP64) >+ { >+ /* The input argument x is denormalized */ >+ /* Normalize f by increasing the exponent by 60 >+ and subtracting a correction to account for the implicit >+ bit. This replaces a slow denormalized >+ multiplication by a fast normal subtraction. */ >+ static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */ >+ PUT_BITS_DP64(ux | 0x03d0000000000000, f); >+ f -= corr; >+ GET_BITS_DP64(f, ux); >+ expadjust = 60; >+ } >+ else >+ { >+ f = x; >+ expadjust = 0; >+ } >+ >+ /* Store the exponent of x in xexp and put >+ f into the range [0.5,1) */ >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust; >+ PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f); >+ >+ /* Now x = 2**xexp * f, 1/2 <= f < 1. */ >+ >+ /* Set index to be the nearest integer to 128*f */ >+ /* >+ r = 128.0 * f; >+ index = (int)(r + 0.5); >+ */ >+ /* This code instead of the above can save several cycles. >+ It only works because 64 <= r < 128, so >+ the nearest integer is always contained in exactly >+ 7 bits, and the right shift is always the same. */ >+ index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46) >+ + ((ux & 0x0000200000000000) >> 45); >+ >+ z1 = ln_lead_table[index-64]; >+ q = ln_tail_table[index-64]; >+ f1 = index * 0.0078125; /* 0.0078125 = 1/128 */ >+ f2 = f - f1; >+ /* At this point, x = 2**xexp * ( f1 + f2 ) where >+ f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */ >+ >+ /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */ >+ u = f2 / (f1 + 0.5 * f2); >+ >+ /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1). >+ The core approximation calculates >+ poly = [log(1 + u/2) - log(1 - u/2)]/u - 1 */ >+ v = u * u; >+ poly = (v * (cb_1 + v * (cb_2 + v * cb_3))); >+ z2 = q + (u + u * poly); >+ >+ /* Now z1,z2 is an extra-precise approximation of log(2f). */ >+ >+#if defined (COMPILING_LOG10) >+ /* Add xexp * log(2) to z1,z2 to get log(x). */ >+ r1 = (xexp * log2_lead + z1); >+ r2 = (xexp * log2_tail + z2); >+ /* At this point r1,r2 is an extra-precise approximation to >+ natural log(x). Convert it to log10(x) by multiplying >+ carefully by log10(e). */ >+ return (((log10e_tail*r2) + log10e_tail*r1) + log10e_lead*r2) + >+ log10e_lead*r1; >+#elif defined(COMPILING_LOG2) >+ /* Convert to log2(x) by multiplying carefully by log2(e) >+ and adding xexp. */ >+ r1 = xexp + log2e_lead*z1; >+ r2 = (((log2e_tail*z2) + log2e_tail*z1) + log2e_lead*z2); >+ return r1 + r2; >+#else >+ /* Add xexp * log(2) to z1,z2 to get the result log(x). >+ The computed r1 is not subject to rounding error because >+ xexp has at most 10 significant bits, log(2) has 24 significant >+ bits, and z1 has up to 24 bits; and the exponents of z1 >+ and z2 differ by at most 6. */ >+ r1 = (xexp * log2_lead + z1); >+ r2 = (xexp * log2_tail + z2); >+ /* Natural log(x) */ >+ return r1 + r2; >+#endif >+ } >+} >+ >+#if defined(COMPILING_LOG10) >+weak_alias (__log10, log10) >+weak_alias (__log10, __ieee754_log10) >+#elif defined(COMPILING_LOG2) >+weak_alias (__log2, log2) >+weak_alias (__log2, __ieee754_log2) >+#else >+weak_alias (__log, log) >+weak_alias (__log, __ieee754_log) >+#endif >============================================================ >Index: sysdeps/x86_64/fpu/w_log10.c >--- sysdeps/x86_64/fpu/w_log10.c created >+++ sysdeps/x86_64/fpu/w_log10.c 2002-12-03 13:43:22.000000000 +0100 1.1 >@@ -0,0 +1,2 @@ >+#define COMPILING_LOG10 1 >+#include <w_log.c> >============================================================ >Index: sysdeps/x86_64/fpu/w_log10f.c >--- sysdeps/x86_64/fpu/w_log10f.c created >+++ sysdeps/x86_64/fpu/w_log10f.c 2002-12-03 13:43:22.000000000 +0100 1.1 >@@ -0,0 +1,2 @@ >+#define COMPILING_LOG10 1 >+#include <w_logf.c> >============================================================ >Index: sysdeps/x86_64/fpu/w_log2.c >--- sysdeps/x86_64/fpu/w_log2.c created >+++ sysdeps/x86_64/fpu/w_log2.c 2002-12-03 13:43:22.000000000 +0100 1.1 >@@ -0,0 +1,2 @@ >+#define COMPILING_LOG2 1 >+#include <w_log.c> >============================================================ >Index: sysdeps/x86_64/fpu/w_log2f.c >--- sysdeps/x86_64/fpu/w_log2f.c created >+++ sysdeps/x86_64/fpu/w_log2f.c 2002-12-03 13:43:22.000000000 +0100 1.1 >@@ -0,0 +1,2 @@ >+#define COMPILING_LOG2 1 >+#include <w_logf.c> >============================================================ >Index: sysdeps/x86_64/fpu/w_logf.c >--- sysdeps/x86_64/fpu/w_logf.c created >+++ sysdeps/x86_64/fpu/w_logf.c 2002-12-03 15:35:44.000000000 +0100 1.1 >@@ -0,0 +1,375 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_NANF_WITH_FLAGS >+#define USE_INFINITYF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NANF_WITH_FLAGS >+#undef USE_INFINITYF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange_overflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = SING; >+#if defined(COMPILING_LOG10) >+ exc.name = (char *)"log10f"; >+#elif defined(COMPILING_LOG2) >+ exc.name = (char *)"log2f"; >+#else >+ exc.name = (char *)"logf"; >+#endif >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = -HUGE; >+ else >+ exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+/* Deal with errno for out-of-range argument */ >+static inline float retval_errno_edom(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = DOMAIN; >+#if defined(COMPILING_LOG10) >+ exc.name = (char *)"log10f"; >+#elif defined(COMPILING_LOG2) >+ exc.name = (char *)"log2f"; >+#else >+ exc.name = (char *)"logf"; >+#endif >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = -HUGE; >+ else >+ exc.retval = nanf_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+#if defined(COMPILING_LOG10) >+ (void)fputs("log10f: DOMAIN error\n", stderr); >+#elif defined(COMPILING_LOG2) >+ (void)fputs("log2f: DOMAIN error\n", stderr); >+#else >+ (void)fputs("logf: DOMAIN error\n", stderr); >+#endif >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+#if defined(COMPILING_LOG10) >+float __log10f(float fx) >+#elif defined(COMPILING_LOG2) >+float __log2f(float fx) >+#else >+float __logf(float fx) >+#endif >+{ >+ >+ double x = fx; >+ >+ int xexp; >+ double r, f, f1, f2, q, u, v, z1, z2, poly; >+ int index; >+ unsigned long ux; >+ >+ /* >+ Computes natural log(x) for float arguments. Algorithm is >+ basically a promotion of the arguments to double followed >+ by an inlined version of the double algorithm, simplified >+ for efficiency (see log_amd.c). Simplifications include: >+ * Special algorithm for arguments near 1.0 not required >+ * Scaling of denormalised arguments not required >+ * Shorter core series approximations used >+ */ >+ >+/* Arrays ln_lead_table and ln_tail_table contain >+ leading and trailing parts respectively of precomputed >+ values of natural log(1+i/64), for i = 0, 1, ..., 64. >+ ln_lead_table contains the first 24 bits of precision, >+ and ln_tail_table contains a further 53 bits precision. */ >+ >+ static const double ln_lead_table[65] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.55041813850402832031e-02, /* 0x3f8fc0a800000000 */ >+ 3.07716131210327148438e-02, /* 0x3f9f829800000000 */ >+ 4.58095073699951171875e-02, /* 0x3fa7745800000000 */ >+ 6.06245994567871093750e-02, /* 0x3faf0a3000000000 */ >+ 7.52233862876892089844e-02, /* 0x3fb341d700000000 */ >+ 8.96121263504028320312e-02, /* 0x3fb6f0d200000000 */ >+ 1.03796780109405517578e-01, /* 0x3fba926d00000000 */ >+ 1.17783010005950927734e-01, /* 0x3fbe270700000000 */ >+ 1.31576299667358398438e-01, /* 0x3fc0d77e00000000 */ >+ 1.45181953907012939453e-01, /* 0x3fc2955280000000 */ >+ 1.58604979515075683594e-01, /* 0x3fc44d2b00000000 */ >+ 1.71850204467773437500e-01, /* 0x3fc5ff3000000000 */ >+ 1.84922337532043457031e-01, /* 0x3fc7ab8900000000 */ >+ 1.97825729846954345703e-01, /* 0x3fc9525a80000000 */ >+ 2.10564732551574707031e-01, /* 0x3fcaf3c900000000 */ >+ 2.23143517971038818359e-01, /* 0x3fcc8ff780000000 */ >+ 2.35566020011901855469e-01, /* 0x3fce270700000000 */ >+ 2.47836112976074218750e-01, /* 0x3fcfb91800000000 */ >+ 2.59957492351531982422e-01, /* 0x3fd0a324c0000000 */ >+ 2.71933674812316894531e-01, /* 0x3fd1675c80000000 */ >+ 2.83768117427825927734e-01, /* 0x3fd22941c0000000 */ >+ 2.95464158058166503906e-01, /* 0x3fd2e8e280000000 */ >+ 3.07025015354156494141e-01, /* 0x3fd3a64c40000000 */ >+ 3.18453729152679443359e-01, /* 0x3fd4618bc0000000 */ >+ 3.29753279685974121094e-01, /* 0x3fd51aad80000000 */ >+ 3.40926527976989746094e-01, /* 0x3fd5d1bd80000000 */ >+ 3.51976394653320312500e-01, /* 0x3fd686c800000000 */ >+ 3.62905442714691162109e-01, /* 0x3fd739d7c0000000 */ >+ 3.73716354370117187500e-01, /* 0x3fd7eaf800000000 */ >+ 3.84411692619323730469e-01, /* 0x3fd89a3380000000 */ >+ 3.94993782043457031250e-01, /* 0x3fd9479400000000 */ >+ 4.05465066432952880859e-01, /* 0x3fd9f323c0000000 */ >+ 4.15827870368957519531e-01, /* 0x3fda9cec80000000 */ >+ 4.26084339618682861328e-01, /* 0x3fdb44f740000000 */ >+ 4.36236739158630371094e-01, /* 0x3fdbeb4d80000000 */ >+ 4.46287095546722412109e-01, /* 0x3fdc8ff7c0000000 */ >+ 4.56237375736236572266e-01, /* 0x3fdd32fe40000000 */ >+ 4.66089725494384765625e-01, /* 0x3fddd46a00000000 */ >+ 4.75845873355865478516e-01, /* 0x3fde744240000000 */ >+ 4.85507786273956298828e-01, /* 0x3fdf128f40000000 */ >+ 4.95077252388000488281e-01, /* 0x3fdfaf5880000000 */ >+ 5.04556000232696533203e-01, /* 0x3fe02552a0000000 */ >+ 5.13945698738098144531e-01, /* 0x3fe0723e40000000 */ >+ 5.23248136043548583984e-01, /* 0x3fe0be72e0000000 */ >+ 5.32464742660522460938e-01, /* 0x3fe109f380000000 */ >+ 5.41597247123718261719e-01, /* 0x3fe154c3c0000000 */ >+ 5.50647079944610595703e-01, /* 0x3fe19ee6a0000000 */ >+ 5.59615731239318847656e-01, /* 0x3fe1e85f40000000 */ >+ 5.68504691123962402344e-01, /* 0x3fe23130c0000000 */ >+ 5.77315330505371093750e-01, /* 0x3fe2795e00000000 */ >+ 5.86049020290374755859e-01, /* 0x3fe2c0e9e0000000 */ >+ 5.94707071781158447266e-01, /* 0x3fe307d720000000 */ >+ 6.03290796279907226562e-01, /* 0x3fe34e2880000000 */ >+ 6.11801505088806152344e-01, /* 0x3fe393e0c0000000 */ >+ 6.20240390300750732422e-01, /* 0x3fe3d90260000000 */ >+ 6.28608644008636474609e-01, /* 0x3fe41d8fe0000000 */ >+ 6.36907458305358886719e-01, /* 0x3fe4618bc0000000 */ >+ 6.45137906074523925781e-01, /* 0x3fe4a4f840000000 */ >+ 6.53301239013671875000e-01, /* 0x3fe4e7d800000000 */ >+ 6.61398470401763916016e-01, /* 0x3fe52a2d20000000 */ >+ 6.69430613517761230469e-01, /* 0x3fe56bf9c0000000 */ >+ 6.77398800849914550781e-01, /* 0x3fe5ad4040000000 */ >+ 6.85303986072540283203e-01, /* 0x3fe5ee02a0000000 */ >+ 6.93147122859954833984e-01}; /* 0x3fe62e42e0000000 */ >+ >+ static const double ln_tail_table[65] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 5.15092497094772879206e-09, /* 0x3e361f807c79f3db */ >+ 4.55457209735272790188e-08, /* 0x3e6873c1980267c8 */ >+ 2.86612990859791781788e-08, /* 0x3e5ec65b9f88c69e */ >+ 2.23596477332056055352e-08, /* 0x3e58022c54cc2f99 */ >+ 3.49498983167142274770e-08, /* 0x3e62c37a3a125330 */ >+ 3.23392843005887000414e-08, /* 0x3e615cad69737c93 */ >+ 1.35722380472479366661e-08, /* 0x3e4d256ab1b285e9 */ >+ 2.56504325268044191098e-08, /* 0x3e5b8abcb97a7aa2 */ >+ 5.81213608741512136843e-08, /* 0x3e6f34239659a5dc */ >+ 5.59374849578288093334e-08, /* 0x3e6e07fd48d30177 */ >+ 5.06615629004996189970e-08, /* 0x3e6b32df4799f4f6 */ >+ 5.24588857848400955725e-08, /* 0x3e6c29e4f4f21cf8 */ >+ 9.61968535632653505972e-10, /* 0x3e1086c848df1b59 */ >+ 1.34829655346594463137e-08, /* 0x3e4cf456b4764130 */ >+ 3.65557749306383026498e-08, /* 0x3e63a02ffcb63398 */ >+ 3.33431709374069198903e-08, /* 0x3e61e6a6886b0976 */ >+ 5.13008650536088382197e-08, /* 0x3e6b8abcb97a7aa2 */ >+ 5.09285070380306053751e-08, /* 0x3e6b578f8aa35552 */ >+ 3.20853940845502057341e-08, /* 0x3e6139c871afb9fc */ >+ 4.06713248643004200446e-08, /* 0x3e65d5d30701ce64 */ >+ 5.57028186706125221168e-08, /* 0x3e6de7bcb2d12142 */ >+ 5.48356693724804282546e-08, /* 0x3e6d708e984e1664 */ >+ 1.99407553679345001938e-08, /* 0x3e556945e9c72f36 */ >+ 1.96585517245087232086e-09, /* 0x3e20e2f613e85bda */ >+ 6.68649386072067321503e-09, /* 0x3e3cb7e0b42724f6 */ >+ 5.89936034642113390002e-08, /* 0x3e6fac04e52846c7 */ >+ 2.85038578721554472484e-08, /* 0x3e5e9b14aec442be */ >+ 5.09746772910284482606e-08, /* 0x3e6b5de8034e7126 */ >+ 5.54234668933210171467e-08, /* 0x3e6dc157e1b259d3 */ >+ 6.29100830926604004874e-09, /* 0x3e3b05096ad69c62 */ >+ 2.61974119468563937716e-08, /* 0x3e5c2116faba4cdd */ >+ 4.16752115011186398935e-08, /* 0x3e665fcc25f95b47 */ >+ 2.47747534460820790327e-08, /* 0x3e5a9a08498d4850 */ >+ 5.56922172017964209793e-08, /* 0x3e6de647b1465f77 */ >+ 2.76162876992552906035e-08, /* 0x3e5da71b7bf7861d */ >+ 7.08169709942321478061e-09, /* 0x3e3e6a6886b09760 */ >+ 5.77453510221151779025e-08, /* 0x3e6f0075eab0ef64 */ >+ 4.43021445893361960146e-09, /* 0x3e33071282fb989b */ >+ 3.15140984357495864573e-08, /* 0x3e60eb43c3f1bed2 */ >+ 2.95077445089736670973e-08, /* 0x3e5faf06ecb35c84 */ >+ 1.44098510263167149349e-08, /* 0x3e4ef1e63db35f68 */ >+ 1.05196987538551827693e-08, /* 0x3e469743fb1a71a5 */ >+ 5.23641361722697546261e-08, /* 0x3e6c1cdf404e5796 */ >+ 7.72099925253243069458e-09, /* 0x3e4094aa0ada625e */ >+ 5.62089493829364197156e-08, /* 0x3e6e2d4c96fde3ec */ >+ 3.53090261098577946927e-08, /* 0x3e62f4d5e9a98f34 */ >+ 3.80080516835568242269e-08, /* 0x3e6467c96ecc5cbe */ >+ 5.66961038386146408282e-08, /* 0x3e6e7040d03dec5a */ >+ 4.42287063097349852717e-08, /* 0x3e67bebf4282de36 */ >+ 3.45294525105681104660e-08, /* 0x3e6289b11aeb783f */ >+ 2.47132034530447431509e-08, /* 0x3e5a891d1772f538 */ >+ 3.59655343422487209774e-08, /* 0x3e634f10be1fb591 */ >+ 5.51581770357780862071e-08, /* 0x3e6d9ce1d316eb93 */ >+ 3.60171867511861372793e-08, /* 0x3e63562a19a9c442 */ >+ 1.94511067964296180547e-08, /* 0x3e54e2adf548084c */ >+ 1.54137376631349347838e-08, /* 0x3e508ce55cc8c97a */ >+ 3.93171034490174464173e-09, /* 0x3e30e2f613e85bda */ >+ 5.52990607758839766440e-08, /* 0x3e6db03ebb0227bf */ >+ 3.29990737637586136511e-08, /* 0x3e61b75bb09cb098 */ >+ 1.18436010922446096216e-08, /* 0x3e496f16abb9df22 */ >+ 4.04248680368301346709e-08, /* 0x3e65b3f399411c62 */ >+ 2.27418915900284316293e-08, /* 0x3e586b3e59f65355 */ >+ 1.70263791333409206020e-08, /* 0x3e52482ceae1ac12 */ >+ 5.76999904754328540596e-08}; /* 0x3e6efa39ef35793c */ >+ >+ static const double >+ log2 = 6.931471805599453e-01, /* 0x3fe62e42fefa39ef */ >+ >+ /* Approximating polynomial coefficients */ >+ cb_1 = 8.33333333333333593622e-02, /* 0x3fb5555555555557 */ >+ cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */ >+ >+#if defined(COMPILING_LOG10) >+ static const double >+ log10e = 4.34294481903251827651e-01; /* 0x3fdbcb7b1526e50e */ >+#elif defined(COMPILING_LOG2) >+ static const double >+ log2e = 1.44269504088896340735e+00; /* 0x3ff71547652b82fe */ >+#endif >+ >+ >+ GET_BITS_DP64(x, ux); >+ >+#if !defined(COMPILING_LOG10) && !defined(COMPILING_LOG2) >+ if (ux == 0x4005bf0a80000000) >+ /* Treat this, the number closest to e in float arithmetic, >+ as a special case and return 1.0 */ >+ return 1.0F; >+#endif >+ >+ if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return fx + fx; /* Raise invalid if it is a signalling NaN */ >+ else >+ { >+ /* x is infinity */ >+ if (ux & SIGNBIT_DP64) >+ /* x is negative infinity. Return a NaN. */ >+ return retval_errno_edom(fx); >+ else >+ return fx; >+ } >+ } >+ else if (!(ux & ~SIGNBIT_DP64)) >+ /* x is +/-zero. Return -infinity with div-by-zero flag. */ >+ return retval_errno_erange_overflow(fx); >+ else if (ux & SIGNBIT_DP64) >+ /* x is negative. Return a NaN. */ >+ return retval_errno_edom(fx); >+ >+ >+ /* >+ First, we decompose the argument x to the form >+ x = 2**M * (F1 + F2), >+ where 1 <= F1+F2 < 2, M has the value of an integer, >+ F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128. >+ >+ Second, we approximate log( 1 + F2/F1 ) by an odd polynomial >+ in U, where U = 2 F2 / (2 F2 + F1). >+ Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ). >+ The core approximation calculates >+ Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U - 1. >+ Note that log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ), >+ thus, Poly = 2 arctanh( U/2 ) / U - 1. >+ >+ It is not hard to see that >+ log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ). >+ Hence, we return Z1 = log(F1), and Z2 = log( 1 + F2/F1). >+ The values of log(F1) are calculated beforehand and stored >+ in the program. >+ */ >+ >+ f = x; >+ >+ /* Store the exponent of x in xexp and put >+ f into the range [0.5,1) */ >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; >+ PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f); >+ >+ /* Now x = 2**xexp * f, 1/2 <= f < 1. */ >+ >+ /* Set index to be the nearest integer to 128*f */ >+ /* >+ r = 128.0 * f; >+ index = (int)(r + 0.5); >+ */ >+ /* This code instead of the above can save several cycles. >+ It only works because 64 <= r < 128, so >+ the nearest integer is always contained in exactly >+ 7 bits, and the right shift is always the same. */ >+ index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46) >+ + ((ux & 0x0000200000000000) >> 45); >+ z1 = ln_lead_table[index-64]; >+ q = ln_tail_table[index-64]; >+ f1 = index * 0.0078125; /* 0.0078125 = 1/128 */ >+ f2 = f - f1; >+ /* At this point, x = 2**xexp * ( f1 + f2 ) where >+ f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */ >+ >+ /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */ >+ /* u = f2 / (f1 + 0.5 * f2); */ >+ u = f2 / (f1 + 0.5 * f2); >+ >+ /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1). >+ The core approximation calculates >+ poly = [log(1 + u/2) - log(1 - u/2)]/u - 1 */ >+ v = u * u; >+ poly = (v * (cb_1 + v * cb_2)); >+ z2 = q + (u + u * poly); >+ >+ /* Now z1,z2 is an extra-precise approximation of log(f). >+ Add xexp * log(2) to z1, z2 to get the result log(x). */ >+ >+ r = xexp*log2 + z1 + z2; >+#if defined (COMPILING_LOG10) >+ return log10e*r; >+#elif defined(COMPILING_LOG2) >+ return log2e*r; >+#else >+ return r; >+#endif >+} >+ >+#if defined(COMPILING_LOG10) >+weak_alias (__log10f, log10f) >+weak_alias (__log10f, __ieee754_log10f) >+#elif defined(COMPILING_LOG2) >+weak_alias (__log2f, log2f) >+weak_alias (__log2f, __ieee754_log2f) >+#else >+weak_alias (__logf, logf) >+weak_alias (__logf, __ieee754_logf) >+#endif >============================================================ >Index: sysdeps/x86_64/fpu/w_pow.c >--- sysdeps/x86_64/fpu/w_pow.c created >+++ sysdeps/x86_64/fpu/w_pow.c 2002-12-03 13:43:25.000000000 +0100 1.1 >@@ -0,0 +1,807 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_2 >+#define USE_SCALEDOUBLE_3 >+#define USE_SCALEDOWNDOUBLE >+#define USE_INFINITY_WITH_FLAGS >+#define USE_ZERO_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#define USE_VAL_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_2 >+#undef USE_SCALEDOUBLE_3 >+#undef USE_SCALEDOWNDOUBLE >+#undef USE_INFINITY_WITH_FLAGS >+#undef USE_ZERO_WITH_FLAGS >+#undef USE_NAN_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange_overflow(double x, double y, int sign) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = y; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"pow"; >+ if (_LIB_VERSION == _SVID_) >+ { >+ if (sign == 1) >+ exc.retval = HUGE; >+ else /* sign = -1 */ >+ exc.retval = -HUGE; >+ } >+ else >+ { >+ if (sign == 1) >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW); >+ else /* sign == -1 */ >+ exc.retval = -infinity_with_flags(AMD_F_OVERFLOW); >+ } >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline double retval_errno_erange_underflow(double x, double y, int sign) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = y; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"pow"; >+ if (sign == 1) >+ exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ else /* sign == -1 */ >+ exc.retval = -zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+/* Deal with errno for out-of-range arguments */ >+static inline double retval_errno_edom(double x, double y, int type) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = y; >+ exc.type = DOMAIN; >+ exc.name = (char *)"pow"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = 0.0; >+ else if (type == 1) >+ exc.retval = infinity_with_flags(AMD_F_DIVBYZERO); >+ else if (type == 2) >+ exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO); >+ else /* type == 3 */ >+ exc.retval = nan_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno (EDOM); >+ if (!matherr(&exc)) >+ { >+ if (_LIB_VERSION == _SVID_) >+ (void)fputs("pow: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+/* Computes the exact product of x and y, the result being the >+ nearly doublelength number (z,zz) */ >+static void mul12(double x, double y, >+ double *z, double *zz) >+{ >+ double hx, tx, hy, ty; >+ /* Split x into hx (head) and tx (tail). Do the same for y. */ >+ unsigned long u; >+ GET_BITS_DP64(x, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hx); >+ tx = x - hx; >+ GET_BITS_DP64(y, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hy); >+ ty = y - hy; >+ *z = x * y; >+ *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty; >+} >+ >+/* Computes the doublelength product of (x,xx) and (y,yy), the result >+ being the doublelength number (z,zz) */ >+static void mul2(double x, double xx, double y, double yy, >+ double *z, double *zz) >+{ >+ double c, cc; >+ mul12(x, y, &c, &cc); >+ cc = x * yy + xx * y + cc; >+ *z = c + cc; >+ *zz = (c - *z) + cc; >+} >+ >+double __pow(double x, double y) >+{ >+ >+ /* Arrays log2_lead_table and log2_tail_table contain >+ leading and trailing parts respectively of precomputed >+ values of natural log2(1+i/64), for i = 0, 1, ..., 64. >+ ln_lead_table contains the first 24 bits of precision, >+ and ln_tail_table contains a further 53 bits precision. */ >+ >+ static const double log2_lead_table[65] = { >+ 0.00000000000000000000E+00, /* 0x0000000000000000 */ >+ 2.23678126931190490723E-02, /* 0x3F96E79680000000 */ >+ 4.43941168487071990967E-02, /* 0x3FA6BAD360000000 */ >+ 6.60891830921173095703E-02, /* 0x3FB0EB3880000000 */ >+ 8.74628350138664245605E-02, /* 0x3FB663F6E0000000 */ >+ 1.08524456620216369629E-01, /* 0x3FBBC84240000000 */ >+ 1.29283010959625244141E-01, /* 0x3FC08C5880000000 */ >+ 1.49747118353843688965E-01, /* 0x3FC32AE9E0000000 */ >+ 1.69924989342689514160E-01, /* 0x3FC5C01A20000000 */ >+ 1.89824551343917846680E-01, /* 0x3FC84C2BC0000000 */ >+ 2.09453359246253967285E-01, /* 0x3FCACF5E20000000 */ >+ 2.28818684816360473633E-01, /* 0x3FCD49EE40000000 */ >+ 2.47927501797676086426E-01, /* 0x3FCFBC16A0000000 */ >+ 2.66786515712738037109E-01, /* 0x3FD11307C0000000 */ >+ 2.85402208566665649414E-01, /* 0x3FD24407A0000000 */ >+ 3.03780734539031982422E-01, /* 0x3FD37124C0000000 */ >+ 3.21928083896636962891E-01, /* 0x3FD49A7840000000 */ >+ 3.39849978685379028320E-01, /* 0x3FD5C01A20000000 */ >+ 3.57551991939544677734E-01, /* 0x3FD6E221C0000000 */ >+ 3.75039428472518920898E-01, /* 0x3FD800A560000000 */ >+ 3.92317414283752441406E-01, /* 0x3FD91BBA80000000 */ >+ 4.09390926361083984375E-01, /* 0x3FDA337600000000 */ >+ 4.26264733076095581055E-01, /* 0x3FDB47EBE0000000 */ >+ 4.42943483591079711914E-01, /* 0x3FDC592FA0000000 */ >+ 4.59431618452072143555E-01, /* 0x3FDD6753E0000000 */ >+ 4.75733429193496704102E-01, /* 0x3FDE726AA0000000 */ >+ 4.91853088140487670898E-01, /* 0x3FDF7A8560000000 */ >+ 5.07794618606567382812E-01, /* 0x3FE03FDA80000000 */ >+ 5.23561954498291015625E-01, /* 0x3FE0C10500000000 */ >+ 5.39158761501312255859E-01, /* 0x3FE140C9E0000000 */ >+ 5.54588794708251953125E-01, /* 0x3FE1BF3100000000 */ >+ 5.69855570793151855469E-01, /* 0x3FE23C41C0000000 */ >+ 5.84962487220764160156E-01, /* 0x3FE2B80340000000 */ >+ 5.99912822246551513672E-01, /* 0x3FE3327C60000000 */ >+ 6.14709794521331787109E-01, /* 0x3FE3ABB3E0000000 */ >+ 6.29356563091278076172E-01, /* 0x3FE423B060000000 */ >+ 6.43856167793273925781E-01, /* 0x3FE49A7840000000 */ >+ 6.58211469650268554688E-01, /* 0x3FE5101180000000 */ >+ 6.72425329685211181641E-01, /* 0x3FE5848220000000 */ >+ 6.86500489711761474609E-01, /* 0x3FE5F7CFE0000000 */ >+ 7.00439691543579101562E-01, /* 0x3FE66A0080000000 */ >+ 7.14245498180389404297E-01, /* 0x3FE6DB1960000000 */ >+ 7.27920413017272949219E-01, /* 0x3FE74B1FC0000000 */ >+ 7.41466939449310302734E-01, /* 0x3FE7BA18E0000000 */ >+ 7.54887461662292480469E-01, /* 0x3FE82809C0000000 */ >+ 7.68184304237365722656E-01, /* 0x3FE894F740000000 */ >+ 7.81359672546386718750E-01, /* 0x3FE900E600000000 */ >+ 7.94415831565856933594E-01, /* 0x3FE96BDAC0000000 */ >+ 8.07354867458343505859E-01, /* 0x3FE9D5D9E0000000 */ >+ 8.20178925991058349609E-01, /* 0x3FEA3EE7E0000000 */ >+ 8.32889974117279052734E-01, /* 0x3FEAA708E0000000 */ >+ 8.45490038394927978516E-01, /* 0x3FEB0E4120000000 */ >+ 8.57980966567993164062E-01, /* 0x3FEB749480000000 */ >+ 8.70364665985107421875E-01, /* 0x3FEBDA0700000000 */ >+ 8.82643043994903564453E-01, /* 0x3FEC3E9CA0000000 */ >+ 8.94817709922790527344E-01, /* 0x3FECA258C0000000 */ >+ 9.06890571117401123047E-01, /* 0x3FED053F60000000 */ >+ 9.18863236904144287109E-01, /* 0x3FED6753E0000000 */ >+ 9.30737316608428955078E-01, /* 0x3FEDC899A0000000 */ >+ 9.42514479160308837891E-01, /* 0x3FEE291420000000 */ >+ 9.54196274280548095703E-01, /* 0x3FEE88C6A0000000 */ >+ 9.65784251689910888672E-01, /* 0x3FEEE7B460000000 */ >+ 9.77279901504516601562E-01, /* 0x3FEF45E080000000 */ >+ 9.88684654235839843750E-01, /* 0x3FEFA34E00000000 */ >+ 1.00000000000000000000E+00}; /* 0x3FF0000000000000 */ >+ >+ static const double log2_tail_table[65] = { >+ 0.00000000000000000000E+00, /* 0x0000000000000000 */ >+ 3.35335459194866276130E-10, /* 0x3DF70B48A629B89C */ >+ 2.50974623855642191448E-09, /* 0x3E258EFD87313606 */ >+ 7.36565512335896390543E-09, /* 0x3E3FA29F9AB3CF74 */ >+ 6.23647298369351871453E-09, /* 0x3E3AC913167CCC53 */ >+ 1.57952684118980398844E-10, /* 0x3DE5B5774C7658A0 */ >+ 5.98534121117166302413E-09, /* 0x3E39B4F3C72C4F78 */ >+ 1.15083837244190179789E-09, /* 0x3E13C570D0FA8F90 */ >+ 1.20996228487473215213E-08, /* 0x3E49FBD6879FA00B */ >+ 7.53609938318432874467E-09, /* 0x3E402F03B2FDD224 */ >+ 6.38269581457264661091E-09, /* 0x3E3B69D927DFC23C */ >+ 5.67952040356156465017E-09, /* 0x3E3864B2DF91E96A */ >+ 1.16459094073677371864E-08, /* 0x3E4902680A23A8D9 */ >+ 2.49821633265319561946E-08, /* 0x3E5AD30B75CB0970 */ >+ 1.02955826924364881206E-08, /* 0x3E461C0E73048B72 */ >+ 1.36380709420054099385E-08, /* 0x3E4D499BD9B32266 */ >+ 1.09907253849796912371E-08, /* 0x3E479A3715FC9256 */ >+ 2.41992456974946430426E-08, /* 0x3E59FBD6879FA00B */ >+ 1.26785390154315961619E-08, /* 0x3E4B3A19BCAF1AA4 */ >+ 2.87440583546118995874E-09, /* 0x3E28B0E2A19575B0 */ >+ 8.49500784748945819113E-09, /* 0x3E423E2E1169656A */ >+ 9.77661777174938265384E-09, /* 0x3E44FEC0A13AF880 */ >+ 2.16260023578294509223E-08, /* 0x3E573882A0A4146E */ >+ 1.22576485902594488001E-08, /* 0x3E4A52B6ACFCFDCA */ >+ 1.85225112644675216321E-10, /* 0x3DE975077F1F5F00 */ >+ 1.77290105086271740075E-09, /* 0x3E1E754D20C519E0 */ >+ 8.18918703987935816281E-09, /* 0x3E41960D9D9C3262 */ >+ 2.15921288850262793860E-08, /* 0x3E572F32FE672868 */ >+ 1.55872185666914610882E-09, /* 0x3E1AC754CB104AE0 */ >+ 4.96067191344004864525E-08, /* 0x3E6AA1E5439E15A5 */ >+ 5.69693854190458063634E-08, /* 0x3E6E95D00DE3B513 */ >+ 3.75377959861950863279E-08, /* 0x3E642727C8080ECC */ >+ 1.35003920212974864041E-08, /* 0x3E4CFDEB43CFD004 */ >+ 1.99405761661543437744E-08, /* 0x3E5569394D90D724 */ >+ 4.95938764277745619566E-08, /* 0x3E6AA02166CCCAB2 */ >+ 5.69883315429349605246E-08, /* 0x3E6E986AA9670761 */ >+ 2.19814507699593824742E-08, /* 0x3E579A3715FC9256 */ >+ 1.31015261824841576777E-08, /* 0x3E4C22A3E377A524 */ >+ 1.22862844080671745121E-08, /* 0x3E4A6274CF0E362C */ >+ 3.74714569064514928410E-08, /* 0x3E641E09AEB8CB1A */ >+ 2.65975130588343109077E-08, /* 0x3E5C8F11979A5DB6 */ >+ 1.94857332324691494283E-08, /* 0x3E54EC3293B2FBE0 */ >+ 4.15459262300620263689E-08, /* 0x3E664E0753C6E578 */ >+ 4.69518366451302198484E-08, /* 0x3E693502E409EAB7 */ >+ 4.05011760638924658298E-08, /* 0x3E65BE7072DBDC04 */ >+ 2.05395606358225316367E-08, /* 0x3E560DDF1680DD44 */ >+ 4.09782728853196822622E-08, /* 0x3E660002CCFE43F5 */ >+ 3.47842490297177925737E-08, /* 0x3E62ACB5F5EFEC49 */ >+ 5.45992606015825934783E-08, /* 0x3E6D5010B3666559 */ >+ 3.64241293587091694274E-08, /* 0x3E638E181ED0798D */ >+ 4.00474626225128781862E-08, /* 0x3E658014D37CDE37 */ >+ 1.25494472416488406547E-08, /* 0x3E4AF321AF5E9BB4 */ >+ 2.85595789566572715872E-08, /* 0x3E5EAA65B49696E2 */ >+ 5.35982971014292903334E-08, /* 0x3E6CC67E6DB516DE */ >+ 5.36693769435427990824E-09, /* 0x3E370D02A99B4C58 */ >+ 5.33851529883522815863E-08, /* 0x3E6CA9331635FEE3 */ >+ 2.44911174062771809500E-08, /* 0x3E5A4C112CE6312E */ >+ 3.70450225289350432643E-10, /* 0x3DF975077F1F5F00 */ >+ 2.09544573213940723936E-08, /* 0x3E567FEAD8BCCE74 */ >+ 2.61789310367290825660E-08, /* 0x3E5C1C02803F7554 */ >+ 3.61063271131029934309E-08, /* 0x3E63626A72AA21A3 */ >+ 3.29721761549390770201E-08, /* 0x3E61B3A9507D6DC1 */ >+ 2.19953998687869412865E-08, /* 0x3E579E0CAA9C9AB6 */ >+ 3.25363260095299997864E-08, /* 0x3E6177C23362928B */ >+ 0.00000000000000000000E+00}; /* 0x0000000000000000 */ >+ >+ static const double >+ /* Reciprocal of log(2.0) */ >+ reclog2_lead = 1.44269504088896338700E+00, /* 0x3FF71547652B82FE */ >+ reclog2_tail = 2.03552737409310207851E-17; /* 0x3C7777D0FFDA0D20 */ >+ >+ const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */ >+ >+ unsigned long ux, ax, uy, ay, mask; >+ int yexp, inty, xpos, ypos, negateres; >+ double r, u1, u2, w, w1, w2; >+ volatile int dummy; >+ >+ double u, r1, r2, f, z1, z2, q, f1, f2, poly; >+ int xexp, expadjust, index, iw; >+ >+ double argx = x, argy = y; >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ xpos = ax == ux; >+ GET_BITS_DP64(y, uy); >+ ay = uy & (~SIGNBIT_DP64); >+ ypos = ay == uy; >+ >+ if (ux == 0x3ff0000000000000) >+ { >+ /* x = +1.0. Return +1.0 for all y, even NaN, >+ raising invalid only if y is a signalling NaN */ >+ if (y + 1.0 == 2.0) dummy = 1; >+ return 1.0; >+ } >+ else if (ay == 0) >+ { >+ /* y is zero. Return 1.0, even if x is infinity or NaN, >+ raising invalid only if x is a signalling NaN */ >+ if (x + 1.0 == 2.0) dummy = 1; >+ return 1.0; >+ } >+ else if (((ax & EXPBITS_DP64) == EXPBITS_DP64) && >+ (ax & MANTBITS_DP64)) >+ /* x is NaN. Return NaN, with invalid exception if it's >+ a signalling NaN. */ >+ return x + x; >+ else if (((ay & EXPBITS_DP64) == EXPBITS_DP64) && >+ (ay & MANTBITS_DP64)) >+ /* y is NaN. Return NaN, with invalid exception if y >+ is a signalling NaN. */ >+ return y + y; >+ else if (uy == 0x3ff0000000000000) >+ /* y is 1.0; return x */ >+ return x; >+ else if ((ay & EXPBITS_DP64) > 0x43e0000000000000) >+ { >+ /* y is infinite or so large that the result would >+ overflow or underflow. Flags should be raised >+ unless y is an exact infinity. */ >+ int yinf = (ay == EXPBITS_DP64); >+ if (ypos) >+ { >+ /* y is +ve */ >+ if (ax == 0) >+ /* abs(x) = 0.0. */ >+ return 0.0; >+ else if (ax < 0x3ff0000000000000) >+ { >+ /* abs(x) < 1.0 */ >+ if (yinf) >+ return 0.0; >+ else >+ return retval_errno_erange_underflow(argx, argy, 1); >+ } >+ else if (ax == 0x3ff0000000000000) >+ /* abs(x) = 1.0. */ >+ return 1.0; >+ else >+ { >+ /* abs(x) > 1.0 */ >+ if (yinf) >+ return infinity_with_flags(0); >+ else >+ return retval_errno_erange_overflow(argx, argy, 1); >+ } >+ } >+ else >+ { >+ /* y is -ve */ >+ if (ax == 0) >+ /* abs(x) = 0.0. Return +infinity. */ >+ return retval_errno_edom(argx, argy, 1); >+ else if (ax < 0x3ff0000000000000) >+ { >+ /* abs(x) < 1.0; return +infinity. */ >+ if (yinf) >+ return infinity_with_flags(0); >+ else >+ return retval_errno_erange_overflow(argx, argy, 1); >+ } >+ else if (ax == 0x3ff0000000000000) >+ /* abs(x) = 1.0. */ >+ return 1.0; >+ else >+ { >+ /* abs(x) > 1.0 */ >+ if (yinf) >+ return 0.0; >+ else >+ return retval_errno_erange_underflow(argx, argy, 1); >+ } >+ } >+ } >+ >+ /* See whether y is an integer. >+ inty = 0 means not an integer. >+ inty = 1 means odd integer. >+ inty = 2 means even integer. >+ */ >+ yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 + 1; >+ if (yexp < 1) >+ inty = 0; >+ else if (yexp > 53) >+ inty = 2; >+ else /* 1 <= yexp <= 53 */ >+ { >+ /* Mask out the bits of r that we don't want */ >+ mask = (1L << (53 - yexp)) - 1; >+ if ((uy & mask) != 0) >+ inty = 0; >+ else if (((uy & ~mask) >> (53 - yexp)) & 0x0000000000000001) >+ inty = 1; >+ else >+ inty = 2; >+ } >+ >+ if ((ax & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is infinity (NaN was already ruled out). */ >+ if (xpos) >+ { >+ /* x is +infinity */ >+ if (ypos) >+ /* y > 0.0 */ >+ return x; >+ else >+ return 0.0; >+ } >+ else >+ { >+ /* x is -infinity */ >+ if (inty == 1) >+ { >+ /* y is an odd integer */ >+ if (ypos) >+ /* Result is -infinity */ >+ return x; >+ else >+ return -zero_with_flags(0); >+ } >+ else >+ { >+ if (ypos) >+ /* Result is +infinity */ >+ return -x; >+ else >+ return 0.0; >+ } >+ } >+ } >+ else if (ax == 0) >+ { >+ /* x is zero */ >+ if (xpos) >+ { >+ /* x is +0.0 */ >+ if (ypos) >+ /* y is positive; return +0.0 for all cases */ >+ return x; >+ else >+ /* y is negative; return +infinity with div-by-zero >+ for all cases */ >+ return retval_errno_edom(argx, argy, 1); >+ } >+ else >+ { >+ /* x is -0.0 */ >+ if (ypos) >+ { >+ /* y is positive */ >+ if (inty == 1) >+ /* -0.0 raised to a positive odd integer returns -0.0 */ >+ return x; >+ else >+ /* Return +0.0 */ >+ return -x; >+ } >+ else >+ { >+ /* y is negative */ >+ if (inty == 1) >+ /* -0.0 raised to a negative odd integer returns -infinity >+ with div-by-zero */ >+ return retval_errno_edom(argx, argy, 2); >+ else >+ /* Return +infinity with div-by-zero */ >+ return retval_errno_edom(argx, argy, 1); >+ } >+ } >+ } >+ >+ negateres = 0; >+ if (!xpos) >+ { >+ /* x is negative */ >+ if (inty) >+ { >+ /* It's OK because y is an integer. */ >+ ux = ax; >+ PUT_BITS_DP64(ux, x); /* x = abs(x) */ >+ /* If y is odd, the result will be negative */ >+ negateres = (inty == 1); >+ } >+ else >+ /* y is not an integer. Return a NaN. */ >+ return retval_errno_edom(argx, argy, 3); >+ } >+ >+ if (ay < 0x3c00000000000000) /* abs(y) < 2^(-63) */ >+ { >+ /* y is close enough to zero for the result to be 1.0 >+ no matter what the size of x */ >+ return 1.0 + y; >+ } >+ >+ /* >+ Calculate log2(x) >+ >+ First, we decompose the argument x to the form >+ x = 2**M * (F1 + F2), >+ where 1 <= F1+F2 < 2, M has the value of an integer, >+ F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128. >+ >+ Second, we approximate log2( 1 + F2/F1 ) by a polynomial >+ in U, where U = 2 F2 / (2 F1 + F2). >+ Note that log2( 1 + F2/F1 ) = log2( 1 + U/2 ) - log2( 1 - U/2 ). >+ The core approximation calculates >+ Poly = [log2( 1 + U/2 ) - log2( 1 - U/2 )]/U - 1. >+ >+ It is not hard to see that >+ log2(x) = M + log2(F1) + log2( 1 + F2/F1 ). >+ Hence, we return Z1 = log2(F1), and Z2 = log2( 1 + F2/F1). >+ The values of log2(F1) are calculated beforehand and stored >+ in the program. >+ */ >+ >+ if (ux < IMPBIT_DP64) >+ { >+ /* The input argument x is denormalized */ >+ /* Normalize f by increasing the exponent by 60 >+ and subtracting a correction to account for the implicit >+ bit. This replaces a slow denormalized >+ multiplication by a fast normal subtraction. */ >+ static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */ >+ PUT_BITS_DP64(ux | 0x03d0000000000000, f); >+ f -= corr; >+ GET_BITS_DP64(f, ux); >+ expadjust = 60; >+ } >+ else >+ { >+ f = x; >+ expadjust = 0; >+ } >+ >+ /* Store the exponent of x in xexp and put >+ f into the range [0.5,1) */ >+ xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust; >+ PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f); >+ >+ /* Now x = 2**xexp * f, 1/2 <= f < 1. */ >+ >+ /* Set index to be the nearest integer to 128*f */ >+ /* >+ r = 128.0 * f; >+ index = (int)(r + 0.5); >+ */ >+ /* This code instead of the above can save several cycles. >+ It only works because 64 <= r < 128, so >+ the nearest integer is always contained in exactly >+ 7 bits, and the right shift is always the same. */ >+ index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46) >+ + ((ux & 0x0000200000000000) >> 45); >+ >+ z1 = log2_lead_table[index-64]; >+ q = log2_tail_table[index-64]; >+ f1 = index * 0.0078125; /* 0.0078125 = 1/128 */ >+ f2 = f - f1; >+ /* At this point, x = 2**xexp * ( f1 + f2 ) where >+ f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */ >+ >+ /* Compute z2 from Taylor series of log2(1+f1/f2) */ >+ >+ u = f2/f1; >+#if 0 >+ div2(f2,0.0,f1,0.0,&r1,&r2); >+#else >+ { >+ double cc, h, hh; >+ mul12(u, f1, &h, &hh); >+ cc = ((f2 - h) - hh) / f1; >+ r1 = u + cc; >+ r2 = (u - r1) + cc; >+ } >+#endif >+ poly = -u*(u*(1./2.-u*(1./3.-u* >+ (1./4.-u*(1./5.-u*(1./6.-u* >+ (1./7.-u*(1./8.)))))))); >+#if 0 >+ add2(r1,r2,poly,0.0,&r1,&r2); >+#else >+ { >+ double r, s; >+ r = r1 + poly; >+ s = ((r1 - r) + poly) + r2; >+ r1 = r + s; >+ r2 = (r - r1) + s; >+ } >+#endif >+ >+ mul2(reclog2_lead,reclog2_tail,r1,r2,&r1,&r2); >+ >+#if 0 >+ add2(r1,r2,q,0.0,&r1,&r2); >+#else >+ { >+ if (r1 == 0.0) >+ r1 = q; >+ else if (q != 0.0) >+ { >+ double r, s; >+ r = r1 + q; >+ s = ((r1 - r) + q) + r2; >+ r1 = r + s; >+ r2 = (r - r1) + s; >+ } >+ } >+#endif >+ >+#if 0 >+ add2(z1,0.0,r1,r2,&z1,&z2); >+#else >+ { >+ double r, s; >+ r = z1 + r1; >+ s = ((z1 - r) + r1) + r2; >+ z1 = r + s; >+ z2 = (r - z1) + s; >+ } >+#endif >+ >+ /* Now z1,z2 is an extra-precise approximation of log2(2f). >+ Add xexp to z1,z2 to get the result log2(x). >+ The computed r1 is not subject to rounding error because >+ xexp has at most 10 significant bits, log(2) has 24 significant >+ bits, and z1 has up to 24 bits; and the exponents of z1 >+ and z2 differ by at most 6. */ >+ >+#if 0 >+ add2(z1,z2,xexp,0.0,&u1,&u2); >+#else >+ { >+ double r, s; >+ r = z1 + xexp; >+ s = ((xexp - r) + z1) + z2; >+ u1 = r + s; >+ u2 = (r - u1) + s; >+ } >+#endif >+ >+ /* end of log2(x) calculation*/ >+ >+ /* Test for overflow and underflow due to y*log2(x) >+ being too large or small. */ >+ >+ if ((u1+u2)*y > 1025) >+ { >+ if (negateres) >+ return retval_errno_erange_overflow(argx, argy, -1); >+ else >+ return retval_errno_erange_overflow(argx, argy, 1); >+ } >+ else if ((u1+u2)*y < -1074) >+ { >+ if (negateres) >+ return retval_errno_erange_underflow(argx, argy, -1); >+ else >+ return retval_errno_erange_underflow(argx, argy, 1); >+ } >+ >+ /* Carefully compute log2(x) * y */ >+#if 0 >+ mul2(u1, u2, y, 0.0, &w1, &w2); >+#else >+ { >+ double c, cc; >+ mul12(u1, y, &c, &cc); >+ cc = u2 * y + cc; >+ w1 = c + cc; >+ w2 = (c - w1) + cc; >+ } >+#endif >+ >+ w = w1 + w2; >+ iw = (int)(w); >+ >+#if 0 >+ sub2(w1, w2, (double)iw, 0.0, &w1, &w2); >+#else >+ { >+ double a, b; >+ a = w1 - iw; >+ b = ((w1 - a) - iw) + w2; >+ w1 = a + b; >+ w2 = (a - w1) + b; >+ } >+#endif >+ >+ w = w1 + w2; >+ >+ /* The following code computes r = exp2(w) */ >+ >+ { >+ static const double >+ log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */ >+ log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */ >+ log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */ >+ one_by_32_lead = 0.03125; >+ >+ double p, z1, z2, z, hx, tx, y1, y2; >+ int m, n; >+ >+ GET_BITS_DP64(w, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3fb7154764ee6c2f) /* abs(x) < 1/(16*log2) */ >+ { >+ if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */ >+ z = 1.0 + w; /* Raises inexact if x is non-zero */ >+ else >+ { >+ /* Split x into hx (head) and tx (tail). */ >+ unsigned long u; >+ hx = w; >+ GET_BITS_DP64(hx, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hx); >+ tx = w - hx; >+ /* Carefully multiply x by log2. y1 is the most significant >+ part of the result, and y2 the least significant part */ >+ y1 = w * log2_lead; >+ y2 = (((hx * log2_lead - y1) + hx * log2_tail) + >+ tx * log2_lead) + tx * log2_tail; >+ >+ p = y1 + y2; >+ z = (9.99564649780173690e-1 + >+ (1.61251249355268050e-5 + >+ (2.37986978239838493e-2 + >+ 2.68724774856111190e-7*p)*p)*p)/ >+ (9.99564649780173692e-1 + >+ (-4.99766199765151309e-1 + >+ (1.070876894098586184e-1 + >+ (-1.189773642681502232e-2 + >+ 5.9480622371960190616e-4*p)*p)*p)*p); >+ >+ z = ((z * y1) + (z * y2)) + 1.0; >+ } >+ r = scaleDouble_2(z, iw); >+ } >+ else >+ { >+ /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */ >+ splitexp(w, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**(m + iw) */ >+ >+ n = m+iw; >+ z = z1+z2; >+ >+ if (n < -1022) >+ { /* Result will be denormalised after scaling >+ down by 2**n. Using scaleDownDouble instead >+ of scaleDouble_3 is faster in this case. */ >+ GET_BITS_DP64(z, ux); >+ scaleDownDouble(ux, -n, &ux); >+ PUT_BITS_DP64(ux, r); >+ } >+ else >+ r = scaleDouble_3(z, n); >+ } >+ } >+ >+ /* If r overflowed or underflowed we need to deal with errno */ >+ if (r > large) >+ { >+ /* Result has overflowed. */ >+ if (negateres) >+ return retval_errno_erange_overflow(argx, argy, -1); >+ else >+ return retval_errno_erange_overflow(argx, argy, 1); >+ } >+ else if (r == 0.0) >+ { >+ /* Result has underflowed. */ >+ if (negateres) >+ return retval_errno_erange_underflow(argx, argy, -1); >+ else >+ return retval_errno_erange_underflow(argx, argy, 1); >+ } >+ else >+ { >+ if (negateres) >+ return -r; >+ else >+ return r; >+ } >+} >+ >+weak_alias (__pow, pow) >+weak_alias (__pow, __ieee754_pow) >============================================================ >Index: sysdeps/x86_64/fpu/w_powf.c >--- sysdeps/x86_64/fpu/w_powf.c created >+++ sysdeps/x86_64/fpu/w_powf.c 2002-12-03 13:43:26.000000000 +0100 1.1 >@@ -0,0 +1,358 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_ZEROF_WITH_FLAGS >+#define USE_INFINITYF_WITH_FLAGS >+#define USE_NANF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_ZEROF_WITH_FLAGS >+#undef USE_INFINITYF_WITH_FLAGS >+#undef USE_NANF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange_overflow(float x, float y, int sign) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)y; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"powf"; >+ if (_LIB_VERSION == _SVID_) >+ { >+ if (sign == 1) >+ exc.retval = HUGE; >+ else /* sign = -1 */ >+ exc.retval = -HUGE; >+ } >+ else >+ { >+ if (sign == 1) >+ exc.retval = infinityf_with_flags(AMD_F_OVERFLOW); >+ else /* sign == -1 */ >+ exc.retval = -infinityf_with_flags(AMD_F_OVERFLOW); >+ } >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline float retval_errno_erange_underflow(float x, float y, int sign) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)y; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"powf"; >+ if (sign == 1) >+ exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ else /* sign == -1 */ >+ exc.retval = -zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+/* Deal with errno for out-of-range arguments */ >+static inline float retval_errno_edom(float x, float y, int type) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)y; >+ exc.type = DOMAIN; >+ exc.name = (char *)"powf"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = 0.0; >+ else if (type == 1) >+ exc.retval = infinityf_with_flags(AMD_F_DIVBYZERO); >+ else if (type == 2) >+ exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO); >+ else /* type == 3 */ >+ exc.retval = nanf_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno (EDOM); >+ if (!matherr(&exc)) >+ { >+ if (_LIB_VERSION == _SVID_) >+ (void)fputs("pow: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+float __powf(float x, float y) >+{ >+ unsigned int ux, ax, uy, ay, mask; >+ int yexp, inty, xpos, ypos, negateres; >+ double dx, dy, dw, dlog2, dr; >+ volatile int dummy; >+ >+ /* Largest float, stored as a double */ >+ const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */ >+ >+ /* Smallest float, stored as a double */ >+ const double tiny = 1.40129846432481707092e-45; /* 0x36a0000000000000 */ >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ xpos = ax == ux; >+ GET_BITS_SP32(y, uy); >+ ay = uy & (~SIGNBIT_SP32); >+ ypos = ay == uy; >+ >+ if (ux == 0x3f800000) >+ { >+ /* x = +1.0. Return +1.0 for all y, even NaN, >+ raising invalid only if y is a signalling NaN */ >+ if (y + 1.0F == 2.0F) dummy = 1; >+ return 1.0F; >+ } >+ else if (ay == 0) >+ { >+ /* y is zero. Return 1.0, even if x is infinity or NaN, >+ raising invalid only if x is a signalling NaN */ >+ if (x + 1.0F == 2.0F) dummy = 1; >+ return 1.0F; >+ } >+ else if (((ax & EXPBITS_SP32) == EXPBITS_SP32) && >+ (ax & MANTBITS_SP32)) >+ /* x is NaN. Return NaN, with invalid exception if it's >+ a signalling NaN. */ >+ return x + x; >+ else if (((ay & EXPBITS_SP32) == EXPBITS_SP32) && >+ (ay & MANTBITS_SP32)) >+ /* y is NaN. Return NaN, with invalid exception if y >+ is a signalling NaN. */ >+ return y + y; >+ else if (uy == 0x3f800000) >+ /* y is 1.0; return x */ >+ return x; >+ else if ((ay & EXPBITS_SP32) > 0x4f000000) >+ { >+ /* y is infinite or so large that the result would >+ overflow or underflow. Flags should be raised >+ unless y is an exact infinity. */ >+ int yinf = (ay == EXPBITS_SP32); >+ if (ypos) >+ { >+ /* y is +ve */ >+ if (ax == 0) >+ /* abs(x) = 0.0. */ >+ return 0.0F; >+ else if (ax < 0x3f800000) >+ { >+ /* abs(x) < 1.0 */ >+ if (yinf) >+ return 0.0F; >+ else >+ return retval_errno_erange_underflow(x, y, 1); >+ } >+ else if (ax == 0x3f800000) >+ /* abs(x) = 1.0. */ >+ return 1.0F; >+ else >+ { >+ /* abs(x) > 1.0 */ >+ if (yinf) >+ return infinityf_with_flags(0); >+ else >+ return retval_errno_erange_overflow(x, y, 1); >+ } >+ } >+ else >+ { >+ /* y is -ve */ >+ if (ax == 0) >+ /* abs(x) = 0.0. Return +infinity. */ >+ return retval_errno_edom(x, y, 1); >+ else if (ax < 0x3f800000) >+ { >+ /* abs(x) < 1.0; return +infinity. */ >+ if (yinf) >+ return infinityf_with_flags(0); >+ else >+ return retval_errno_erange_overflow(x, y, 1); >+ } >+ else if (ax == 0x3f800000) >+ /* abs(x) = 1.0. */ >+ return 1.0F; >+ else >+ { >+ /* abs(x) > 1.0 */ >+ if (yinf) >+ return 0.0F; >+ else >+ return retval_errno_erange_underflow(x, y, 1); >+ } >+ } >+ } >+ >+ /* See whether y is an integer. >+ inty = 0 means not an integer. >+ inty = 1 means odd integer. >+ inty = 2 means even integer. >+ */ >+ yexp = ((uy & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32 + 1; >+ if (yexp < 1) >+ inty = 0; >+ else if (yexp > 24) >+ inty = 2; >+ else /* 1 <= yexp <= 24 */ >+ { >+ /* Mask out the bits of r that we don't want */ >+ mask = (1 << (24 - yexp)) - 1; >+ if ((uy & mask) != 0) >+ inty = 0; >+ else if (((uy & ~mask) >> (24 - yexp)) & 0x00000001) >+ inty = 1; >+ else >+ inty = 2; >+ } >+ >+ if ((ax & EXPBITS_SP32) == EXPBITS_SP32) >+ { >+ /* x is infinity (NaN was already ruled out). */ >+ if (xpos) >+ { >+ /* x is +infinity */ >+ if (ypos) >+ /* y > 0.0 */ >+ return x; >+ else >+ return 0.0F; >+ } >+ else >+ { >+ /* x is -infinity */ >+ if (inty == 1) >+ { >+ /* y is an odd integer */ >+ if (ypos) >+ /* Result is -infinity */ >+ return x; >+ else >+ return -0.0F; >+ } >+ else >+ { >+ if (ypos) >+ /* Result is +infinity */ >+ return -x; >+ else >+ return 0.0F; >+ } >+ } >+ } >+ else if (ax == 0) >+ { >+ /* x is zero */ >+ if (xpos) >+ { >+ /* x is +0.0 */ >+ if (ypos) >+ /* y is positive; return +0.0 for all cases */ >+ return x; >+ else >+ /* y is negative; return +infinity with div-by-zero >+ for all cases */ >+ return retval_errno_edom(x, y, 1); >+ } >+ else >+ { >+ /* x is -0.0 */ >+ if (ypos) >+ { >+ /* y is positive */ >+ if (inty == 1) >+ /* -0.0 raised to a positive odd integer returns -0.0 */ >+ return x; >+ else >+ /* Return +0.0 */ >+ return -x; >+ } >+ else >+ { >+ /* y is negative */ >+ if (inty == 1) >+ /* -0.0 raised to a negative odd integer returns -infinity >+ with div-by-zero */ >+ return retval_errno_edom(x, y, 2); >+ else >+ /* Return +infinity with div-by-zero */ >+ return retval_errno_edom(x, y, 1); >+ } >+ } >+ } >+ >+ negateres = 0; >+ if (!xpos) >+ { >+ /* x is negative */ >+ if (inty) >+ { >+ /* It's OK because y is an integer. */ >+ ux = ax; >+ PUT_BITS_SP32(ux, x); /* x = abs(x) */ >+ /* If y is odd, the result will be negative */ >+ negateres = (inty == 1); >+ } >+ else >+ /* y is not an integer. Return a NaN. */ >+ return retval_errno_edom(x, y, 3); >+ } >+ >+ if (ay < 0x2e800000) /* abs(y) < 2^(-34) */ >+ { >+ /* y is close enough to zero for the result to be 1.0 >+ no matter what the size of x */ >+ return 1.0F + y; >+ } >+ >+ /* Simply use double precision for computation of log2(x), >+ y*log2(x) and exp2(y*log2(x)) */ >+ dx = x; >+ dy = y; >+ dlog2 = __log2(dx); >+ dw = y * dlog2; >+ dr = __exp2(dw); >+ >+ /* If dr overflowed or underflowed we need to deal with errno */ >+ if (dr > large) >+ { >+ /* Double dr has overflowed range of float. */ >+ if (negateres) >+ return retval_errno_erange_overflow(x, y, -1); >+ else >+ return retval_errno_erange_overflow(x, y, 1); >+ } >+ else if (dr < tiny) >+ { >+ /* Double dr has underflowed range of float. */ >+ if (negateres) >+ return retval_errno_erange_underflow(x, y, -1); >+ else >+ return retval_errno_erange_underflow(x, y, 1); >+ } >+ else >+ { >+ if (negateres) >+ return -dr; >+ else >+ return dr; >+ } >+} >+ >+weak_alias (__powf, powf) >+weak_alias (__powf, __ieee754_powf) >============================================================ >Index: sysdeps/x86_64/fpu/w_remainder.c >--- sysdeps/x86_64/fpu/w_remainder.c created >+++ sysdeps/x86_64/fpu/w_remainder.c 2002-12-03 13:43:26.000000000 +0100 1.1 >@@ -0,0 +1,270 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_NAN_WITH_FLAGS >+#define USE_SCALEDOUBLE_3 >+#define USE_GET_FPSW_INLINE >+#define USE_SET_FPSW_INLINE >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_SCALEDOUBLE_3 >+#undef USE_GET_FPSW_INLINE >+#undef USE_SET_FPSW_INLINE >+ >+/* Computes the exact product of x and y, the result being the >+ nearly doublelength number (z,zz) */ >+static inline void dekker_mul12(double x, double y, >+ double *z, double *zz) >+{ >+ double hx, tx, hy, ty; >+ /* Split x into hx (head) and tx (tail). Do the same for y. */ >+ unsigned long u; >+ GET_BITS_DP64(x, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hx); >+ tx = x - hx; >+ GET_BITS_DP64(y, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hy); >+ ty = y - hy; >+ *z = x * y; >+ *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty; >+} >+ >+ >+#if defined(COMPILING_FMOD) >+double __fmod(double x, double y) >+#else >+double __remainder(double x, double y) >+#endif >+{ >+ double dx, dy, scale, w, t, v, c, cc; >+ int i, ntimes, xexp, yexp; >+ unsigned long u, ux, uy, ax, ay, todd; >+ unsigned int sw; >+ >+ dx = x; >+ dy = y; >+ >+ GET_BITS_DP64(dx, ux); >+ GET_BITS_DP64(dy, uy); >+ ax = ux & ~SIGNBIT_DP64; >+ ay = uy & ~SIGNBIT_DP64; >+ xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ >+ if (xexp < 1 || xexp > BIASEDEMAX_DP64 || >+ yexp < 1 || yexp > BIASEDEMAX_DP64) >+ { >+ /* x or y is zero, denormalized, NaN or infinity */ >+ if (xexp > BIASEDEMAX_DP64) >+ { >+ /* x is NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return dx + dx; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity; result is NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ else if (yexp > BIASEDEMAX_DP64) >+ { >+ /* y is NaN or infinity */ >+ if (uy & MANTBITS_DP64) >+ /* y is NaN */ >+ return dy + dy; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* y is infinity; result is x */ >+ return dx; >+ } >+ else if (ax == 0x0000000000000000) >+ { >+ /* x is zero */ >+ if (ay == 0x0000000000000000) >+ /* y is zero */ >+ return nan_with_flags(AMD_F_INVALID); >+ else >+ return dx; >+ } >+ else if (ay == 0x0000000000000000) >+ /* y is zero */ >+ return nan_with_flags(AMD_F_INVALID); >+ >+ /* We've exhausted all other possibilities. One or both of x and >+ y must be denormalized */ >+ if (xexp < 1) >+ { >+ /* x is denormalized. Figure out its exponent. */ >+ u = ax; >+ while (u < IMPBIT_DP64) >+ { >+ xexp--; >+ u <<= 1; >+ } >+ } >+ if (yexp < 1) >+ { >+ /* y is denormalized. Figure out its exponent. */ >+ u = ay; >+ while (u < IMPBIT_DP64) >+ { >+ yexp--; >+ u <<= 1; >+ } >+ } >+ } >+ else if (ax == ay) >+ { >+ /* abs(x) == abs(y); return zero with the sign of x */ >+ PUT_BITS_DP64(ux & SIGNBIT_DP64, dx); >+ return dx; >+ } >+ >+ /* Set x = abs(x), y = abs(y) */ >+ PUT_BITS_DP64(ax, dx); >+ PUT_BITS_DP64(ay, dy); >+ >+ if (ax < ay) >+ { >+ /* abs(x) < abs(y) */ >+#if !defined(COMPILING_FMOD) >+ if (dx > 0.5*dy) >+ dx -= dy; >+#endif >+ return x < 0.0? -dx : dx; >+ } >+ >+ /* Save the current floating-point status word. We need >+ to do this because the remainder function is always >+ exact for finite arguments, but our algorithm causes >+ the inexact flag to be raised. We therefore need to >+ restore the entry status before exiting. */ >+ sw = get_fpsw_inline(); >+ >+ /* Set ntimes to the number of times we need to do a >+ partial remainder. If the exponent of x is an exact multiple >+ of 52 larger than the exponent of y, and the mantissa of x is >+ less than the mantissa of y, ntimes will be one too large >+ but it doesn't matter - it just means that we'll go round >+ the loop below one extra time. */ >+ if (xexp <= yexp) >+ ntimes = 0; >+ else >+ ntimes = (xexp - yexp) / 52; >+ >+ if (ntimes == 0) >+ { >+ w = dy; >+ scale = 1.0; >+ } >+ else >+ { >+ /* Set w = y * 2^(52*ntimes) */ >+ w = scaleDouble_3(dy, ntimes * 52); >+ >+ /* Set scale = 2^(-52) */ >+ PUT_BITS_DP64((unsigned long)(-52 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, >+ scale); >+ } >+ >+ /* Each time round the loop we compute a partial remainder. >+ This is done by subtracting a large multiple of w >+ from x each time, where w is a scaled up version of y. >+ The subtraction must be performed exactly in quad >+ precision, though the result at each stage can >+ fit exactly in a double precision number. */ >+ for (i = 0; i < ntimes; i++) >+ { >+ /* t is the integer multiple of w that we will subtract. >+ We use a truncated value for t. >+ >+ N.B. w has been chosen so that the integer t will have >+ at most 52 significant bits. This is the amount by >+ which the exponent of the partial remainder dx gets reduced >+ every time around the loop. In theory we could use >+ 53 bits in t, but the quad precision multiplication >+ routine dekker_mul12 does not allow us to do that because >+ it loses the last (106th) bit of its quad precision result. */ >+ >+ /* Set dx = dx - w * t, where t is equal to trunc(dx/w). */ >+ t = (double)(long)(dx / w); >+ /* At this point, t may be one too large due to >+ rounding of dx/w */ >+ >+ /* Compute w * t in quad precision */ >+ dekker_mul12(w, t, &c, &cc); >+ >+ /* Subtract w * t from dx */ >+ v = dx - c; >+ dx = v + (((dx - v) - c) - cc); >+ >+ /* If t was one too large, dx will be negative. Add back >+ one w */ >+ /* It might be possible to speed up this loop by finding >+ a way to compute correctly truncated t directly from dx and w. >+ We would then avoid the need for this check on negative dx. */ >+ if (dx < 0.0) >+ dx += w; >+ >+ /* Scale w down by 2^(-52) for the next iteration */ >+ w *= scale; >+ } >+ >+ /* One more time */ >+ /* Variable todd says whether the integer t is odd or not */ >+ t = (double)(long)(dx / w); >+ todd = ((long)(dx / w)) & 1; >+ dekker_mul12(w, t, &c, &cc); >+ v = dx - c; >+ dx = v + (((dx - v) - c) - cc); >+ if (dx < 0.0) >+ { >+ todd = !todd; >+ dx += w; >+ } >+ >+ /* At this point, dx lies in the range [0,dy) */ >+#if !defined(COMPILING_FMOD) >+ /* For the fmod function, we're done apart from setting >+ the correct sign. */ >+ /* For the remainder function, we need to adjust dx >+ so that it lies in the range (-y/2, y/2] by carefully >+ subtracting w (== dy == y) if necessary. The rigmarole >+ with todd is to get the correct sign of the result >+ when x/y lies exactly half way between two integers, >+ when we need to choose the even integer. */ >+ if (ay < 0x7fd0000000000000) >+ { >+ if (dx + dx > w || (todd && (dx + dx == w))) >+ dx -= w; >+ } >+ else if (dx > 0.5 * w || (todd && (dx == 0.5 * w))) >+ dx -= w; >+ >+#endif >+ >+ /* **** N.B. for some reason this breaks the 32 bit version >+ of remainder when compiling with optimization. */ >+ /* Restore the entry status flags */ >+ set_fpsw_inline(sw); >+ >+ /* Set the result sign according to input argument x */ >+ return x < 0.0? -dx : dx; >+ >+} >+ >+#if defined(COMPILING_FMOD) >+weak_alias (__fmod, fmod) >+weak_alias (__fmod, __ieee754_fmod) >+#else >+weak_alias (__remainder, remainder) >+weak_alias (__remainder, __ieee754_remainder) >+#endif >============================================================ >Index: sysdeps/x86_64/fpu/w_remainder_piby2.c >--- sysdeps/x86_64/fpu/w_remainder_piby2.c created >+++ sysdeps/x86_64/fpu/w_remainder_piby2.c 2002-12-03 13:43:28.000000000 +0100 1.1 >@@ -0,0 +1,541 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+/* Define this to get debugging print statements activated */ >+#define DEBUGGING_PRINT >+#undef DEBUGGING_PRINT >+ >+ >+#ifdef DEBUGGING_PRINT >+#include <stdio.h> >+char *d2b(int d, int bitsper, int point) >+{ >+ static char buff[50]; >+ int i, j; >+ j = bitsper; >+ if (point >= 0 && point <= bitsper) >+ j++; >+ buff[j] = '\0'; >+ for (i = bitsper - 1; i >= 0; i--) >+ { >+ j--; >+ if (d % 2 == 1) >+ buff[j] = '1'; >+ else >+ buff[j] = '0'; >+ if (i == point) >+ { >+ j--; >+ buff[j] = '.'; >+ } >+ d /= 2; >+ } >+ return buff; >+} >+#endif >+ >+/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using >+ extra precision, and return the result in r, rr. >+ Return value "region" tells how many lots of pi/2 were subtracted >+ from x to put it in the range [-pi/4,pi/4], mod 4. */ >+void __remainder_piby2(double x, double *r, double *rr, int *region) >+{ >+ >+ /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */ >+ static const double >+ eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */ >+ >+ static const double >+ piby2_lead = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */ >+ piby2_tail = 6.12323399573676480327e-17, /* 0x3c91a62633145c06 */ >+ pi_lead = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */ >+ pi_tail = 1.22464679914735296065e-16, /* 0x3ca1a62633145c06 */ >+ three_piby2_lead = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */ >+ three_piby2_tail = 1.83697019872102919446e-16, /* 0x3caa79394c9e8a08 */ >+ two_pi_lead = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */ >+ two_pi_tail = 2.44929359829470592131e-16, /* 0x3cb1a62633145c06 */ >+ five_piby2_lead = 7.85398163397448278999e+00, /* 0x401f6a7a2955385e */ >+ five_piby2_tail = 3.06161699786838264816e-16; /* 0x3cb60fafbfd97308 */ >+ >+ /* Each of these threshold values is the closest machine >+ number BELOW a multiple of pi/4, i.e. they are not >+ rounded to nearest. thresh1 is 1*pi/4, thresh2 is 2*pi/4, etc. >+ This ensures that we end up in precisely the correct region. */ >+ static const double >+ thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */ >+ thresh2 = 1.5707963267948965579989e+00, /* 0x3ff921fb54442d18 */ >+ thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */ >+ thresh4 = 3.1415926535897931159979e+00, /* 0x400921fb54442d18 */ >+ thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */ >+ thresh6 = 4.7123889803846896739969e+00, /* 0x4012d97c7f3321d2 */ >+ thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */ >+ thresh8 = 6.2831853071795862319959e+00, /* 0x401921fb54442d18 */ >+ thresh9 = 7.0685834705770345109954e+00, /* 0x401c463abeccb2bb */ >+ thresh10 = 7.8539816339744827899949e+00; /* 0x401f6a7a2955385e */ >+ >+ static const double >+ twobypi = 6.36619772367581271411E-01, /* 0x3FE45F306DC9C882 */ >+ twobypi_part1 = 6.36619761586189270020e-01, /* 0x3fe45f3068000000 */ >+ twobypi_part2 = 1.07813920013910546913e-08, /* 0x3e47272208000000 */ >+ twobypi_part3 = 7.16649491121506946045e-17, /* 0x3c94a7f09d5f47d6 */ >+ piby2_part1 = 1.57079631090164184570e+00, /* 0x3ff921fb50000000 */ >+ piby2_part2 = 1.58932547122958567343e-08, /* 0x3e5110b460000000 */ >+ piby2_part3 = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */ >+ >+ static const double cancellationThresh = 1.0e-12; >+ int done = 0; >+ >+ /* For small values of x, up to 11*pi/4, we do quad precision >+ subtraction of the relevant multiple of pi/2 */ >+ if (x <= eleven_piby4) >+ { >+ double s, t, ctest; >+ if (x <= thresh1) /* x < pi/4 */ >+ { >+ /* Quick return if x is already less than pi/4 */ >+ *r = x; >+ *rr = 0.0; >+ *region = 0; >+ return; >+ } >+ else if (x <= thresh2) /* x < 2*pi/4 */ >+ { >+ t = x - piby2_lead; >+ s = ((-piby2_lead - t) + x) - piby2_tail; >+ *region = 1; >+ } >+ else if (x <= thresh3) /* x < 3*pi/4 */ >+ { >+ t = x - piby2_lead; >+ s = ((x - t) - piby2_lead) - piby2_tail; >+ *region = 1; >+ } >+ else if (x <= thresh4) /* x < 4*pi/4 */ >+ { >+ t = x - pi_lead; >+ s = ((-pi_lead - t) + x) - pi_tail; >+ *region = 2; >+ } >+ else if (x <= thresh5) /* x < 5*pi/4 */ >+ { >+ t = x - pi_lead; >+ s = ((x - t) - pi_lead) - pi_tail; >+ *region = 2; >+ } >+ else if (x <= thresh6) /* x < 6*pi/4 */ >+ { >+ t = x - three_piby2_lead; >+ s = ((-three_piby2_lead - t) + x) - three_piby2_tail; >+ *region = 3; >+ } >+ else if (x <= thresh7) /* x < 7*pi/4 */ >+ { >+ t = x - three_piby2_lead; >+ s = ((x - t) - three_piby2_lead) - three_piby2_tail; >+ *region = 3; >+ } >+ else if (x <= thresh8) /* x < 8*pi/4 */ >+ { >+ t = x - two_pi_lead; >+ s = ((-two_pi_lead - t) + x) - two_pi_tail; >+ *region = 0; >+ } >+ else if (x <= thresh9) /* x < 9*pi/4 */ >+ { >+ t = x - two_pi_lead; >+ s = ((x - t) - two_pi_lead) - two_pi_tail; >+ *region = 0; >+ } >+ else if (x <= thresh10) /* x < 10*pi/4 */ >+ { >+ t = x - five_piby2_lead; >+ s = ((-five_piby2_lead - t) + x) - five_piby2_tail; >+ *region = 1; >+ } >+ else /* x < 11*pi/4 */ >+ { >+ t = x - five_piby2_lead; >+ s = ((x - t) - five_piby2_lead) - five_piby2_tail; >+ *region = 1; >+ } >+ >+ *r = t + s; >+ *rr = (t - *r) + s; >+ >+ /* Check for massive cancellation which may happen very close >+ to multiples of pi/2 */ >+ if (*r < 0.0) >+ ctest = -(*r); >+ else >+ ctest = *r; >+#ifdef DEBUGGING_PRINT >+ printf("Cancellation threshold test = (%g > %g)\n", >+ ctest, cancellationThresh); >+#endif >+ >+ /* Check if cancellation error was not too large */ >+ if (ctest > cancellationThresh) >+ done = 1; >+ /* Otherwise fall through to the expensive method */ >+ } >+ else if (x <= 1.0e5) >+ { >+ /* This range reduction is accurate enough for x up to >+ approximately 2**(20) except near multiples of pi/2 */ >+ >+ /* We perform quad precision arithmetic to find the >+ nearest multiple of pi/2 to x */ >+ >+ int reg, it; >+ double hx, tx, z, zz, w, ww, dreg, s, t, c, cc, ctest; >+ >+ /* Split x into head and tail, hx and tx */ >+ unsigned long u; >+ GET_BITS_DP64(x, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hx); >+ tx = x - hx; >+ >+ /* Multiply x by 2/pi in extra precision, result in (z, zz) */ >+ c = x * twobypi; >+ cc = ((((hx * twobypi_part1 - c) + hx * twobypi_part2) + >+ tx * twobypi_part1) + tx * twobypi_part2) + x * twobypi_part3; >+ z = c + cc; >+ zz = (c - z) + cc; >+ >+#ifdef DEBUGGING_PRINT >+ printf("z = %30.20e = %s\n", z, double2hex(&z)); >+ printf("zz = %30.20e = %s\n", zz, double2hex(&zz)); >+#endif >+ >+ /* Find reg, the nearest integer to (z, zz). We need to be >+ careful when (z,zz) is very near an odd multiple of 0.5. >+ The simple formula >+ reg = (int)((zz + 0.5) + z); >+ fails in that case because the double rounding may >+ lead us astray. */ >+ t = z + z; >+ it = (int)t; >+ if (it == t && it & 1) >+ { >+ /* z is an odd multiple of 0.5; we must use zz >+ to discriminate */ >+ if (zz > 0.0) >+ reg = (int)z + 1; >+ else >+ reg = (int)z; >+ } >+ else >+ reg = (int)(z + 0.5); >+ >+#ifdef DEBUGGING_PRINT >+ printf("reg = %d\n", reg); >+#endif >+ >+ /* Carefully subtract reg from (z, zz), result in (w, ww) */ >+ dreg = reg; >+ s = z - dreg; >+ if (z > dreg) >+ t = ((z - s) - dreg) + zz; >+ else >+ t = ((-dreg - s) + z) + zz; >+ w = s + t; >+ ww = (s - w) + t; >+ >+#ifdef DEBUGGING_PRINT >+ printf("w = %30.20e = %s\n", w, double2hex(&w)); >+ printf("ww = %30.20e = %s\n", ww, double2hex(&ww)); >+#endif >+ >+ /* Check for massive cancellation which may happen very close >+ to multiples of pi/2 */ >+ if (w < 0.0) >+ ctest = -w; >+ else >+ ctest = w; >+ >+ /* If cancellation is not too severe, continue with this method. >+ Otherwise we fall through to the expensive, accurate method */ >+ if (ctest > cancellationThresh) >+ { >+ /* Split w into (hx, tx) */ >+ GET_BITS_DP64(w, u); >+ u &= 0xfffffffff8000000; >+ PUT_BITS_DP64(u, hx); >+ tx = w - hx; >+ >+ /* Carefully multiply (w, ww) by pi/2 */ >+ c = piby2_lead * w; >+ cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) + >+ piby2_part2 * hx) + piby2_part2 * tx) + >+ (piby2_lead * ww + piby2_part3 * w); >+ *r = c + cc; >+ *rr = (c - *r) + cc; >+ >+ *region = reg & 3; >+ >+#ifdef DEBUGGING_PRINT >+ printf("r = %30.20e = %s\n", *r, double2hex(r)); >+ printf("rr = %30.20e = %s\n", *rr, double2hex(rr)); >+#endif >+ done = 1; >+ } >+ } >+ >+ if (!done) >+ { >+ /* This method simulates multi-precision floating-point >+ arithmetic and is accurate for all 1 <= x < infinity */ >+ const int bitsper = 10; >+ unsigned long res[500]; >+ unsigned long ux, u, carry, mask, mant, highbitsrr; >+ int first, last, i, rexp, xexp, resexp, ltb, determ; >+ double xx, t; >+ static unsigned long pibits[] = >+ { >+ 0, 0, 0, 0, 0, 0, >+ 162, 998, 54, 915, 580, 84, 671, 777, 855, 839, >+ 851, 311, 448, 877, 553, 358, 316, 270, 260, 127, >+ 593, 398, 701, 942, 965, 390, 882, 283, 570, 265, >+ 221, 184, 6, 292, 750, 642, 465, 584, 463, 903, >+ 491, 114, 786, 617, 830, 930, 35, 381, 302, 749, >+ 72, 314, 412, 448, 619, 279, 894, 260, 921, 117, >+ 569, 525, 307, 637, 156, 529, 504, 751, 505, 160, >+ 945, 1022, 151, 1023, 480, 358, 15, 956, 753, 98, >+ 858, 41, 721, 987, 310, 507, 242, 498, 777, 733, >+ 244, 399, 870, 633, 510, 651, 373, 158, 940, 506, >+ 997, 965, 947, 833, 825, 990, 165, 164, 746, 431, >+ 949, 1004, 287, 565, 464, 533, 515, 193, 111, 798 >+ }; >+ >+ GET_BITS_DP64(x, ux); >+ >+#ifdef DEBUGGING_PRINT >+ printf("On entry, x = %25.20e = %s\n", x, double2hex(&x)); >+#endif >+ >+ xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); >+ ux = (ux & MANTBITS_DP64) | IMPBIT_DP64; >+ >+ /* Now ux is the mantissa bit pattern of x as a long integer */ >+ carry = 0; >+ mask = (1L << bitsper) - 1; >+ >+ /* Set first and last to the positions of the first >+ and last chunks of 2/pi that we need */ >+ first = xexp / bitsper; >+ resexp = xexp - first * bitsper; >+ /* 180 is the theoretical maximum number of bits (actually >+ 175 for IEEE double precision) that we need to extract >+ from the middle of 2/pi to compute the reduced argument >+ accurately enough for our purposes */ >+ last = first + 180 / bitsper; >+ >+ /* Do a long multiplication of the bits of 2/pi by the >+ integer mantissa */ >+#if 0 >+ for (i = last; i >= first; i--) >+ { >+ u = pibits[i] * ux + carry; >+ res[i - first] = u & mask; >+ carry = u >> bitsper; >+ } >+ res[last - first + 1] = 0; >+#else >+ /* Unroll the loop. This is only correct because we know >+ that bitsper is fixed as 10. */ >+ res[19] = 0; >+ u = pibits[last] * ux; >+ res[18] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-1] * ux + carry; >+ res[17] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-2] * ux + carry; >+ res[16] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-3] * ux + carry; >+ res[15] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-4] * ux + carry; >+ res[14] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-5] * ux + carry; >+ res[13] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-6] * ux + carry; >+ res[12] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-7] * ux + carry; >+ res[11] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-8] * ux + carry; >+ res[10] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-9] * ux + carry; >+ res[9] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-10] * ux + carry; >+ res[8] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-11] * ux + carry; >+ res[7] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-12] * ux + carry; >+ res[6] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-13] * ux + carry; >+ res[5] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-14] * ux + carry; >+ res[4] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-15] * ux + carry; >+ res[3] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-16] * ux + carry; >+ res[2] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-17] * ux + carry; >+ res[1] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last-18] * ux + carry; >+ res[0] = u & mask; >+#endif >+ >+#ifdef DEBUGGING_PRINT >+ printf("resexp = %d\n", resexp); >+ printf("Significant part of x * 2/pi with binary" >+ " point in correct place:\n"); >+ for (i = 0; i <= last - first; i++) >+ { >+ if (i > 0 && i % 5 == 0) >+ printf("\n "); >+ if (i == 1) >+ printf("%s ", d2b((int)res[i], bitsper, resexp)); >+ else >+ printf("%s ", d2b((int)res[i], bitsper, -1)); >+ } >+ printf("\n"); >+#endif >+ >+ /* Reconstruct the result */ >+ ltb = (int)((((res[0] << bitsper) | res[1]) >+ >> (bitsper - 1 - resexp)) & 7); >+ >+ /* determ says whether the fractional part is >= 0.5 */ >+ determ = ltb & 1; >+ >+#ifdef DEBUGGING_PRINT >+ printf("ltb = %d (last two bits before binary point" >+ " and first bit after)\n", ltb); >+ printf("determ = %d (1 means need to negate because the fractional\n" >+ " part of x * 2/pi is greater than 0.5)\n", determ); >+#endif >+ >+ i = 1; >+ if (determ) >+ { >+ /* The mantissa is >= 0.5. We want to subtract it >+ from 1.0 by negating all the bits */ >+ *region = ((ltb >> 1) + 1) & 3; >+ mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1); >+ while (mant < 0x0020000000000000) >+ { >+ i++; >+ mant = (mant << bitsper) | (~(res[i]) & mask); >+ } >+ highbitsrr = ~(res[i + 1]) << (64 - bitsper); >+ } >+ else >+ { >+ *region = (ltb >> 1); >+ mant = res[1] & ((1L << (bitsper - resexp)) - 1); >+ while (mant < 0x0020000000000000) >+ { >+ i++; >+ mant = (mant << bitsper) | res[i]; >+ } >+ highbitsrr = res[i + 1] << (64 - bitsper); >+ } >+ >+ rexp = 52 + resexp - i * bitsper; >+ >+ while (mant >= 0x0020000000000000) >+ { >+ rexp++; >+ highbitsrr = (highbitsrr >> 1) | ((mant & 1) << 63); >+ mant >>= 1; >+ } >+ >+#ifdef DEBUGGING_PRINT >+ printf("Normalised mantissa = 0x%016lx\n", mant); >+ printf("High bits of rest of mantissa = 0x%016lx\n", highbitsrr); >+ printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp); >+#endif >+ >+ /* Put the result exponent rexp onto the mantissa pattern */ >+ u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64; >+ ux = (mant & MANTBITS_DP64) | u; >+ if (determ) >+ /* If we negated the mantissa we negate x too */ >+ ux |= SIGNBIT_DP64; >+ PUT_BITS_DP64(ux, x); >+ >+ /* Create the bit pattern for rr */ >+ highbitsrr >>= 12; /* Note this is shifted one place too far */ >+ u = ((unsigned long)rexp + EXPBIAS_DP64 - 53) << EXPSHIFTBITS_DP64; >+ PUT_BITS_DP64(u, t); >+ u |= highbitsrr; >+ PUT_BITS_DP64(u, xx); >+ >+ /* Subtract the implicit bit we accidentally added */ >+ xx -= t; >+ /* Set the correct sign, and double to account for the >+ "one place too far" shift */ >+ if (determ) >+ xx *= -2.0; >+ else >+ xx *= 2.0; >+ >+#ifdef DEBUGGING_PRINT >+ printf("(lead part of x*2/pi) = %25.20e = %s\n", x, double2hex(&x)); >+ printf("(tail part of x*2/pi) = %25.20e = %s\n", xx, double2hex(&xx)); >+#endif >+ >+ /* (x,xx) is an extra-precise version of the fractional part of >+ x * 2 / pi. Multiply (x,xx) by pi/2 in extra precision >+ to get the reduced argument (r,rr). */ >+ { >+ double hx, tx, c, cc; >+ /* Split x into hx (head) and tx (tail) */ >+ GET_BITS_DP64(x, ux); >+ ux &= 0xfffffffff8000000; >+ PUT_BITS_DP64(ux, hx); >+ tx = x - hx; >+ >+ c = piby2_lead * x; >+ cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) + >+ piby2_part2 * hx) + piby2_part2 * tx) + >+ (piby2_lead * xx + piby2_part3 * x); >+ *r = c + cc; >+ *rr = (c - *r) + cc; >+ } >+ >+#ifdef DEBUGGING_PRINT >+ printf(" (r,rr) = lead and tail parts of frac(x*2/pi) * pi/2:\n"); >+ printf(" r = %25.20e = %s\n", *r, double2hex(r)); >+ printf("rr = %25.20e = %s\n", *rr, double2hex(rr)); >+ printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n", >+ *region); >+#endif >+ } >+} >============================================================ >Index: sysdeps/x86_64/fpu/w_remainder_piby2f.c >--- sysdeps/x86_64/fpu/w_remainder_piby2f.c created >+++ sysdeps/x86_64/fpu/w_remainder_piby2f.c 2002-12-03 13:43:29.000000000 +0100 1.1 >@@ -0,0 +1,386 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+/* Define this to get debugging print statements activated */ >+#define DEBUGGING_PRINT >+#undef DEBUGGING_PRINT >+ >+ >+#ifdef DEBUGGING_PRINT >+#include <stdio.h> >+char *d2b(long d, int bitsper, int point) >+{ >+ static char buff[200]; >+ int i, j; >+ j = bitsper; >+ if (point >= 0 && point <= bitsper) >+ j++; >+ buff[j] = '\0'; >+ for (i = bitsper - 1; i >= 0; i--) >+ { >+ j--; >+ if (d % 2 == 1) >+ buff[j] = '1'; >+ else >+ buff[j] = '0'; >+ if (i == point) >+ { >+ j--; >+ buff[j] = '.'; >+ } >+ d /= 2; >+ } >+ return buff; >+} >+#endif >+ >+/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using >+ extra precision, and return the result in r. >+ Return value "region" tells how many lots of pi/2 were subtracted >+ from x to put it in the range [-pi/4,pi/4], mod 4. */ >+void __remainder_piby2f(float x, double *r, int *region) >+{ >+ >+ /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */ >+ static const double >+ eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */ >+ >+ static const double >+ piby2 = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */ >+ twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ >+ pi = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */ >+ three_piby2 = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */ >+ two_pi = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */ >+ five_piby2 = 7.85398163397448278999e+00; /* 0x401f6a7a2955385e */ >+ >+ /* Each of these threshold values is the closest machine >+ number BELOW a multiple of pi/4, i.e. they are not >+ rounded to nearest. thresh1 is 1*pi/4, thresh3 is 3*pi/4, etc. >+ This ensures that we end up in precisely the correct region. */ >+ static const double >+ thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */ >+ thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */ >+ thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */ >+ thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */ >+ thresh9 = 7.0685834705770345109954e+00; /* 0x401c463abeccb2bb */ >+ >+ static const double cancellationThresh = 1.0e-5; >+ int done = 0; >+ double dx; >+ >+ dx = x; >+ >+ /* For small values of x, up to 11*pi/4, we do double precision >+ subtraction of the relevant multiple of pi/2 */ >+ if (dx <= eleven_piby4) /* x <= 11*pi/4 */ >+ { >+ double t, ctest; >+ >+ if (dx <= thresh5) /* x < 5*pi/4 */ >+ { >+ if (dx <= thresh1) /* x < pi/4 */ >+ { >+ /* Quick return if x is already less than pi/4 */ >+ *r = dx; >+ *region = 0; >+ return; >+ } >+ else if (dx <= thresh3) /* x < 3*pi/4 */ >+ { >+ t = dx - piby2; >+ *region = 1; >+ } >+ else /* x < 5*pi/4 */ >+ { >+ t = dx - pi; >+ *region = 2; >+ } >+ } >+ else >+ { >+ if (dx <= thresh7) /* x < 7*pi/4 */ >+ { >+ t = dx - three_piby2; >+ *region = 3; >+ } >+ else if (dx <= thresh9) /* x < 9*pi/4 */ >+ { >+ t = dx - two_pi; >+ *region = 0; >+ } >+ else /* x < 11*pi/4 */ >+ { >+ t = dx - five_piby2; >+ *region = 1; >+ } >+ } >+ >+ /* Check for massive cancellation which may happen very close >+ to multiples of pi/2 */ >+ if (t < 0.0) >+ ctest = -t; >+ else >+ ctest = t; >+#ifdef DEBUGGING_PRINT >+ printf("Cancellation threshold test = (%g > %g)\n", >+ ctest, cancellationThresh); >+#endif >+ >+ /* Check if cancellation error was not too large */ >+ if (ctest > cancellationThresh) >+ { >+ *r = t; >+ done = 1; >+ } >+ /* Otherwise fall through to the expensive method */ >+ } >+ else if (dx <= 1.0e6) >+ { >+ /* This range reduction is accurate enough for x up to >+ approximately 2**(20) except near multiples of pi/2 */ >+ >+ /* We perform double precision arithmetic to find the >+ nearest multiple of pi/2 to x */ >+ int reg; >+ double z, w, c, ctest; >+ >+ /* Multiply x by 2/pi in double precision, result in z */ >+ z = dx * twobypi; >+ >+#ifdef DEBUGGING_PRINT >+ printf("z = %30.20e = %s\n", z, double2hex(&z)); >+#endif >+ >+ /* Find reg, the nearest integer to z */ >+ reg = (int)(z + 0.5); >+ >+#ifdef DEBUGGING_PRINT >+ printf("reg = %d\n", reg); >+#endif >+ >+ /* Subtract reg from z, result in w */ >+ w = z - reg; >+ >+#ifdef DEBUGGING_PRINT >+ printf("w = %30.20e = %s\n", w, double2hex(&w)); >+#endif >+ >+ /* Check for massive cancellation which may happen very close >+ to multiples of pi/2 */ >+ if (w < 0.0) >+ ctest = -w; >+ else >+ ctest = w; >+ >+ /* If cancellation is not too severe, continue with this method. >+ Otherwise we fall through to the expensive, accurate method */ >+ if (ctest > cancellationThresh) >+ { >+ /* Multiply w by pi/2 */ >+ c = w * piby2; >+ *r = c; >+ *region = reg & 3; >+ >+#ifdef DEBUGGING_PRINT >+ printf("r = %30.20e = %s\n", *r, double2hex(r)); >+#endif >+ done = 1; >+ } >+ } >+ >+ if (!done) >+ { >+ /* This method simulates multi-precision floating-point >+ arithmetic and is accurate for all 1 <= x < infinity */ >+#if 0 >+ const int bitsper = 36; >+#else >+#define bitsper 36 >+#endif >+ unsigned long res[10]; >+ unsigned long u, carry, mask, mant, nextbits; >+ unsigned long ux; >+ int first, last, i, rexp, xexp, resexp, ltb, determ, bc; >+ static const double >+ piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ >+ static unsigned long pibits[] = >+ { >+ 0L, >+ 5215L, 13000023176L, 11362338026L, 67174558139L, >+ 34819822259L, 10612056195L, 67816420731L, 57840157550L, >+ 19558516809L, 50025467026L, 25186875954L, 18152700886L >+ }; >+ >+#ifdef DEBUGGING_PRINT >+ printf("On entry, x = %25.20e = %s\n", dx, double2hex(&dx)); >+#endif >+ >+ >+ GET_BITS_DP64(dx, ux); >+ >+ xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); >+ ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29; >+ >+#ifdef DEBUGGING_PRINT >+ printf("ux = %s\n", d2b(ux, 64, -1)); >+#endif >+ >+ /* Now ux is the mantissa bit pattern of x as a long integer */ >+ mask = (1L << bitsper) - 1; >+ >+ /* Set first and last to the positions of the first >+ and last chunks of 2/pi that we need */ >+ first = xexp / bitsper; >+ resexp = xexp - first * bitsper; >+ /* 120 is the theoretical maximum number of bits (actually >+ 115 for IEEE single precision) that we need to extract >+ from the middle of 2/pi to compute the reduced argument >+ accurately enough for our purposes */ >+ last = first + 120 / bitsper; >+ >+#ifdef DEBUGGING_PRINT >+ printf("first = %d, last = %d\n", first, last); >+#endif >+ >+ /* Do a long multiplication of the bits of 2/pi by the >+ integer mantissa */ >+#if 0 >+ for (i = last; i >= first; i--) >+ { >+ u = pibits[i] * ux + carry; >+ res[i - first] = u & mask; >+ carry = u >> bitsper; >+ } >+ res[last - first + 1] = 0; >+#else >+ /* Unroll the loop. This is only correct because we know >+ that bitsper is fixed as 36. */ >+ res[4] = 0; >+ u = pibits[last] * ux; >+ res[3] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last - 1] * ux + carry; >+ res[2] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[last - 2] * ux + carry; >+ res[1] = u & mask; >+ carry = u >> bitsper; >+ u = pibits[first] * ux + carry; >+ res[0] = u & mask; >+#endif >+ >+#ifdef DEBUGGING_PRINT >+ printf("resexp = %d\n", resexp); >+ printf("Significant part of x * 2/pi with binary" >+ " point in correct place:\n"); >+ for (i = 0; i <= last - first; i++) >+ { >+ if (i > 0 && i % 5 == 0) >+ printf("\n "); >+ if (i == 1) >+ printf("%s ", d2b(res[i], bitsper, resexp)); >+ else >+ printf("%s ", d2b(res[i], bitsper, -1)); >+ } >+ printf("\n"); >+#endif >+ >+ /* Reconstruct the result */ >+ ltb = (int)((((res[0] << bitsper) | res[1]) >+ >> (bitsper - 1 - resexp)) & 7); >+ >+ /* determ says whether the fractional part is >= 0.5 */ >+ determ = ltb & 1; >+ >+#ifdef DEBUGGING_PRINT >+ printf("ltb = %d (last two bits before binary point" >+ " and first bit after)\n", ltb); >+ printf("determ = %d (1 means need to negate because the fractional\n" >+ " part of x * 2/pi is greater than 0.5)\n", determ); >+#endif >+ >+ i = 1; >+ if (determ) >+ { >+ /* The mantissa is >= 0.5. We want to subtract it >+ from 1.0 by negating all the bits */ >+ *region = ((ltb >> 1) + 1) & 3; >+ mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1); >+ while (mant < 0x0000000000010000) >+ { >+ i++; >+ mant = (mant << bitsper) | (~(res[i]) & mask); >+ } >+ nextbits = (~(res[i+1]) & mask); >+ } >+ else >+ { >+ *region = (ltb >> 1); >+ mant = res[1] & ((1L << (bitsper - resexp)) - 1); >+ while (mant < 0x0000000000010000) >+ { >+ i++; >+ mant = (mant << bitsper) | res[i]; >+ } >+ nextbits = res[i+1]; >+ } >+ >+#ifdef DEBUGGING_PRINT >+ printf("First bits of mant = %s\n", d2b(mant, bitsper, -1)); >+#endif >+ >+ /* Normalize the mantissa. The shift value 6 here, determined by >+ trial and error, seems to give optimal speed. */ >+ bc = 0; >+ while (mant < 0x0000400000000000) >+ { >+ bc += 6; >+ mant <<= 6; >+ } >+ while (mant < 0x0010000000000000) >+ { >+ bc++; >+ mant <<= 1; >+ } >+ mant |= nextbits >> (bitsper - bc); >+ >+ rexp = 52 + resexp - bc - i * bitsper; >+ >+#ifdef DEBUGGING_PRINT >+ printf("Normalised mantissa = 0x%016lx\n", mant); >+ printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp); >+#endif >+ >+ /* Put the result exponent rexp onto the mantissa pattern */ >+ u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64; >+ ux = (mant & MANTBITS_DP64) | u; >+ if (determ) >+ /* If we negated the mantissa we negate x too */ >+ ux |= SIGNBIT_DP64; >+ PUT_BITS_DP64(ux, dx); >+ >+#ifdef DEBUGGING_PRINT >+ printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx)); >+#endif >+ >+ /* x is a double precision version of the fractional part of >+ x * 2 / pi. Multiply x by pi/2 in double precision >+ to get the reduced argument r. */ >+ *r = dx * piby2; >+ >+#ifdef DEBUGGING_PRINT >+ printf(" r = frac(x*2/pi) * pi/2:\n"); >+ printf(" r = %25.20e = %s\n", *r, double2hex(r)); >+ printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n", >+ *region); >+#endif >+ } >+} >============================================================ >Index: sysdeps/x86_64/fpu/w_remainderf.c >--- sysdeps/x86_64/fpu/w_remainderf.c created >+++ sysdeps/x86_64/fpu/w_remainderf.c 2002-12-03 13:43:29.000000000 +0100 1.1 >@@ -0,0 +1,188 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_NAN_WITH_FLAGS >+#define USE_SCALEDOUBLE_1 >+#define USE_GET_FPSW_INLINE >+#define USE_SET_FPSW_INLINE >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_SCALEDOUBLE_1 >+#undef USE_GET_FPSW_INLINE >+#undef USE_SET_FPSW_INLINE >+ >+#if defined(COMPILING_FMOD) >+float __fmodf(float x, float y) >+#else >+float __remainderf(float x, float y) >+#endif >+{ >+ double dx, dy, scale, w, t; >+ int i, ntimes, xexp, yexp; >+ unsigned long ux, uy, ax, ay; >+ >+ unsigned int sw; >+ >+ dx = x; >+ dy = y; >+ >+ GET_BITS_DP64(dx, ux); >+ GET_BITS_DP64(dy, uy); >+ ax = ux & ~SIGNBIT_DP64; >+ ay = uy & ~SIGNBIT_DP64; >+ xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ >+ if (xexp < 1 || xexp > BIASEDEMAX_DP64 || >+ yexp < 1 || yexp > BIASEDEMAX_DP64) >+ { >+ /* x or y is zero, NaN or infinity (neither x nor y can be >+ denormalized because we promoted from float to double) */ >+ if (xexp > BIASEDEMAX_DP64) >+ { >+ /* x is NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return dx + dx; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity; result is NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ else if (yexp > BIASEDEMAX_DP64) >+ { >+ /* y is NaN or infinity */ >+ if (uy & MANTBITS_DP64) >+ /* y is NaN */ >+ return dy + dy; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* y is infinity; result is x */ >+ return dx; >+ } >+ else if (xexp < 1) >+ { >+ /* x must be zero (cannot be denormalized) */ >+ if (yexp < 1) >+ /* y must be zero (cannot be denormalized) */ >+ return nan_with_flags(AMD_F_INVALID); >+ else >+ return dx; >+ } >+ else >+ /* y must be zero */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ else if (ax == ay) >+ { >+ /* abs(x) == abs(y); return zero with the sign of x */ >+ PUT_BITS_DP64(ux & SIGNBIT_DP64, dx); >+ return dx; >+ } >+ >+ /* Set dx = abs(x), dy = abs(y) */ >+ PUT_BITS_DP64(ax, dx); >+ PUT_BITS_DP64(ay, dy); >+ >+ if (ax < ay) >+ { >+ /* abs(x) < abs(y) */ >+#if !defined(COMPILING_FMOD) >+ if (dx > 0.5*dy) >+ dx -= dy; >+#endif >+ return x < 0.0? -dx : dx; >+ } >+ >+ /* Save the current floating-point status word. We need >+ to do this because the remainder function is always >+ exact for finite arguments, but our algorithm causes >+ the inexact flag to be raised. We therefore need to >+ restore the entry status before exiting. */ >+ sw = get_fpsw_inline(); >+ >+ /* Set ntimes to the number of times we need to do a >+ partial remainder. If the exponent of x is an exact multiple >+ of 24 larger than the exponent of y, and the mantissa of x is >+ less than the mantissa of y, ntimes will be one too large >+ but it doesn't matter - it just means that we'll go round >+ the loop below one extra time. */ >+ if (xexp <= yexp) >+ { >+ ntimes = 0; >+ w = dy; >+ scale = 1.0; >+ } >+ else >+ { >+ ntimes = (xexp - yexp) / 24; >+ >+ /* Set w = y * 2^(24*ntimes) */ >+ PUT_BITS_DP64((unsigned long)(ntimes * 24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, >+ scale); >+ w = scale * dy; >+ /* Set scale = 2^(-24) */ >+ PUT_BITS_DP64((unsigned long)(-24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, >+ scale); >+ } >+ >+ /* Each time round the loop we compute a partial remainder. >+ This is done by subtracting a large multiple of w >+ from x each time, where w is a scaled up version of y. >+ The subtraction can be performed exactly when performed >+ in double precision, and the result at each stage can >+ fit exactly in a single precision number. */ >+ for (i = 0; i < ntimes; i++) >+ { >+ /* t is the integer multiple of w that we will subtract. >+ We use a truncated value for t. */ >+ t = (double)((int)(dx / w)); >+ dx -= w * t; >+ /* Scale w down by 2^(-24) for the next iteration */ >+ w *= scale; >+ } >+ >+ /* One more time */ >+#if defined(COMPILING_FMOD) >+ t = (double)((int)(dx / w)); >+ dx -= w * t; >+#else >+ { >+ unsigned int todd; >+ /* Variable todd says whether the integer t is odd or not */ >+ t = (double)((int)(dx / w)); >+ todd = ((int)(dx / w)) & 1; >+ dx -= w * t; >+ >+ /* At this point, dx lies in the range [0,dy) */ >+ /* For the remainder function, we need to adjust dx >+ so that it lies in the range (-y/2, y/2] by carefully >+ subtracting w (== dy == y) if necessary. */ >+ if (dx > 0.5 * w || ((dx == 0.5 * w) && todd)) >+ dx -= w; >+ } >+#endif >+ >+ /* **** N.B. for some reason this breaks the 32 bit version >+ of remainder when compiling with optimization. */ >+ /* Restore the entry status flags */ >+ set_fpsw_inline(sw); >+ >+ /* Set the result sign according to input argument x */ >+ return x < 0.0? -dx : dx; >+ >+} >+ >+#if defined(COMPILING_FMOD) >+weak_alias (__fmodf, fmodf) >+weak_alias (__fmodf, __ieee754_fmodf) >+#else >+weak_alias (__remainderf, remainderf) >+weak_alias (__remainderf, __ieee754_remainderf) >+#endif >============================================================ >Index: sysdeps/x86_64/fpu/w_sinh.c >--- sysdeps/x86_64/fpu/w_sinh.c created >+++ sysdeps/x86_64/fpu/w_sinh.c 2002-12-03 13:43:30.000000000 +0100 1.1 >@@ -0,0 +1,335 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_INFINITY_WITH_FLAGS >+#define USE_VAL_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_INFINITY_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange(double x, int xneg) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"sinh"; >+ if (_LIB_VERSION == _SVID_) >+ { >+ if (xneg) >+ exc.retval = -HUGE; >+ else >+ exc.retval = HUGE; >+ } >+ else >+ { >+ if (xneg) >+ exc.retval = -infinity_with_flags(AMD_F_OVERFLOW); >+ else >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW); >+ } >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+double __sinh(double x) >+{ >+ /* >+ After dealing with special cases the computation is split into >+ regions as follows: >+ >+ abs(x) >= max_sinh_arg: >+ sinh(x) = sign(x)*Inf >+ >+ abs(x) >= small_threshold: >+ sinh(x) = sign(x)*exp(abs(x))/2 computed using the >+ splitexp and scaleDouble functions as for exp_amd(). >+ >+ abs(x) < small_threshold: >+ compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) >+ sinh(x) is then sign(x)*z. */ >+ >+ static const double >+ max_sinh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */ >+ thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ >+ log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ >+ log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ >+ small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; >+ /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ >+ >+ /* Lead and tail tabulated values of sinh(i) and cosh(i) >+ for i = 0,...,36. The lead part has 26 leading bits. */ >+ >+ static const double sinh_lead[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.17520117759704589844e+00, /* 0x3ff2cd9fc0000000 */ >+ 3.62686038017272949219e+00, /* 0x400d03cf60000000 */ >+ 1.00178747177124023438e+01, /* 0x40240926e0000000 */ >+ 2.72899169921875000000e+01, /* 0x403b4a3800000000 */ >+ 7.42032089233398437500e+01, /* 0x40528d0160000000 */ >+ 2.01713153839111328125e+02, /* 0x406936d228000000 */ >+ 5.48316116333007812500e+02, /* 0x4081228768000000 */ >+ 1.49047882080078125000e+03, /* 0x409749ea50000000 */ >+ 4.05154187011718750000e+03, /* 0x40afa71570000000 */ >+ 1.10132326660156250000e+04, /* 0x40c5829dc8000000 */ >+ 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ >+ 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ >+ 2.21206695312500000000e+05, /* 0x410b00b590000000 */ >+ 6.01302140625000000000e+05, /* 0x412259ac48000000 */ >+ 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ >+ 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ >+ 1.20774762500000000000e+07, /* 0x4167093488000000 */ >+ 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ >+ 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ >+ 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ >+ 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ >+ 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ >+ 4.87240166400000000000e+09, /* 0x41f226af30000000 */ >+ 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ >+ 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ >+ 9.78648043520000000000e+10, /* 0x4236c93268000000 */ >+ 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ >+ 7.23128516608000000000e+11, /* 0x42650bba30000000 */ >+ 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ >+ 5.34323724288000000000e+12, /* 0x4293704708000000 */ >+ 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ >+ 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ >+ 1.07321789251584000000e+14, /* 0x42d866f348000000 */ >+ 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ >+ 7.93006722514944000000e+14, /* 0x430689e220000000 */ >+ 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ >+ >+ static const double sinh_tail[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.60467555584448807892e-08, /* 0x3e513ae6096a0092 */ >+ 2.76742892754807136947e-08, /* 0x3e5db70cfb79a640 */ >+ 2.09697499555224576530e-07, /* 0x3e8c2526b66dc067 */ >+ 2.04940252448908240062e-07, /* 0x3e8b81b18647f380 */ >+ 1.65444891522700935932e-06, /* 0x3ebbc1cdd1e1eb08 */ >+ 3.53116789999998198721e-06, /* 0x3ecd9f201534fb09 */ >+ 6.94023870987375490695e-06, /* 0x3edd1c064a4e9954 */ >+ 4.98876893611587449271e-06, /* 0x3ed4eca65d06ea74 */ >+ 3.19656024605152215752e-05, /* 0x3f00c259bcc0ecc5 */ >+ 2.08687768377236501204e-04, /* 0x3f2b5a6647cf9016 */ >+ 4.84668088325403796299e-05, /* 0x3f09691adefb0870 */ >+ 1.17517985422733832468e-03, /* 0x3f53410fc29cde38 */ >+ 6.90830086959560562415e-04, /* 0x3f46a31a50b6fb3c */ >+ 1.45697262451506548420e-03, /* 0x3f57defc71805c40 */ >+ 2.99859023684906737806e-02, /* 0x3f9eb49fd80e0bab */ >+ 1.02538800507941396667e-02, /* 0x3f84fffc7bcd5920 */ >+ 1.26787628407699110022e-01, /* 0x3fc03a93b6c63435 */ >+ 6.86652479544033744752e-02, /* 0x3fb1940bb255fd1c */ >+ 4.81593627621056619148e-01, /* 0x3fded26e14260b50 */ >+ 1.70489513795397629181e+00, /* 0x3ffb47401fc9f2a2 */ >+ 1.12416073482258713767e+01, /* 0x40267bb3f55634f1 */ >+ 7.06579578070110514432e+00, /* 0x401c435ff8194ddc */ >+ 5.91244512999659974639e+01, /* 0x404d8fee052ba63a */ >+ 1.68921736147050694399e+02, /* 0x40651d7edccde3f6 */ >+ 2.60692936262073658327e+02, /* 0x40704b1644557d1a */ >+ 3.62419382134885609048e+02, /* 0x4076a6b5ca0a9dc4 */ >+ 4.07689930834187271103e+03, /* 0x40afd9cc72249aba */ >+ 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ >+ 2.53720210371943067003e+04, /* 0x40d8c70158ac6363 */ >+ 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ >+ 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ >+ 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ >+ 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ >+ 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ >+ 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ >+ 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ >+ >+ static const double cosh_lead[ 37] = { >+ 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ >+ 1.54308062791824340820e+00, /* 0x3ff8b07550000000 */ >+ 3.76219564676284790039e+00, /* 0x400e18fa08000000 */ >+ 1.00676617622375488281e+01, /* 0x402422a490000000 */ >+ 2.73082327842712402344e+01, /* 0x403b4ee858000000 */ >+ 7.42099475860595703125e+01, /* 0x40528d6fc8000000 */ >+ 2.01715633392333984375e+02, /* 0x406936e678000000 */ >+ 5.48317031860351562500e+02, /* 0x4081228948000000 */ >+ 1.49047915649414062500e+03, /* 0x409749eaa8000000 */ >+ 4.05154199218750000000e+03, /* 0x40afa71580000000 */ >+ 1.10132329101562500000e+04, /* 0x40c5829dd0000000 */ >+ 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ >+ 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ >+ 2.21206695312500000000e+05, /* 0x410b00b590000000 */ >+ 6.01302140625000000000e+05, /* 0x412259ac48000000 */ >+ 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ >+ 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ >+ 1.20774762500000000000e+07, /* 0x4167093488000000 */ >+ 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ >+ 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ >+ 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ >+ 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ >+ 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ >+ 4.87240166400000000000e+09, /* 0x41f226af30000000 */ >+ 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ >+ 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ >+ 9.78648043520000000000e+10, /* 0x4236c93268000000 */ >+ 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ >+ 7.23128516608000000000e+11, /* 0x42650bba30000000 */ >+ 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ >+ 5.34323724288000000000e+12, /* 0x4293704708000000 */ >+ 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ >+ 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ >+ 1.07321789251584000000e+14, /* 0x42d866f348000000 */ >+ 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ >+ 7.93006722514944000000e+14, /* 0x430689e220000000 */ >+ 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ >+ >+ static const double cosh_tail[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 6.89700037027478056904e-09, /* 0x3e3d9f5504c2bd28 */ >+ 4.43207835591715833630e-08, /* 0x3e67cb66f0a4c9fd */ >+ 2.33540217013828929694e-07, /* 0x3e8f58617928e588 */ >+ 5.17452463948269748331e-08, /* 0x3e6bc7d000c38d48 */ >+ 9.38728274131605919153e-07, /* 0x3eaf7f9d4e329998 */ >+ 2.73012191010840495544e-06, /* 0x3ec6e6e464885269 */ >+ 3.29486051438996307950e-06, /* 0x3ecba3a8b946c154 */ >+ 4.75803746362771416375e-06, /* 0x3ed3f4e76110d5a4 */ >+ 3.33050940471947692369e-05, /* 0x3f017622515a3e2b */ >+ 9.94707313972136215365e-06, /* 0x3ee4dc4b528af3d0 */ >+ 6.51685096227860253398e-05, /* 0x3f11156278615e10 */ >+ 1.18132406658066663359e-03, /* 0x3f535ad50ed821f5 */ >+ 6.93090416366541877541e-04, /* 0x3f46b61055f2935c */ >+ 1.45780415323416845386e-03, /* 0x3f57e2794a601240 */ >+ 2.99862082708111758744e-02, /* 0x3f9eb4b45f6aadd3 */ >+ 1.02539925859688602072e-02, /* 0x3f85000b967b3698 */ >+ 1.26787669807076286421e-01, /* 0x3fc03a940fadc092 */ >+ 6.86652631843830962843e-02, /* 0x3fb1940bf3bf874c */ >+ 4.81593633223853068159e-01, /* 0x3fded26e1a2a2110 */ >+ 1.70489514001513020602e+00, /* 0x3ffb4740205796d6 */ >+ 1.12416073489841270572e+01, /* 0x40267bb3f55cb85d */ >+ 7.06579578098005001152e+00, /* 0x401c435ff81e18ac */ >+ 5.91244513000686140458e+01, /* 0x404d8fee052bdea4 */ >+ 1.68921736147088438429e+02, /* 0x40651d7edccde926 */ >+ 2.60692936262087528121e+02, /* 0x40704b1644557e0e */ >+ 3.62419382134890611269e+02, /* 0x4076a6b5ca0a9e1c */ >+ 4.07689930834187453002e+03, /* 0x40afd9cc72249abe */ >+ 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ >+ 2.53720210371943103382e+04, /* 0x40d8c70158ac6364 */ >+ 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ >+ 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ >+ 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ >+ 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ >+ 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ >+ 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ >+ 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ >+ >+ unsigned long ux, aux, xneg; >+ double y, z, z1, z2; >+ int m; >+ >+ /* Special cases */ >+ >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ if (aux < 0x3e30000000000000) /* |x| small enough that sinh(x) = x */ >+ { >+ if (aux == 0) >+ /* with no inexact */ >+ return x; >+ else >+ return val_with_flags(x, AMD_F_INEXACT); >+ } >+ else if (aux >= 0x7ff0000000000000) /* |x| is NaN or Inf */ >+ return x + x; >+ >+ xneg = (aux != ux); >+ >+ y = x; >+ if (xneg) y = -x; >+ >+ if (y >= max_sinh_arg) >+ /* Return +/-infinity with overflow flag */ >+ return retval_errno_erange(x, xneg); >+ else if (y >= small_threshold) >+ { >+ /* In this range y is large enough so that >+ the negative exponential is negligible, >+ so sinh(y) is approximated by sign(x)*exp(y)/2. The >+ code below is an inlined version of that from >+ exp() with two changes (it operates on >+ y instead of x, and the division by 2 is >+ done by reducing m by 1). */ >+ >+ splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, >+ log2_by_32_tail, &m, &z1, &z2); >+ m -= 1; >+ >+ if (m >= EMIN_DP64 && m <= EMAX_DP64) >+ z = scaleDouble_1((z1+z2),m); >+ else >+ z = scaleDouble_2((z1+z2),m); >+ } >+ else >+ { >+ /* In this range we find the integer part y0 of y >+ and the increment dy = y - y0. We then compute >+ >+ z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) >+ >+ where sinh(y0) and cosh(y0) are tabulated above. */ >+ >+ int ind; >+ double dy, dy2, sdy, cdy, sdy1, sdy2; >+ >+ ind = (int)y; >+ dy = y - ind; >+ >+ dy2 = dy*dy; >+ sdy = dy*dy2*(0.166666666666666667013899e0 + >+ (0.833333333333329931873097e-2 + >+ (0.198412698413242405162014e-3 + >+ (0.275573191913636406057211e-5 + >+ (0.250521176994133472333666e-7 + >+ (0.160576793121939886190847e-9 + >+ 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ cdy = dy2*(0.500000000000000005911074e0 + >+ (0.416666666666660876512776e-1 + >+ (0.138888888889814854814536e-2 + >+ (0.248015872460622433115785e-4 + >+ (0.275573350756016588011357e-6 + >+ (0.208744349831471353536305e-8 + >+ 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ /* At this point sinh(dy) is approximated by dy + sdy. >+ Shift some significant bits from dy to sdy. */ >+ >+ GET_BITS_DP64(dy, ux); >+ ux &= 0xfffffffff8000000; >+ PUT_BITS_DP64(ux, sdy1); >+ sdy2 = sdy + (dy - sdy1); >+ >+ z = ((((((cosh_tail[ind]*sdy2 + sinh_tail[ind]*cdy) >+ + cosh_tail[ind]*sdy1) + sinh_tail[ind]) >+ + cosh_lead[ind]*sdy2) + sinh_lead[ind]*cdy) >+ + cosh_lead[ind]*sdy1) + sinh_lead[ind]; >+ } >+ >+ if (xneg) z = - z; >+ return z; >+} >+ >+weak_alias (__sinh, sinh) >+weak_alias (__sinh, __ieee754_sinh) >============================================================ >Index: sysdeps/x86_64/fpu/w_sinhf.c >--- sysdeps/x86_64/fpu/w_sinhf.c created >+++ sysdeps/x86_64/fpu/w_sinhf.c 2002-12-03 13:43:31.000000000 +0100 1.1 >@@ -0,0 +1,250 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_INFINITY_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange(float x, int xneg) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"sinh"; >+ if (_LIB_VERSION == _SVID_) >+ { >+ if (xneg) >+ exc.retval = -HUGE; >+ else >+ exc.retval = HUGE; >+ } >+ else >+ { >+ if (xneg) >+ exc.retval = -infinity_with_flags(AMD_F_OVERFLOW); >+ else >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW); >+ } >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+float __sinhf(float fx) >+{ >+ /* >+ After dealing with special cases the computation is split into >+ regions as follows: >+ >+ abs(x) >= max_sinh_arg: >+ sinh(x) = sign(x)*Inf >+ >+ abs(x) >= small_threshold: >+ sinh(x) = sign(x)*exp(abs(x))/2 computed using the >+ splitexp and scaleDouble functions as for exp_amd(). >+ >+ abs(x) < small_threshold: >+ compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) >+ sinh(x) is then sign(x)*z. */ >+ >+ static const double >+ /* The max argument of sinhf, but stored as a double */ >+ max_sinh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ >+ thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ >+ log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ >+ log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ >+ small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; >+ /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ >+ >+ /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ >+ >+ static const double sinh_lead[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ >+ 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ >+ 1.00178749274099008204e+01, /* 0x40240926e70949ad */ >+ 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ >+ 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ >+ 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ >+ 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ >+ 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ >+ 4.05154190208278987484e+03, /* 0x40afa7157430966f */ >+ 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ >+ 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ >+ 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ >+ 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ >+ 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ >+ 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ >+ 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ >+ 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ >+ 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ >+ 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ >+ 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ >+ 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ >+ 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ >+ 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ >+ 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ >+ 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ >+ 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ >+ 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ >+ 7.23128532145737548828e+11, /* 0x42650bba3796379a */ >+ 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ >+ 5.34323729076223046875e+12, /* 0x429370470aec28ec */ >+ 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ >+ 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ >+ 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ >+ 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ >+ 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ >+ 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ >+ >+ static const double cosh_lead[ 37] = { >+ 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ >+ 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ >+ 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ >+ 1.00676619957777653269e+01, /* 0x402422a497d6185e */ >+ 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ >+ 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ >+ 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ >+ 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ >+ 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ >+ 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ >+ 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ >+ 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ >+ 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ >+ 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ >+ 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ >+ 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ >+ 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ >+ 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ >+ 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ >+ 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ >+ 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ >+ 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ >+ 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ >+ 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ >+ 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ >+ 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ >+ 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ >+ 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ >+ 7.23128532145737548828e+11, /* 0x42650bba3796379a */ >+ 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ >+ 5.34323729076223046875e+12, /* 0x429370470aec28ec */ >+ 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ >+ 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ >+ 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ >+ 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ >+ 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ >+ 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ >+ >+ unsigned long ux, aux, xneg; >+ double x = fx, y, z, z1, z2; >+ int m; >+ >+ /* Special cases */ >+ >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ if (aux < 0x3f10000000000000) /* |x| small enough that sinh(x) = x */ >+ { >+ if (aux == 0) return x; /* with no inexact */ >+ if (LAMBDA_DP64 + x > 1.0) return x; /* with inexact */ >+ } >+ else if (aux >= 0x7ff0000000000000) /* |x| is NaN or Inf */ >+ return x + x; >+ >+ xneg = (aux != ux); >+ >+ y = x; >+ if (xneg) y = -x; >+ >+ if (y >= max_sinh_arg) >+ { >+ /* Return infinity with overflow flag. */ >+#if 0 >+ /* This way handles non-POSIX behaviour but weirdly causes >+ sinhf to run half as fast for all arguments on Hammer */ >+ return retval_errno_erange(fx, xneg); >+#else >+ /* This handles POSIX behaviour */ >+ __set_errno(ERANGE); >+ z = infinity_with_flags(AMD_F_OVERFLOW); >+#endif >+ } >+ else if (y >= small_threshold) >+ { >+ /* In this range y is large enough so that >+ the negative exponential is negligible, >+ so sinh(y) is approximated by sign(x)*exp(y)/2. The >+ code below is an inlined version of that from >+ exp() with two changes (it operates on >+ y instead of x, and the division by 2 is >+ done by reducing m by 1). */ >+ >+ splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, >+ log2_by_32_tail, &m, &z1, &z2); >+ m -= 1; >+ /* scaleDouble_1 is always safe because the argument x was >+ float, rather than double */ >+ z = scaleDouble_1((z1+z2),m); >+ } >+ else >+ { >+ /* In this range we find the integer part y0 of y >+ and the increment dy = y - y0. We then compute >+ >+ z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) >+ >+ where sinh(y0) and cosh(y0) are tabulated above. */ >+ >+ int ind; >+ double dy, dy2, sdy, cdy; >+ >+ ind = (int)y; >+ dy = y - ind; >+ >+ dy2 = dy*dy; >+ >+ sdy = dy + dy*dy2*(0.166666666666666667013899e0 + >+ (0.833333333333329931873097e-2 + >+ (0.198412698413242405162014e-3 + >+ (0.275573191913636406057211e-5 + >+ (0.250521176994133472333666e-7 + >+ (0.160576793121939886190847e-9 + >+ 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ cdy = 1 + dy2*(0.500000000000000005911074e0 + >+ (0.416666666666660876512776e-1 + >+ (0.138888888889814854814536e-2 + >+ (0.248015872460622433115785e-4 + >+ (0.275573350756016588011357e-6 + >+ (0.208744349831471353536305e-8 + >+ 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ z = sinh_lead[ind]*cdy + cosh_lead[ind]*sdy; >+ } >+ >+ if (xneg) z = - z; >+ return z; >+} >+ >+weak_alias (__sinhf, sinhf) >+weak_alias (__sinhf, __ieee754_sinhf) >============================================================ >Index: sysdeps/x86_64/fpu/s_copysignf.S >--- sysdeps/x86_64/fpu/s_copysignf.S 2002-07-23 21:36:49.000000000 +0200 1.3 >+++ sysdeps/x86_64/fpu/s_copysignf.S removed >@@ -1,49 +0,0 @@ >-/* copy sign, double version. >- Copyright (C) 2002 Free Software Foundation, Inc. >- This file is part of the GNU C Library. >- Contributed by Andreas Jaeger <aj@suse.de>, 2002. >- >- The GNU C Library is free software; you can redistribute it and/or >- modify it under the terms of the GNU Lesser General Public >- License as published by the Free Software Foundation; either >- version 2.1 of the License, or (at your option) any later version. >- >- The GNU C Library is distributed in the hope that it will be useful, >- but WITHOUT ANY WARRANTY; without even the implied warranty of >- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >- Lesser General Public License for more details. >- >- You should have received a copy of the GNU Lesser General Public >- License along with the GNU C Library; if not, write to the Free >- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA >- 02111-1307 USA. */ >- >-#include <machine/asm.h> >- >-#ifdef __ELF__ >- .section .rodata >-#else >- .text >-#endif >- >- .align ALIGNARG(4) >- ASM_TYPE_DIRECTIVE(mask,@object) >-mask: >- .byte 0xff, 0xff, 0xff, 0x7f >- ASM_SIZE_DIRECTIVE(mask) >- >-#ifdef PIC >-#define MO(op) op##(%rip) >-#else >-#define MO(op) op >-#endif >- >-ENTRY(__copysignf) >- movss MO(mask),%xmm3 >- andps %xmm3,%xmm0 >- andnps %xmm1,%xmm3 >- orps %xmm3,%xmm0 >- retq >-END (__copysignf) >- >-weak_alias (__copysignf, copysignf) >============================================================ >Index: sysdeps/x86_64/fpu/s_copysign.S >--- sysdeps/x86_64/fpu/s_copysign.S 2002-09-01 07:30:27.000000000 +0200 1.3 >+++ sysdeps/x86_64/fpu/s_copysign.S removed >@@ -1,52 +0,0 @@ >-/* copy sign, double version. >- Copyright (C) 2002 Free Software Foundation, Inc. >- This file is part of the GNU C Library. >- Contributed by Andreas Jaeger <aj@suse.de>, 2002. >- >- The GNU C Library is free software; you can redistribute it and/or >- modify it under the terms of the GNU Lesser General Public >- License as published by the Free Software Foundation; either >- version 2.1 of the License, or (at your option) any later version. >- >- The GNU C Library is distributed in the hope that it will be useful, >- but WITHOUT ANY WARRANTY; without even the implied warranty of >- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >- Lesser General Public License for more details. >- >- You should have received a copy of the GNU Lesser General Public >- License along with the GNU C Library; if not, write to the Free >- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA >- 02111-1307 USA. */ >- >-#include <machine/asm.h> >- >-#ifdef __ELF__ >- .section .rodata >-#else >- .text >-#endif >- >- .align ALIGNARG(4) >- ASM_TYPE_DIRECTIVE(signmask,@object) >-signmask: >- .byte 0, 0, 0, 0, 0, 0, 0, 0x80 >- .byte 0, 0, 0, 0, 0, 0, 0, 0 >-othermask: >- .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f >- .byte 0, 0, 0, 0, 0, 0, 0, 0 >- ASM_SIZE_DIRECTIVE(othermask) >- >-#ifdef PIC >-#define MO(op) op##(%rip) >-#else >-#define MO(op) op >-#endif >- >-ENTRY(__copysign) >- andpd MO(othermask),%xmm0 >- andpd MO(signmask),%xmm1 >- orpd %xmm1,%xmm0 >- ret >-END (__copysign) >- >-weak_alias (__copysign, copysign) >============================================================ >Index: sysdeps/x86_64/fpu/w_cosh.c >--- sysdeps/x86_64/fpu/w_cosh.c created >+++ sysdeps/x86_64/fpu/w_cosh.c 2003-06-24 14:49:17.000000000 +0200 1.1 >@@ -0,0 +1,347 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_INFINITY_WITH_FLAGS >+#define USE_VAL_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_INFINITY_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+#include "libm_errno_amd.h" >+ >+/* Deal with errno for out-of-range result */ >+static inline double retval_errno_erange(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"cosh"; >+ if (_LIB_VERSION == _SVID_) >+ { >+ exc.retval = HUGE; >+ } >+ else >+ { >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW); >+ } >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+ >+double __cosh(double x) >+{ >+ /* >+ Derived from sinh subroutine >+ >+ After dealing with special cases the computation is split into >+ regions as follows: >+ >+ abs(x) >= max_cosh_arg: >+ cosh(x) = sign(x)*Inf >+ >+ abs(x) >= small_threshold: >+ cosh(x) = sign(x)*exp(abs(x))/2 computed using the >+ splitexp and scaleDouble functions as for exp_amd(). >+ >+ abs(x) < small_threshold: >+ compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) >+ cosh(x) is then sign(x)*z. */ >+ >+ static const double >+ max_cosh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */ >+ thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ >+ log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ >+ log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ >+// small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; >+ small_threshold = 20.0; >+ /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ >+ >+ /* Lead and tail tabulated values of sinh(i) and cosh(i) >+ for i = 0,...,36. The lead part has 26 leading bits. */ >+ >+ static const double sinh_lead[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.17520117759704589844e+00, /* 0x3ff2cd9fc0000000 */ >+ 3.62686038017272949219e+00, /* 0x400d03cf60000000 */ >+ 1.00178747177124023438e+01, /* 0x40240926e0000000 */ >+ 2.72899169921875000000e+01, /* 0x403b4a3800000000 */ >+ 7.42032089233398437500e+01, /* 0x40528d0160000000 */ >+ 2.01713153839111328125e+02, /* 0x406936d228000000 */ >+ 5.48316116333007812500e+02, /* 0x4081228768000000 */ >+ 1.49047882080078125000e+03, /* 0x409749ea50000000 */ >+ 4.05154187011718750000e+03, /* 0x40afa71570000000 */ >+ 1.10132326660156250000e+04, /* 0x40c5829dc8000000 */ >+ 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ >+ 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ >+ 2.21206695312500000000e+05, /* 0x410b00b590000000 */ >+ 6.01302140625000000000e+05, /* 0x412259ac48000000 */ >+ 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ >+ 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ >+ 1.20774762500000000000e+07, /* 0x4167093488000000 */ >+ 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ >+ 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ >+ 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ >+ 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ >+ 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ >+ 4.87240166400000000000e+09, /* 0x41f226af30000000 */ >+ 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ >+ 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ >+ 9.78648043520000000000e+10, /* 0x4236c93268000000 */ >+ 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ >+ 7.23128516608000000000e+11, /* 0x42650bba30000000 */ >+ 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ >+ 5.34323724288000000000e+12, /* 0x4293704708000000 */ >+ 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ >+ 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ >+ 1.07321789251584000000e+14, /* 0x42d866f348000000 */ >+ 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ >+ 7.93006722514944000000e+14, /* 0x430689e220000000 */ >+ 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ >+ >+ static const double sinh_tail[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.60467555584448807892e-08, /* 0x3e513ae6096a0092 */ >+ 2.76742892754807136947e-08, /* 0x3e5db70cfb79a640 */ >+ 2.09697499555224576530e-07, /* 0x3e8c2526b66dc067 */ >+ 2.04940252448908240062e-07, /* 0x3e8b81b18647f380 */ >+ 1.65444891522700935932e-06, /* 0x3ebbc1cdd1e1eb08 */ >+ 3.53116789999998198721e-06, /* 0x3ecd9f201534fb09 */ >+ 6.94023870987375490695e-06, /* 0x3edd1c064a4e9954 */ >+ 4.98876893611587449271e-06, /* 0x3ed4eca65d06ea74 */ >+ 3.19656024605152215752e-05, /* 0x3f00c259bcc0ecc5 */ >+ 2.08687768377236501204e-04, /* 0x3f2b5a6647cf9016 */ >+ 4.84668088325403796299e-05, /* 0x3f09691adefb0870 */ >+ 1.17517985422733832468e-03, /* 0x3f53410fc29cde38 */ >+ 6.90830086959560562415e-04, /* 0x3f46a31a50b6fb3c */ >+ 1.45697262451506548420e-03, /* 0x3f57defc71805c40 */ >+ 2.99859023684906737806e-02, /* 0x3f9eb49fd80e0bab */ >+ 1.02538800507941396667e-02, /* 0x3f84fffc7bcd5920 */ >+ 1.26787628407699110022e-01, /* 0x3fc03a93b6c63435 */ >+ 6.86652479544033744752e-02, /* 0x3fb1940bb255fd1c */ >+ 4.81593627621056619148e-01, /* 0x3fded26e14260b50 */ >+ 1.70489513795397629181e+00, /* 0x3ffb47401fc9f2a2 */ >+ 1.12416073482258713767e+01, /* 0x40267bb3f55634f1 */ >+ 7.06579578070110514432e+00, /* 0x401c435ff8194ddc */ >+ 5.91244512999659974639e+01, /* 0x404d8fee052ba63a */ >+ 1.68921736147050694399e+02, /* 0x40651d7edccde3f6 */ >+ 2.60692936262073658327e+02, /* 0x40704b1644557d1a */ >+ 3.62419382134885609048e+02, /* 0x4076a6b5ca0a9dc4 */ >+ 4.07689930834187271103e+03, /* 0x40afd9cc72249aba */ >+ 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ >+ 2.53720210371943067003e+04, /* 0x40d8c70158ac6363 */ >+ 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ >+ 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ >+ 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ >+ 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ >+ 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ >+ 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ >+ 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ >+ >+ static const double cosh_lead[ 37] = { >+ 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ >+ 1.54308062791824340820e+00, /* 0x3ff8b07550000000 */ >+ 3.76219564676284790039e+00, /* 0x400e18fa08000000 */ >+ 1.00676617622375488281e+01, /* 0x402422a490000000 */ >+ 2.73082327842712402344e+01, /* 0x403b4ee858000000 */ >+ 7.42099475860595703125e+01, /* 0x40528d6fc8000000 */ >+ 2.01715633392333984375e+02, /* 0x406936e678000000 */ >+ 5.48317031860351562500e+02, /* 0x4081228948000000 */ >+ 1.49047915649414062500e+03, /* 0x409749eaa8000000 */ >+ 4.05154199218750000000e+03, /* 0x40afa71580000000 */ >+ 1.10132329101562500000e+04, /* 0x40c5829dd0000000 */ >+ 2.99370708007812500000e+04, /* 0x40dd3c4488000000 */ >+ 8.13773945312500000000e+04, /* 0x40f3de1650000000 */ >+ 2.21206695312500000000e+05, /* 0x410b00b590000000 */ >+ 6.01302140625000000000e+05, /* 0x412259ac48000000 */ >+ 1.63450865625000000000e+06, /* 0x4138f0cca8000000 */ >+ 4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */ >+ 1.20774762500000000000e+07, /* 0x4167093488000000 */ >+ 3.28299845000000000000e+07, /* 0x417f4f2208000000 */ >+ 8.92411500000000000000e+07, /* 0x419546d8f8000000 */ >+ 2.42582596000000000000e+08, /* 0x41aceb0888000000 */ >+ 6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */ >+ 1.79245641600000000000e+09, /* 0x41dab5adb8000000 */ >+ 4.87240166400000000000e+09, /* 0x41f226af30000000 */ >+ 1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */ >+ 3.60024494080000000000e+10, /* 0x4220c3d390000000 */ >+ 9.78648043520000000000e+10, /* 0x4236c93268000000 */ >+ 2.66024116224000000000e+11, /* 0x424ef822f0000000 */ >+ 7.23128516608000000000e+11, /* 0x42650bba30000000 */ >+ 1.96566712320000000000e+12, /* 0x427c9aae40000000 */ >+ 5.34323724288000000000e+12, /* 0x4293704708000000 */ >+ 1.45244246507520000000e+13, /* 0x42aa6b7658000000 */ >+ 3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */ >+ 1.07321789251584000000e+14, /* 0x42d866f348000000 */ >+ 2.91730863685632000000e+14, /* 0x42f0953e28000000 */ >+ 7.93006722514944000000e+14, /* 0x430689e220000000 */ >+ 2.15561576592179200000e+15}; /* 0x431ea215a0000000 */ >+ >+ static const double cosh_tail[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 6.89700037027478056904e-09, /* 0x3e3d9f5504c2bd28 */ >+ 4.43207835591715833630e-08, /* 0x3e67cb66f0a4c9fd */ >+ 2.33540217013828929694e-07, /* 0x3e8f58617928e588 */ >+ 5.17452463948269748331e-08, /* 0x3e6bc7d000c38d48 */ >+ 9.38728274131605919153e-07, /* 0x3eaf7f9d4e329998 */ >+ 2.73012191010840495544e-06, /* 0x3ec6e6e464885269 */ >+ 3.29486051438996307950e-06, /* 0x3ecba3a8b946c154 */ >+ 4.75803746362771416375e-06, /* 0x3ed3f4e76110d5a4 */ >+ 3.33050940471947692369e-05, /* 0x3f017622515a3e2b */ >+ 9.94707313972136215365e-06, /* 0x3ee4dc4b528af3d0 */ >+ 6.51685096227860253398e-05, /* 0x3f11156278615e10 */ >+ 1.18132406658066663359e-03, /* 0x3f535ad50ed821f5 */ >+ 6.93090416366541877541e-04, /* 0x3f46b61055f2935c */ >+ 1.45780415323416845386e-03, /* 0x3f57e2794a601240 */ >+ 2.99862082708111758744e-02, /* 0x3f9eb4b45f6aadd3 */ >+ 1.02539925859688602072e-02, /* 0x3f85000b967b3698 */ >+ 1.26787669807076286421e-01, /* 0x3fc03a940fadc092 */ >+ 6.86652631843830962843e-02, /* 0x3fb1940bf3bf874c */ >+ 4.81593633223853068159e-01, /* 0x3fded26e1a2a2110 */ >+ 1.70489514001513020602e+00, /* 0x3ffb4740205796d6 */ >+ 1.12416073489841270572e+01, /* 0x40267bb3f55cb85d */ >+ 7.06579578098005001152e+00, /* 0x401c435ff81e18ac */ >+ 5.91244513000686140458e+01, /* 0x404d8fee052bdea4 */ >+ 1.68921736147088438429e+02, /* 0x40651d7edccde926 */ >+ 2.60692936262087528121e+02, /* 0x40704b1644557e0e */ >+ 3.62419382134890611269e+02, /* 0x4076a6b5ca0a9e1c */ >+ 4.07689930834187453002e+03, /* 0x40afd9cc72249abe */ >+ 1.55377375868385224749e+04, /* 0x40ce58de693edab5 */ >+ 2.53720210371943103382e+04, /* 0x40d8c70158ac6364 */ >+ 4.78822310734952334315e+04, /* 0x40e7614764f43e20 */ >+ 1.81871712615542812273e+05, /* 0x4106337db36fc718 */ >+ 5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */ >+ 6.41374032312148716301e+05, /* 0x412392bc108b37cc */ >+ 7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */ >+ 3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */ >+ 7.63580561355670914054e+06}; /* 0x415d20d76744835c */ >+ >+ unsigned long ux, aux, xneg; >+ double y, z, z1, z2; >+ int m; >+ >+ /* Special cases */ >+ >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ if (aux < 0x3e30000000000000) /* |x| small enough that cosh(x) = 1 */ >+ { >+ if (aux == 0) >+ /* with no inexact */ >+ return 1.0; >+ else >+ return val_with_flags(1.0, AMD_F_INEXACT); >+ } >+ else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */ >+ { >+ if (aux > PINFBITPATT_DP64) /* |x| is a NaN? */ >+ return x + x; >+ else /* x is infinity */ >+ return infinity_with_flags(0); >+ } >+ >+ xneg = (aux != ux); >+ >+ y = x; >+ if (xneg) y = -x; >+ >+ if (y >= max_cosh_arg) >+ { >+ /* Return +/-infinity with overflow flag */ >+ return retval_errno_erange(x); >+ } >+ else if (y >= small_threshold) >+ { >+ /* In this range y is large enough so that >+ the negative exponential is negligible, >+ so cosh(y) is approximated by sign(x)*exp(y)/2. The >+ code below is an inlined version of that from >+ exp() with two changes (it operates on >+ y instead of x, and the division by 2 is >+ done by reducing m by 1). */ >+ >+ splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, >+ log2_by_32_tail, &m, &z1, &z2); >+ m -= 1; >+ >+ if (m >= EMIN_DP64 && m <= EMAX_DP64) >+ z = scaleDouble_1((z1+z2),m); >+ else >+ z = scaleDouble_2((z1+z2),m); >+ } >+ else >+ { >+ /* In this range we find the integer part y0 of y >+ and the increment dy = y - y0. We then compute >+ >+ z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) >+ z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) >+ >+ where sinh(y0) and cosh(y0) are tabulated above. */ >+ >+ int ind; >+ double dy, dy2, sdy, cdy; >+ >+ ind = (int)y; >+ dy = y - ind; >+ >+ dy2 = dy*dy; >+ sdy = dy*dy2*(0.166666666666666667013899e0 + >+ (0.833333333333329931873097e-2 + >+ (0.198412698413242405162014e-3 + >+ (0.275573191913636406057211e-5 + >+ (0.250521176994133472333666e-7 + >+ (0.160576793121939886190847e-9 + >+ 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ cdy = dy2*(0.500000000000000005911074e0 + >+ (0.416666666666660876512776e-1 + >+ (0.138888888889814854814536e-2 + >+ (0.248015872460622433115785e-4 + >+ (0.275573350756016588011357e-6 + >+ (0.208744349831471353536305e-8 + >+ 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ /* At this point sinh(dy) is approximated by dy + sdy, and cosh(dy) is approximated by 1 + cdy. >+ Shift some significant bits from dy to cdy. */ >+#if 0 >+ double sdy1,sdy2; >+ GET_BITS_DP64(dy, ux); >+ ux &= 0xfffffffff8000000; >+ PUT_BITS_DP64(ux, sdy1); // sdy1 is upper 53-27=26 significant bits of dy. >+ sdy2 = sdy + (dy - sdy1); // sdy2 is sdy + lower bits of dy >+ >+ z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy2) >+ + sinh_tail[ind]*sdy1) + cosh_tail[ind]) >+ + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy2) >+ + sinh_lead[ind]*sdy1) + cosh_lead[ind]; >+#else >+ z = ((((((cosh_tail[ind]*cdy + sinh_tail[ind]*sdy) >+ + sinh_tail[ind]*dy) + cosh_tail[ind]) >+ + cosh_lead[ind]*cdy) + sinh_lead[ind]*sdy) >+ + sinh_lead[ind]*dy) + cosh_lead[ind]; >+#endif >+ } >+ >+ return z; >+} >+ >+weak_alias (__cosh, cosh) >============================================================ >Index: sysdeps/x86_64/fpu/w_coshf.c >--- sysdeps/x86_64/fpu/w_coshf.c created >+++ sysdeps/x86_64/fpu/w_coshf.c 2003-06-24 14:49:17.000000000 +0200 1.1 >@@ -0,0 +1,256 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_INFINITYF_WITH_FLAGS >+#define USE_VALF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_INFINITYF_WITH_FLAGS >+#undef USE_VALF_WITH_FLAGS >+ >+#include "libm_errno_amd.h" >+ >+/* Deal with errno for out-of-range result */ >+static inline float retval_errno_erange(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"coshf"; >+ if (_LIB_VERSION == _SVID_) >+ { >+ exc.retval = HUGE; >+ } >+ else >+ { >+ exc.retval = infinityf_with_flags(AMD_F_OVERFLOW); >+ } >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+ >+float __coshf(float fx) >+{ >+ /* >+ After dealing with special cases the computation is split into >+ regions as follows: >+ >+ abs(x) >= max_cosh_arg: >+ cosh(x) = sign(x)*Inf >+ >+ abs(x) >= small_threshold: >+ cosh(x) = sign(x)*exp(abs(x))/2 computed using the >+ splitexp and scaleDouble functions as for exp_amd(). >+ >+ abs(x) < small_threshold: >+ compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) >+ cosh(x) is then sign(x)*z. */ >+ >+ static const double >+ /* The max argument of coshf, but stored as a double */ >+ max_cosh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */ >+ thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ >+ log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ >+ log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ >+ >+ small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889; >+// small_threshold = 20.0; >+ /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */ >+ >+ /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */ >+ >+ static const double sinh_lead[ 37] = { >+ 0.00000000000000000000e+00, /* 0x0000000000000000 */ >+ 1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */ >+ 3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */ >+ 1.00178749274099008204e+01, /* 0x40240926e70949ad */ >+ 2.72899171971277496596e+01, /* 0x403b4a3803703630 */ >+ 7.42032105777887522891e+01, /* 0x40528d0166f07374 */ >+ 2.01713157370279219549e+02, /* 0x406936d22f67c805 */ >+ 5.48316123273246489589e+02, /* 0x408122876ba380c9 */ >+ 1.49047882578955000099e+03, /* 0x409749ea514eca65 */ >+ 4.05154190208278987484e+03, /* 0x40afa7157430966f */ >+ 1.10132328747033916443e+04, /* 0x40c5829dced69991 */ >+ 2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */ >+ 8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */ >+ 2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */ >+ 6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */ >+ 1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */ >+ 4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */ >+ 1.20774763767876271158e+07, /* 0x416709348c0ea4ed */ >+ 3.28299845686652474105e+07, /* 0x417f4f22091940bb */ >+ 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ >+ 2.42582597704895108938e+08, /* 0x41aceb088b68e803 */ >+ 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ >+ 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ >+ 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ >+ 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ >+ 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ >+ 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ >+ 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ >+ 7.23128532145737548828e+11, /* 0x42650bba3796379a */ >+ 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ >+ 5.34323729076223046875e+12, /* 0x429370470aec28ec */ >+ 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ >+ 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ >+ 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ >+ 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ >+ 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ >+ 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ >+ >+ static const double cosh_lead[ 37] = { >+ 1.00000000000000000000e+00, /* 0x3ff0000000000000 */ >+ 1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */ >+ 3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */ >+ 1.00676619957777653269e+01, /* 0x402422a497d6185e */ >+ 2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */ >+ 7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */ >+ 2.01715636122455890700e+02, /* 0x406936e67db9b919 */ >+ 5.48317035155212010977e+02, /* 0x4081228949ba3a8b */ >+ 1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */ >+ 4.05154202549259389343e+03, /* 0x40afa715845d8894 */ >+ 1.10132329201033226127e+04, /* 0x40c5829dd053712d */ >+ 2.99370708659497577173e+04, /* 0x40dd3c4489115627 */ >+ 8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */ >+ 2.21206696005590405548e+05, /* 0x410b00b5916b6105 */ >+ 6.01302142082804115489e+05, /* 0x412259ac48bf13ca */ >+ 1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */ >+ 4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */ >+ 1.20774763767876680940e+07, /* 0x416709348c0ea503 */ >+ 3.28299845686652623117e+07, /* 0x417f4f22091940bf */ >+ 8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */ >+ 2.42582597704895138741e+08, /* 0x41aceb088b68e804 */ >+ 6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */ >+ 1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */ >+ 4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */ >+ 1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */ >+ 3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */ >+ 9.78648047144193725586e+10, /* 0x4236c932696a6b5c */ >+ 2.66024120300899291992e+11, /* 0x424ef822f7f6731c */ >+ 7.23128532145737548828e+11, /* 0x42650bba3796379a */ >+ 1.96566714857202099609e+12, /* 0x427c9aae4631c056 */ >+ 5.34323729076223046875e+12, /* 0x429370470aec28ec */ >+ 1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */ >+ 3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */ >+ 1.07321789892958031250e+14, /* 0x42d866f34a725782 */ >+ 2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */ >+ 7.93006726156715250000e+14, /* 0x430689e221bc8d5a */ >+ 2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */ >+ >+ unsigned long ux, aux, xneg; >+ double x = fx, y, z, z1, z2; >+ int m; >+ >+ /* Special cases */ >+ >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ if (aux < 0x3f10000000000000) /* |x| small enough that cosh(x) = 1 */ >+ { >+ if (aux == 0) return (float)1.0; /* with no inexact */ >+ else return valf_with_flags((float)1.0, AMD_F_INEXACT); /* with inexact */ >+ } >+ else if (aux >= PINFBITPATT_DP64) /* |x| is NaN or Inf */ >+ { >+ if (aux > PINFBITPATT_DP64) /* |x| is a NaN? */ >+ return fx + fx; >+ else /* x is infinity */ >+ return infinityf_with_flags(0); >+ } >+ >+ xneg = (aux != ux); >+ >+ y = x; >+ if (xneg) y = -x; >+ >+ if (y >= max_cosh_arg) >+ { >+ /* Return infinity with overflow flag. */ >+#if 0 >+ /* This way handles non-POSIX behaviour but weirdly causes >+ sinhf to run half as fast for all arguments on Hammer */ >+ return retval_errno_erange(fx); >+#else >+ /* This handles POSIX behaviour */ >+ __set_errno(ERANGE); >+ z = infinityf_with_flags(AMD_F_OVERFLOW); >+#endif >+ } >+ else if (y >= small_threshold) >+ { >+ /* In this range y is large enough so that >+ the negative exponential is negligible, >+ so cosh(y) is approximated by sign(x)*exp(y)/2. The >+ code below is an inlined version of that from >+ exp() with two changes (it operates on >+ y instead of x, and the division by 2 is >+ done by reducing m by 1). */ >+ >+ splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead, >+ log2_by_32_tail, &m, &z1, &z2); >+ m -= 1; >+ /* scaleDouble_1 is always safe because the argument x was >+ float, rather than double */ >+ >+ z = scaleDouble_1((z1+z2),m); >+ } >+ else >+ { >+ /* In this range we find the integer part y0 of y >+ and the increment dy = y - y0. We then compute >+ >+ z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) >+ z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy) >+ >+ where sinh(y0) and cosh(y0) are tabulated above. */ >+ >+ int ind; >+ double dy, dy2, sdy, cdy; >+ >+ ind = (int)y; >+ dy = y - ind; >+ >+ dy2 = dy*dy; >+ >+ sdy = dy + dy*dy2*(0.166666666666666667013899e0 + >+ (0.833333333333329931873097e-2 + >+ (0.198412698413242405162014e-3 + >+ (0.275573191913636406057211e-5 + >+ (0.250521176994133472333666e-7 + >+ (0.160576793121939886190847e-9 + >+ 0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ cdy = 1 + dy2*(0.500000000000000005911074e0 + >+ (0.416666666666660876512776e-1 + >+ (0.138888888889814854814536e-2 + >+ (0.248015872460622433115785e-4 + >+ (0.275573350756016588011357e-6 + >+ (0.208744349831471353536305e-8 + >+ 0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2); >+ >+ z = cosh_lead[ind]*cdy + sinh_lead[ind]*sdy; >+ } >+ >+// if (xneg) z = - z; >+ return (float)z; >+} >+ >+weak_alias (__coshf, coshf) >============================================================ >Index: sysdeps/x86_64/fpu/s_atan.c >--- sysdeps/x86_64/fpu/s_atan.c created >+++ sysdeps/x86_64/fpu/s_atan.c 2003-06-24 14:51:39.000000000 +0200 1.1 >@@ -0,0 +1,146 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VAL_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#define USE_HANDLE_ERROR >+#include "libm_inlines_amd.h" >+#undef USE_VAL_WITH_FLAGS >+#undef USE_NAN_WITH_FLAGS >+#undef USE_HANDLE_ERROR >+ >+#include "libm_errno_amd.h" >+ >+/* Deal with errno for out-of-range argument */ >+static inline double retval_errno_edom(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.name = (char *)"atan"; >+ exc.type = DOMAIN; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = nan_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+ (void)fputs("atan: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+ >+double __atan (double x) >+{ >+ >+ /* Some constants and split constants. */ >+ >+ static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ >+ double chi, clo, v, s, q, z; >+ >+ /* Find properties of argument x. */ >+ >+ unsigned long ux, aux, xneg; >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ xneg = (ux != aux); >+ >+ if (xneg) v = -x; >+ else v = x; >+ >+ /* if |x| less than 1.490116119385e-8, then atan(x) = x */ >+ if (aux < 0x3E50000000000000) >+ { >+ if (aux == 0) /* if x=0, then result is precise */ >+ return x; >+ else >+ return val_with_flags(x, AMD_F_INEXACT); >+ } >+ >+ /* Argument reduction to range [-7/16,7/16] */ >+ >+ if (aux > 0x4003800000000000) /* v > 39./16. */ >+ { >+ >+ if (aux > PINFBITPATT_DP64) >+ { >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it's a signalling NaN */ >+ } >+ else if (aux > 0x4370000000000000) >+ { /* abs(x) > 2^56 => arctan(1/x) is >+ insignificant compared to piby2 */ >+ if (xneg) >+ return val_with_flags(-piby2, AMD_F_INEXACT); >+ else >+ return val_with_flags(piby2, AMD_F_INEXACT); >+ } >+ >+ x = -1.0/v; >+ /* (chi + clo) = arctan(infinity) */ >+ chi = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ >+ clo = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */ >+ } >+ else if (aux > 0x3ff3000000000000) /* 39./16. > v > 19./16. */ >+ { >+ x = (v-1.5)/(1.0+1.5*v); >+ /* (chi + clo) = arctan(1.5) */ >+ chi = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */ >+ clo = 1.39033110312309953701e-17; /* 0x3c7007887af0cbbc */ >+ } >+ else if (aux > 0x3fe6000000000000) /* 19./16. > v > 11./16. */ >+ { >+ x = (v-1.0)/(1.0+v); >+ /* (chi + clo) = arctan(1.) */ >+ chi = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */ >+ clo = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */ >+ } >+ else if (aux > 0x3fdc000000000000) /* 11./16. > v > 7./16. */ >+ { >+ x = (2.0*v-1.0)/(2.0+v); >+ /* (chi + clo) = arctan(0.5) */ >+ chi = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */ >+ clo = 2.26987774529616809294e-17; /* 0x3c7a2b7f222f65e0 */ >+ } >+ else /* v < 7./16. */ >+ { >+ x = v; >+ chi = 0.0; >+ clo = 0.0; >+ } >+ >+ /* Core approximation: Remez(4,4) on [-7/16,7/16] */ >+ >+ s = x*x; >+ q = x*s* >+ (0.268297920532545909e0 + >+ (0.447677206805497472e0 + >+ (0.220638780716667420e0 + >+ (0.304455919504853031e-1 + >+ 0.142316903342317766e-3*s)*s)*s)*s)/ >+ (0.804893761597637733e0 + >+ (0.182596787737507063e1 + >+ (0.141254259931958921e1 + >+ (0.424602594203847109e0 + >+ 0.389525873944742195e-1*s)*s)*s)*s); >+ >+ z = chi - ((q - clo) - x); >+ >+ if (xneg) z = -z; >+ return z; >+} >+ >+weak_alias (__atan, atan) >============================================================ >Index: sysdeps/x86_64/fpu/s_atanf.c >--- sysdeps/x86_64/fpu/s_atanf.c created >+++ sysdeps/x86_64/fpu/s_atanf.c 2003-06-24 14:51:45.000000000 +0200 1.1 >@@ -0,0 +1,143 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_VALF_WITH_FLAGS >+#define USE_NAN_WITH_FLAGS >+#define USE_HANDLE_ERRORF >+#include "libm_inlines_amd.h" >+#undef USE_VALF_WITH_FLAGS >+#undef USE_NAN_WITH_FLAGS >+#undef USE_HANDLE_ERRORF >+ >+#include "libm_errno_amd.h" >+ >+/* Deal with errno for out-of-range argument */ >+static inline float retval_errno_edom(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (float)x; >+ exc.arg2 = (float)x; >+ exc.name = (char *)"atanf"; >+ exc.type = DOMAIN; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = nan_with_flags(AMD_F_INVALID); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(EDOM); >+ else if (!matherr(&exc)) >+ { >+ if(_LIB_VERSION == _SVID_) >+ (void)fputs("atanf: DOMAIN error\n", stderr); >+ __set_errno(EDOM); >+ } >+ return exc.retval; >+} >+ >+ >+float __atanf (float fx) >+{ >+ >+ /* Some constants and split constants. */ >+ >+ static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ >+ >+ double c, v, s, q, z; >+ unsigned int xnan; >+ >+ double x = fx; >+ >+ /* Find properties of argument fx. */ >+ >+ unsigned long ux, aux, xneg; >+ >+ GET_BITS_DP64(x, ux); >+ aux = ux & ~SIGNBIT_DP64; >+ xneg = ux & SIGNBIT_DP64; >+ >+ if (aux < 0x3ec0000000000000) /* v < 19073486328125e-06 */ >+ { >+ if (aux == 0) /* if x=0, then result is precise */ >+ return fx; >+ else >+ return valf_with_flags(fx, AMD_F_INEXACT); >+ } >+ >+ v = x; >+ if (xneg) v = -x; >+ >+ /* Argument reduction to range [-7/16,7/16] */ >+ >+ if (aux < 0x3fdc000000000000) /* v < 7./16. */ >+ { >+ x = v; >+ c = 0.0; >+ } >+ else if (aux < 0x3fe6000000000000) /* v < 11./16. */ >+ { >+ x = (2.0*v-1.0)/(2.0+v); >+ /* c = arctan(0.5) */ >+ c = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */ >+ } >+ else if (aux < 0x3ff3000000000000) /* v < 19./16. */ >+ { >+ x = (v-1.0)/(1.0+v); >+ /* c = arctan(1.) */ >+ c = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */ >+ } >+ else if (aux < 0x4003800000000000) /* v < 39./16. */ >+ { >+ x = (v-1.5)/(1.0+1.5*v); >+ /* c = arctan(1.5) */ >+ c = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */ >+ } >+ else >+ { >+ >+ xnan = (aux > PINFBITPATT_DP64); >+ >+ if (xnan) >+ { >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it's a signalling NaN */ >+ } >+ else if (aux > 0x4190000000000000) >+ { /* abs(x) > 2^26 => arctan(1/x) is >+ insignificant compared to piby2 */ >+ if (xneg) >+ return valf_with_flags((float)-piby2, AMD_F_INEXACT); >+ else >+ return valf_with_flags((float)piby2, AMD_F_INEXACT); >+ } >+ >+ x = -1.0/v; >+ /* c = arctan(infinity) */ >+ c = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ >+ } >+ >+ /* Core approximation: Remez(2,2) on [-7/16,7/16] */ >+ >+ s = x*x; >+ q = x*s* >+ (0.296528598819239217902158651186e0 + >+ (0.192324546402108583211697690500e0 + >+ 0.470677934286149214138357545549e-2*s)*s)/ >+ (0.889585796862432286486651434570e0 + >+ (0.111072499995399550138837673349e1 + >+ 0.299309699959659728404442796915e0*s)*s); >+ >+ z = c - (q - x); >+ >+ if (xneg) z = -z; >+ return (float)z; >+} >+ >+weak_alias (__atanf, atanf) >============================================================ >Index: sysdeps/x86_64/fpu/s_tan.c >--- sysdeps/x86_64/fpu/s_tan.c created >+++ sysdeps/x86_64/fpu/s_tan.c 2003-06-24 14:49:17.000000000 +0200 1.1 >@@ -0,0 +1,220 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_NAN_WITH_FLAGS >+#define USE_VAL_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_NAN_WITH_FLAGS >+#undef USE_VAL_WITH_FLAGS >+ >+ >+/* tan(x + xx) approximation valid on the interval [-pi/4,pi/4]. >+ If recip is true return -1/tan(x + xx) instead. */ >+static inline double tan_piby4(double x, double xx, int recip, int extra) >+{ >+ double r, t1, t2, xl; >+ int transform = 0; >+ static const double >+ piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */ >+ piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */ >+ >+ /* In order to maintain relative precision transform using the identity: >+ tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. >+ Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */ >+ >+ if (x > 0.68) >+ { >+ transform = 1; >+ x = piby4_lead - x; >+ xl = piby4_tail - xx; >+ x += xl; >+ xx = 0.0; >+ } >+ else if (x < -0.68) >+ { >+ transform = -1; >+ x = piby4_lead + x; >+ xl = piby4_tail + xx; >+ x += xl; >+ xx = 0.0; >+ } >+ >+ /* Core Remez [2,3] approximation to tan(x+xx) on the >+ interval [0,0.68]. */ >+ >+ r = x*x + 2.0 * x * xx; >+ t1 = x; >+ t2 = xx + x*r* >+ (0.372379159759792203640806338901e0 + >+ (-0.229345080057565662883358588111e-1 + >+ 0.224044448537022097264602535574e-3*r)*r)/ >+ (0.111713747927937668539901657944e1 + >+ (-0.515658515729031149329237816945e0 + >+ (0.260656620398645407524064091208e-1 - >+ 0.232371494088563558304549252913e-3*r)*r)*r); >+ >+ /* Reconstruct tan(x) in the transformed case. */ >+ >+ if (transform) >+ { >+ double t; >+ t = t1 + t2; >+ if (recip) >+ return transform*(2*t/(t-1) - 1.0); >+ else >+ return transform*(1.0 - 2*t/(1+t)); >+ } >+ >+ if (recip) >+ { >+ if (extra) >+ { >+ /* Compute -1.0/(t1 + t2) accurately */ >+ double trec, trec_top, z1, z2, t; >+ unsigned long u; >+ t = t1 + t2; >+ GET_BITS_DP64(t, u); >+ u &= 0xffffffff00000000; >+ PUT_BITS_DP64(u, z1); >+ z2 = t2 - (z1 - t1); >+ trec = -1.0 / t; >+ GET_BITS_DP64(trec, u); >+ u &= 0xffffffff00000000; >+ PUT_BITS_DP64(u, trec_top); >+ return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2); >+ } >+ else >+ return -1.0/(t1 + t2); >+ >+ } >+ else >+ return t1 + t2; >+} >+ >+ >+double __tan(double x) >+{ >+ double r, rr; >+ int region, xneg; >+ int extra = 0 ; // does tan_piby4 need extra accuracy on reciprocal? >+ >+ unsigned long ux, ax; >+ GET_BITS_DP64(x, ux); >+ ax = (ux & ~SIGNBIT_DP64); >+ if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ >+ { >+ if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ >+ { >+ if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ >+ { >+ if (ax == 0x0000000000000000) return x; >+ else return val_with_flags(x, AMD_F_INEXACT); >+ } >+ else >+ { >+ return x + x*x*x*0.333333333333333333; >+ } >+ } >+ else >+ return tan_piby4(x, 0.0, 0, 0); >+ } >+ else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else >+ /* x is infinity. Return a NaN */ >+ return nan_with_flags(AMD_F_INVALID); >+ } >+ xneg = (ax != ux); >+ >+ >+ if (xneg) >+ x = -x; >+ >+ if (x < 5.0e5) >+ { >+ /* For these size arguments we can just carefully subtract the >+ appropriate multiple of pi/2, using extra precision where >+ x is close to an exact multiple of pi/2 */ >+ static const double >+ twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ >+ piby2_1 = 1.57079632673412561417e+00, /* 0x3ff921fb54400000 */ >+ piby2_1tail = 6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */ >+ piby2_2 = 6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */ >+ piby2_2tail = 2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */ >+ piby2_3 = 2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */ >+ piby2_3tail = 8.47842766036889956997e-32; /* 0x397b839a252049c1 */ >+ double t, rhead, rtail; >+ int npi2; >+ unsigned long uy, xexp, expdiff; >+ xexp = ax >> EXPSHIFTBITS_DP64; >+ /* How many pi/2 is x a multiple of? */ >+ if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */ >+ { >+ if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */ >+ npi2 = 1; >+ else >+ npi2 = 2; >+ } >+ else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */ >+ { >+ if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */ >+ npi2 = 3; >+ else >+ npi2 = 4; >+ } >+ else >+ npi2 = (int)(x * twobypi + 0.5); >+ /* Subtract the multiple from x to get an extra-precision remainder */ >+ rhead = x - npi2 * piby2_1; >+ rtail = npi2 * piby2_1tail; >+ GET_BITS_DP64(rhead, uy); >+ expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); >+ if (expdiff > 15) >+ { >+ /* The remainder is pretty small compared with x, which >+ implies that x is a near multiple of pi/2 >+ (x matches the multiple to at least 15 bits) */ >+ t = rhead; >+ rtail = npi2 * piby2_2; >+ rhead = t - rtail; >+ rtail = npi2 * piby2_2tail - ((t - rhead) - rtail); >+ if (expdiff > 48) >+ { >+ /* x matches a pi/2 multiple to at least 48 bits */ >+ t = rhead; >+ rtail = npi2 * piby2_3; >+ rhead = t - rtail; >+ rtail = npi2 * piby2_3tail - ((t - rhead) - rtail); >+ } >+ } >+ r = rhead - rtail; >+ rr = (rhead - r) - rtail; >+ region = npi2 & 3; >+ } >+ else >+ { >+ /* Reduce x into range [-pi/4,pi/4] */ >+ __remainder_piby2(x, &r, &rr, ®ion); >+ extra = 1; >+ } >+ >+ if (xneg) >+ return -tan_piby4(r, rr, region & 1, extra); >+ else >+ return tan_piby4(r, rr, region & 1, extra); >+} >+ >+weak_alias (__tan, tan) >============================================================ >Index: sysdeps/x86_64/fpu/w_exp10f.c >--- sysdeps/x86_64/fpu/w_exp10f.c created >+++ sysdeps/x86_64/fpu/w_exp10f.c 2004-03-26 12:37:37.020163455 +0100 1.1 >@@ -0,0 +1,157 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXPF >+#define USE_SCALEFLOAT_1 >+#define USE_SCALEFLOAT_2 >+#define USE_ZEROF_WITH_FLAGS >+#define USE_INFINITYF_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXPF >+#undef USE_SCALEFLOAT_1 >+#undef USE_SCALEFLOAT_2 >+#undef USE_ZEROF_WITH_FLAGS >+#undef USE_INFINITYF_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline float retval_errno_erange_overflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"exp10f"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline float retval_errno_erange_underflow(float x) >+{ >+ struct exception exc; >+ exc.arg1 = (double)x; >+ exc.arg2 = (double)x; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"exp10f"; >+ exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+float __exp10f(float x) >+{ >+ static const float >+ max_exp10_arg = 3.8531841278E+01F, /* 0x421A209B */ >+ min_exp10_arg =-4.4853469848E+01F, /* 0xC23369F4 */ >+ log10 = 2.3025850929E+00F, /* 0x40135D8E */ >+ thirtytwo_by_log10of2 = 1.0630169677E+02F, /* 0x42D49A78 */ >+ log10of2_by_32_lead = 9.4070434570E-03F, /* 0x3C1A2000 */ >+ log10of2_by_32_tail = 1.4390730030E-07F; /* 0x341A84F0 */ >+ >+ float y, z1, z2, z; >+ int m; >+ unsigned int ux, ax; >+ >+ /* >+ Computation of exp10f (x). >+ >+ We compute the values m, z1, and z2 such that >+ exp10f(x) = 2**m * (z1 + z2), where exp10f(x) is 10**x. >+ >+ Computations needed in order to obtain m, z1, and z2 >+ involve three steps. >+ >+ First, we reduce the argument x to the form >+ x = n * log10of2/32 + remainder, >+ where n has the value of an integer and |remainder| <= log10of2/64. >+ The value of n = x * 32/log10of2 rounded to the nearest integer and >+ the remainder = x - n*log10of2/32. >+ >+ Second, we approximate exp10f(r1 + r2) - 1 where r1 is the leading >+ part of the remainder and r2 is the trailing part of the remainder. >+ >+ Third, we reconstruct exp10f(x) so that >+ exp10f(x) = 2**m * (z1 + z2). >+ */ >+ >+ GET_BITS_SP32(x, ux); >+ ax = ux & (~SIGNBIT_SP32); >+ >+ if (ax >= 0x421A209B) /* abs(x) >= 38.5... */ >+ { >+ if(ax >= 0x7f800000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_SP32) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_SP32) >+ /* x is negative infinity; return 0.0 with no flags. */ >+ return 0.0F; >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ if (x > max_exp10_arg) >+ /* Return +infinity with overflow flag */ >+ return retval_errno_erange_overflow(x); >+ else if (x < min_exp10_arg) >+ /* x is negative. Return +zero with underflow and inexact flags */ >+ return retval_errno_erange_underflow(x); >+ } >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3bde5bd9) /* abs(x) < 1/(64*log10) */ >+ { >+ if (ax < 0x32800000) /* abs(x) < 2^(-26) */ >+ return 1.0F + x; /* Raises inexact if x is non-zero */ >+ else >+ y = log10*x; >+ z = (((((((( >+ 1.0F/40320)*x+ >+ 1.0F/5040)*y+ >+ 1.0F/720)*y+ >+ 1.0F/120)*y+ >+ 1.0F/24)*y+ >+ 1.0F/6)*y+ >+ 1.0F/2)*y+ >+ 1.0F)*y + 1.0; >+ } >+ else >+ { >+ /* Find m, z1 and z2 such that exp10f(x) = 2**m * (z1 + z2) */ >+ >+ splitexpf(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead, >+ log10of2_by_32_tail, &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**m */ >+ >+ if (m >= EMIN_SP32 && m <= EMAX_SP32) >+ z = scaleFloat_1((z1+z2),m); >+ else >+ z = scaleFloat_2((z1+z2),m); >+ } >+ return z; >+} >+ >+weak_alias (__exp10f, exp10f) >+strong_alias (__exp10f, __pow10f) >+weak_alias (__pow10f, pow10f) >============================================================ >Index: sysdeps/x86_64/fpu/w_exp10.c >--- sysdeps/x86_64/fpu/w_exp10.c created >+++ sysdeps/x86_64/fpu/w_exp10.c 2004-03-26 12:37:47.013847218 +0100 1.1 >@@ -0,0 +1,160 @@ >+/* >+(C) 2002 Advanced Micro Devices, Inc. >+** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS >+ AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC >+ LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH >+ THIS LIBRARY** >+*/ >+ >+#include "libm_amd.h" >+#include "libm_util_amd.h" >+ >+#define USE_SPLITEXP >+#define USE_SCALEDOUBLE_1 >+#define USE_SCALEDOUBLE_2 >+#define USE_ZERO_WITH_FLAGS >+#define USE_INFINITY_WITH_FLAGS >+#include "libm_inlines_amd.h" >+#undef USE_SPLITEXP >+#undef USE_SCALEDOUBLE_1 >+#undef USE_SCALEDOUBLE_2 >+#undef USE_ZERO_WITH_FLAGS >+#undef USE_INFINITY_WITH_FLAGS >+ >+/* Deal with errno for out-of-range result */ >+#include "libm_errno_amd.h" >+static inline double retval_errno_erange_overflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = OVERFLOW; >+ exc.name = (char *)"exp10"; >+ if (_LIB_VERSION == _SVID_) >+ exc.retval = HUGE; >+ else >+ exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+static inline double retval_errno_erange_underflow(double x) >+{ >+ struct exception exc; >+ exc.arg1 = x; >+ exc.arg2 = x; >+ exc.type = UNDERFLOW; >+ exc.name = (char *)"exp10"; >+ exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT); >+ if (_LIB_VERSION == _POSIX_) >+ __set_errno(ERANGE); >+ else if (!matherr(&exc)) >+ __set_errno(ERANGE); >+ return exc.retval; >+} >+ >+double __exp10(double x) >+{ >+ static const double >+ max_exp10_arg = 3.0825471555991674677e+02, /* 0x40734413509f79ff */ >+ min_exp10_arg = -3.2330621534311580944e+02, /* 0xc07434e6420f4374 */ >+ log10 = 2.30258509299404568401e+00, /* 0x40026bb1bbb55516 */ >+ thirtytwo_by_log10of2 = 1.06301699036395595131e+02, /* 0x405a934f0979a371 */ >+ log10of2_by_32_lead = 9.40718688070774078369e-03, /* 0x3F83441340000000 */ >+ log10of2_by_32_trail = 4.83791671566737916758e-10; /* 0x3E009F79FEF311F1 */ >+ >+ double y, z1, z2, z; >+ int m; >+ unsigned long ux, ax; >+ >+ /* >+ Computation of exp10(x). >+ >+ We compute the values m, z1, and z2 such that >+ exp10(x) = 2**m * (z1 + z2), where exp10(x) is 10**x. >+ >+ Computations needed in order to obtain m, z1, and z2 >+ involve three steps. >+ >+ First, we reduce the argument x to the form >+ x = n * log10of2/32 + remainder, >+ where n has the value of an integer and |remainder| <= log10of2/64. >+ The value of n = x * 32/log10of2 rounded to the nearest integer and >+ the remainder = x - n*log10of2/32. >+ >+ Second, we approximate exp10(r1 + r2) - 1 where r1 is the leading >+ part of the remainder and r2 is the trailing part of the remainder. >+ >+ Third, we reconstruct exp10(x) so that >+ exp10(x) = 2**m * (z1 + z2). >+ */ >+ >+ >+ GET_BITS_DP64(x, ux); >+ ax = ux & (~SIGNBIT_DP64); >+ >+ if (ax >= 0x40734413509f79ff) /* abs(x) >= 308.25... */ >+ { >+ if(ax >= 0x7ff0000000000000) >+ { >+ /* x is either NaN or infinity */ >+ if (ux & MANTBITS_DP64) >+ /* x is NaN */ >+ return x + x; /* Raise invalid if it is a signalling NaN */ >+ else if (ux & SIGNBIT_DP64) >+ /* x is negative infinity; return 0.0 with no flags. */ >+ return 0.0; >+ else >+ /* x is positive infinity */ >+ return x; >+ } >+ if (x > max_exp10_arg) >+ /* Return +infinity with overflow flag */ >+ return retval_errno_erange_overflow(x); >+ else if (x < min_exp10_arg) >+ /* x is negative. Return +zero with underflow and inexact flags */ >+ return retval_errno_erange_underflow(x); >+ } >+ >+ >+ /* Handle small arguments separately */ >+ if (ax < 0x3f9bcb7b131bbb9d) /* abs(x) < 1/(16*log10) */ >+ { >+ if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */ >+ return 1.0 + x; /* Raises inexact if x is non-zero */ >+ else >+ y = log10*x; >+ z = (((((((((( >+ 1.0/3628800)*y+ >+ 1.0/362880)*y+ >+ 1.0/40320)*y+ >+ 1.0/5040)*y+ >+ 1.0/720)*y+ >+ 1.0/120)*y+ >+ 1.0/24)*y+ >+ 1.0/6)*y+ >+ 1.0/2)*y+ >+ 1.0)*y + 1.0; >+ } >+ else >+ { >+ /* Find m, z1 and z2 such that exp10(x) = 2**m * (z1 + z2) */ >+ >+ splitexp(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead, >+ log10of2_by_32_trail, &m, &z1, &z2); >+ >+ /* Scale (z1 + z2) by 2.0**m */ >+ if (m > EMIN_DP64 && m < EMAX_DP64) >+ return scaleDouble_1((z1+z2),m); >+ else >+ return scaleDouble_2((z1+z2),m); >+ } >+ return z; >+} >+ >+weak_alias (__exp10, exp10) >+strong_alias (__exp10, __pow10) >+weak_alias (__pow10, pow10)
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 100289
:
64301
|
64302
|
64303
|
64304
|
64305
|
64306
|
65840
|
81968
|
81969