Attachment #64301 for bug #100289

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/Makefile.x86_64-new-libm (+3 lines)
	1	ifeq ($(subdir),math)
	2	libm-sysdep_routines += w_remainder_piby2 w_remainder_piby2f
	3	endif

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_acos.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_acosf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_asin.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_asinf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp10.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp10f.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp2.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp2f.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_expf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_fmod.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_fmodf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_hypot.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_hypotf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log10.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log10f.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log2.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log2f.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_logf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_pow.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_powf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_remainder.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_remainderf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_sinh.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_sinhf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#ifndef LIBM_AMD_H_INCLUDED
#define LIBM_AMD_H_INCLUDED 1

/* The following definition of weak_alias is extracted from
   libc-symbols.h */

/* Define ALIASNAME as a weak alias for NAME.
   If weak aliases are not available, this defines a strong alias.  */
#  define weak_alias(name, aliasname) _weak_alias (name, aliasname)
#  define _weak_alias(name, aliasname) \
  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));

#include <math.h>

extern double chgsign(double x);
extern float chgsignf(float x);

extern double fma(double x, double y, double z);
extern float fmaf(float x, float y, float z);

extern void __remainder_piby2(double x, double *r, double *rr, int *region);
extern void __remainder_piby2f(float x, double *r, int *region);

#endif /* LIBM_AMD_H_INCLUDED */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#ifndef LIBM_ERRNO_AMD_H_INCLUDED
#define LIBM_ERRNO_AMD_H_INCLUDED 1

#include <stdio.h>
#include <errno.h>
#ifndef __set_errno
#define __set_errno(x) errno = (x)
#endif

#endif /* LIBM_ERRNO_AMD_H_INCLUDED */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#ifndef LIBM_INLINES_AMD_H_INCLUDED
#define LIBM_INLINES_AMD_H_INCLUDED 1

#include "libm_util_amd.h"

#ifdef WIN32
#define inline __inline
#endif

/* Set defines for inline functions calling other inlines */
#if defined(USE_VAL_WITH_FLAGS) || defined(USE_VALF_WITH_FLAGS) || \
    defined(USE_ZERO_WITH_FLAGS) || defined(USE_ZEROF_WITH_FLAGS) || \
    defined(USE_NAN_WITH_FLAGS) || defined(USE_NANF_WITH_FLAGS) || \
    defined(USE_INFINITY_WITH_FLAGS) || defined(USE_INFINITYF_WITH_FLAGS) || \
    defined(USE_SQRT_AMD_INLINE) || defined(USE_SQRTF_AMD_INLINE)
#undef USE_RAISE_FPSW_FLAGS
#define USE_RAISE_FPSW_FLAGS 1
#endif

#if defined(USE_SPLITDOUBLE)
/* Splits double x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0.
   Assumes that x is not zero, denormal, infinity or NaN, but these conditions
   are not checked */
static inline void splitDouble(double x, int *e, double *m)
{
  unsigned long ux, uy;
  GET_BITS_DP64(x, ux);
  uy = ux;
  ux &= EXPBITS_DP64;
  ux >>= EXPSHIFTBITS_DP64;
  *e = (int)ux - EXPBIAS_DP64 + 1;
  uy = (uy & (SIGNBIT_DP64 | MANTBITS_DP64)) | HALFEXPBITS_DP64;
  PUT_BITS_DP64(uy, x);
  *m = x;
}
#endif /* USE_SPLITDOUBLE */


#if defined(USE_SPLITDOUBLE_2)
/* Splits double x into exponent e and mantissa m, where 1.0 <= abs(m) < 4.0.
   Assumes that x is not zero, denormal, infinity or NaN, but these conditions
   are not checked. Also assumes EXPBIAS_DP is odd. With this
   assumption, e will be even on exit. */
static inline void splitDouble_2(double x, int *e, double *m)
{
  unsigned long ux, vx;
  GET_BITS_DP64(x, ux);
  vx = ux;
  ux &= EXPBITS_DP64;
  ux >>= EXPSHIFTBITS_DP64;
  if (ux & 1)
    {
      /* The exponent is odd */
      vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | ONEEXPBITS_DP64;
      PUT_BITS_DP64(vx, x);
      *m = x;
      *e = ux - EXPBIAS_DP64;
    }
  else
    {
      /* The exponent is even */
      vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | TWOEXPBITS_DP64;
      PUT_BITS_DP64(vx, x);
      *m = x;
      *e = ux - EXPBIAS_DP64 - 1;
    }
}
#endif /* USE_SPLITDOUBLE_2 */


#if defined(USE_SPLITFLOAT)
/* Splits float x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0.
   Assumes that x is not zero, denormal, infinity or NaN, but these conditions
   are not checked */
static inline void splitFloat(float x, int *e, float *m)
{
  unsigned int ux, uy;
  GET_BITS_SP32(x, ux);
  uy = ux;
  ux &= EXPBITS_SP32;
  ux >>= EXPSHIFTBITS_SP32;
  *e = (int)ux - EXPBIAS_SP32 + 1;
  uy = (uy & (SIGNBIT_SP32 | MANTBITS_SP32)) | HALFEXPBITS_SP32;
  PUT_BITS_SP32(uy, x);
  *m = x;
}
#endif /* USE_SPLITFLOAT */


#if defined(USE_SCALEDOUBLE_1)
/* Scales the double x by 2.0**n.
   Assumes EMIN <= n <= EMAX, though this condition is not checked. */
static inline double scaleDouble_1(double x, int n)
{
  double t;
  /* Construct the number t = 2.0**n */
  PUT_BITS_DP64(((long)n + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t);
  return x*t;
}
#endif /* USE_SCALEDOUBLE_1 */


#if defined(USE_SCALEDOUBLE_2)
/* Scales the double x by 2.0**n.
   Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */
static inline double scaleDouble_2(double x, int n)
{
  double t1, t2;
  int n1, n2;
  n1 = n / 2;
  n2 = n - n1;
  /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */
  PUT_BITS_DP64(((long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1);
  PUT_BITS_DP64(((long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2);
  return (x*t1)*t2;
}
#endif /* USE_SCALEDOUBLE_2 */


#if defined(USE_SCALEDOUBLE_3)
/* Scales the double x by 2.0**n.
   Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */
static inline double scaleDouble_3(double x, int n)
{
  double t1, t2, t3;
  int n1, n2, n3;
  n1 = n / 3;
  n2 = (n - n1) / 2;
  n3 = n - n1 - n2;
  /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */
  PUT_BITS_DP64(((long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1);
  PUT_BITS_DP64(((long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2);
  PUT_BITS_DP64(((long)n3 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t3);
  return ((x*t1)*t2)*t3;
}
#endif /* USE_SCALEDOUBLE_3 */


#if defined(USE_SCALEFLOAT_1)
/* Scales the float x by 2.0**n.
   Assumes EMIN <= n <= EMAX, though this condition is not checked. */
static inline double scaleFloat_1(float x, int n)
{
  float t;
  /* Construct the number t = 2.0**n */
  PUT_BITS_SP32((n + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t);
  return x*t;
}
#endif /* USE_SCALEFLOAT_1 */


#if defined(USE_SCALEFLOAT_2)
/* Scales the float x by 2.0**n.
   Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */
static inline float scaleFloat_2(float x, int n)
{
  float t1, t2;
  int n1, n2;
  n1 = n / 2;
  n2 = n - n1;
  /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */
  PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1);
  PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2);
  return (x*t1)*t2;
}
#endif /* USE_SCALEFLOAT_2 */


#if defined(USE_SCALEFLOAT_3)
/* Scales the float x by 2.0**n.
   Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */
static inline double scaleFloat_3(float x, int n)
{
  float t1, t2, t3;
  int n1, n2, n3;
  n1 = n / 3;
  n2 = (n - n1) / 2;
  n3 = n - n1 - n2;
  /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */
  PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1);
  PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2);
  PUT_BITS_SP32((n3 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t3);
  return ((x*t1)*t2)*t3;
}
#endif /* USE_SCALEFLOAT_3 */

#if defined(USE_SETPRECISIONDOUBLE)
unsigned int setPrecisionDouble(void)
{
  unsigned int cw, cwold = 0;
#if defined(WIN32)
  __asm fstcw cwold;
  cw = cwold & (~0x00000300); /* These two bits control rounding precision */
  cw |= AMD_F_DOUBLE;
  __asm fldcw cw;
#elif defined(linux)
  /* There is no precision control on Hammer */
#else
  /* Do nowt */
#endif
  return cwold;
}
#endif /* USE_SETPRECISIONDOUBLE */

#if defined(USE_RESTOREPRECISION)
void restorePrecision(unsigned int cwold)
{
#if defined(WIN32)
  __asm fldcw cwold;
#elif defined(linux)
  /* There is no precision control on Hammer */
#else
  /* Do nowt */
#endif
  return;
}
#endif /* USE_RESTOREPRECISION */


#if defined(USE_CLEAR_FPSW_FLAGS)
/* Clears floating-point status flags. The argument should be
   the bitwise or of the flags to be cleared, from the
   list above, e.g.
     clear_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID);
 */
static inline void clear_fpsw_flags(int flags)
{
#if defined(WIN32)
  fpenv_type fenv;
  /* Get the current floating-point environment */
  __asm fnstenv fenv;
  fenv.status_word &= (~flags);
  /* Put the floating-point environment back */
  __asm fldenv fenv;
#elif defined(linux)
  unsigned int cw;
  /* Get the current floating-point control/status word */
  asm volatile ("STMXCSR %0" : "=m" (cw));
  cw &= (~flags);
  asm volatile ("LDMXCSR %0" : : "m" (cw));
#else
#error Unknown machine
#endif
}
#endif /* USE_CLEAR_FPSW_FLAGS */


#if defined(USE_RAISE_FPSW_FLAGS)
/* Raises floating-point status flags. The argument should be
   the bitwise or of the flags to be raised, from the
   list above, e.g.
     raise_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID);
 */
static inline void raise_fpsw_flags(int flags)
{
#if defined(WIN32)
  fpenv_type fenv;
  /* Get the current floating-point environment */
  __asm fnstenv fenv;
  fenv.status_word |= flags;
  /* Put the floating-point environment back */
  __asm fldenv fenv;
#elif defined(linux)
  unsigned int cw;
  /* Get the current floating-point control/status word */
  asm volatile ("STMXCSR %0" : "=m" (cw));
  cw |= flags;
  asm volatile ("LDMXCSR %0" : : "m" (cw));
#else
#error Unknown machine
#endif
}
#endif /* USE_RAISE_FPSW_FLAGS */


#if defined(USE_GET_FPSW_INLINE)
/* Return the current floating-point status word */
static inline unsigned int get_fpsw_inline(void)
{
#if defined(WIN32)
  unsigned short sw;
  __asm fstsw sw;
  return (unsigned int)sw;
#elif defined(linux)
  unsigned int sw;
  asm volatile ("STMXCSR %0" : "=m" (sw));
  return sw;
#else
#error Unknown machine
#endif
}
#endif /* USE_GET_FPSW_INLINE */

#if defined(USE_SET_FPSW_INLINE)
/* Set the floating-point status word */
static inline void set_fpsw_inline(unsigned int sw)
{
#if defined(WIN32)
  fpenv_type fenv;
  /* Get the current floating-point environment */
  __asm fnstenv fenv;
  /* Set the status word to sw */
  fenv.status_word = (unsigned short)sw;
  /* Put the floating-point environment back */
  __asm fldenv fenv;
#elif defined(linux)
  /* Set the current floating-point control/status word */
  asm volatile ("LDMXCSR %0" : : "m" (sw));
#else
#error Unknown machine
#endif
}
#endif /* USE_SET_FPSW_INLINE */

#if defined(USE_CLEAR_FPSW_INLINE)
/* Clear all exceptions from the floating-point status word */
static inline void clear_fpsw_inline(void)
{
#if defined(WIN32)
  fpenv_type fenv;
  /* Get the current floating-point environment */
  __asm fnstenv fenv;
  /* Set the status word to 0 */
  fenv.status_word = 0;
  /* Put the floating-point environment back */
  __asm fldenv fenv;
#elif defined(linux)
  unsigned int cw;
  /* Get the current floating-point control/status word */
  asm volatile ("STMXCSR %0" : "=m" (cw));
  cw &= ~(AMD_F_INEXACT | AMD_F_UNDERFLOW | AMD_F_OVERFLOW |
          AMD_F_DIVBYZERO | AMD_F_INVALID);
  asm volatile ("LDMXCSR %0" : : "m" (cw));
#else
#error Unknown machine
#endif
}
#endif /* USE_CLEAR_FPSW_INLINE */


#if defined(USE_VAL_WITH_FLAGS)
/* Returns a double value after raising the given flags,
  e.g.  val_with_flags(AMD_F_INEXACT);
 */
static inline double val_with_flags(double val, int flags)
{
  raise_fpsw_flags(flags);
  return val;
}
#endif /* USE_VAL_WITH_FLAGS */

#if defined(USE_VALF_WITH_FLAGS)
/* Returns a float value after raising the given flags,
  e.g.  valf_with_flags(AMD_F_INEXACT);
 */
static inline float valf_with_flags(float val, int flags)
{
  raise_fpsw_flags(flags);
  return val;
}
#endif /* USE_VALF_WITH_FLAGS */


#if defined(USE_ZERO_WITH_FLAGS)
/* Returns a double +zero after raising the given flags,
  e.g.  zero_with_flags(AMD_F_INEXACT | AMD_F_INVALID);
 */
static inline double zero_with_flags(int flags)
{
  raise_fpsw_flags(flags);
  return 0.0;
}
#endif /* USE_ZERO_WITH_FLAGS */


#if defined(USE_ZEROF_WITH_FLAGS)
/* Returns a float +zero after raising the given flags,
  e.g.  zerof_with_flags(AMD_F_INEXACT | AMD_F_INVALID);
 */
static inline float zerof_with_flags(int flags)
{
  raise_fpsw_flags(flags);
  return 0.0F;
}
#endif /* USE_ZEROF_WITH_FLAGS */


#if defined(USE_NAN_WITH_FLAGS)
/* Returns a double quiet +nan after raising the given flags,
   e.g.  nan_with_flags(AMD_F_INVALID);
*/
static inline double nan_with_flags(int flags)
{
  double z;
  raise_fpsw_flags(flags);
  PUT_BITS_DP64(0x7ff8000000000000, z);
  return z;
}
#endif /* USE_NAN_WITH_FLAGS */

#if defined(USE_NANF_WITH_FLAGS)
/* Returns a float quiet +nan after raising the given flags,
   e.g.  nanf_with_flags(AMD_F_INVALID);
*/
static inline float nanf_with_flags(int flags)
{
  float z;
  raise_fpsw_flags(flags);
  PUT_BITS_SP32(0x7fc00000, z);
  return z;
}
#endif /* USE_NANF_WITH_FLAGS */


#ifdef USE_INFINITY_WITH_FLAGS
/* Returns a positive double infinity after raising the given flags,
   e.g.  infinity_with_flags(AMD_F_OVERFLOW);
*/
static inline double infinity_with_flags(int flags)
{
  double z;
  raise_fpsw_flags(flags);
  PUT_BITS_DP64((unsigned long)(BIASEDEMAX_DP64 + 1) << EXPSHIFTBITS_DP64, z);
  return z;
}
#endif /* USE_INFINITY_WITH_FLAGS */

#ifdef USE_INFINITYF_WITH_FLAGS
/* Returns a positive float infinity after raising the given flags,
   e.g.  infinityf_with_flags(AMD_F_OVERFLOW);
*/
static inline float infinityf_with_flags(int flags)
{
  float z;
  raise_fpsw_flags(flags);
  PUT_BITS_SP32((BIASEDEMAX_SP32 + 1) << EXPSHIFTBITS_SP32, z);
  return z;
}
#endif /* USE_INFINITYF_WITH_FLAGS */


#if defined(USE_SPLITEXP)
/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2).
   Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments
   abs(x) > large/(ln(base)) (where large is the largest representable
   floating point number) should be handled separately instead of calling
   this function. This function is called by exp_amd, exp2_amd, exp10_amd,
   cosh_amd and sinh_amd. */
static inline void splitexp(double x, double logbase,
                            double thirtytwo_by_logbaseof2,
                            double logbaseof2_by_32_lead,
                            double logbaseof2_by_32_trail,
                            int *m, double *z1, double *z2)
{
  double q, r, r1, r2, f1, f2;
  int n, j;

/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain
   leading and trailing parts respectively of precomputed
   values of pow(2.0,j/32.0), for j = 0, 1, ..., 31.
   two_to_jby32_lead_table contains the first 25 bits of precision,
   and two_to_jby32_trail_table contains a further 53 bits precision. */

  static const double two_to_jby32_lead_table[32] = {
    1.00000000000000000000e+00,   /* 0x3ff0000000000000 */
    1.02189713716506958008e+00,   /* 0x3ff059b0d0000000 */
    1.04427373409271240234e+00,   /* 0x3ff0b55860000000 */
    1.06714040040969848633e+00,   /* 0x3ff11301d0000000 */
    1.09050768613815307617e+00,   /* 0x3ff172b830000000 */
    1.11438673734664916992e+00,   /* 0x3ff1d48730000000 */
    1.13878858089447021484e+00,   /* 0x3ff2387a60000000 */
    1.16372483968734741211e+00,   /* 0x3ff29e9df0000000 */
    1.18920707702636718750e+00,   /* 0x3ff306fe00000000 */
    1.21524733304977416992e+00,   /* 0x3ff371a730000000 */
    1.24185776710510253906e+00,   /* 0x3ff3dea640000000 */
    1.26905095577239990234e+00,   /* 0x3ff44e0860000000 */
    1.29683953523635864258e+00,   /* 0x3ff4bfdad0000000 */
    1.32523661851882934570e+00,   /* 0x3ff5342b50000000 */
    1.35425549745559692383e+00,   /* 0x3ff5ab07d0000000 */
    1.38390988111495971680e+00,   /* 0x3ff6247eb0000000 */
    1.41421353816986083984e+00,   /* 0x3ff6a09e60000000 */
    1.44518077373504638672e+00,   /* 0x3ff71f75e0000000 */
    1.47682613134384155273e+00,   /* 0x3ff7a11470000000 */
    1.50916439294815063477e+00,   /* 0x3ff8258990000000 */
    1.54221081733703613281e+00,   /* 0x3ff8ace540000000 */
    1.57598084211349487305e+00,   /* 0x3ff93737b0000000 */
    1.61049032211303710938e+00,   /* 0x3ff9c49180000000 */
    1.64575546979904174805e+00,   /* 0x3ffa5503b0000000 */
    1.68179279565811157227e+00,   /* 0x3ffae89f90000000 */
    1.71861928701400756836e+00,   /* 0x3ffb7f76f0000000 */
    1.75625211000442504883e+00,   /* 0x3ffc199bd0000000 */
    1.79470902681350708008e+00,   /* 0x3ffcb720d0000000 */
    1.83400803804397583008e+00,   /* 0x3ffd5818d0000000 */
    1.87416762113571166992e+00,   /* 0x3ffdfc9730000000 */
    1.91520655155181884766e+00,   /* 0x3ffea4afa0000000 */
    1.95714408159255981445e+00};  /* 0x3fff507650000000 */

  static const double two_to_jby32_trail_table[32] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    1.14890470981563546737e-08,   /* 0x3e48ac2ba1d73e2a */
    4.83347014379782142328e-08,   /* 0x3e69f3121ec53172 */
    2.67125131841396124714e-10,   /* 0x3df25b50a4ebbf1b */
    4.65271045830351350190e-08,   /* 0x3e68faa2f5b9bef9 */
    5.24924336638693782574e-09,   /* 0x3e368b9aa7805b80 */
    5.38622214388600821910e-08,   /* 0x3e6ceac470cd83f6 */
    1.90902301017041969782e-08,   /* 0x3e547f7b84b09745 */
    3.79763538792174980894e-08,   /* 0x3e64636e2a5bd1ab */
    2.69306947081946450986e-08,   /* 0x3e5ceaa72a9c5154 */
    4.49683815095311756138e-08,   /* 0x3e682468446b6824 */
    1.41933332021066904914e-09,   /* 0x3e18624b40c4dbd0 */
    1.94146510233556266402e-08,   /* 0x3e54d8a89c750e5e */
    2.46409119489264118569e-08,   /* 0x3e5a753e077c2a0f */
    4.94812958044698886494e-08,   /* 0x3e6a90a852b19260 */
    8.48872238075784476136e-10,   /* 0x3e0d2ac258f87d03 */
    2.42032342089579394887e-08,   /* 0x3e59fcef32422cbf */
    3.32420002333182569170e-08,   /* 0x3e61d8bee7ba46e2 */
    1.45956577586525322754e-08,   /* 0x3e4f580c36bea881 */
    3.46452721050003920866e-08,   /* 0x3e62999c25159f11 */
    8.07090469079979051284e-09,   /* 0x3e415506dadd3e2a */
    2.99439161340839520436e-09,   /* 0x3e29b8bc9e8a0388 */
    9.83621719880452147153e-09,   /* 0x3e451f8480e3e236 */
    8.35492309647188080486e-09,   /* 0x3e41f12ae45a1224 */
    3.48493175137966283582e-08,   /* 0x3e62b5a75abd0e6a */
    1.11084703472699692902e-08,   /* 0x3e47daf237553d84 */
    5.03688744342840346564e-08,   /* 0x3e6b0aa538444196 */
    4.81896001063495806249e-08,   /* 0x3e69df20d22a0798 */
    4.83653666334089557746e-08,   /* 0x3e69f7490e4bb40b */
    1.29745882314081237628e-08,   /* 0x3e4bdcdaf5cb4656 */
    9.84532844621636118964e-09,   /* 0x3e452486cc2c7b9d */
    4.25828404545651943883e-08};  /* 0x3e66dc8a80ce9f09 */

    /*
      Step 1. Reduce the argument.

      To perform argument reduction, we find the integer n such that
      x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64.
      n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and
      remainder by x - n*logbaseof2/32. The calculation of n is
      straightforward whereas the computation of x - n*logbaseof2/32
      must be carried out carefully.
      logbaseof2/32 is so represented in two pieces that
      (1) logbaseof2/32 is known to extra precision, (2) the product
      of n and the leading piece is a model number and is hence
      calculated without error, and (3) the subtraction of the value
      obtained in (2) from x is a model number and is hence again
      obtained without error.
    */

    r = x * thirtytwo_by_logbaseof2;
    /* Set n = nearest integer to r */
    /* This is faster on Hammer */
    if (r > 0)
      n = (int)(r + 0.5);
    else
      n = (int)(r - 0.5);

    r1 = x - n * logbaseof2_by_32_lead;
    r2 =   - n * logbaseof2_by_32_trail;

    /* Set j = n mod 32:   5 mod 32 = 5,   -5 mod 32 = 27,  etc. */
    /* j = n % 32;
       if (j < 0) j += 32; */
    j = n & 0x0000001f;

    f1 = two_to_jby32_lead_table[j];
    f2 = two_to_jby32_trail_table[j];

    *m = (n - j) / 32;

    /* Step 2. The following is the core approximation. We approximate
       exp(r1+r2)-1 by a polynomial. */

    r1 *= logbase; r2 *= logbase;

    r = r1 + r2;
    q = r1 + (r2 +
              r*r*( 5.00000000000000008883e-01 +
                      r*( 1.66666666665260878863e-01 +
                      r*( 4.16666666662260795726e-02 +
                      r*( 8.33336798434219616221e-03 +
                      r*( 1.38889490863777199667e-03 ))))));

    /* Step 3. Function value reconstruction.
       We now reconstruct the exponential of the input argument
       so that exp(x) = 2**m * (z1 + z2).
       The order of the computation below must be strictly observed. */

    *z1 = f1;
    *z2 = f2 + ((f1 + f2) * q);
}
#endif /* USE_SPLITEXP */


#if defined(USE_SPLITEXPF)
/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2).
   Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments
   abs(x) > large/(ln(base)) (where large is the largest representable
   floating point number) should be handled separately instead of calling
   this function. This function is called by exp_amd, exp2_amd, exp10_amd,
   cosh_amd and sinh_amd. */
static inline void splitexpf(float x, float logbase,
                             float thirtytwo_by_logbaseof2,
                             float logbaseof2_by_32_lead,
                             float logbaseof2_by_32_trail,
                             int *m, float *z1, float *z2)
{
  float q, r, r1, r2, f1, f2;
  int n, j;

/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain
   leading and trailing parts respectively of precomputed
   values of pow(2.0,j/32.0), for j = 0, 1, ..., 31.
   two_to_jby32_lead_table contains the first 10 bits of precision,
   and two_to_jby32_trail_table contains a further 24 bits precision. */

  static const float two_to_jby32_lead_table[32] = {
    1.0000000000E+00F,  /* 0x3F800000 */
    1.0214843750E+00F,  /* 0x3F82C000 */
    1.0429687500E+00F,  /* 0x3F858000 */
    1.0664062500E+00F,  /* 0x3F888000 */
    1.0898437500E+00F,  /* 0x3F8B8000 */
    1.1132812500E+00F,  /* 0x3F8E8000 */
    1.1386718750E+00F,  /* 0x3F91C000 */
    1.1621093750E+00F,  /* 0x3F94C000 */
    1.1875000000E+00F,  /* 0x3F980000 */
    1.2148437500E+00F,  /* 0x3F9B8000 */
    1.2402343750E+00F,  /* 0x3F9EC000 */
    1.2675781250E+00F,  /* 0x3FA24000 */
    1.2949218750E+00F,  /* 0x3FA5C000 */
    1.3242187500E+00F,  /* 0x3FA98000 */
    1.3535156250E+00F,  /* 0x3FAD4000 */
    1.3828125000E+00F,  /* 0x3FB10000 */
    1.4140625000E+00F,  /* 0x3FB50000 */
    1.4433593750E+00F,  /* 0x3FB8C000 */
    1.4765625000E+00F,  /* 0x3FBD0000 */
    1.5078125000E+00F,  /* 0x3FC10000 */
    1.5410156250E+00F,  /* 0x3FC54000 */
    1.5742187500E+00F,  /* 0x3FC98000 */
    1.6093750000E+00F,  /* 0x3FCE0000 */
    1.6445312500E+00F,  /* 0x3FD28000 */
    1.6816406250E+00F,  /* 0x3FD74000 */
    1.7167968750E+00F,  /* 0x3FDBC000 */
    1.7558593750E+00F,  /* 0x3FE0C000 */
    1.7929687500E+00F,  /* 0x3FE58000 */
    1.8339843750E+00F,  /* 0x3FEAC000 */
    1.8730468750E+00F,  /* 0x3FEFC000 */
    1.9140625000E+00F,  /* 0x3FF50000 */
    1.9570312500E+00F}; /* 0x3FFA8000 */

  static const float two_to_jby32_trail_table[32] = {
    0.0000000000E+00F,  /* 0x00000000 */
    4.1277357377E-04F,  /* 0x39D86988 */
    1.3050324051E-03F,  /* 0x3AAB0D9F */
    7.3415064253E-04F,  /* 0x3A407404 */
    6.6398258787E-04F,  /* 0x3A2E0F1E */
    1.1054925853E-03F,  /* 0x3A90E62D */
    1.1675967835E-04F,  /* 0x38F4DCE0 */
    1.6154836630E-03F,  /* 0x3AD3BEA3 */
    1.7071149778E-03F,  /* 0x3ADFC146 */
    4.0360994171E-04F,  /* 0x39D39B9C */
    1.6234370414E-03F,  /* 0x3AD4C982 */
    1.4728321694E-03F,  /* 0x3AC10C0C */
    1.9176795613E-03F,  /* 0x3AFB5AA6 */
    1.0178930825E-03F,  /* 0x3A856AD3 */
    7.3992193211E-04F,  /* 0x3A41F752 */
    1.0973819299E-03F,  /* 0x3A8FD607 */
    1.5106226783E-04F,  /* 0x391E6678 */
    1.8214319134E-03F,  /* 0x3AEEBD1D */
    2.6364589576E-04F,  /* 0x398A39F4 */
    1.3519275235E-03F,  /* 0x3AB13329 */
    1.1952003697E-03F,  /* 0x3A9CA845 */
    1.7620950239E-03F,  /* 0x3AE6F619 */
    1.1153318919E-03F,  /* 0x3A923054 */
    1.2242280645E-03F,  /* 0x3AA07647 */
    1.5220546629E-04F,  /* 0x391F9958 */
    1.8224230735E-03F,  /* 0x3AEEDE5F */
    3.9278529584E-04F,  /* 0x39CDEEC0 */
    1.7403248930E-03F,  /* 0x3AE41B9D */
    2.3711356334E-05F,  /* 0x37C6E7C0 */
    1.1207590578E-03F,  /* 0x3A92E66F */
    1.1440613307E-03F,  /* 0x3A95F454 */
    1.1287408415E-04F}; /* 0x38ECB6D0 */

    /*
      Step 1. Reduce the argument.

      To perform argument reduction, we find the integer n such that
      x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64.
      n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and
      remainder by x - n*logbaseof2/32. The calculation of n is
      straightforward whereas the computation of x - n*logbaseof2/32
      must be carried out carefully.
      logbaseof2/32 is so represented in two pieces that
      (1) logbaseof2/32 is known to extra precision, (2) the product
      of n and the leading piece is a model number and is hence
      calculated without error, and (3) the subtraction of the value
      obtained in (2) from x is a model number and is hence again
      obtained without error.
    */

    r = x * thirtytwo_by_logbaseof2;
    /* Set n = nearest integer to r */
    /* This is faster on Hammer */
    if (r > 0)
      n = (int)(r + 0.5F);
    else
      n = (int)(r - 0.5F);

    r1 = x - n * logbaseof2_by_32_lead;
    r2 =   - n * logbaseof2_by_32_trail;

    /* Set j = n mod 32:   5 mod 32 = 5,   -5 mod 32 = 27,  etc. */
    /* j = n % 32;
       if (j < 0) j += 32; */
    j = n & 0x0000001f;

    f1 = two_to_jby32_lead_table[j];
    f2 = two_to_jby32_trail_table[j];

    *m = (n - j) / 32;

    /* Step 2. The following is the core approximation. We approximate
       exp(r1+r2)-1 by a polynomial. */

    r1 *= logbase; r2 *= logbase;

    r = r1 + r2;
    q = r1 + (r2 +
              r*r*( 5.00000000000000008883e-01F +
                      r*( 1.66666666665260878863e-01F )));

    /* Step 3. Function value reconstruction.
       We now reconstruct the exponential of the input argument
       so that exp(x) = 2**m * (z1 + z2).
       The order of the computation below must be strictly observed. */

    *z1 = f1;
    *z2 = f2 + ((f1 + f2) * q);
}
#endif /* SPLITEXPF */


#if defined(USE_SCALEUPDOUBLE1024)
/* Scales up a double (normal or denormal) whose bit pattern is given
   as ux by 2**1024. There are no checks that the input number is
   scalable by that amount. */
static inline void scaleUpDouble1024(unsigned long ux, unsigned long *ur)
{
  unsigned long uy;
  double y;

  if ((ux & EXPBITS_DP64) == 0)
    {
      /* ux is denormalised */
      PUT_BITS_DP64(ux | 0x4010000000000000, y);
      if (ux & SIGNBIT_DP64)
        y += 4.0;
      else
        y -= 4.0;
      GET_BITS_DP64(y, uy);
    }
  else
    /* ux is normal */
    uy = ux + 0x4000000000000000;

  *ur = uy;
  return;
}

#endif /* SCALEUPDOUBLE1024 */


#if defined(USE_SCALEDOWNDOUBLE)
/* Scales down a double whose bit pattern is given as ux by 2**k.
   There are no checks that the input number is scalable by that amount. */
static inline void scaleDownDouble(unsigned long ux, int k,
                                   unsigned long *ur)
{
  unsigned long uy, uk, ax, xsign;
  int n, shift;
  xsign = ux & SIGNBIT_DP64;
  ax = ux & ~SIGNBIT_DP64;
  n = ((ax & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - k;
  if (n > 0)
    {
      uk = (unsigned long)n << EXPSHIFTBITS_DP64;
      uy = (ax & ~EXPBITS_DP64) | uk;
    }
  else
    {
      uy = (ax & ~EXPBITS_DP64) | 0x0010000000000000;
      shift = (1 - n);
      if (shift > MANTLENGTH_DP64 + 1)
        /* Sigh. Shifting works mod 64 so be careful not to shift too much */
        uy = 0;
      else
        {
          /* Make sure we round the result */
          uy >>= shift - 1;
          uy = (uy >> 1) + (uy & 1);
        }
    }
  *ur = uy | xsign;
}

#endif /* SCALEDOWNDOUBLE */


#if defined(USE_SCALEUPFLOAT128)
/* Scales up a float (normal or denormal) whose bit pattern is given
   as ux by 2**128. There are no checks that the input number is
   scalable by that amount. */
static inline void scaleUpFloat128(unsigned int ux, unsigned int *ur)
{
  unsigned int uy;
  float y;

  if ((ux & EXPBITS_SP32) == 0)
    {
      /* ux is denormalised */
      PUT_BITS_SP32(ux | 0x40800000, y);
      /* Compensate for the implicit bit just added */
      if (ux & SIGNBIT_SP32)
        y += 4.0F;
      else
        y -= 4.0F;
      GET_BITS_SP32(y, uy);
    }
  else
    /* ux is normal */
    uy = ux + 0x40000000;
  *ur = uy;
}
#endif /* SCALEUPFLOAT128 */


#if defined(USE_SCALEDOWNFLOAT)
/* Scales down a float whose bit pattern is given as ux by 2**k.
   There are no checks that the input number is scalable by that amount. */
static inline void scaleDownFloat(unsigned int ux, int k,
                                  unsigned int *ur)
{
  unsigned int uy, uk, ax, xsign;
  int n, shift;

  xsign = ux & SIGNBIT_SP32;
  ax = ux & ~SIGNBIT_SP32;
  n = ((ax & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - k;
  if (n > 0)
    {
      uk = (unsigned int)n << EXPSHIFTBITS_SP32;
      uy = (ax & ~EXPBITS_SP32) | uk;
    }
  else
    {
      uy = (ax & ~EXPBITS_SP32) | 0x00800000;
      shift = (1 - n);
      if (shift > MANTLENGTH_SP32 + 1)
        /* Sigh. Shifting works mod 32 so be careful not to shift too much */
        uy = 0;
      else
        {
          /* Make sure we round the result */
          uy >>= shift - 1;
          uy = (uy >> 1) + (uy & 1);
        }
    }
  *ur = uy | xsign;
}
#endif /* SCALEDOWNFLOAT */


#if defined(USE_SQRT_AMD_INLINE)
static inline double sqrt_amd_inline(double x)
{
  /*
     Computes the square root of x.

     The calculation is carried out in three steps.

     Step 1. Reduction.
     The input argument is scaled to the interval [1, 4) by
     computing
               x = 2^e * y, where y in [1,4).
     Furthermore y is decomposed as y = c + t where
               c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64.

     Step 2. Approximation.
     An approximation q = sqrt(1 + (t/c)) - 1  is obtained
     from a basic series expansion using precomputed values
     stored in rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl.

     Step 3. Reconstruction.
     The value of sqrt(x) is reconstructed via
       sqrt(x) = 2^(e/2) * sqrt(y)
               = 2^(e/2) * sqrt(c) * sqrt(y/c)
               = 2^(e/2) * sqrt(c) * sqrt(1 + t/c)
               = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ]
    */

  unsigned long ux, ax, u;
  double r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail;
  int e, denorm = 0, index;

/* Arrays rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl contain
   leading and trailing parts respectively of precomputed
   values of sqrt(j/32), for j = 32, 33, ..., 128.
   rt_jby32_lead_table_dbl contains the first 21 bits of precision,
   and rt_jby32_trail_table_dbl contains a further 53 bits precision. */

  static const double rt_jby32_lead_table_dbl[97] = {
    1.00000000000000000000e+00,   /* 0x3ff0000000000000 */
    1.01550388336181640625e+00,   /* 0x3ff03f8100000000 */
    1.03077602386474609375e+00,   /* 0x3ff07e0f00000000 */
    1.04582500457763671875e+00,   /* 0x3ff0bbb300000000 */
    1.06065940856933593750e+00,   /* 0x3ff0f87600000000 */
    1.07528972625732421875e+00,   /* 0x3ff1346300000000 */
    1.08972454071044921875e+00,   /* 0x3ff16f8300000000 */
    1.10396957397460937500e+00,   /* 0x3ff1a9dc00000000 */
    1.11803340911865234375e+00,   /* 0x3ff1e37700000000 */
    1.13192272186279296875e+00,   /* 0x3ff21c5b00000000 */
    1.14564323425292968750e+00,   /* 0x3ff2548e00000000 */
    1.15920162200927734375e+00,   /* 0x3ff28c1700000000 */
    1.17260360717773437500e+00,   /* 0x3ff2c2fc00000000 */
    1.18585395812988281250e+00,   /* 0x3ff2f94200000000 */
    1.19895744323730468750e+00,   /* 0x3ff32eee00000000 */
    1.21191978454589843750e+00,   /* 0x3ff3640600000000 */
    1.22474479675292968750e+00,   /* 0x3ff3988e00000000 */
    1.23743629455566406250e+00,   /* 0x3ff3cc8a00000000 */
    1.25000000000000000000e+00,   /* 0x3ff4000000000000 */
    1.26243782043457031250e+00,   /* 0x3ff432f200000000 */
    1.27475452423095703125e+00,   /* 0x3ff4656500000000 */
    1.28695297241210937500e+00,   /* 0x3ff4975c00000000 */
    1.29903793334960937500e+00,   /* 0x3ff4c8dc00000000 */
    1.31101036071777343750e+00,   /* 0x3ff4f9e600000000 */
    1.32287502288818359375e+00,   /* 0x3ff52a7f00000000 */
    1.33463478088378906250e+00,   /* 0x3ff55aaa00000000 */
    1.34629058837890625000e+00,   /* 0x3ff58a6800000000 */
    1.35784721374511718750e+00,   /* 0x3ff5b9be00000000 */
    1.36930561065673828125e+00,   /* 0x3ff5e8ad00000000 */
    1.38066959381103515625e+00,   /* 0x3ff6173900000000 */
    1.39194107055664062500e+00,   /* 0x3ff6456400000000 */
    1.40312099456787109375e+00,   /* 0x3ff6732f00000000 */
    1.41421318054199218750e+00,   /* 0x3ff6a09e00000000 */
    1.42521858215332031250e+00,   /* 0x3ff6cdb200000000 */
    1.43614006042480468750e+00,   /* 0x3ff6fa6e00000000 */
    1.44697952270507812500e+00,   /* 0x3ff726d400000000 */
    1.45773792266845703125e+00,   /* 0x3ff752e500000000 */
    1.46841716766357421875e+00,   /* 0x3ff77ea300000000 */
    1.47901916503906250000e+00,   /* 0x3ff7aa1000000000 */
    1.48954677581787109375e+00,   /* 0x3ff7d52f00000000 */
    1.50000000000000000000e+00,   /* 0x3ff8000000000000 */
    1.51038074493408203125e+00,   /* 0x3ff82a8500000000 */
    1.52068996429443359375e+00,   /* 0x3ff854bf00000000 */
    1.53093051910400390625e+00,   /* 0x3ff87eb100000000 */
    1.54110336303710937500e+00,   /* 0x3ff8a85c00000000 */
    1.55120849609375000000e+00,   /* 0x3ff8d1c000000000 */
    1.56124877929687500000e+00,   /* 0x3ff8fae000000000 */
    1.57122516632080078125e+00,   /* 0x3ff923bd00000000 */
    1.58113861083984375000e+00,   /* 0x3ff94c5800000000 */
    1.59099006652832031250e+00,   /* 0x3ff974b200000000 */
    1.60078048706054687500e+00,   /* 0x3ff99ccc00000000 */
    1.61051177978515625000e+00,   /* 0x3ff9c4a800000000 */
    1.62018489837646484375e+00,   /* 0x3ff9ec4700000000 */
    1.62979984283447265625e+00,   /* 0x3ffa13a900000000 */
    1.63935947418212890625e+00,   /* 0x3ffa3ad100000000 */
    1.64886283874511718750e+00,   /* 0x3ffa61be00000000 */
    1.65831184387207031250e+00,   /* 0x3ffa887200000000 */
    1.66770744323730468750e+00,   /* 0x3ffaaeee00000000 */
    1.67705059051513671875e+00,   /* 0x3ffad53300000000 */
    1.68634128570556640625e+00,   /* 0x3ffafb4100000000 */
    1.69558238983154296875e+00,   /* 0x3ffb211b00000000 */
    1.70477199554443359375e+00,   /* 0x3ffb46bf00000000 */
    1.71391296386718750000e+00,   /* 0x3ffb6c3000000000 */
    1.72300529479980468750e+00,   /* 0x3ffb916e00000000 */
    1.73204994201660156250e+00,   /* 0x3ffbb67a00000000 */
    1.74104785919189453125e+00,   /* 0x3ffbdb5500000000 */
    1.75000000000000000000e+00,   /* 0x3ffc000000000000 */
    1.75890541076660156250e+00,   /* 0x3ffc247a00000000 */
    1.76776695251464843750e+00,   /* 0x3ffc48c600000000 */
    1.77658367156982421875e+00,   /* 0x3ffc6ce300000000 */
    1.78535652160644531250e+00,   /* 0x3ffc90d200000000 */
    1.79408740997314453125e+00,   /* 0x3ffcb49500000000 */
    1.80277538299560546875e+00,   /* 0x3ffcd82b00000000 */
    1.81142139434814453125e+00,   /* 0x3ffcfb9500000000 */
    1.82002735137939453125e+00,   /* 0x3ffd1ed500000000 */
    1.82859230041503906250e+00,   /* 0x3ffd41ea00000000 */
    1.83711719512939453125e+00,   /* 0x3ffd64d500000000 */
    1.84560203552246093750e+00,   /* 0x3ffd879600000000 */
    1.85404872894287109375e+00,   /* 0x3ffdaa2f00000000 */
    1.86245727539062500000e+00,   /* 0x3ffdcca000000000 */
    1.87082862854003906250e+00,   /* 0x3ffdeeea00000000 */
    1.87916183471679687500e+00,   /* 0x3ffe110c00000000 */
    1.88745784759521484375e+00,   /* 0x3ffe330700000000 */
    1.89571857452392578125e+00,   /* 0x3ffe54dd00000000 */
    1.90394306182861328125e+00,   /* 0x3ffe768d00000000 */
    1.91213226318359375000e+00,   /* 0x3ffe981800000000 */
    1.92028617858886718750e+00,   /* 0x3ffeb97e00000000 */
    1.92840576171875000000e+00,   /* 0x3ffedac000000000 */
    1.93649101257324218750e+00,   /* 0x3ffefbde00000000 */
    1.94454288482666015625e+00,   /* 0x3fff1cd900000000 */
    1.95256233215332031250e+00,   /* 0x3fff3db200000000 */
    1.96054744720458984375e+00,   /* 0x3fff5e6700000000 */
    1.96850109100341796875e+00,   /* 0x3fff7efb00000000 */
    1.97642326354980468750e+00,   /* 0x3fff9f6e00000000 */
    1.98431301116943359375e+00,   /* 0x3fffbfbf00000000 */
    1.99217128753662109375e+00,   /* 0x3fffdfef00000000 */
    2.00000000000000000000e+00};  /* 0x4000000000000000 */

  static const double rt_jby32_trail_table_dbl[97] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    9.17217678638807524014e-07,   /* 0x3eaec6d70177881c */
    3.82539669043705364790e-07,   /* 0x3e99abfb41bd6b24 */
    2.85899577162227138140e-08,   /* 0x3e5eb2bf6bab55a2 */
    7.63210485349101216659e-07,   /* 0x3ea99bed9b2d8d0c */
    9.32123004127716212874e-07,   /* 0x3eaf46e029c1b296 */
    1.95174719169309219157e-07,   /* 0x3e8a3226fc42f30c */
    5.34316371481845492427e-07,   /* 0x3ea1edbe20701d73 */
    5.79631242504454563052e-07,   /* 0x3ea372fe94f82be7 */
    4.20404384109571705948e-07,   /* 0x3e9c367e08e7bb06 */
    6.89486030314147010716e-07,   /* 0x3ea722a3d0a66608 */
    6.89927685625314560328e-07,   /* 0x3ea7266f067ca1d6 */
    3.32778123013641425828e-07,   /* 0x3e965515a9b34850 */
    1.64433259436999584387e-07,   /* 0x3e8611e23ef6c1bd */
    4.37590875197899335723e-07,   /* 0x3e9d5dc1059ed8e7 */
    1.79808183816018617413e-07,   /* 0x3e88222982d0e4f4 */
    7.46386593615986477624e-08,   /* 0x3e7409212e7d0322 */
    5.72520794105201454728e-07,   /* 0x3ea335ea8a5fcf39 */
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    2.96860689431670420344e-07,   /* 0x3e93ec071e938bfe */
    3.54167239176257065345e-07,   /* 0x3e97c48bfd9862c6 */
    7.95211265664474710063e-07,   /* 0x3eaaaed010f74671 */
    1.72327048595145565621e-07,   /* 0x3e87211cbfeb62e0 */
    6.99494915996239297020e-07,   /* 0x3ea7789d9660e72d */
    6.32644111701500844315e-07,   /* 0x3ea53a5f1d36f1cf */
    6.20124838851440463844e-10,   /* 0x3e054eacff2057dc */
    6.13404719757812629969e-07,   /* 0x3ea4951b3e6a83cc */
    3.47654909777986407387e-07,   /* 0x3e9754aa76884c66 */
    7.83106177002392475763e-07,   /* 0x3eaa46d4b1de1074 */
    5.33337372440526357008e-07,   /* 0x3ea1e55548f92635 */
    2.01508648555298681765e-08,   /* 0x3e55a3070dd17788 */
    5.25472356925843939587e-07,   /* 0x3ea1a1c5eedb0801 */
    3.81831102861301692797e-07,   /* 0x3e999fcef32422cc */
    6.99220602161420018738e-07,   /* 0x3ea776425d6b0199 */
    6.01209702477462624811e-07,   /* 0x3ea42c5a1e0191a2 */
    9.01437000591944740554e-08,   /* 0x3e7832a0bdff1327 */
    5.10428680864685379950e-08,   /* 0x3e6b674743636676 */
    3.47895267104621031421e-07,   /* 0x3e9758cb90d2f714 */
    7.80735841510641848628e-07,   /* 0x3eaa3278459cde25 */
    1.35158752025506517690e-07,   /* 0x3e822404f4a103ee */
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    1.76523947728535489812e-09,   /* 0x3e1e539af6892ac5 */
    6.68280121328499932183e-07,   /* 0x3ea66c7b872c9cd0 */
    5.70135482405123276616e-07,   /* 0x3ea3216d2f43887d */
    1.37705134737562525897e-07,   /* 0x3e827b832cbedc0e */
    7.09655107074516613672e-07,   /* 0x3ea7cfe41579091d */
    7.20302724551461693011e-07,   /* 0x3ea82b5a713c490a */
    4.69926266058212796694e-07,   /* 0x3e9f8945932d872e */
    2.19244345915999437026e-07,   /* 0x3e8d6d2da9490251 */
    1.91141411617401877927e-07,   /* 0x3e89a791a3114e4a */
    5.72297665296622053774e-07,   /* 0x3ea333ffe005988d */
    5.61055484436830560103e-07,   /* 0x3ea2d36e0ed49ab1 */
    2.76225500213991506100e-07,   /* 0x3e92898498f55f9e */
    7.58466189522395692908e-07,   /* 0x3ea9732cca1032a3 */
    1.56893371256836029827e-07,   /* 0x3e850ed0b02a22d2 */
    4.06038997708867066507e-07,   /* 0x3e9b3fb265b1e40a */
    5.51305629612057435809e-07,   /* 0x3ea27fade682d1de */
    5.64778487026561123207e-07,   /* 0x3ea2f36906f707ba */
    3.92609705553556897517e-07,   /* 0x3e9a58fbbee883b6 */
    9.09698438776943827802e-07,   /* 0x3eae864005bca6d7 */
    1.05949774066016139743e-07,   /* 0x3e7c70d02300f263 */
    7.16578798392844784244e-07,   /* 0x3ea80b5d712d8e3e */
    6.86233073531233972561e-07,   /* 0x3ea706b27cc7d390 */
    7.99211473033494452908e-07,   /* 0x3eaad12c9d849a97 */
    8.65552275731027456121e-07,   /* 0x3ead0b09954e764b */
    6.75456120386058448618e-07,   /* 0x3ea6aa1fb7826cbd */
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    4.99167184520462138743e-07,   /* 0x3ea0bfd03f46763c */
    4.51720373502110930296e-10,   /* 0x3dff0abfb4adfb9e */
    1.28874162718371367439e-07,   /* 0x3e814c151f991b2e */
    5.85529267186999798656e-07,   /* 0x3ea3a5a879b09292 */
    1.01827770937125531924e-07,   /* 0x3e7b558d173f9796 */
    2.54736389177809626508e-07,   /* 0x3e9118567cd83fb8 */
    6.98925535290464831294e-07,   /* 0x3ea773b981896751 */
    1.20940735036524314513e-07,   /* 0x3e803b7df49f48a8 */
    5.43759351196479689657e-08,   /* 0x3e6d315f22491900 */
    1.11957989042397958409e-07,   /* 0x3e7e0db1c5bb84b2 */
    8.47006714134442661218e-07,   /* 0x3eac6bbb7644ff76 */
    8.92831044643427836228e-07,   /* 0x3eadf55c3afec01f */
    7.77828292464916501663e-07,   /* 0x3eaa197e81034da3 */
    6.48469316302918797451e-08,   /* 0x3e71683f4920555d */
    2.12579816658859849140e-07,   /* 0x3e8c882fd78bb0b0 */
    7.61222472580559138435e-07,   /* 0x3ea98ad9eb7b83ec */
    2.86488961857314189607e-07,   /* 0x3e9339d7c7777273 */
    2.14637363790165363515e-07,   /* 0x3e8ccee237cae6fe */
    5.44137005612605847831e-08,   /* 0x3e6d368fe324a146 */
    2.58378284856442408413e-07,   /* 0x3e9156e7b6d99b45 */
    3.15848939061134843091e-07,   /* 0x3e95323e5310b5c1 */
    6.60530466255089632309e-07,   /* 0x3ea629e9db362f5d */
    7.63436345535852301127e-07,   /* 0x3ea99dde4728d7ec */
    8.68233432860324345268e-08,   /* 0x3e774e746878544d */
    9.45465175398023087082e-07,   /* 0x3eafb97be873a87d */
    8.77499534786171267246e-07,   /* 0x3ead71a9e23c2f63 */
    2.74055432394999316135e-07,   /* 0x3e92643c89cda173 */
    4.72129009349126213532e-07,   /* 0x3e9faf1d57a4d56c */
    8.93777032327078947306e-07,   /* 0x3eadfd7c7ab7b282 */
    0.00000000000000000000e+00};  /* 0x0000000000000000 */


  /* Handle special arguments first */

  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);

  if(ax >= 0x7ff0000000000000)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else if (ux & SIGNBIT_DP64)
        /* x is negative infinity */
        return nan_with_flags(AMD_F_INVALID);
      else
        /* x is positive infinity */
        return x;
    }
  else if (ux & SIGNBIT_DP64)
    {
      /* x is negative. */
      if (ux == SIGNBIT_DP64)
        /* Handle negative zero first */
        return x;
      else
        return nan_with_flags(AMD_F_INVALID);
    }
  else if (ux <= 0x000fffffffffffff)
    {
      /* x is denormalised or zero */
      if (ux == 0)
        /* x is zero */
        return x;
      else
        {
          /* x is denormalised; scale it up */
          /* Normalize x by increasing the exponent by 60
             and subtracting a correction to account for the implicit
             bit. This replaces a slow denormalized
             multiplication by a fast normal subtraction. */
          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
          denorm = 1;
          GET_BITS_DP64(x, ux);
          PUT_BITS_DP64(ux | 0x03d0000000000000, x);
          x -= corr;
          GET_BITS_DP64(x, ux);
        }
    }

  /* Main algorithm */

  /*
     Find y and e such that x = 2^e * y, where y in [1,4).
     This is done using an in-lined variant of splitDouble,
     which also ensures that e is even.
   */
  y = x;
  ux &= EXPBITS_DP64;
  ux >>= EXPSHIFTBITS_DP64;
  if (ux & 1)
    {
      GET_BITS_DP64(y, u);
      u &= (SIGNBIT_DP64 | MANTBITS_DP64);
      u |= ONEEXPBITS_DP64;
      PUT_BITS_DP64(u, y);
      e = ux - EXPBIAS_DP64;
    }
  else
    {
      GET_BITS_DP64(y, u);
      u &= (SIGNBIT_DP64 | MANTBITS_DP64);
      u |= TWOEXPBITS_DP64;
      PUT_BITS_DP64(u, y);
      e = ux - EXPBIAS_DP64 - 1;
    }


  /* Find the index of the sub-interval of [1,4) in which y lies. */

  index = (int)(32.0*y+0.5);

  /* Look up the table values and compute c and r = c/t */

  rtc_lead = rt_jby32_lead_table_dbl[index-32];
  rtc_trail = rt_jby32_trail_table_dbl[index-32];
  c = 0.03125*index;
  r = (y - c)/c;

  /*
    Find q = sqrt(1+r) - 1.
    From one step of Newton on (q+1)^2 = 1+r
  */

  p = r*0.5 - r*r*(0.1250079870 - r*(0.6250522999E-01));
  twop = p + p;
  q = p - (p*p + (twop - r))/(twop + 2.0);

  /* Reconstruction */

  rtc = rtc_lead + rtc_trail;
  e >>= 1; /* e = e/2 */
  z = rtc_lead + (rtc*q+rtc_trail);

  if (denorm)
    {
      /* Scale by 2**(e-30) */
      PUT_BITS_DP64(((long)(e - 30) + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r);
      z *= r;
    }
  else
    {
      /* Scale by 2**e */
      PUT_BITS_DP64(((long)e + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r);
      z *= r;
    }

  return z;

}
#endif /* SQRT_AMD_INLINE */

#if defined(USE_SQRTF_AMD_INLINE)

static inline float sqrtf_amd_inline(float x)
{
  /*
     Computes the square root of x.

     The calculation is carried out in three steps.

     Step 1. Reduction.
     The input argument is scaled to the interval [1, 4) by
     computing
               x = 2^e * y, where y in [1,4).
     Furthermore y is decomposed as y = c + t where
               c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64.

     Step 2. Approximation.
     An approximation q = sqrt(1 + (t/c)) - 1  is obtained
     from a basic series expansion using precomputed values
     stored in rt_jby32_lead_table_float and rt_jby32_trail_table_float.

     Step 3. Reconstruction.
     The value of sqrt(x) is reconstructed via
       sqrt(x) = 2^(e/2) * sqrt(y)
               = 2^(e/2) * sqrt(c) * sqrt(y/c)
               = 2^(e/2) * sqrt(c) * sqrt(1 + t/c)
               = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ]
    */

  unsigned int ux, ax, u;
  float r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail;
  int e, denorm = 0, index;

/* Arrays rt_jby32_lead_table_float and rt_jby32_trail_table_float contain
   leading and trailing parts respectively of precomputed
   values of sqrt(j/32), for j = 32, 33, ..., 128.
   rt_jby32_lead_table_float contains the first 13 bits of precision,
   and rt_jby32_trail_table_float contains a further 24 bits precision. */

static const float rt_jby32_lead_table_float[97] = {
    1.00000000000000000000e+00F,   /* 0x3f800000 */
    1.01538085937500000000e+00F,   /* 0x3f81f800 */
    1.03076171875000000000e+00F,   /* 0x3f83f000 */
    1.04565429687500000000e+00F,   /* 0x3f85d800 */
    1.06054687500000000000e+00F,   /* 0x3f87c000 */
    1.07519531250000000000e+00F,   /* 0x3f89a000 */
    1.08959960937500000000e+00F,   /* 0x3f8b7800 */
    1.10375976562500000000e+00F,   /* 0x3f8d4800 */
    1.11791992187500000000e+00F,   /* 0x3f8f1800 */
    1.13183593750000000000e+00F,   /* 0x3f90e000 */
    1.14550781250000000000e+00F,   /* 0x3f92a000 */
    1.15917968750000000000e+00F,   /* 0x3f946000 */
    1.17236328125000000000e+00F,   /* 0x3f961000 */
    1.18579101562500000000e+00F,   /* 0x3f97c800 */
    1.19873046875000000000e+00F,   /* 0x3f997000 */
    1.21191406250000000000e+00F,   /* 0x3f9b2000 */
    1.22460937500000000000e+00F,   /* 0x3f9cc000 */
    1.23730468750000000000e+00F,   /* 0x3f9e6000 */
    1.25000000000000000000e+00F,   /* 0x3fa00000 */
    1.26220703125000000000e+00F,   /* 0x3fa19000 */
    1.27465820312500000000e+00F,   /* 0x3fa32800 */
    1.28686523437500000000e+00F,   /* 0x3fa4b800 */
    1.29882812500000000000e+00F,   /* 0x3fa64000 */
    1.31079101562500000000e+00F,   /* 0x3fa7c800 */
    1.32275390625000000000e+00F,   /* 0x3fa95000 */
    1.33447265625000000000e+00F,   /* 0x3faad000 */
    1.34619140625000000000e+00F,   /* 0x3fac5000 */
    1.35766601562500000000e+00F,   /* 0x3fadc800 */
    1.36914062500000000000e+00F,   /* 0x3faf4000 */
    1.38061523437500000000e+00F,   /* 0x3fb0b800 */
    1.39184570312500000000e+00F,   /* 0x3fb22800 */
    1.40307617187500000000e+00F,   /* 0x3fb39800 */
    1.41406250000000000000e+00F,   /* 0x3fb50000 */
    1.42504882812500000000e+00F,   /* 0x3fb66800 */
    1.43603515625000000000e+00F,   /* 0x3fb7d000 */
    1.44677734375000000000e+00F,   /* 0x3fb93000 */
    1.45751953125000000000e+00F,   /* 0x3fba9000 */
    1.46826171875000000000e+00F,   /* 0x3fbbf000 */
    1.47900390625000000000e+00F,   /* 0x3fbd5000 */
    1.48950195312500000000e+00F,   /* 0x3fbea800 */
    1.50000000000000000000e+00F,   /* 0x3fc00000 */
    1.51025390625000000000e+00F,   /* 0x3fc15000 */
    1.52050781250000000000e+00F,   /* 0x3fc2a000 */
    1.53076171875000000000e+00F,   /* 0x3fc3f000 */
    1.54101562500000000000e+00F,   /* 0x3fc54000 */
    1.55102539062500000000e+00F,   /* 0x3fc68800 */
    1.56103515625000000000e+00F,   /* 0x3fc7d000 */
    1.57104492187500000000e+00F,   /* 0x3fc91800 */
    1.58105468750000000000e+00F,   /* 0x3fca6000 */
    1.59082031250000000000e+00F,   /* 0x3fcba000 */
    1.60058593750000000000e+00F,   /* 0x3fcce000 */
    1.61035156250000000000e+00F,   /* 0x3fce2000 */
    1.62011718750000000000e+00F,   /* 0x3fcf6000 */
    1.62963867187500000000e+00F,   /* 0x3fd09800 */
    1.63916015625000000000e+00F,   /* 0x3fd1d000 */
    1.64868164062500000000e+00F,   /* 0x3fd30800 */
    1.65820312500000000000e+00F,   /* 0x3fd44000 */
    1.66748046875000000000e+00F,   /* 0x3fd57000 */
    1.67700195312500000000e+00F,   /* 0x3fd6a800 */
    1.68627929687500000000e+00F,   /* 0x3fd7d800 */
    1.69555664062500000000e+00F,   /* 0x3fd90800 */
    1.70458984375000000000e+00F,   /* 0x3fda3000 */
    1.71386718750000000000e+00F,   /* 0x3fdb6000 */
    1.72290039062500000000e+00F,   /* 0x3fdc8800 */
    1.73193359375000000000e+00F,   /* 0x3fddb000 */
    1.74096679687500000000e+00F,   /* 0x3fded800 */
    1.75000000000000000000e+00F,   /* 0x3fe00000 */
    1.75878906250000000000e+00F,   /* 0x3fe12000 */
    1.76757812500000000000e+00F,   /* 0x3fe24000 */
    1.77636718750000000000e+00F,   /* 0x3fe36000 */
    1.78515625000000000000e+00F,   /* 0x3fe48000 */
    1.79394531250000000000e+00F,   /* 0x3fe5a000 */
    1.80273437500000000000e+00F,   /* 0x3fe6c000 */
    1.81127929687500000000e+00F,   /* 0x3fe7d800 */
    1.81982421875000000000e+00F,   /* 0x3fe8f000 */
    1.82836914062500000000e+00F,   /* 0x3fea0800 */
    1.83691406250000000000e+00F,   /* 0x3feb2000 */
    1.84545898437500000000e+00F,   /* 0x3fec3800 */
    1.85400390625000000000e+00F,   /* 0x3fed5000 */
    1.86230468750000000000e+00F,   /* 0x3fee6000 */
    1.87060546875000000000e+00F,   /* 0x3fef7000 */
    1.87915039062500000000e+00F,   /* 0x3ff08800 */
    1.88745117187500000000e+00F,   /* 0x3ff19800 */
    1.89550781250000000000e+00F,   /* 0x3ff2a000 */
    1.90380859375000000000e+00F,   /* 0x3ff3b000 */
    1.91210937500000000000e+00F,   /* 0x3ff4c000 */
    1.92016601562500000000e+00F,   /* 0x3ff5c800 */
    1.92822265625000000000e+00F,   /* 0x3ff6d000 */
    1.93627929687500000000e+00F,   /* 0x3ff7d800 */
    1.94433593750000000000e+00F,   /* 0x3ff8e000 */
    1.95239257812500000000e+00F,   /* 0x3ff9e800 */
    1.96044921875000000000e+00F,   /* 0x3ffaf000 */
    1.96826171875000000000e+00F,   /* 0x3ffbf000 */
    1.97631835937500000000e+00F,   /* 0x3ffcf800 */
    1.98413085937500000000e+00F,   /* 0x3ffdf800 */
    1.99194335937500000000e+00F,   /* 0x3ffef800 */
    2.00000000000000000000e+00F};  /* 0x40000000 */

static const float rt_jby32_trail_table_float[97] = {
    0.00000000000000000000e+00F,   /* 0x00000000 */
    1.23941208585165441036e-04F,   /* 0x3901f637 */
    1.46876545841223560274e-05F,   /* 0x37766aff */
    1.70736297150142490864e-04F,   /* 0x393307ad */
    1.13296780909877270460e-04F,   /* 0x38ed99bf */
    9.53458802541717886925e-05F,   /* 0x38c7f46e */
    1.25126505736261606216e-04F,   /* 0x39033464 */
    2.10342666832730174065e-04F,   /* 0x395c8f6e */
    1.14066875539720058441e-04F,   /* 0x38ef3730 */
    8.72047676239162683487e-05F,   /* 0x38b6e1b4 */
    1.36111237225122749805e-04F,   /* 0x390eb915 */
    2.26244374061934649944e-05F,   /* 0x37bdc99c */
    2.40658700931817293167e-04F,   /* 0x397c5954 */
    6.31069415248930454254e-05F,   /* 0x38845848 */
    2.27412077947519719601e-04F,   /* 0x396e7577 */
    5.90185391047270968556e-06F,   /* 0x36c6088a */
    1.35496389702893793583e-04F,   /* 0x390e1409 */
    1.32179571664892137051e-04F,   /* 0x390a99af */
    0.00000000000000000000e+00F,   /* 0x00000000 */
    2.31086043640971183777e-04F,   /* 0x39724fb0 */
    9.66752704698592424393e-05F,   /* 0x38cabe24 */
    8.85332483449019491673e-05F,   /* 0x38b9aaed */
    2.09980673389509320259e-04F,   /* 0x395c2e42 */
    2.20044588786549866199e-04F,   /* 0x3966bbc5 */
    1.21749282698146998882e-04F,   /* 0x38ff53a6 */
    1.62125259521417319775e-04F,   /* 0x392a002b */
    9.97955357888713479042e-05F,   /* 0x38d14952 */
    1.81545779923908412457e-04F,   /* 0x393e5d53 */
    1.65768768056295812130e-04F,   /* 0x392dd237 */
    5.48927710042335093021e-05F,   /* 0x38663caa */
    9.53875860432162880898e-05F,   /* 0x38c80ad2 */
    4.53481625299900770187e-05F,   /* 0x383e3438 */
    1.51062369695864617825e-04F,   /* 0x391e667f */
    1.70453247847035527229e-04F,   /* 0x3932bbb2 */
    1.05505387182347476482e-04F,   /* 0x38dd42c6 */
    2.02269104192964732647e-04F,   /* 0x39541833 */
    2.18442466575652360916e-04F,   /* 0x39650db4 */
    1.55796806211583316326e-04F,   /* 0x39235d63 */
    1.60395247803535312414e-05F,   /* 0x37868c9e */
    4.49578510597348213196e-05F,   /* 0x383c9120 */
    0.00000000000000000000e+00F,   /* 0x00000000 */
    1.26840444863773882389e-04F,   /* 0x39050079 */
    1.82820076588541269302e-04F,   /* 0x393fb364 */
    1.69370483490638434887e-04F,   /* 0x3931990b */
    8.78757418831810355186e-05F,   /* 0x38b849ee */
    1.83815121999941766262e-04F,   /* 0x3940be7f */
    2.14343352126888930798e-04F,   /* 0x3960c15b */
    1.80714370799250900745e-04F,   /* 0x393d7e25 */
    8.41425862745381891727e-05F,   /* 0x38b075b5 */
    1.69945167726837098598e-04F,   /* 0x3932334f */
    1.95121858268976211548e-04F,   /* 0x394c99a0 */
    1.60778334247879683971e-04F,   /* 0x3928969b */
    6.79871009197086095810e-05F,   /* 0x388e944c */
    1.61929419846273958683e-04F,   /* 0x3929cb99 */
    1.99474830878898501396e-04F,   /* 0x39512a1e */
    1.81604162207804620266e-04F,   /* 0x393e6cff */
    1.09270178654696792364e-04F,   /* 0x38e527fb */
    2.27539261686615645885e-04F,   /* 0x396e979b */
    4.90300008095800876617e-05F,   /* 0x384da590 */
    6.28985289949923753738e-05F,   /* 0x3883e864 */
    2.58551553997676819563e-05F,   /* 0x37d8e386 */
    1.82868374395184218884e-04F,   /* 0x393fc05b */
    4.64625991298817098141e-05F,   /* 0x3842e0d6 */
    1.05703387816902250051e-04F,   /* 0x38ddad13 */
    1.17213814519345760345e-04F,   /* 0x38f5d0b0 */
    8.17377731436863541603e-05F,   /* 0x38ab6aa2 */
    0.00000000000000000000e+00F,   /* 0x00000000 */
    1.16847433673683553934e-04F,   /* 0x38f50bfd */
    1.88827965757809579372e-04F,   /* 0x3946001f */
    2.16612941585481166840e-04F,   /* 0x39632298 */
    2.00857131858356297016e-04F,   /* 0x39529d2d */
    1.42199307447299361229e-04F,   /* 0x39151b56 */
    4.12627305195201188326e-05F,   /* 0x382d1185 */
    1.42796401632949709892e-04F,   /* 0x3915bb9e */
    2.03253570361994206905e-04F,   /* 0x39552077 */
    2.23214170546270906925e-04F,   /* 0x396a0e99 */
    2.03244591830298304558e-04F,   /* 0x39551e0e */
    1.43898156238719820976e-04F,   /* 0x3916e35e */
    4.57155256299301981926e-05F,   /* 0x383fbeac */
    1.53365719597786664963e-04F,   /* 0x3920d0cc */
    2.23224633373320102692e-04F,   /* 0x396a1168 */
    1.16566716314991936088e-05F,   /* 0x37439106 */
    7.43694272387074306607e-06F,   /* 0x36f98ada */
    2.11048507480882108212e-04F,   /* 0x395d4ce7 */
    1.34682719362899661064e-04F,   /* 0x390d399e */
    2.29425968427676707506e-05F,   /* 0x37c074da */
    1.20421340398024767637e-04F,   /* 0x38fc8ab7 */
    1.83421318070031702518e-04F,   /* 0x394054c9 */
    2.12376224226318299770e-04F,   /* 0x395eb14f */
    2.07710763788782060146e-04F,   /* 0x3959ccef */
    1.69840845046564936638e-04F,   /* 0x3932174e */
    9.91739216260612010956e-05F,   /* 0x38cffb98 */
    2.40249748458154499531e-04F,   /* 0x397beb8d */
    1.05178231024183332920e-04F,   /* 0x38dc9322 */
    1.82623916771262884140e-04F,   /* 0x393f7ebc */
    2.28821940254420042038e-04F,   /* 0x396fefec */
    0.00000000000000000000e+00F};  /* 0x00000000 */


/* Handle special arguments first */

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);

  if(ax >= 0x7f800000)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_SP32)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else if (ux & SIGNBIT_SP32)
        return nanf_with_flags(AMD_F_INVALID);
      else
        /* x is positive infinity */
        return x;
    }
  else if (ux & SIGNBIT_SP32)
    {
      /* x is negative. */
      if (x == 0.0F)
        /* Handle negative zero first */
        return x;
      else
        return nanf_with_flags(AMD_F_INVALID);
    }
  else if (ux <= 0x007fffff)
    {
      /* x is denormalised or zero */
      if (ux == 0)
        /* x is zero */
        return x;
      else
        {
          /* x is denormalised; scale it up */
          /* Normalize x by increasing the exponent by 26
             and subtracting a correction to account for the implicit
             bit. This replaces a slow denormalized
             multiplication by a fast normal subtraction. */
          static const float corr = 7.888609052210118054e-31F; /* 0x0d800000 */
          denorm = 1;
          GET_BITS_SP32(x, ux);
          PUT_BITS_SP32(ux | 0x0d800000, x);
          x -= corr;
          GET_BITS_SP32(x, ux);
        }
    }

  /* Main algorithm */

  /*
     Find y and e such that x = 2^e * y, where y in [1,4).
     This is done using an in-lined variant of splitFloat,
     which also ensures that e is even.
   */
  y = x;
  ux &= EXPBITS_SP32;
  ux >>= EXPSHIFTBITS_SP32;
  if (ux & 1)
    {
      GET_BITS_SP32(y, u);
      u &= (SIGNBIT_SP32 | MANTBITS_SP32);
      u |= ONEEXPBITS_SP32;
      PUT_BITS_SP32(u, y);
      e = ux - EXPBIAS_SP32;
    }
  else
    {
      GET_BITS_SP32(y, u);
      u &= (SIGNBIT_SP32 | MANTBITS_SP32);
      u |= TWOEXPBITS_SP32;
      PUT_BITS_SP32(u, y);
      e = ux - EXPBIAS_SP32 - 1;
    }

  /* Find the index of the sub-interval of [1,4) in which y lies. */

  index = (int)(32.0F*y+0.5);

  /* Look up the table values and compute c and r = c/t */

  rtc_lead = rt_jby32_lead_table_float[index-32];
  rtc_trail = rt_jby32_trail_table_float[index-32];
  c = 0.03125F*index;
  r = (y - c)/c;

  /*
  Find q = sqrt(1+r) - 1.
  From one step of Newton on (q+1)^2 = 1+r
  */

  p = r*0.5F - r*r*(0.1250079870F - r*(0.6250522999e-01F));
  twop = p + p;
  q = p - (p*p + (twop - r))/(twop + 2.0);

  /* Reconstruction */

  rtc = rtc_lead + rtc_trail;
  e >>= 1; /* e = e/2 */
  z = rtc_lead + (rtc*q+rtc_trail);

  if (denorm)
    {
      /* Scale by 2**(e-13) */
      PUT_BITS_SP32(((e - 13) + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r);
      z *= r;
    }
  else
    {
      /* Scale by 2**e */
      PUT_BITS_SP32((e + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r);
      z *= r;
    }

  return z;

}
#endif /* SQRTF_AMD_INLINE */

#ifdef USE_LOG_KERNEL_AMD
static inline void log_kernel_amd64(double x, unsigned long ux, int *xexp, double *r1, double *r2)
{

  int expadjust;
  double r, z1, z2, correction, f, f1, f2, q, u, v, poly;
  int index;

  /*
    Computes natural log(x). Algorithm based on:
    Ping-Tak Peter Tang
    "Table-driven implementation of the logarithm function in IEEE
    floating-point arithmetic"
    ACM Transactions on Mathematical Software (TOMS)
    Volume 16, Issue 4 (December 1990)
  */

/* Arrays ln_lead_table and ln_tail_table contain
   leading and trailing parts respectively of precomputed
   values of natural log(1+i/64), for i = 0, 1, ..., 64.
   ln_lead_table contains the first 24 bits of precision,
   and ln_tail_table contains a further 53 bits precision. */

  static const double ln_lead_table[65] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */
    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */
    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */
    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */
    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */
    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */
    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */
    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */
    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */
    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */
    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */
    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */
    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */
    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */
    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */
    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */
    2.35566020011901855469e-01,   /* 0x3fce270700000000 */
    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */
    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */
    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */
    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */
    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */
    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */
    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */
    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */
    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */
    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */
    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */
    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */
    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */
    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */
    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */
    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */
    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */
    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */
    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */
    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */
    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */
    4.75845873355865478516e-01,   /* 0x3fde744240000000 */
    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */
    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */
    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */
    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */
    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */
    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */
    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */
    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */
    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */
    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */
    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */
    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */
    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */
    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */
    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */
    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */
    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */
    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */
    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */
    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */
    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */
    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */
    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */
    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */
    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */

  static const double ln_tail_table[65] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */
    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */
    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */
    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */
    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */
    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */
    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */
    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */
    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */
    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */
    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */
    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */
    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */
    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */
    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */
    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */
    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */
    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */
    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */
    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */
    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */
    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */
    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */
    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */
    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */
    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */
    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */
    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */
    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */
    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */
    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */
    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */
    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */
    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */
    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */
    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */
    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */
    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */
    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */
    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */
    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */
    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */
    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */
    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */
    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */
    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */
    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */
    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */
    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */
    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */
    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */
    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */
    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */
    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */
    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */
    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */
    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */
    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */
    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */
    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */
    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */
    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */
    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */
    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */

  /* Approximating polynomial coefficients for x near 1.0 */
  static const double
    ca_1 = 8.33333333333317923934e-02,  /* 0x3fb55555555554e6 */
    ca_2 = 1.25000000037717509602e-02,  /* 0x3f89999999bac6d4 */
    ca_3 = 2.23213998791944806202e-03,  /* 0x3f62492307f1519f */
    ca_4 = 4.34887777707614552256e-04;  /* 0x3f3c8034c85dfff0 */

  /* Approximating polynomial coefficients for other x */
  static const double
    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */
    cb_2 = 1.24999999978138668903e-02,  /* 0x3f89999999865ede */
    cb_3 = 2.23219810758559851206e-03;  /* 0x3f6249423bd94741 */

  static const unsigned long
    log_thresh1 = 0x3fee0faa00000000,
    log_thresh2 = 0x3ff1082c00000000;

  /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
     log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */
  if (ux >= log_thresh1 && ux <= log_thresh2)
    {
      /* Arguments close to 1.0 are handled separately to maintain
         accuracy.

         The approximation in this region exploits the identity
             log( 1 + r ) = log( 1 + u/2 )  /  log( 1 - u/2 ), where
             u  = 2r / (2+r).
         Note that the right hand side has an odd Taylor series expansion
         which converges much faster than the Taylor series expansion of
         log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by
             u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1).

         One subtlety is that since u cannot be calculated from
         r exactly, the rounding error in the first u should be
         avoided if possible. To accomplish this, we observe that
                       u  =  r  -  r*r/(2+r).
         Since x (=1+r) is the input argument, and thus presumed exact,
         the formula above approximates u accurately because
                       u  =  r  -  correction,
         and the magnitude of "correction" (of the order of r*r)
         is small.
         With these observations, we will approximate log( 1 + r ) by
            r + (  (A1*u^3 + ... + An*u^(2n+1)) - correction ).

         We approximate log(1+r) by an odd polynomial in u, where
                  u = 2r/(2+r) = r - r*r/(2+r).
      */
      r = x - 1.0;
      u = r / (2.0 + r);
      correction = r * u;
      u = u + u;
      v = u * u;
      z1 = r;
      z2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction);
      *r1 = z1;
      *r2 = z2;
      *xexp = 0;
    }
  else
    {
      /*
        First, we decompose the argument x to the form
        x  =  2**M  *  (F1  +  F2),
        where  1 <= F1+F2 < 2, M has the value of an integer,
        F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.

        Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
        in U, where U  =  2 F2 / (2 F2 + F1).
        Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
        The core approximation calculates
        Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.
        Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
        thus, Poly =  2 arctanh( U/2 ) / U  -  1.

        It is not hard to see that
          log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
        Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).
        The values of log(F1) are calculated beforehand and stored
        in the program.
      */

      f = x;
      if (ux < IMPBIT_DP64)
        {
          /* The input argument x is denormalized */
          /* Normalize f by increasing the exponent by 60
             and subtracting a correction to account for the implicit
             bit. This replaces a slow denormalized
             multiplication by a fast normal subtraction. */
          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
          GET_BITS_DP64(f, ux);
          ux |= 0x03d0000000000000;
          PUT_BITS_DP64(ux, f);
          f -= corr;
          GET_BITS_DP64(f, ux);
          expadjust = 60;
        }
      else
        expadjust = 0;

      /* Store the exponent of x in xexp and put
         f into the range [0.5,1) */
      *xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
      PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);

      /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */

      /* Set index to be the nearest integer to 128*f */
      r = 128.0 * f;
      index = (int)(r + 0.5);

      z1 = ln_lead_table[index-64];
      q = ln_tail_table[index-64];
      f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
      f2 = f - f1;
      /* At this point, x = 2**xexp * ( f1  +  f2 ) where
         f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */

      /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */
      /* u = f2 / (f1 + 0.5 * f2); */
      u = f2 / (f1 + 0.5 * f2);

      /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).
         The core approximation calculates
         poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */
      v = u * u;
      poly = (v * (cb_1 + v * (cb_2 + v * cb_3)));
      z2 = q + (u + u * poly);
      *r1 = z1;
      *r2 = z2;
    }
  return;
}
#endif /* USE_LOG_KERNEL_AMD */

#if defined(USE_REMAINDER_PIBY2F_INLINE)
/* Define this to get debugging print statements activated */
#define DEBUGGING_PRINT
#undef DEBUGGING_PRINT


#ifdef DEBUGGING_PRINT
#include <stdio.h>
char *d2b(long d, int bitsper, int point)
{
  static char buff[200];
  int i, j;
  j = bitsper;
  if (point >= 0 && point <= bitsper)
    j++;
  buff[j] = '\0';
  for (i = bitsper - 1; i >= 0; i--)
    {
      j--;
      if (d % 2 == 1)
        buff[j] = '1';
      else
        buff[j] = '0';
      if (i == point)
        {
          j--;
          buff[j] = '.';
        }
      d /= 2;
    }
  return buff;
}
#endif

/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
   extra precision, and return the result in r.
   Return value "region" tells how many lots of pi/2 were subtracted
   from x to put it in the range [-pi/4,pi/4], mod 4. */
static inline void __remainder_piby2f_inline(double x, unsigned long ux, double *r, int *region)
{

  /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */
  static const double
    eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */

  static const double
    piby2 = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
    twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */
    pi = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */
    three_piby2 = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */
    two_pi = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */
    five_piby2 = 7.85398163397448278999e+00; /* 0x401f6a7a2955385e */

  /* Each of these threshold values is the closest machine
     number BELOW a multiple of pi/4, i.e. they are not
     rounded to nearest. thresh1 is 1*pi/4, thresh3 is 3*pi/4, etc.
     This ensures that we end up in precisely the correct region. */
  static const double
    thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */
    thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */
    thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */
    thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */
    thresh9 = 7.0685834705770345109954e+00; /* 0x401c463abeccb2bb */

  static const double cancellationThresh = 1.0e-5;
  int done = 0;

  /* For small values of x, up to 11*pi/4, we do double precision
     subtraction of the relevant multiple of pi/2 */
  if (x <= eleven_piby4) /* x <= 11*pi/4 */
    {
      double t, ctest;

      if (x <= thresh5) /* x < 5*pi/4 */
        {
          if (x <= thresh1) /* x < pi/4 */
            {
              /* Quick return if x is already less than pi/4 */
              *r = x;
              *region = 0;
              return;
            }
          else if (x <= thresh3) /* x < 3*pi/4 */
            {
              t = x - piby2;
              *region = 1;
            }
          else /* x < 5*pi/4 */
            {
              t = x - pi;
              *region = 2;
            }
        }
      else
        {
          if (x <= thresh7) /* x < 7*pi/4 */
            {
              t = x - three_piby2;
              *region = 3;
            }
          else if (x <= thresh9) /* x < 9*pi/4 */
            {
              t = x - two_pi;
              *region = 0;
            }
          else /* x < 11*pi/4 */
            {
              t = x - five_piby2;
              *region = 1;
            }
        }

      /* Check for massive cancellation which may happen very close
         to multiples of pi/2 */
      if (t < 0.0)
        ctest = -t;
      else
        ctest = t;
#ifdef DEBUGGING_PRINT
      printf("Cancellation threshold test = (%g > %g)\n",
             ctest, cancellationThresh);
#endif

      /* Check if cancellation error was not too large */
      if (ctest > cancellationThresh)
        {
          *r = t;
          done = 1;
        }
      /* Otherwise fall through to the expensive method */
    }
  else if (x <= 1.0e6)
    {
      /* This range reduction is accurate enough for x up to
         approximately 2**(20) except near multiples of pi/2 */

      /* We perform double precision arithmetic to find the
         nearest multiple of pi/2 to x */
      int reg;
      double z, w, c, ctest;

      /* Multiply x by 2/pi in double precision, result in z */
      z = x * twobypi;

#ifdef DEBUGGING_PRINT
      printf("z = %30.20e = %s\n", z, double2hex(&z));
#endif

      /* Find reg, the nearest integer to z */
      reg = (int)(z + 0.5);

#ifdef DEBUGGING_PRINT
      printf("reg = %d\n", reg);
#endif

      /* Subtract reg from z, result in w */
      w = z - reg;

#ifdef DEBUGGING_PRINT
      printf("w = %30.20e = %s\n", w, double2hex(&w));
#endif

     /* Check for massive cancellation which may happen very close
        to multiples of pi/2 */
      if (w < 0.0)
        ctest = -w;
      else
        ctest = w;

      /* If cancellation is not too severe, continue with this method.
         Otherwise we fall through to the expensive, accurate method */
      if (ctest > cancellationThresh)
        {
          /* Multiply w by pi/2 */
          c = w * piby2;
          *r = c;
          *region = reg & 3;

#ifdef DEBUGGING_PRINT
          printf("r = %30.20e = %s\n", *r, double2hex(r));
#endif
          done = 1;
        }
    }

  if (!done)
    {
      /* This method simulates multi-precision floating-point
         arithmetic and is accurate for all 1 <= x < infinity */
#if 0
      const int bitsper = 36;
#else
#define bitsper 36
#endif
      unsigned long res[10];
      unsigned long u, carry, mask, mant, nextbits;
      int first, last, i, rexp, xexp, resexp, ltb, determ, bc;
      double dx;
      static const double
        piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
      static unsigned long pibits[] =
      {
        0L,
        5215L, 13000023176L, 11362338026L, 67174558139L,
        34819822259L, 10612056195L, 67816420731L, 57840157550L,
        19558516809L, 50025467026L, 25186875954L, 18152700886L
      };

#ifdef DEBUGGING_PRINT
      printf("On entry, x = %25.20e = %s\n", x, double2hex(&x));
#endif

      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
      ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29;

#ifdef DEBUGGING_PRINT
      printf("ux = %s\n", d2b(ux, 64, -1));
#endif

      /* Now ux is the mantissa bit pattern of x as a long integer */
      mask = (1L << bitsper) - 1;

      /* Set first and last to the positions of the first
         and last chunks of 2/pi that we need */
      first = xexp / bitsper;
      resexp = xexp - first * bitsper;
      /* 120 is the theoretical maximum number of bits (actually
         115 for IEEE single precision) that we need to extract
         from the middle of 2/pi to compute the reduced argument
         accurately enough for our purposes */
      last = first + 120 / bitsper;

#ifdef DEBUGGING_PRINT
      printf("first = %d, last = %d\n", first, last);
#endif

      /* Do a long multiplication of the bits of 2/pi by the
         integer mantissa */
#if 0
      for (i = last; i >= first; i--)
        {
          u = pibits[i] * ux + carry;
          res[i - first] = u & mask;
          carry = u >> bitsper;
        }
      res[last - first + 1] = 0;
#else
      /* Unroll the loop. This is only correct because we know
         that bitsper is fixed as 36. */
      res[4] = 0;
      u = pibits[last] * ux;
      res[3] = u & mask;
      carry = u >> bitsper;
      u = pibits[last - 1] * ux + carry;
      res[2] = u & mask;
      carry = u >> bitsper;
      u = pibits[last - 2] * ux + carry;
      res[1] = u & mask;
      carry = u >> bitsper;
      u = pibits[first] * ux + carry;
      res[0] = u & mask;
#endif

#ifdef DEBUGGING_PRINT
      printf("resexp = %d\n", resexp);
      printf("Significant part of x * 2/pi with binary"
             " point in correct place:\n");
      for (i = 0; i <= last - first; i++)
        {
          if (i > 0 && i % 5 == 0)
            printf("\n ");
          if (i == 1)
            printf("%s ", d2b(res[i], bitsper, resexp));
          else
            printf("%s ", d2b(res[i], bitsper, -1));
        }
      printf("\n");
#endif

      /* Reconstruct the result */
      ltb = (int)((((res[0] << bitsper) | res[1])
                   >> (bitsper - 1 - resexp)) & 7);

      /* determ says whether the fractional part is >= 0.5 */
      determ = ltb & 1;

#ifdef DEBUGGING_PRINT
      printf("ltb = %d (last two bits before binary point"
             " and first bit after)\n", ltb);
      printf("determ = %d (1 means need to negate because the fractional\n"
             "            part of x * 2/pi is greater than 0.5)\n", determ);
#endif

      i = 1;
      if (determ)
        {
          /* The mantissa is >= 0.5. We want to subtract it
             from 1.0 by negating all the bits */
          *region = ((ltb >> 1) + 1) & 3;
          mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1);
          while (mant < 0x0000000000010000)
            {
              i++;
              mant = (mant << bitsper) | (~(res[i]) & mask);
            }
          nextbits = (~(res[i+1]) & mask);
        }
      else
        {
          *region = (ltb >> 1);
          mant = res[1] & ((1L << (bitsper - resexp)) - 1);
          while (mant < 0x0000000000010000)
            {
              i++;
              mant = (mant << bitsper) | res[i];
            }
          nextbits = res[i+1];
        }

#ifdef DEBUGGING_PRINT
      printf("First bits of mant = %s\n", d2b(mant, bitsper, -1));
#endif

      /* Normalize the mantissa. The shift value 6 here, determined by
         trial and error, seems to give optimal speed. */
      bc = 0;
      while (mant < 0x0000400000000000)
        {
          bc += 6;
          mant <<= 6;
        }
      while (mant < 0x0010000000000000)
        {
          bc++;
          mant <<= 1;
        }
      mant |= nextbits >> (bitsper - bc);

      rexp = 52 + resexp - bc - i * bitsper;

#ifdef DEBUGGING_PRINT
      printf("Normalised mantissa = 0x%016lx\n", mant);
      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);
#endif

      /* Put the result exponent rexp onto the mantissa pattern */
      u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
      ux = (mant & MANTBITS_DP64) | u;
      if (determ)
        /* If we negated the mantissa we negate x too */
        ux |= SIGNBIT_DP64;
      PUT_BITS_DP64(ux, dx);

#ifdef DEBUGGING_PRINT
      printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx));
#endif

      /* x is a double precision version of the fractional part of
         x * 2 / pi. Multiply x by pi/2 in double precision
         to get the reduced argument r. */
      *r = dx * piby2;

#ifdef DEBUGGING_PRINT
      printf(" r = frac(x*2/pi) * pi/2:\n");
      printf(" r = %25.20e = %s\n", *r, double2hex(r));
      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",
             *region);
#endif
    }
}
#endif /* USE_REMAINDER_PIBY2F_INLINE */

#endif /* LIBM_INLINES_AMD_H_INCLUDED */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#ifndef LIBM_UTIL_AMD_H_INCLUDED
#define LIBM_UTIL_AMD_H_INCLUDED 1

/* Compile-time verification that type long is the same size
   as type double (i.e. we are really on a 64-bit machine) */
void check_long_against_double_size(int machine_is_64_bit[(sizeof(long) == sizeof(double))?1:-1]); 


/* Definitions for double functions on 64 bit machines */
#define SIGNBIT_DP64      0x8000000000000000
#define EXPBITS_DP64      0x7ff0000000000000
#define MANTBITS_DP64     0x000fffffffffffff
#define ONEEXPBITS_DP64   0x3ff0000000000000
#define TWOEXPBITS_DP64   0x4000000000000000
#define HALFEXPBITS_DP64  0x3fe0000000000000
#define IMPBIT_DP64       0x0010000000000000
#define QNANBITPATT_DP64  0x7ff8000000000000
#define PINFBITPATT_DP64  0x7ff0000000000000
#define NINFBITPATT_DP64  0xfff0000000000000
#define EXPBIAS_DP64      1023
#define EXPSHIFTBITS_DP64 52
#define BIASEDEMIN_DP64   1
#define EMIN_DP64         -1022
#define BIASEDEMAX_DP64   2046
#define EMAX_DP64         1023
#define LAMBDA_DP64       1.0e300
#define MANTLENGTH_DP64   53
#define BASEDIGITS_DP64   15


/* These definitions, used by float functions,
   are for both 32 and 64 bit machines */
#define SIGNBIT_SP32      0x80000000
#define EXPBITS_SP32      0x7f800000
#define MANTBITS_SP32     0x007fffff
#define ONEEXPBITS_SP32   0x3f800000
#define TWOEXPBITS_SP32   0x40000000
#define HALFEXPBITS_SP32  0x3f000000
#define IMPBIT_SP32       0x00800000
#define QNANBITPATT_SP32  0x7fc00000
#define PINFBITPATT_SP32  0x7f800000
#define NINFBITPATT_SP32  0xff800000
#define EXPBIAS_SP32      127
#define EXPSHIFTBITS_SP32 23
#define BIASEDEMIN_SP32   1
#define EMIN_SP32         -126
#define BIASEDEMAX_SP32   254
#define EMAX_SP32         127
#define LAMBDA_SP32       1.0e30
#define MANTLENGTH_SP32   24
#define BASEDIGITS_SP32   7

#define CLASS_SIGNALLING_NAN 1
#define CLASS_QUIET_NAN 2
#define CLASS_NEGATIVE_INFINITY 3
#define CLASS_NEGATIVE_NORMAL_NONZERO 4
#define CLASS_NEGATIVE_DENORMAL 5
#define CLASS_NEGATIVE_ZERO 6
#define CLASS_POSITIVE_ZERO 7
#define CLASS_POSITIVE_DENORMAL 8
#define CLASS_POSITIVE_NORMAL_NONZERO 9
#define CLASS_POSITIVE_INFINITY 10

#define OLD_BITS_SP32(x) (*((unsigned int *)&x))
#define OLD_BITS_DP64(x) (*((unsigned long *)&x))

/* Alternatives to the above functions which don't have
   problems when using high optimization levels on gcc */
#define GET_BITS_SP32(x, ux) {union {float f; unsigned int i;} _bitsy; _bitsy.f = (x); ux = _bitsy.i;}
#define PUT_BITS_SP32(ux, x) {union {float f; unsigned int i;} _bitsy; _bitsy.i = (ux); x = _bitsy.f;}
#define GET_BITS_DP64(x, ux) {union {double d; unsigned long i;} _bitsy; _bitsy.d = (x); ux = _bitsy.i;}
#define PUT_BITS_DP64(ux, x) {union {double d; unsigned long i;} _bitsy; _bitsy.i = (ux); x = _bitsy.d;}


/* Processor-dependent floating-point status flags */
#define AMD_F_INEXACT 0x00000020
#define AMD_F_UNDERFLOW 0x00000010
#define AMD_F_OVERFLOW 0x00000008
#define AMD_F_DIVBYZERO 0x00000004
#define AMD_F_INVALID 0x00000001

/* Processor-dependent floating-point precision-control flags */
#define AMD_F_EXTENDED 0x00000300
#define AMD_F_DOUBLE   0x00000200
#define AMD_F_SINGLE   0x00000000

/* Processor-dependent floating-point rounding-control flags */
#define AMD_F_RC_NEAREST 0x00000000
#define AMD_F_RC_DOWN    0x00002000
#define AMD_F_RC_UP      0x00004000
#define AMD_F_RC_ZERO    0x00006000

#endif /* LIBM_UTIL_AMD_H_INCLUDED */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_VAL_WITH_FLAGS
#define USE_SCALEDOUBLE_1
#define USE_SCALEUPDOUBLE1024
#include "libm_inlines_amd.h"
#undef USE_SCALEUPDOUBLE1024
#undef USE_SCALEDOUBLE_1
#undef USE_VAL_WITH_FLAGS

double __atan(double y)
{

  /* Some constants and split constants. */

  static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
  double chi, clo, v, s, q, z;

  /* Find properties of argument y. */

  unsigned long uy, auy, yneg;
  GET_BITS_DP64(y, uy);
  auy = uy & ~SIGNBIT_DP64;
  yneg = (uy != auy);

  if (yneg) v = -y;
  else v = y;

  /* Argument reduction to range [-7/16,7/16] */

  if (auy > 0x4003800000000000) /* v > 39./16. */
    {

      if (auy > PINFBITPATT_DP64) return y + y; /* y is NaN */  
      else if (v > 0x4370000000000000)
	{ /* abs(y) > 2^56 => arctan(1/y) is 
	     insignificant compared to piby2 */
	  if (yneg) return val_with_flags(-piby2, AMD_F_INEXACT);
	  else return val_with_flags(piby2, AMD_F_INEXACT);
	}

      y = -1.0/v;
      /* (chi + clo) = arctan(infinity) */
      chi = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
      clo = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */
    }
  else if (auy > 0x3ff3000000000000) /* 39./16. > v > 19./16. */
    {
      y = (v-1.5)/(1.0+1.5*v);
      /* (chi + clo) = arctan(1.5) */
      chi = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */
      clo = 1.39033110312309953701e-17; /* 0x3c7007887af0cbbc */
    }
  else if (auy > 0x3fe6000000000000) /* 19./16. > v > 11./16. */
    {
      y = (v-1.)/(1.0+v);
      /* (chi + clo) = arctan(1.) */
      chi = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */
      clo = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */
    }
  else if (auy > 0x3fdc000000000000) /* 11./16. > v > 7./16. */
    {
      y = (2*v-1.0)/(2.0+v);
      /* (chi + clo) = arctan(0.5) */
      chi = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */
      clo = 2.26987774529616809294e-17; /* 0x3c7a2b7f222f65e0 */
    }
  else  /* v < 7./16. */
    {
      y = v;
      chi = 0.0;
      clo = 0.0;
    }

  /* Core approximation: Remez(4,4) on [-7/16,7/16] */

  s = y*y;
  q = y*s*
       (0.268297920532545909e0 + 
	(0.447677206805497472e0 + 
	 (0.220638780716667420e0 + 
	  (0.304455919504853031e-1 + 
	    0.142316903342317766e-3*s)*s)*s)*s)/
       (0.804893761597637733e0 + 
	(0.182596787737507063e1 + 
	 (0.141254259931958921e1 + 
	  (0.424602594203847109e0 + 
	    0.389525873944742195e-1*s)*s)*s)*s);

  z = chi - ((q - clo) - y);

  if (yneg) z = -z;
  return z;
}

weak_alias (__atan, atan)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_atan2.c.x86_64-new-libm (+746 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_VAL_WITH_FLAGS
		13	#define USE_SCALEDOUBLE_1
		14	#define USE_SCALEDOUBLE_2
		15	#define USE_SCALEUPDOUBLE1024
		16	#define USE_SCALEDOWNDOUBLE
		17	#include "libm_inlines_amd.h"
		18	#undef USE_SCALEDOWNDOUBLE
		19	#undef USE_SCALEUPDOUBLE1024
		20	#undef USE_SCALEDOUBLE_1
		21	#undef USE_SCALEDOUBLE_2
		22	#undef USE_VAL_WITH_FLAGS
		23
		24	/* Deal with errno for out-of-range arguments
		25	(only used when _LIB_VERSION is _SVID_) */
		26	#include "libm_errno_amd.h"
		27	static inline double retval_errno_edom(double x, double y)
		28	{
		29	struct exception exc;
		30	exc.arg1 = x;
		31	exc.arg2 = y;
		32	exc.type = DOMAIN;
		33	exc.name = (char *)"atan2";
		34	exc.retval = HUGE;
		35	if (!matherr(&exc))
		36	{
		37	(void)fputs("atan2: DOMAIN error\n", stderr);
		38	__set_errno(EDOM);
		39	}
		40	return exc.retval;
		41	}
		42
		43	double __atan2(double y, double x)
		44	{
		45	/* Arrays atan_jby256_lead and atan_jby256_tail contain
		46	leading and trailing parts respectively of precomputed
		47	values of atan(j/256), for j = 16, 17, ..., 256.
		48	atan_jby256_lead contains the first 21 bits of precision,
		49	and atan_jby256_tail contains a further 53 bits precision. */
		50
		51	static const double atan_jby256_lead[ 241] = {
		52	6.24187886714935302734e-02, /* 0x3faff55b00000000 */
		53	6.63088560104370117188e-02, /* 0x3fb0f99e00000000 */
		54	7.01969265937805175781e-02, /* 0x3fb1f86d00000000 */
		55	7.40829110145568847656e-02, /* 0x3fb2f71900000000 */
		56	7.79666304588317871094e-02, /* 0x3fb3f59f00000000 */
		57	8.18479657173156738281e-02, /* 0x3fb4f3fd00000000 */
		58	8.57268571853637695312e-02, /* 0x3fb5f23200000000 */
		59	8.96031260490417480469e-02, /* 0x3fb6f03b00000000 */
		60	9.34767723083496093750e-02, /* 0x3fb7ee1800000000 */
		61	9.73475575447082519531e-02, /* 0x3fb8ebc500000000 */
		62	1.01215422153472900391e-01, /* 0x3fb9e94100000000 */
		63	1.05080246925354003906e-01, /* 0x3fbae68a00000000 */
		64	1.08941912651062011719e-01, /* 0x3fbbe39e00000000 */
65	1.12800359725952148438e-01, /* 0x3fbce07c00000000 */
66	1.16655409336090087891e-01, /* 0x3fbddd2100000000 */
67	1.20507001876831054688e-01, /* 0x3fbed98c00000000 */
68	1.24354958534240722656e-01, /* 0x3fbfd5ba00000000 */
69	1.28199219703674316406e-01, /* 0x3fc068d500000000 */
70	1.32039666175842285156e-01, /* 0x3fc0e6ad00000000 */
71	1.35876297950744628906e-01, /* 0x3fc1646500000000 */
72	1.39708757400512695312e-01, /* 0x3fc1e1fa00000000 */
73	1.43537282943725585938e-01, /* 0x3fc25f6e00000000 */
74	1.47361397743225097656e-01, /* 0x3fc2dcbd00000000 */
75	1.51181221008300781250e-01, /* 0x3fc359e800000000 */
76	1.54996633529663085938e-01, /* 0x3fc3d6ee00000000 */
77	1.58807516098022460938e-01, /* 0x3fc453ce00000000 */
78	1.62613749504089355469e-01, /* 0x3fc4d08700000000 */
79	1.66415214538574218750e-01, /* 0x3fc54d1800000000 */
80	1.70211911201477050781e-01, /* 0x3fc5c98100000000 */
81	1.74003481864929199219e-01, /* 0x3fc645bf00000000 */
82	1.77790164947509765625e-01, /* 0x3fc6c1d400000000 */
83	1.81571602821350097656e-01, /* 0x3fc73dbd00000000 */
84	1.85347914695739746094e-01, /* 0x3fc7b97b00000000 */
85	1.89118742942810058594e-01, /* 0x3fc8350b00000000 */
86	1.92884206771850585938e-01, /* 0x3fc8b06e00000000 */
87	1.96644186973571777344e-01, /* 0x3fc92ba300000000 */
88	2.00398445129394531250e-01, /* 0x3fc9a6a800000000 */
89	2.04147100448608398438e-01, /* 0x3fca217e00000000 */
90	2.07889914512634277344e-01, /* 0x3fca9c2300000000 */
91	2.11626768112182617188e-01, /* 0x3fcb169600000000 */
92	2.15357661247253417969e-01, /* 0x3fcb90d700000000 */
93	2.19082474708557128906e-01, /* 0x3fcc0ae500000000 */
94	2.22801089286804199219e-01, /* 0x3fcc84bf00000000 */
95	2.26513504981994628906e-01, /* 0x3fccfe6500000000 */
96	2.30219483375549316406e-01, /* 0x3fcd77d500000000 */
97	2.33919143676757812500e-01, /* 0x3fcdf11000000000 */
98	2.37612247467041015625e-01, /* 0x3fce6a1400000000 */
99	2.41298794746398925781e-01, /* 0x3fcee2e100000000 */
100	2.44978547096252441406e-01, /* 0x3fcf5b7500000000 */
101	2.48651623725891113281e-01, /* 0x3fcfd3d100000000 */
102	2.52317905426025390625e-01, /* 0x3fd025fa00000000 */
103	2.55977153778076171875e-01, /* 0x3fd061ee00000000 */
104	2.59629487991333007812e-01, /* 0x3fd09dc500000000 */
105	2.63274669647216796875e-01, /* 0x3fd0d97e00000000 */
106	2.66912937164306640625e-01, /* 0x3fd1151a00000000 */
107	2.70543813705444335938e-01, /* 0x3fd1509700000000 */
108	2.74167299270629882812e-01, /* 0x3fd18bf500000000 */
109	2.77783632278442382812e-01, /* 0x3fd1c73500000000 */
110	2.81392335891723632812e-01, /* 0x3fd2025500000000 */
111	2.84993648529052734375e-01, /* 0x3fd23d5600000000 */
112	2.88587331771850585938e-01, /* 0x3fd2783700000000 */
113	2.92173147201538085938e-01, /* 0x3fd2b2f700000000 */
114	2.95751571655273437500e-01, /* 0x3fd2ed9800000000 */
115	2.99322128295898437500e-01, /* 0x3fd3281800000000 */
116	3.02884817123413085938e-01, /* 0x3fd3627700000000 */
117	3.06439399719238281250e-01, /* 0x3fd39cb400000000 */
118	3.09986352920532226562e-01, /* 0x3fd3d6d100000000 */
119	3.13524961471557617188e-01, /* 0x3fd410cb00000000 */
120	3.17055702209472656250e-01, /* 0x3fd44aa400000000 */
121	3.20578098297119140625e-01, /* 0x3fd4845a00000000 */
122	3.24092388153076171875e-01, /* 0x3fd4bdee00000000 */
123	3.27598333358764648438e-01, /* 0x3fd4f75f00000000 */
124	3.31095933914184570312e-01, /* 0x3fd530ad00000000 */
125	3.34585189819335937500e-01, /* 0x3fd569d800000000 */
126	3.38066101074218750000e-01, /* 0x3fd5a2e000000000 */
127	3.41538190841674804688e-01, /* 0x3fd5dbc300000000 */
128	3.45002174377441406250e-01, /* 0x3fd6148400000000 */
129	3.48457098007202148438e-01, /* 0x3fd64d1f00000000 */
130	3.51903676986694335938e-01, /* 0x3fd6859700000000 */
131	3.55341434478759765625e-01, /* 0x3fd6bdea00000000 */
132	3.58770608901977539062e-01, /* 0x3fd6f61900000000 */
133	3.62190723419189453125e-01, /* 0x3fd72e2200000000 */
134	3.65602254867553710938e-01, /* 0x3fd7660700000000 */
135	3.69004726409912109375e-01, /* 0x3fd79dc600000000 */
136	3.72398376464843750000e-01, /* 0x3fd7d56000000000 */
137	3.75782966613769531250e-01, /* 0x3fd80cd400000000 */
138	3.79158496856689453125e-01, /* 0x3fd8442200000000 */
139	3.82525205612182617188e-01, /* 0x3fd87b4b00000000 */
140	3.85882616043090820312e-01, /* 0x3fd8b24d00000000 */
141	3.89230966567993164062e-01, /* 0x3fd8e92900000000 */
142	3.92570018768310546875e-01, /* 0x3fd91fde00000000 */
143	3.95900011062622070312e-01, /* 0x3fd9566d00000000 */
144	3.99220705032348632812e-01, /* 0x3fd98cd500000000 */
145	4.02532100677490234375e-01, /* 0x3fd9c31600000000 */
146	4.05834197998046875000e-01, /* 0x3fd9f93000000000 */
147	4.09126996994018554688e-01, /* 0x3fda2f2300000000 */
148	4.12410259246826171875e-01, /* 0x3fda64ee00000000 */
149	4.15684223175048828125e-01, /* 0x3fda9a9200000000 */
150	4.18948888778686523438e-01, /* 0x3fdad00f00000000 */
151	4.22204017639160156250e-01, /* 0x3fdb056400000000 */
152	4.25449609756469726562e-01, /* 0x3fdb3a9100000000 */
153	4.28685665130615234375e-01, /* 0x3fdb6f9600000000 */
154	4.31912183761596679688e-01, /* 0x3fdba47300000000 */
155	4.35129165649414062500e-01, /* 0x3fdbd92800000000 */
156	4.38336372375488281250e-01, /* 0x3fdc0db400000000 */
157	4.41534280776977539062e-01, /* 0x3fdc421900000000 */
158	4.44722414016723632812e-01, /* 0x3fdc765500000000 */
159	4.47900772094726562500e-01, /* 0x3fdcaa6800000000 */
160	4.51069593429565429688e-01, /* 0x3fdcde5300000000 */
161	4.54228639602661132812e-01, /* 0x3fdd121500000000 */
162	4.57377910614013671875e-01, /* 0x3fdd45ae00000000 */
163	4.60517644882202148438e-01, /* 0x3fdd791f00000000 */
164	4.63647603988647460938e-01, /* 0x3fddac6700000000 */
165	4.66767549514770507812e-01, /* 0x3fdddf8500000000 */
166	4.69877958297729492188e-01, /* 0x3fde127b00000000 */
167	4.72978591918945312500e-01, /* 0x3fde454800000000 */
168	4.76069211959838867188e-01, /* 0x3fde77eb00000000 */
169	4.79150056838989257812e-01, /* 0x3fdeaa6500000000 */
170	4.82221126556396484375e-01, /* 0x3fdedcb600000000 */
171	4.85282421112060546875e-01, /* 0x3fdf0ede00000000 */
172	4.88333940505981445312e-01, /* 0x3fdf40dd00000000 */
173	4.91375446319580078125e-01, /* 0x3fdf72b200000000 */
174	4.94406938552856445312e-01, /* 0x3fdfa45d00000000 */
175	4.97428894042968750000e-01, /* 0x3fdfd5e000000000 */
176	5.00440597534179687500e-01, /* 0x3fe0039c00000000 */
177	5.03442764282226562500e-01, /* 0x3fe01c3400000000 */
178	5.06434917449951171875e-01, /* 0x3fe034b700000000 */
179	5.09417057037353515625e-01, /* 0x3fe04d2500000000 */
180	5.12389183044433593750e-01, /* 0x3fe0657e00000000 */
181	5.15351772308349609375e-01, /* 0x3fe07dc300000000 */
182	5.18304347991943359375e-01, /* 0x3fe095f300000000 */
183	5.21246910095214843750e-01, /* 0x3fe0ae0e00000000 */
184	5.24179458618164062500e-01, /* 0x3fe0c61400000000 */
185	5.27101993560791015625e-01, /* 0x3fe0de0500000000 */
186	5.30014991760253906250e-01, /* 0x3fe0f5e200000000 */
187	5.32917976379394531250e-01, /* 0x3fe10daa00000000 */
188	5.35810947418212890625e-01, /* 0x3fe1255d00000000 */
189	5.38693904876708984375e-01, /* 0x3fe13cfb00000000 */
190	5.41567325592041015625e-01, /* 0x3fe1548500000000 */
191	5.44430732727050781250e-01, /* 0x3fe16bfa00000000 */
192	5.47284126281738281250e-01, /* 0x3fe1835a00000000 */
193	5.50127506256103515625e-01, /* 0x3fe19aa500000000 */
194	5.52961349487304687500e-01, /* 0x3fe1b1dc00000000 */
195	5.55785179138183593750e-01, /* 0x3fe1c8fe00000000 */
196	5.58598995208740234375e-01, /* 0x3fe1e00b00000000 */
197	5.61403274536132812500e-01, /* 0x3fe1f70400000000 */
198	5.64197540283203125000e-01, /* 0x3fe20de800000000 */
199	5.66981792449951171875e-01, /* 0x3fe224b700000000 */
200	5.69756031036376953125e-01, /* 0x3fe23b7100000000 */
201	5.72520732879638671875e-01, /* 0x3fe2521700000000 */
202	5.75275897979736328125e-01, /* 0x3fe268a900000000 */
203	5.78021049499511718750e-01, /* 0x3fe27f2600000000 */
204	5.80756187438964843750e-01, /* 0x3fe2958e00000000 */
205	5.83481788635253906250e-01, /* 0x3fe2abe200000000 */
206	5.86197376251220703125e-01, /* 0x3fe2c22100000000 */
207	5.88903427124023437500e-01, /* 0x3fe2d84c00000000 */
208	5.91599464416503906250e-01, /* 0x3fe2ee6200000000 */
209	5.94285964965820312500e-01, /* 0x3fe3046400000000 */
210	5.96962928771972656250e-01, /* 0x3fe31a5200000000 */
211	5.99629878997802734375e-01, /* 0x3fe3302b00000000 */
212	6.02287292480468750000e-01, /* 0x3fe345f000000000 */
213	6.04934692382812500000e-01, /* 0x3fe35ba000000000 */
214	6.07573032379150390625e-01, /* 0x3fe3713d00000000 */
215	6.10201358795166015625e-01, /* 0x3fe386c500000000 */
216	6.12820148468017578125e-01, /* 0x3fe39c3900000000 */
217	6.15428924560546875000e-01, /* 0x3fe3b19800000000 */
218	6.18028640747070312500e-01, /* 0x3fe3c6e400000000 */
219	6.20618820190429687500e-01, /* 0x3fe3dc1c00000000 */
220	6.23198986053466796875e-01, /* 0x3fe3f13f00000000 */
221	6.25770092010498046875e-01, /* 0x3fe4064f00000000 */
222	6.28331184387207031250e-01, /* 0x3fe41b4a00000000 */
223	6.30883216857910156250e-01, /* 0x3fe4303200000000 */
224	6.33425712585449218750e-01, /* 0x3fe4450600000000 */
225	6.35958671569824218750e-01, /* 0x3fe459c600000000 */
226	6.38482093811035156250e-01, /* 0x3fe46e7200000000 */
227	6.40995979309082031250e-01, /* 0x3fe4830a00000000 */
228	6.43500804901123046875e-01, /* 0x3fe4978f00000000 */
229	6.45996093750000000000e-01, /* 0x3fe4ac0000000000 */
230	6.48482322692871093750e-01, /* 0x3fe4c05e00000000 */
231	6.50959014892578125000e-01, /* 0x3fe4d4a800000000 */
232	6.53426170349121093750e-01, /* 0x3fe4e8de00000000 */
233	6.55884265899658203125e-01, /* 0x3fe4fd0100000000 */
234	6.58332824707031250000e-01, /* 0x3fe5111000000000 */
235	6.60772323608398437500e-01, /* 0x3fe5250c00000000 */
236	6.63202762603759765625e-01, /* 0x3fe538f500000000 */
237	6.65623664855957031250e-01, /* 0x3fe54cca00000000 */
238	6.68035984039306640625e-01, /* 0x3fe5608d00000000 */
239	6.70438766479492187500e-01, /* 0x3fe5743c00000000 */
240	6.72832489013671875000e-01, /* 0x3fe587d800000000 */
241	6.75216674804687500000e-01, /* 0x3fe59b6000000000 */
242	6.77592277526855468750e-01, /* 0x3fe5aed600000000 */
243	6.79958820343017578125e-01, /* 0x3fe5c23900000000 */
244	6.82316303253173828125e-01, /* 0x3fe5d58900000000 */
245	6.84664726257324218750e-01, /* 0x3fe5e8c600000000 */
246	6.87004089355468750000e-01, /* 0x3fe5fbf000000000 */
247	6.89334869384765625000e-01, /* 0x3fe60f0800000000 */
248	6.91656589508056640625e-01, /* 0x3fe6220d00000000 */
249	6.93969249725341796875e-01, /* 0x3fe634ff00000000 */
250	6.96272850036621093750e-01, /* 0x3fe647de00000000 */
251	6.98567867279052734375e-01, /* 0x3fe65aab00000000 */
252	7.00854301452636718750e-01, /* 0x3fe66d6600000000 */
253	7.03131675720214843750e-01, /* 0x3fe6800e00000000 */
254	7.05400466918945312500e-01, /* 0x3fe692a400000000 */
255	7.07660198211669921875e-01, /* 0x3fe6a52700000000 */
256	7.09911346435546875000e-01, /* 0x3fe6b79800000000 */
257	7.12153911590576171875e-01, /* 0x3fe6c9f700000000 */
258	7.14387893676757812500e-01, /* 0x3fe6dc4400000000 */
259	7.16613292694091796875e-01, /* 0x3fe6ee7f00000000 */
260	7.18829631805419921875e-01, /* 0x3fe700a700000000 */
261	7.21037864685058593750e-01, /* 0x3fe712be00000000 */
262	7.23237514495849609375e-01, /* 0x3fe724c300000000 */
263	7.25428581237792968750e-01, /* 0x3fe736b600000000 */
264	7.27611064910888671875e-01, /* 0x3fe7489700000000 */
265	7.29785442352294921875e-01, /* 0x3fe75a6700000000 */
266	7.31950759887695312500e-01, /* 0x3fe76c2400000000 */
267	7.34108448028564453125e-01, /* 0x3fe77dd100000000 */
268	7.36257076263427734375e-01, /* 0x3fe78f6b00000000 */
269	7.38397598266601562500e-01, /* 0x3fe7a0f400000000 */
270	7.40530014038085937500e-01, /* 0x3fe7b26c00000000 */
271	7.42654323577880859375e-01, /* 0x3fe7c3d300000000 */
272	7.44770050048828125000e-01, /* 0x3fe7d52800000000 */
273	7.46877670288085937500e-01, /* 0x3fe7e66c00000000 */
274	7.48976707458496093750e-01, /* 0x3fe7f79e00000000 */
275	7.51068115234375000000e-01, /* 0x3fe808c000000000 */
276	7.53150939941406250000e-01, /* 0x3fe819d000000000 */
277	7.55226135253906250000e-01, /* 0x3fe82ad000000000 */
278	7.57292747497558593750e-01, /* 0x3fe83bbe00000000 */
279	7.59351730346679687500e-01, /* 0x3fe84c9c00000000 */
280	7.61402606964111328125e-01, /* 0x3fe85d6900000000 */
281	7.63445377349853515625e-01, /* 0x3fe86e2500000000 */
282	7.65480041503906250000e-01, /* 0x3fe87ed000000000 */
283	7.67507076263427734375e-01, /* 0x3fe88f6b00000000 */
284	7.69526004791259765625e-01, /* 0x3fe89ff500000000 */
285	7.71537303924560546875e-01, /* 0x3fe8b06f00000000 */
286	7.73540973663330078125e-01, /* 0x3fe8c0d900000000 */
287	7.75536537170410156250e-01, /* 0x3fe8d13200000000 */
288	7.77523994445800781250e-01, /* 0x3fe8e17a00000000 */
289	7.79504299163818359375e-01, /* 0x3fe8f1b300000000 */
290	7.81476497650146484375e-01, /* 0x3fe901db00000000 */
291	7.83441066741943359375e-01, /* 0x3fe911f300000000 */
292	7.85398006439208984375e-01}; /* 0x3fe921fb00000000 */
293
294	static const double atan_jby256_tail[ 241] = {
295	2.13244638182005395671e-08, /* 0x3e56e59fbd38db2c */
296	3.89093864761712760656e-08, /* 0x3e64e3aa54dedf96 */
297	4.44780900009437454576e-08, /* 0x3e67e105ab1bda88 */
298	1.15344768460112754160e-08, /* 0x3e48c5254d013fd0 */
299	3.37271051945395312705e-09, /* 0x3e2cf8ab3ad62670 */
300	2.40857608736109859459e-08, /* 0x3e59dca4bec80468 */
301	1.85853810450623807768e-08, /* 0x3e53f4b5ec98a8da */
302	5.14358299969225078306e-08, /* 0x3e6b9d49619d81fe */
303	8.85023985412952486748e-09, /* 0x3e43017887460934 */
304	1.59425154214358432060e-08, /* 0x3e511e3eca0b9944 */
305	1.95139937737755753164e-08, /* 0x3e54f3f73c5a332e */
306	2.64909755273544319715e-08, /* 0x3e5c71c8ae0e00a6 */
307	4.43388037881231070144e-08, /* 0x3e67cde0f86fbdc7 */
308	2.14757072421821274557e-08, /* 0x3e570f328c889c72 */
309	2.61049792670754218852e-08, /* 0x3e5c07ae9b994efe */
310	7.81439350674466302231e-09, /* 0x3e40c8021d7b1698 */
311	3.60125207123751024094e-08, /* 0x3e635585edb8cb22 */
312	6.15276238179343767917e-08, /* 0x3e70842567b30e96 */
313	9.54387964641184285058e-08, /* 0x3e799e811031472e */
314	3.02789566851502754129e-08, /* 0x3e6041821416bcee */
315	1.16888650949870856331e-07, /* 0x3e7f6086e4dc96f4 */
316	1.07580956468653338863e-08, /* 0x3e471a535c5f1b58 */
317	8.33454265379535427653e-08, /* 0x3e765f743fe63ca1 */
318	1.10790279272629526068e-07, /* 0x3e7dbd733472d014 */
319	1.08394277896366207424e-07, /* 0x3e7d18cc4d8b0d1d */
320	9.22176086126841098800e-08, /* 0x3e78c12553c8fb29 */
321	7.90938592199048786990e-08, /* 0x3e753b49e2e8f991 */
322	8.66445407164293125637e-08, /* 0x3e77422ae148c141 */
323	1.40839973537092438671e-08, /* 0x3e4e3ec269df56a8 */
324	1.19070438507307600689e-07, /* 0x3e7ff6754e7e0ac9 */
325	6.40451663051716197071e-08, /* 0x3e7131267b1b5aad */
326	1.08338682076343674522e-07, /* 0x3e7d14fa403a94bc */
327	3.52999550187922736222e-08, /* 0x3e62f396c089a3d8 */
328	1.05983273930043077202e-07, /* 0x3e7c731d78fa95bb */
329	1.05486124078259553339e-07, /* 0x3e7c50f385177399 */
330	5.82167732281776477773e-08, /* 0x3e6f41409c6f2c20 */
331	1.08696483983403942633e-07, /* 0x3e7d2d90c4c39ec0 */
332	4.47335086122377542835e-08, /* 0x3e680420696f2106 */
333	1.26896287162615723528e-08, /* 0x3e4b40327943a2e8 */
334	4.06534471589151404531e-08, /* 0x3e65d35e02f3d2a2 */
335	3.84504846300557026690e-08, /* 0x3e64a498288117b0 */
336	3.60715006404807269080e-08, /* 0x3e635da119afb324 */
337	6.44725903165522722801e-08, /* 0x3e714e85cdb9a908 */
338	3.63749249976409461305e-08, /* 0x3e638754e5547b9a */
339	1.03901294413833913794e-07, /* 0x3e7be40ae6ce3246 */
340	6.25379756302167880580e-08, /* 0x3e70c993b3bea7e7 */
341	6.63984302368488828029e-08, /* 0x3e71d2dd89ac3359 */
342	3.21844598971548278059e-08, /* 0x3e61476603332c46 */
343	1.16030611712765830905e-07, /* 0x3e7f25901bac55b7 */
344	1.17464622142347730134e-07, /* 0x3e7f881b7c826e28 */
345	7.54604017965808996596e-08, /* 0x3e7441996d698d20 */
346	1.49234929356206556899e-07, /* 0x3e8407ac521ea089 */
347	1.41416924523217430259e-07, /* 0x3e82fb0c6c4b1723 */
348	2.13308065617483489011e-07, /* 0x3e8ca135966a3e18 */
349	5.04230937933302320146e-08, /* 0x3e6b1218e4d646e4 */
350	5.45874922281655519035e-08, /* 0x3e6d4e72a350d288 */
351	1.51849028914786868886e-07, /* 0x3e84617e2f04c329 */
352	3.09004308703769273010e-08, /* 0x3e6096ec41e82650 */
353	9.67574548184738317664e-08, /* 0x3e79f91f25773e6e */
354	4.02508285529322212824e-08, /* 0x3e659c0820f1d674 */
355	3.01222268096861091157e-08, /* 0x3e602bf7a2df1064 */
356	2.36189860670079288680e-07, /* 0x3e8fb36bfc40508f */
357	1.14095158111080887695e-07, /* 0x3e7ea08f3f8dc892 */
358	7.42349089746573467487e-08, /* 0x3e73ed6254656a0e */
359	5.12515583196230380184e-08, /* 0x3e6b83f5e5e69c58 */
360	2.19290391828763918102e-07, /* 0x3e8d6ec2af768592 */
361	3.83263512187553886471e-08, /* 0x3e6493889a226f94 */
362	1.61513486284090523855e-07, /* 0x3e85ad8fa65279ba */
363	5.09996743535589922261e-08, /* 0x3e6b615784d45434 */
364	1.23694037861246766534e-07, /* 0x3e809a184368f145 */
365	8.23367955351123783984e-08, /* 0x3e761a2439b0d91c */
366	1.07591766213053694014e-07, /* 0x3e7ce1a65e39a978 */
367	1.42789947524631815640e-07, /* 0x3e832a39a93b6a66 */
368	1.32347123024711878538e-07, /* 0x3e81c3699af804e7 */
369	2.17626067316598149229e-08, /* 0x3e575e0f4e44ede8 */
370	2.34454866923044288656e-07, /* 0x3e8f77ced1a7a83b */
371	2.82966370261766916053e-09, /* 0x3e284e7f0cb1b500 */
372	2.29300919890907632975e-07, /* 0x3e8ec6b838b02dfe */
373	1.48428270450261284915e-07, /* 0x3e83ebf4dfbeda87 */
374	1.87937408574313982512e-07, /* 0x3e89397aed9cb475 */
375	6.13685946813334055347e-08, /* 0x3e707937bc239c54 */
376	1.98585022733583817493e-07, /* 0x3e8aa754553131b6 */
377	7.68394131623752961662e-08, /* 0x3e74a05d407c45dc */
378	1.28119052312436745644e-07, /* 0x3e8132231a206dd0 */
379	7.02119104719236502733e-08, /* 0x3e72d8ecfdd69c88 */
380	9.87954793820636301943e-08, /* 0x3e7a852c74218606 */
381	1.72176752381034986217e-07, /* 0x3e871bf2baeebb50 */
382	1.12877225146169704119e-08, /* 0x3e483d7db7491820 */
383	5.33549829555851737993e-08, /* 0x3e6ca50d92b6da14 */
384	2.13833275710816521345e-08, /* 0x3e56f5cde8530298 */
385	1.16243518048290556393e-07, /* 0x3e7f343198910740 */
386	6.29926408369055877943e-08, /* 0x3e70e8d241ccd80a */
387	6.45429039328021963791e-08, /* 0x3e71535ac619e6c8 */
388	8.64001922814281933403e-08, /* 0x3e77316041c36cd2 */
389	9.50767572202325800240e-08, /* 0x3e7985a000637d8e */
390	5.80851497508121135975e-08, /* 0x3e6f2f29858c0a68 */
391	1.82350561135024766232e-07, /* 0x3e8879847f96d909 */
392	1.98948680587390608655e-07, /* 0x3e8ab3d319e12e42 */
393	7.83548663450197659846e-08, /* 0x3e75088162dfc4c2 */
394	3.04374234486798594427e-08, /* 0x3e605749a1cd9d8c */
395	2.76135725629797411787e-08, /* 0x3e5da65c6c6b8618 */
396	4.32610105454203065470e-08, /* 0x3e6739bf7df1ad64 */
397	5.17107515324127256994e-08, /* 0x3e6bc31252aa3340 */
398	2.82398327875841444660e-08, /* 0x3e5e528191ad3aa8 */
399	1.87482469524195595399e-07, /* 0x3e8929d93df19f18 */
400	2.97481891662714096139e-08, /* 0x3e5ff11eb693a080 */
401	9.94421570843584316402e-09, /* 0x3e455ae3f145a3a0 */
402	1.07056210730391848428e-07, /* 0x3e7cbcd8c6c0ca82 */
403	6.25589580466881163081e-08, /* 0x3e70cb04d425d304 */
404	9.56641013869464593803e-08, /* 0x3e79adfcab5be678 */
405	1.88056307148355440276e-07, /* 0x3e893d90c5662508 */
406	8.38850689379557880950e-08, /* 0x3e768489bd35ff40 */
407	5.01215865527674122924e-09, /* 0x3e3586ed3da2b7e0 */
408	1.74166095998522089762e-07, /* 0x3e87604d2e850eee */
409	9.96779574395363585849e-08, /* 0x3e7ac1d12bfb53d8 */
410	5.98432026368321460686e-09, /* 0x3e39b3d468274740 */
411	1.18362922366887577169e-07, /* 0x3e7fc5d68d10e53c */
412	1.86086833284154215946e-07, /* 0x3e88f9e51884becb */
413	1.97671457251348941011e-07, /* 0x3e8a87f0869c06d1 */
414	1.42447160717199237159e-07, /* 0x3e831e7279f685fa */
415	1.05504240785546574184e-08, /* 0x3e46a8282f9719b0 */
416	3.13335218371639189324e-08, /* 0x3e60d2724a8a44e0 */
417	1.96518418901914535399e-07, /* 0x3e8a60524b11ad4e */
418	2.17692035039173536059e-08, /* 0x3e575fdf832750f0 */
419	2.15613114426529981675e-07, /* 0x3e8cf06902e4cd36 */
420	5.68271098300441214948e-08, /* 0x3e6e82422d4f6d10 */
421	1.70331455823369124256e-08, /* 0x3e524a091063e6c0 */
422	9.17590028095709583247e-08, /* 0x3e78a1a172dc6f38 */
423	2.77266304112916566247e-07, /* 0x3e929b6619f8a92d */
424	9.37041937614656939690e-08, /* 0x3e79274d9c1b70c8 */
425	1.56116346368316796511e-08, /* 0x3e50c34b1fbb7930 */
426	4.13967433808382727413e-08, /* 0x3e6639866c20eb50 */
427	1.70164749185821616276e-07, /* 0x3e86d6d0f6832e9e */
428	4.01708788545600086008e-07, /* 0x3e9af54def99f25e */
429	2.59663539226050551563e-07, /* 0x3e916cfc52a00262 */
430	2.22007487655027469542e-07, /* 0x3e8dcc1e83569c32 */
431	2.90542250809644081369e-07, /* 0x3e937f7a551ed425 */
432	4.67720537666628903341e-07, /* 0x3e9f6360adc98887 */
433	2.79799803956772554802e-07, /* 0x3e92c6ec8d35a2c1 */
434	2.07344552327432547723e-07, /* 0x3e8bd44df84cb036 */
435	2.54705698692735196368e-07, /* 0x3e9117cf826e310e */
436	4.26848589539548450728e-07, /* 0x3e9ca533f332cfc9 */
437	2.52506723633552216197e-07, /* 0x3e90f208509dbc2e */
438	2.14684129933849704964e-07, /* 0x3e8cd07d93c945de */
439	3.20134822201596505431e-07, /* 0x3e957bdfd67e6d72 */
440	9.93537565749855712134e-08, /* 0x3e7aab89c516c658 */
441	3.70792944827917252327e-08, /* 0x3e63e823b1a1b8a0 */
442	1.41772749369083698972e-07, /* 0x3e8307464a9d6d3c */
443	4.22446601490198804306e-07, /* 0x3e9c5993cd438843 */
444	4.11818433724801511540e-07, /* 0x3e9ba2fca02ab554 */
445	1.19976381502605310519e-07, /* 0x3e801a5b6983a268 */
446	3.43703078571520905265e-08, /* 0x3e6273d1b350efc8 */
447	1.66128705555453270379e-07, /* 0x3e864c238c37b0c6 */
448	5.00499610023283006540e-08, /* 0x3e6aded07370a300 */
449	1.75105139941208062123e-07, /* 0x3e878091197eb47e */
450	7.70807146729030327334e-08, /* 0x3e74b0f245e0dabc */
451	2.45918607526895836121e-07, /* 0x3e9080d9794e2eaf */
452	2.18359020958626199345e-07, /* 0x3e8d4ec242b60c76 */
453	8.44342887976445333569e-09, /* 0x3e4221d2f940caa0 */
454	1.07506148687888629299e-07, /* 0x3e7cdbc42b2bba5c */
455	5.36544954316820904572e-08, /* 0x3e6cce37bb440840 */
456	3.39109101518396596341e-07, /* 0x3e96c1d999cf1dd0 */
457	2.60098720293920613340e-08, /* 0x3e5bed8a07eb0870 */
458	8.42678991664621455827e-08, /* 0x3e769ed88f490e3c */
459	5.36972237470183633197e-08, /* 0x3e6cd41719b73ef0 */
460	4.28192558171921681288e-07, /* 0x3e9cbc4ac95b41b7 */
461	2.71535491483955143294e-07, /* 0x3e9238f1b890f5d7 */
462	7.84094998145075780203e-08, /* 0x3e750c4282259cc4 */
463	3.43880599134117431863e-07, /* 0x3e9713d2de87b3e2 */
464	1.32878065060366481043e-07, /* 0x3e81d5a7d2255276 */
465	4.18046802627967629428e-07, /* 0x3e9c0dfd48227ac1 */
466	2.65042411765766019424e-07, /* 0x3e91c964dab76753 */
467	1.70383695347518643694e-07, /* 0x3e86de56d5704496 */
468	1.54096497259613515678e-07, /* 0x3e84aeb71fd19968 */
469	2.36543402412459813461e-07, /* 0x3e8fbf91c57b1918 */
470	4.38416350106876736790e-07, /* 0x3e9d6bef7fbe5d9a */
471	3.03892161339927775731e-07, /* 0x3e9464d3dc249066 */
472	3.31136771605664899240e-07, /* 0x3e9638e2ec4d9073 */
473	6.49494294526590682218e-08, /* 0x3e716f4a7247ea7c */
474	4.10423429887181345747e-09, /* 0x3e31a0a740f1d440 */
475	1.70831640869113847224e-07, /* 0x3e86edbb0114a33c */
476	1.10811512657909180966e-07, /* 0x3e7dbee8bf1d513c */
477	3.23677724749783611964e-07, /* 0x3e95b8bdb0248f73 */
478	3.55662734259192678528e-07, /* 0x3e97de3d3f5eac64 */
479	2.30102333489738219140e-07, /* 0x3e8ee24187ae448a */
480	4.47429004000738629714e-07, /* 0x3e9e06c591ec5192 */
481	7.78167135617329598659e-08, /* 0x3e74e3861a332738 */
482	9.90345291908535415737e-08, /* 0x3e7a9599dcc2bfe4 */
483	5.85800913143113728314e-08, /* 0x3e6f732fbad43468 */
484	4.57859062410871843857e-07, /* 0x3e9eb9f573b727d9 */
485	3.67993069723390929794e-07, /* 0x3e98b212a2eb9897 */
486	2.90836464322977276043e-07, /* 0x3e9384884c167215 */
487	2.51621574250131388318e-07, /* 0x3e90e2d363020051 */
488	2.75789824740652815545e-07, /* 0x3e92820879fbd022 */
489	3.88985776250314403593e-07, /* 0x3e9a1ab9893e4b30 */
490	1.40214080183768019611e-07, /* 0x3e82d1b817a24478 */
491	3.23451432223550478373e-08, /* 0x3e615d7b8ded4878 */
492	9.15979180730608444470e-08, /* 0x3e78968f9db3a5e4 */
493	3.44371402498640470421e-07, /* 0x3e971c4171fe135f */
494	3.40401897215059498077e-07, /* 0x3e96d80f605d0d8c */
495	1.06431813453707950243e-07, /* 0x3e7c91f043691590 */
496	1.46204238932338846248e-07, /* 0x3e839f8a15fce2b2 */
497	9.94610376972039046878e-09, /* 0x3e455beda9d94b80 */
498	2.01711528092681771039e-07, /* 0x3e8b12c15d60949a */
499	2.72027977986191568296e-07, /* 0x3e924167b312bfe3 */
500	2.48402602511693757964e-07, /* 0x3e90ab8633070277 */
501	1.58480011219249621715e-07, /* 0x3e854554ebbc80ee */
502	3.00372828113368713281e-08, /* 0x3e60204aef5a4bb8 */
503	3.67816204583541976394e-07, /* 0x3e98af08c679cf2c */
504	2.46169793032343824291e-07, /* 0x3e90852a330ae6c8 */
505	1.70080468270204253247e-07, /* 0x3e86d3eb9ec32916 */
506	1.67806717763872914315e-07, /* 0x3e8685cb7fcbbafe */
507	2.67715622006907942620e-07, /* 0x3e91f751c1e0bd95 */
508	2.14411342550299170574e-08, /* 0x3e5705b1b0f72560 */
509	4.11228221283669073277e-07, /* 0x3e9b98d8d808ca92 */
510	3.52311752396749662260e-08, /* 0x3e62ea22c75cc980 */
511	3.52718000397367821054e-07, /* 0x3e97aba62bca0350 */
512	4.38857387992911129814e-07, /* 0x3e9d73833442278c */
513	3.22574606753482540743e-07, /* 0x3e95a5ca1fb18bf9 */
514	3.28730371182804296828e-08, /* 0x3e61a6092b6ecf28 */
515	7.56672470607639279700e-08, /* 0x3e744fd049aac104 */
516	3.26750155316369681821e-09, /* 0x3e2c114fd8df5180 */
517	3.21724445362095284743e-07, /* 0x3e95972f130feae5 */
518	1.06639427371776571151e-07, /* 0x3e7ca034a55fe198 */
519	3.41020788139524715063e-07, /* 0x3e96e2b149990227 */
520	1.00582838631232552824e-07, /* 0x3e7b00000294592c */
521	3.68439433859276640065e-07, /* 0x3e98b9bdc442620e */
522	2.20403078342388012027e-07, /* 0x3e8d94fdfabf3e4e */
523	1.62841467098298142534e-07, /* 0x3e85db30b145ad9a */
524	2.25325348296680733838e-07, /* 0x3e8e3e1eb95022b0 */
525	4.37462238226421614339e-07, /* 0x3e9d5b8b45442bd6 */
526	3.52055880555040706500e-07, /* 0x3e97a046231ecd2e */
527	4.75614398494781776825e-07, /* 0x3e9feafe3ef55232 */
528	3.60998399033215317516e-07, /* 0x3e9839e7bfd78267 */
529	3.79292434611513945954e-08, /* 0x3e645cf49d6fa900 */
530	1.29859015528549300061e-08, /* 0x3e4be3132b27f380 */
531	3.15927546985474913188e-07, /* 0x3e9533980bb84f9f */
532	2.28533679887379668031e-08, /* 0x3e5889e2ce3ba390 */
533	1.17222541823553133877e-07, /* 0x3e7f7778c3ad0cc8 */
534	1.51991208405464415857e-07, /* 0x3e846660cec4eba2 */
535	1.56958239325240655564e-07}; /* 0x3e85110b4611a626 */
536
537	/* Some constants and split constants. */
538
539	static double pi = 3.1415926535897932e+00, /* 0x400921fb54442d18 */
540	piby2 = 1.5707963267948966e+00, /* 0x3ff921fb54442d18 */
541	piby4 = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
542	three_piby4 = 2.3561944901923449e+00, /* 0x4002d97c7f3321d2 */
543	pi_head = 3.1415926218032836e+00, /* 0x400921fb50000000 */
544	pi_tail = 3.1786509547056392e-08, /* 0x3e6110b4611a6263 */
545	piby2_head = 1.5707963267948965e+00, /* 0x3ff921fb54442d18 */
546	piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
547
548	double u, v, vbyu, q1, q2, s, u1, vu1, u2, vu2, uu, c, r;
549	unsigned int swap_vu, index, xzero, yzero, xnan, ynan, xinf, yinf;
550	int m, xexp, yexp, diffexp;
551
552	/* Find properties of arguments x and y. */
553
554	unsigned long ux, ui, aux, xneg, uy, auy, yneg;
555
556	GET_BITS_DP64(x, ux);
557	GET_BITS_DP64(y, uy);
558	aux = ux & ~SIGNBIT_DP64;
559	auy = uy & ~SIGNBIT_DP64;
560	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
561	yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
562	xneg = ux & SIGNBIT_DP64;
563	yneg = uy & SIGNBIT_DP64;
564	xzero = (aux == 0);
565	yzero = (auy == 0);
566	xnan = (aux > PINFBITPATT_DP64);
567	ynan = (auy > PINFBITPATT_DP64);
568	xinf = (aux == PINFBITPATT_DP64);
569	yinf = (auy == PINFBITPATT_DP64);
570
571	diffexp = yexp - xexp;
572
573	/* Special cases */
574
575	if (xnan)
576	return x + x;
577	else if (ynan)
578	return y + y;
579	else if (yzero)
580	{ /* Zero y gives +-0 for positive x
581	and +-pi for negative x */
582	if ((_LIB_VERSION == _SVID_) && xzero)
583	/* Sigh - _SVID_ defines atan2(0,0) as a domain error */
584	return retval_errno_edom(x, y);
585	else if (xneg)
586	{
587	if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
588	else return val_with_flags(pi,AMD_F_INEXACT);
589	}
590	else return y;
591	}
592	else if (xzero)
593	{ /* Zero x gives +- pi/2
594	depending on sign of y */
595	if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
596	else val_with_flags(piby2,AMD_F_INEXACT);
597	}
598
599	/* Scale up both x and y if they are both below 1/4.
600	This avoids any possible later denormalised arithmetic. */
601
602	if ((xexp < 1021 && yexp < 1021))
603	{
604	scaleUpDouble1024(ux, &ux);
605	scaleUpDouble1024(uy, &uy);
606	PUT_BITS_DP64(ux, x);
607	PUT_BITS_DP64(uy, y);
608	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
609	yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
610	diffexp = yexp - xexp;
611	}
612
613	if (diffexp > 56)
614	{ /* abs(y)/abs(x) > 2^56 => arctan(x/y)
615	is insignificant compared to piby2 */
616	if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
617	else return val_with_flags(piby2,AMD_F_INEXACT);
618	}
619	else if (diffexp < -28 && (!xneg))
620	{ /* x positive and dominant over y by a factor of 2^28.
621	In this case atan(y/x) is y/x to machine accuracy. */
622
623	if (diffexp < -1074) /* Result underflows */
624	{
625	if (yneg)
626	return val_with_flags(-0.0,AMD_F_INEXACT \| AMD_F_UNDERFLOW);
627	else
628	return val_with_flags(0.0,AMD_F_INEXACT \| AMD_F_UNDERFLOW);
629	}
630	else
631	{
632	if (diffexp < -1022)
633	{
634	/* Result will likely be denormalized */
635	y = scaleDouble_1(y, 100);
636	y /= x;
637	/* Now y is 2^100 times the true result. Scale it back down. */
638	GET_BITS_DP64(y, uy);
639	scaleDownDouble(uy, 100, &uy);
640	PUT_BITS_DP64(uy, y);
641	if ((uy & EXPBITS_DP64) == 0)
642	return val_with_flags(y, AMD_F_INEXACT \| AMD_F_UNDERFLOW);
643	else
644	return y;
645	}
646	else
647	return y / x;
648	}
649	}
650	else if (diffexp < -56 && xneg)
651	{ /* abs(x)/abs(y) > 2^56 and x < 0 => arctan(y/x)
652	is insignificant compared to pi */
653	if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
654	else return val_with_flags(pi,AMD_F_INEXACT);
655	}
656	else if (yinf && xinf)
657	{ /* If abs(x) and abs(y) are both infinity
658	return +-pi/4 or +- 3pi/4 according to
659	signs. */
660	if (xneg)
661	{
662	if (yneg) return val_with_flags(-three_piby4,AMD_F_INEXACT);
663	else return val_with_flags(three_piby4,AMD_F_INEXACT);
664	}
665	else
666	{
667	if (yneg) return val_with_flags(-piby4,AMD_F_INEXACT);
668	else return val_with_flags(piby4,AMD_F_INEXACT);
669	}
670	}
671
672	/* General case: take absolute values of arguments */
673
674	u = x; v = y;
675	if (xneg) u = -x;
676	if (yneg) v = -y;
677
678	/* Swap u and v if necessary to obtain 0 < v < u. Compute v/u. */
679
680	swap_vu = (u < v);
681	if (swap_vu) { uu = u; u = v; v = uu; }
682	vbyu = v/u;
683
684	if (vbyu > 0.0625)
685	{ /* General values of v/u. Use a look-up
686	table and series expansion. */
687
688	index = (int)(256*vbyu + 0.5);
689	q1 = atan_jby256_lead[index-16];
690	q2 = atan_jby256_tail[index-16];
691	c = index*1./256;
692	GET_BITS_DP64(u, ui);
693	m = (int)((ui & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
694	u = scaleDouble_2(u,-m);
695	v = scaleDouble_2(v,-m);
696	GET_BITS_DP64(u, ui);
697	PUT_BITS_DP64(0xfffffffff8000000 & ui, u1); /* 26 leading bits of u */
698	u2 = u - u1;
699
700	r = ((v-cu1)-cu2)/(u+c*v);
701
702	/* Polynomial approximation to atan(r) */
703
704	s = r*r;
705	q2 = q2 + r - r(s (0.33333333333224095522 - s*(0.19999918038989143496)));
706	}
707	else if (vbyu < 1.e-8)
708	{ /* v/u is small enough that atan(v/u) = v/u */
709	q1 = 0.0;
710	q2 = vbyu;
711	}
712	else /* vbyu <= 0.0625 */
713	{
714	/* Small values of v/u. Use a series expansion
715	computed carefully to minimise cancellation */
716
717	GET_BITS_DP64(u, ui);
718	PUT_BITS_DP64(0xffffffff00000000 & ui, u1);
719	GET_BITS_DP64(vbyu, ui);
720	PUT_BITS_DP64(0xffffffff00000000 & ui, vu1);
721	u2 = u - u1;
722	vu2 = vbyu - vu1;
723
724	q1 = 0.0;
725	s = vbyu*vbyu;
726	q2 = vbyu +
727	((((v - u1vu1) - u2vu1) - u*vu2)/u -
728	(vbyus(0.33333333333333170500 -
729	s*(0.19999999999393223405 -
730	s*(0.14285713561807169030 -
731	s*(0.11110736283514525407 -
732	s*(0.90029810285449784439E-01)))))));
733	}
734
735	/* Tidy-up according to which quadrant the arguments lie in */
736
737	if (swap_vu) {q1 = piby2_head - q1; q2 = piby2_tail - q2;}
738	if (xneg) {q1 = pi_head - q1; q2 = pi_tail - q2;}
739	q1 = q1 + q2;
740
741	if (yneg) q1 = - q1;
742
743	return q1;
744	}
745
746	weak_alias (__atan2, atan2)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_atan2f.c.x86_64-new-libm (+459 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9
		10	#include "libm_amd.h"
		11	#include "libm_util_amd.h"
		12
		13	#define USE_VAL_WITH_FLAGS
		14	#define USE_SCALEDOUBLE_1
		15	#define USE_SCALEDOUBLE_2
		16	#define USE_SCALEUPDOUBLE1024
		17	#define USE_SCALEDOWNDOUBLE
		18	#include "libm_inlines_amd.h"
		19	#undef USE_SCALEDOWNDOUBLE
		20	#undef USE_SCALEUPDOUBLE1024
		21	#undef USE_SCALEDOUBLE_1
		22	#undef USE_SCALEDOUBLE_2
		23	#undef USE_VAL_WITH_FLAGS
		24
		25	/* Deal with errno for out-of-range arguments
		26	(only used when _LIB_VERSION is _SVID_) */
		27	#include "libm_errno_amd.h"
		28	static inline float retval_errno_edom(float x, float y)
		29	{
		30	struct exception exc;
		31	exc.arg1 = (double)x;
		32	exc.arg2 = (double)y;
		33	exc.type = DOMAIN;
		34	exc.name = (char *)"atan2f";
		35	exc.retval = HUGE;
		36	if (!matherr(&exc))
		37	{
		38	(void)fputs("atan2f: DOMAIN error\n", stderr);
		39	__set_errno(EDOM);
		40	}
		41	return exc.retval;
		42	}
		43
		44	float __atan2f(float fy, float fx)
		45	{
		46	/* Array atan_jby256 contains precomputed values of atan(j/256),
		47	for j = 16, 17, ..., 256. */
		48
		49	static const double atan_jby256[ 241] = {
		50	6.24188099959573430842e-02, /* 0x3faff55bb72cfde9 */
		51	6.63088949198234745008e-02, /* 0x3fb0f99ea71d52a6 */
		52	7.01969710718705064423e-02, /* 0x3fb1f86dbf082d58 */
		53	7.40829225490337306415e-02, /* 0x3fb2f719318a4a9a */
		54	7.79666338315423007588e-02, /* 0x3fb3f59f0e7c559d */
		55	8.18479898030765457007e-02, /* 0x3fb4f3fd677292fb */
		56	8.57268757707448092464e-02, /* 0x3fb5f2324fd2d7b2 */
		57	8.96031774848717321724e-02, /* 0x3fb6f03bdcea4b0c */
		58	9.34767811585894559112e-02, /* 0x3fb7ee182602f10e */
		59	9.73475734872236708739e-02, /* 0x3fb8ebc54478fb28 */
		60	1.01215441667466668485e-01, /* 0x3fb9e94153cfdcf1 */
		61	1.05080273416329528224e-01, /* 0x3fbae68a71c722b8 */
		62	1.08941956989865793015e-01, /* 0x3fbbe39ebe6f07c3 */
		63	1.12800381201659388752e-01, /* 0x3fbce07c5c3cca32 */
		64	1.16655435441069349478e-01, /* 0x3fbddd21701eba6e */
65	1.20507009691224548087e-01, /* 0x3fbed98c2190043a */
66	1.24354994546761424279e-01, /* 0x3fbfd5ba9aac2f6d */
67	1.28199281231298117811e-01, /* 0x3fc068d584212b3d */
68	1.32039761614638734288e-01, /* 0x3fc0e6adccf40881 */
69	1.35876328229701304195e-01, /* 0x3fc1646541060850 */
70	1.39708874289163620386e-01, /* 0x3fc1e1fafb043726 */
71	1.43537293701821222491e-01, /* 0x3fc25f6e171a535c */
72	1.47361481088651630200e-01, /* 0x3fc2dcbdb2fba1ff */
73	1.51181331798580037562e-01, /* 0x3fc359e8edeb99a3 */
74	1.54996741923940972718e-01, /* 0x3fc3d6eee8c6626c */
75	1.58807608315631065832e-01, /* 0x3fc453cec6092a9e */
76	1.62613828597948567589e-01, /* 0x3fc4d087a9da4f17 */
77	1.66415301183114927586e-01, /* 0x3fc54d18ba11570a */
78	1.70211925285474380276e-01, /* 0x3fc5c9811e3ec269 */
79	1.74003600935367680469e-01, /* 0x3fc645bfffb3aa73 */
80	1.77790228992676047071e-01, /* 0x3fc6c1d4898933d8 */
81	1.81571711160032150945e-01, /* 0x3fc73dbde8a7d201 */
82	1.85347949995694760705e-01, /* 0x3fc7b97b4bce5b02 */
83	1.89118848926083965578e-01, /* 0x3fc8350be398ebc7 */
84	1.92884312257974643856e-01, /* 0x3fc8b06ee2879c28 */
85	1.96644245190344985064e-01, /* 0x3fc92ba37d050271 */
86	2.00398553825878511514e-01, /* 0x3fc9a6a8e96c8626 */
87	2.04147145182116990236e-01, /* 0x3fca217e601081a5 */
88	2.07889927202262986272e-01, /* 0x3fca9c231b403279 */
89	2.11626808765629753628e-01, /* 0x3fcb1696574d780b */
90	2.15357699697738047551e-01, /* 0x3fcb90d7529260a2 */
91	2.19082510780057748701e-01, /* 0x3fcc0ae54d768466 */
92	2.22801153759394493514e-01, /* 0x3fcc84bf8a742e6d */
93	2.26513541356919617664e-01, /* 0x3fccfe654e1d5395 */
94	2.30219587276843717927e-01, /* 0x3fcd77d5df205736 */
95	2.33919206214733416127e-01, /* 0x3fcdf110864c9d9d */
96	2.37612313865471241892e-01, /* 0x3fce6a148e96ec4d */
97	2.41298826930858800743e-01, /* 0x3fcee2e1451d980c */
98	2.44978663126864143473e-01, /* 0x3fcf5b75f92c80dd */
99	2.48651741190513253521e-01, /* 0x3fcfd3d1fc40dbe4 */
100	2.52317980886427151166e-01, /* 0x3fd025fa510665b5 */
101	2.55977303013005474952e-01, /* 0x3fd061eea03d6290 */
102	2.59629629408257511791e-01, /* 0x3fd09dc597d86362 */
103	2.63274882955282396590e-01, /* 0x3fd0d97ee509acb3 */
104	2.66912987587400396539e-01, /* 0x3fd1151a362431c9 */
105	2.70543868292936529052e-01, /* 0x3fd150973a9ce546 */
106	2.74167451119658789338e-01, /* 0x3fd18bf5a30bf178 */
107	2.77783663178873208022e-01, /* 0x3fd1c735212dd883 */
108	2.81392432649178403370e-01, /* 0x3fd2025567e47c95 */
109	2.84993688779881237938e-01, /* 0x3fd23d562b381041 */
110	2.88587361894077354396e-01, /* 0x3fd278372057ef45 */
111	2.92173383391398755471e-01, /* 0x3fd2b2f7fd9b5fe2 */
112	2.95751685750431536626e-01, /* 0x3fd2ed987a823cfe */
113	2.99322202530807379706e-01, /* 0x3fd328184fb58951 */
114	3.02884868374971361060e-01, /* 0x3fd362773707ebcb */
115	3.06439619009630070945e-01, /* 0x3fd39cb4eb76157b */
116	3.09986391246883430384e-01, /* 0x3fd3d6d129271134 */
117	3.13525122985043869228e-01, /* 0x3fd410cbad6c7d32 */
118	3.17055753209146973237e-01, /* 0x3fd44aa436c2af09 */
119	3.20578221991156986359e-01, /* 0x3fd4845a84d0c21b */
120	3.24092470489871664618e-01, /* 0x3fd4bdee586890e6 */
121	3.27598440950530811477e-01, /* 0x3fd4f75f73869978 */
122	3.31096076704132047386e-01, /* 0x3fd530ad9951cd49 */
123	3.34585322166458920545e-01, /* 0x3fd569d88e1b4cd7 */
124	3.38066122836825466713e-01, /* 0x3fd5a2e0175e0f4e */
125	3.41538425296541714449e-01, /* 0x3fd5dbc3fbbe768d */
126	3.45002177207105076295e-01, /* 0x3fd614840309cfe1 */
127	3.48457327308122011278e-01, /* 0x3fd64d1ff635c1c5 */
128	3.51903825414964732676e-01, /* 0x3fd685979f5fa6fd */
129	3.55341622416168290144e-01, /* 0x3fd6bdeac9cbd76c */
130	3.58770670270572189509e-01, /* 0x3fd6f61941e4def0 */
131	3.62190922004212156882e-01, /* 0x3fd72e22d53aa2a9 */
132	3.65602331706966821034e-01, /* 0x3fd7660752817501 */
133	3.69004854528964421068e-01, /* 0x3fd79dc6899118d1 */
134	3.72398446676754202311e-01, /* 0x3fd7d5604b63b3f7 */
135	3.75783065409248884237e-01, /* 0x3fd80cd46a14b1d0 */
136	3.79158669033441808605e-01, /* 0x3fd84422b8df95d7 */
137	3.82525216899905096124e-01, /* 0x3fd87b4b0c1ebedb */
138	3.85882669398073752109e-01, /* 0x3fd8b24d394a1b25 */
139	3.89230987951320717144e-01, /* 0x3fd8e92916f5cde8 */
140	3.92570135011828580396e-01, /* 0x3fd91fde7cd0c662 */
141	3.95900074055262896078e-01, /* 0x3fd9566d43a34907 */
142	3.99220769575252543149e-01, /* 0x3fd98cd5454d6b18 */
143	4.02532187077682512832e-01, /* 0x3fd9c3165cc58107 */
144	4.05834293074804064450e-01, /* 0x3fd9f93066168001 */
145	4.09127055079168300278e-01, /* 0x3fda2f233e5e530b */
146	4.12410441597387267265e-01, /* 0x3fda64eec3cc23fc */
147	4.15684422123729413467e-01, /* 0x3fda9a92d59e98cf */
148	4.18948967133552840902e-01, /* 0x3fdad00f5422058b */
149	4.22204048076583571270e-01, /* 0x3fdb056420ae9343 */
150	4.25449637370042266227e-01, /* 0x3fdb3a911da65c6c */
151	4.28685708391625730496e-01, /* 0x3fdb6f962e737efb */
152	4.31912235472348193799e-01, /* 0x3fdba473378624a5 */
153	4.35129193889246812521e-01, /* 0x3fdbd9281e528191 */
154	4.38336559857957774877e-01, /* 0x3fdc0db4c94ec9ef */
155	4.41534310525166673322e-01, /* 0x3fdc42191ff11eb6 */
156	4.44722423960939305942e-01, /* 0x3fdc76550aad71f8 */
157	4.47900879150937292206e-01, /* 0x3fdcaa6872f3631b */
158	4.51069655988523443568e-01, /* 0x3fdcde53432c1350 */
159	4.54228735266762495559e-01, /* 0x3fdd121566b7f2ad */
160	4.57378098670320809571e-01, /* 0x3fdd45aec9ec862b */
161	4.60517728767271039558e-01, /* 0x3fdd791f5a1226f4 */
162	4.63647609000806093515e-01, /* 0x3fddac670561bb4f */
163	4.66767723680866497560e-01, /* 0x3fdddf85bb026974 */
164	4.69878057975686880265e-01, /* 0x3fde127b6b0744af */
165	4.72978597903265574054e-01, /* 0x3fde4548066cf51a */
166	4.76069330322761219421e-01, /* 0x3fde77eb7f175a34 */
167	4.79150242925822533735e-01, /* 0x3fdeaa65c7cf28c4 */
168	4.82221324227853687105e-01, /* 0x3fdedcb6d43f8434 */
169	4.85282563559221225002e-01, /* 0x3fdf0ede98f393cf */
170	4.88333951056405479729e-01, /* 0x3fdf40dd0b541417 */
171	4.91375477653101910835e-01, /* 0x3fdf72b221a4e495 */
172	4.94407135071275316562e-01, /* 0x3fdfa45dd3029258 */
173	4.97428915812172245392e-01, /* 0x3fdfd5e0175fdf83 */
174	5.00440813147294050189e-01, /* 0x3fe0039c73c1a40b */
175	5.03442821109336358099e-01, /* 0x3fe01c341e82422d */
176	5.06434934483096732549e-01, /* 0x3fe034b709250488 */
177	5.09417148796356245022e-01, /* 0x3fe04d25314342e5 */
178	5.12389460310737621107e-01, /* 0x3fe0657e94db30cf */
179	5.15351866012543347040e-01, /* 0x3fe07dc3324e9b38 */
180	5.18304363603577900044e-01, /* 0x3fe095f30861a58f */
181	5.21246951491958210312e-01, /* 0x3fe0ae0e1639866c */
182	5.24179628782913242802e-01, /* 0x3fe0c6145b5b43da */
183	5.27102395269579471204e-01, /* 0x3fe0de05d7aa6f7c */
184	5.30015251423793132268e-01, /* 0x3fe0f5e28b67e295 */
185	5.32918198386882147055e-01, /* 0x3fe10daa77307a0d */
186	5.35811237960463593311e-01, /* 0x3fe1255d9bfbd2a8 */
187	5.38694372597246617929e-01, /* 0x3fe13cfbfb1b056e */
188	5.41567605391844897333e-01, /* 0x3fe1548596376469 */
189	5.44430940071603086672e-01, /* 0x3fe16bfa6f5137e1 */
190	5.47284380987436924748e-01, /* 0x3fe1835a88be7c13 */
191	5.50127933104692989907e-01, /* 0x3fe19aa5e5299f99 */
192	5.52961601994028217888e-01, /* 0x3fe1b1dc87904284 */
193	5.55785393822313511514e-01, /* 0x3fe1c8fe7341f64f */
194	5.58599315343562330405e-01, /* 0x3fe1e00babdefeb3 */
195	5.61403373889889367732e-01, /* 0x3fe1f7043557138a */
196	5.64197577362497537656e-01, /* 0x3fe20de813e823b1 */
197	5.66981934222700489912e-01, /* 0x3fe224b74c1d192a */
198	5.69756453482978431069e-01, /* 0x3fe23b71e2cc9e6a */
199	5.72521144698072359525e-01, /* 0x3fe25217dd17e501 */
200	5.75276017956117824426e-01, /* 0x3fe268a940696da6 */
201	5.78021083869819540801e-01, /* 0x3fe27f261273d1b3 */
202	5.80756353567670302596e-01, /* 0x3fe2958e59308e30 */
203	5.83481838685214859730e-01, /* 0x3fe2abe21aded073 */
204	5.86197551356360535557e-01, /* 0x3fe2c2215e024465 */
205	5.88903504204738026395e-01, /* 0x3fe2d84c2961e48b */
206	5.91599710335111383941e-01, /* 0x3fe2ee628406cbca */
207	5.94286183324841177367e-01, /* 0x3fe30464753b090a */
208	5.96962937215401501234e-01, /* 0x3fe31a52048874be */
209	5.99629986503951384336e-01, /* 0x3fe3302b39b78856 */
210	6.02287346134964152178e-01, /* 0x3fe345f01cce37bb */
211	6.04935031491913965951e-01, /* 0x3fe35ba0b60eccce */
212	6.07573058389022313541e-01, /* 0x3fe3713d0df6c503 */
213	6.10201443063065118722e-01, /* 0x3fe386c52d3db11e */
214	6.12820202165241245673e-01, /* 0x3fe39c391cd41719 */
215	6.15429352753104952356e-01, /* 0x3fe3b198e5e2564a */
216	6.18028912282561737612e-01, /* 0x3fe3c6e491c78dc4 */
217	6.20618898599929469384e-01, /* 0x3fe3dc1c2a188504 */
218	6.23199329934065904268e-01, /* 0x3fe3f13fb89e96f4 */
219	6.25770224888563042498e-01, /* 0x3fe4064f47569f48 */
220	6.28331602434009650615e-01, /* 0x3fe41b4ae06fea41 */
221	6.30883481900321840818e-01, /* 0x3fe430328e4b26d5 */
222	6.33425882969144482537e-01, /* 0x3fe445065b795b55 */
223	6.35958825666321447834e-01, /* 0x3fe459c652badc7f */
224	6.38482330354437466191e-01, /* 0x3fe46e727efe4715 */
225	6.40996417725432032775e-01, /* 0x3fe4830aeb5f7bfd */
226	6.43501108793284370968e-01, /* 0x3fe4978fa3269ee1 */
227	6.45996424886771558604e-01, /* 0x3fe4ac00b1c71762 */
228	6.48482387642300484032e-01, /* 0x3fe4c05e22de94e4 */
229	6.50959018996812410762e-01, /* 0x3fe4d4a8023414e8 */
230	6.53426341180761927063e-01, /* 0x3fe4e8de5bb6ec04 */
231	6.55884376711170835605e-01, /* 0x3fe4fd013b7dd17e */
232	6.58333148384755983962e-01, /* 0x3fe51110adc5ed81 */
233	6.60772679271132590273e-01, /* 0x3fe5250cbef1e9fa */
234	6.63202992706093175102e-01, /* 0x3fe538f57b89061e */
235	6.65624112284960989250e-01, /* 0x3fe54ccaf0362c8f */
236	6.68036061856020157990e-01, /* 0x3fe5608d29c70c34 */
237	6.70438865514021320458e-01, /* 0x3fe5743c352b33b9 */
238	6.72832547593763097282e-01, /* 0x3fe587d81f732fba */
239	6.75217132663749830535e-01, /* 0x3fe59b60f5cfab9d */
240	6.77592645519925151909e-01, /* 0x3fe5aed6c5909517 */
241	6.79959111179481823228e-01, /* 0x3fe5c2399c244260 */
242	6.82316554874748071313e-01, /* 0x3fe5d58987169b18 */
243	6.84665002047148862907e-01, /* 0x3fe5e8c6941043cf */
244	6.87004478341244895212e-01, /* 0x3fe5fbf0d0d5cc49 */
245	6.89335009598845749323e-01, /* 0x3fe60f084b46e05e */
246	6.91656621853199760075e-01, /* 0x3fe6220d115d7b8d */
247	6.93969341323259825138e-01, /* 0x3fe634ff312d1f3b */
248	6.96273194408023488045e-01, /* 0x3fe647deb8e20b8f */
249	6.98568207680949848637e-01, /* 0x3fe65aabb6c07b02 */
250	7.00854407884450081312e-01, /* 0x3fe66d663923e086 */
251	7.03131821924453670469e-01, /* 0x3fe6800e4e7e2857 */
252	7.05400476865049030906e-01, /* 0x3fe692a40556fb6a */
253	7.07660399923197958039e-01, /* 0x3fe6a5276c4b0575 */
254	7.09911618463524796141e-01, /* 0x3fe6b798920b3d98 */
255	7.12154159993178659249e-01, /* 0x3fe6c9f7855c3198 */
256	7.14388052156768926793e-01, /* 0x3fe6dc44551553ae */
257	7.16613322731374569052e-01, /* 0x3fe6ee7f10204aef */
258	7.18829999621624415873e-01, /* 0x3fe700a7c5784633 */
259	7.21038110854851588272e-01, /* 0x3fe712be84295198 */
260	7.23237684576317874097e-01, /* 0x3fe724c35b4fae7b */
261	7.25428749044510712274e-01, /* 0x3fe736b65a172dff */
262	7.27611332626510676214e-01, /* 0x3fe748978fba8e0f */
263	7.29785463793429123314e-01, /* 0x3fe75a670b82d8d8 */
264	7.31951171115916565668e-01, /* 0x3fe76c24dcc6c6c0 */
265	7.34108483259739652560e-01, /* 0x3fe77dd112ea22c7 */
266	7.36257428981428097003e-01, /* 0x3fe78f6bbd5d315e */
267	7.38398037123989547936e-01, /* 0x3fe7a0f4eb9c19a2 */
268	7.40530336612692630105e-01, /* 0x3fe7b26cad2e50fd */
269	7.42654356450917929600e-01, /* 0x3fe7c3d311a6092b */
270	7.44770125716075148681e-01, /* 0x3fe7d528289fa093 */
271	7.46877673555587429099e-01, /* 0x3fe7e66c01c114fd */
272	7.48977029182941400620e-01, /* 0x3fe7f79eacb97898 */
273	7.51068221873802288613e-01, /* 0x3fe808c03940694a */
274	7.53151280962194302759e-01, /* 0x3fe819d0b7158a4c */
275	7.55226235836744863583e-01, /* 0x3fe82ad036000005 */
276	7.57293115936992444759e-01, /* 0x3fe83bbec5cdee22 */
277	7.59351950749757920178e-01, /* 0x3fe84c9c7653f7ea */
278	7.61402769805578416573e-01, /* 0x3fe85d69576cc2c5 */
279	7.63445602675201784315e-01, /* 0x3fe86e2578f87ae5 */
280	7.65480478966144461950e-01, /* 0x3fe87ed0eadc5a2a */
281	7.67507428319308182552e-01, /* 0x3fe88f6bbd023118 */
282	7.69526480405658186434e-01, /* 0x3fe89ff5ff57f1f7 */
283	7.71537664922959498526e-01, /* 0x3fe8b06fc1cf3dfe */
284	7.73541011592573490852e-01, /* 0x3fe8c0d9145cf49d */
285	7.75536550156311621507e-01, /* 0x3fe8d13206f8c4ca */
286	7.77524310373347682379e-01, /* 0x3fe8e17aa99cc05d */
287	7.79504322017186335181e-01, /* 0x3fe8f1b30c44f167 */
288	7.81476614872688268854e-01, /* 0x3fe901db3eeef187 */
289	7.83441218733151756304e-01, /* 0x3fe911f35199833b */
290	7.85398163397448278999e-01}; /* 0x3fe921fb54442d18 */
291
292	/* Some constants. */
293
294	static double pi = 3.1415926535897932e+00, /* 0x400921fb54442d18 */
295	piby2 = 1.5707963267948966e+00, /* 0x3ff921fb54442d18 */
296	piby4 = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
297	three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */
298
299	double u, v, vbyu, q, s, uu, r;
300	unsigned int swap_vu, index, xzero, yzero, xnan, ynan, xinf, yinf;
301	int xexp, yexp, diffexp;
302
303	double x = fx;
304	double y = fy;
305
306	/* Find properties of arguments x and y. */
307
308	unsigned long ux, aux, xneg, uy, auy, yneg;
309
310	GET_BITS_DP64(x, ux);
311	GET_BITS_DP64(y, uy);
312	aux = ux & ~SIGNBIT_DP64;
313	auy = uy & ~SIGNBIT_DP64;
314	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
315	yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
316	xneg = ux & SIGNBIT_DP64;
317	yneg = uy & SIGNBIT_DP64;
318	xzero = (aux == 0);
319	yzero = (auy == 0);
320	xnan = (aux > PINFBITPATT_DP64);
321	ynan = (auy > PINFBITPATT_DP64);
322	xinf = (aux == PINFBITPATT_DP64);
323	yinf = (auy == PINFBITPATT_DP64);
324
325	diffexp = yexp - xexp;
326
327	/* Special cases */
328
329	if (xnan)
330	return x + x;
331	else if (ynan)
332	return y + y;
333	else if (yzero)
334	{ /* Zero y gives +-0 for positive x
335	and +-pi for negative x */
336	if ((_LIB_VERSION == _SVID_) && xzero)
337	/* Sigh - _SVID_ defines atan2(0,0) as a domain error */
338	return retval_errno_edom(x, y);
339	else if (xneg)
340	{
341	if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
342	else return val_with_flags(pi,AMD_F_INEXACT);
343	}
344	else return y;
345	}
346	else if (xzero)
347	{ /* Zero x gives +- pi/2
348	depending on sign of y */
349	if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
350	else val_with_flags(piby2,AMD_F_INEXACT);
351	}
352
353	if (diffexp > 26)
354	{ /* abs(y)/abs(x) > 2^26 => arctan(x/y)
355	is insignificant compared to piby2 */
356	if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
357	else return val_with_flags(piby2,AMD_F_INEXACT);
358	}
359	else if (diffexp < -13 && (!xneg))
360	{ /* x positive and dominant over y by a factor of 2^13.
361	In this case atan(y/x) is y/x to machine accuracy. */
362
363	if (diffexp < -150) /* Result underflows */
364	{
365	if (yneg)
366	return val_with_flags(-0.0,AMD_F_INEXACT \| AMD_F_UNDERFLOW);
367	else
368	return val_with_flags(0.0,AMD_F_INEXACT \| AMD_F_UNDERFLOW);
369	}
370	else
371	{
372	if (diffexp < -126)
373	{
374	/* Result will likely be denormalized */
375	y = scaleDouble_1(y, 100);
376	y /= x;
377	/* Now y is 2^100 times the true result. Scale it back down. */
378	GET_BITS_DP64(y, uy);
379	scaleDownDouble(uy, 100, &uy);
380	PUT_BITS_DP64(uy, y);
381	if ((uy & EXPBITS_DP64) == 0)
382	return val_with_flags(y, AMD_F_INEXACT \| AMD_F_UNDERFLOW);
383	else
384	return y;
385	}
386	else
387	return y / x;
388	}
389	}
390	else if (diffexp < -26 && xneg)
391	{ /* abs(x)/abs(y) > 2^56 and x < 0 => arctan(y/x)
392	is insignificant compared to pi */
393	if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
394	else return val_with_flags(pi,AMD_F_INEXACT);
395	}
396	else if (yinf && xinf)
397	{ /* If abs(x) and abs(y) are both infinity
398	return +-pi/4 or +- 3pi/4 according to
399	signs. */
400	if (xneg)
401	{
402	if (yneg) return val_with_flags(-three_piby4,AMD_F_INEXACT);
403	else return val_with_flags(three_piby4,AMD_F_INEXACT);
404	}
405	else
406	{
407	if (yneg) return val_with_flags(-piby4,AMD_F_INEXACT);
408	else return val_with_flags(piby4,AMD_F_INEXACT);
409	}
410	}
411
412	/* General case: take absolute values of arguments */
413
414	u = x; v = y;
415	if (xneg) u = -x;
416	if (yneg) v = -y;
417
418	/* Swap u and v if necessary to obtain 0 < v < u. Compute v/u. */
419
420	swap_vu = (u < v);
421	if (swap_vu) { uu = u; u = v; v = uu; }
422	vbyu = v/u;
423
424	if (vbyu > 0.0625)
425	{ /* General values of v/u. Use a look-up
426	table and series expansion. */
427
428	index = (int)(256*vbyu + 0.5);
429	r = (256v-indexu)/(256u+indexv);
430
431	/* Polynomial approximation to atan(vbyu) */
432
433	s = r*r;
434	q = atan_jby256[index-16] + r - rs0.33333333333224095522;
435	}
436	else if (vbyu < 1.e-4)
437	{ /* v/u is small enough that atan(v/u) = v/u */
438	q = vbyu;
439	}
440	else /* vbyu <= 0.0625 */
441	{
442	/* Small values of v/u. Use a series expansion */
443
444	s = vbyu*vbyu;
445	q = vbyu -
446	vbyus(0.33333333333333170500 -
447	s*(0.19999999999393223405 -
448	s*0.14285713561807169030));
449	}
450
451	/* Tidy-up according to which quadrant the arguments lie in */
452
453	if (swap_vu) {q = piby2 - q;}
454	if (xneg) {q = pi - q;}
455	if (yneg) q = - q;
456	return q;
457	}
458
459	weak_alias (__atan2f, atan2f)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_VAL_WITH_FLAGS
#define USE_SCALEFLOAT_1
#define USE_SCALEUPFLOAT128
#include "libm_inlines_amd.h"
#undef USE_SCALEUPFLOAT128
#undef USE_SCALEFLOAT_1
#undef USE_VAL_WITH_FLAGS

float __atanf(float fy)
{

  /* Some constants and split constants. */

  static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */

  double c, v, s, q, z;
  unsigned int ynan;

  double y = fy;

  /* Find properties of argument y. */

  unsigned long uy, auy, yneg;

  GET_BITS_DP64(y, uy);
  auy = uy & ~SIGNBIT_DP64;
  yneg = uy & SIGNBIT_DP64;

  v = y;
  if (yneg) v = -y;

  /* Argument reduction to range [-7/16,7/16] */

  if (auy < 0x3fdc000000000000) /* v < 7./16. */
    {
      y = v;
      c = 0.0;
    }
  else if (auy < 0x3fe6000000000000) /* v < 11./16. */
    {
      y = (2*v-1.0)/(2.0+v);
      /* c = arctan(0.5) */
      c = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */
    }
  else if (auy < 0x3ff3000000000000) /* v < 19./16. */
    {
      y = (v-1.)/(1.0+v);
      /* c = arctan(1.) */
      c = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */
    }
  else if (auy < 0x4003800000000000) /* v < 39./16. */
    {
      y = (v-1.5)/(1.0+1.5*v);
      /* c = arctan(1.5) */
      c = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */
    }
  else
    {

      ynan = (auy > PINFBITPATT_DP64);  

      if (ynan) return y + y;
      else if (v > 0x4c80000000000000)
	{ /* abs(y) > 2^26 => arctan(1/y) is 
	     insignificant compared to piby2 */
	  if (yneg) return val_with_flags(-piby2, AMD_F_INEXACT);
	  else return val_with_flags(piby2, AMD_F_INEXACT);
	}

      y = -1.0/v;
      /* c = arctan(infinity) */
      c = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
    }

  /* Core approximation: Remez(2,2) on [-7/16,7/16] */

  s = y*y;
  q = y*s*
    (0.296528598819239217902158651186e0 + 
     (0.192324546402108583211697690500e0 + 
       0.470677934286149214138357545549e-2*s)*s)/
    (0.889585796862432286486651434570e0 +
     (0.111072499995399550138837673349e1 + 
       0.299309699959659728404442796915e0*s)*s);

  z = c - (q - y);

  if (yneg) z = -z;
  return z;
}

weak_alias (__atanf, atanf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

double __ceil(double x)
{
  double r;
  long rexp, xneg;
  unsigned long ux, ax, ur, mask;

  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);
  xneg = (ux != ax);

  if (ax >= 0x4340000000000000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^53 */
      if (ax > 0x7ff0000000000000)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        return x;
    }
  else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
    {
      if (ax == 0x0000000000000000)
        /* x is +zero or -zero; return the same zero */
          return x;
      else if (xneg) /* x < 0.0 */
        return -0.0;
      else
        return 1.0;
    }
  else
    {
      rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
      /* Mask out the bits of r that we don't want */
      mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
      ur = (ux & ~mask);
      PUT_BITS_DP64(ur, r);
      if (xneg || (ur == ux))
        return r;
      else
        /* We threw some bits away and x was positive */
        return r + 1.0;
    }

}

weak_alias (__ceil, ceil)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

float __ceilf(float x)
{
  float r;
  int rexp, xneg;
  unsigned int ux, ax, ur, mask;

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);
  xneg = (ux != ax);

  if (ax >= 0x4b800000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^24 */
      if (ax > 0x7f800000)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        return x;
    }
  else if (ax < 0x3f800000) /* abs(x) < 1.0 */
    {
      if (ax == 0x00000000)
        /* x is +zero or -zero; return the same zero */
        return x;
      else if (xneg) /* x < 0.0 */
        return -0.0F;
      else
        return 1.0F;
    }
  else
    {
      rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
      /* Mask out the bits of r that we don't want */
      mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
      ur = (ux & ~mask);
      PUT_BITS_SP32(ur, r);

      if (xneg || (ux == ur)) return r;
      else 
        /* We threw some bits away and x was positive */
        return r + 1.0F;
    }
}

weak_alias (__ceilf, ceilf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

/* Returns the absolute value of x with the sign of y.
   NaNs are not considered special; their sign bits are handled
   the same as for any other number. */

double __copysign(double x, double y)
{
  /* This works on Hammer */
  double temp = -0.0; /* 0x8000000000000000 */
  /* AND the bit pattern with y, result in y */
  asm volatile ("andpd	%0, %1" : : "x" (temp), "x" (y));
  /* AND the ones-complement of the bit pattern with x, result in temp */
  asm volatile ("andnpd	%0, %1" : : "x" (x), "x" (temp));
  asm volatile ("orpd	%0, %1" : : "x" (temp), "x" (y));
  return y;
}


weak_alias (__copysign, copysign)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

  /* Returns the absolute value of x with the sign of y.
     NaNs are not considered special; their sign bits are handled
     the same as for any other number. */

float __copysignf(float x, float y)
{
  /* This works on Hammer */
  float temp = -0.0; /* 0x80000000 */
  /* AND the bit pattern with y, result in y */
  asm volatile ("andps	%0, %1" : : "x" (temp), "x" (y));
  /* AND the ones-complement of the bit pattern with x, result in temp */
  asm volatile ("andnps	%0, %1" : : "x" (x), "x" (temp));
  asm volatile ("orps	%0, %1" : : "x" (temp), "x" (y));
  return y;
}


weak_alias (__copysignf, copysignf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_cos.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_cosf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

/* Returns 0 if x is infinite or NaN, otherwise returns 1 */

int __finite(double x)
{
  /* This works on Hammer */
  double temp = 1.0e444; /* = infinity = 0x7ff0000000000000 */
  volatile int retval;
  retval = 0;
  asm volatile ("andpd	%0, %1;" : : "x" (temp), "x" (x));
  asm volatile ("comisd	%0, %1" : : "x" (temp), "x" (x));
  asm volatile ("setnz	%0" : "=g" (retval));
  return retval;
}

hidden_def (__finite)
weak_alias (__finite, finite)





/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

/* Returns 0 if x is infinite or NaN, otherwise returns 1 */

int __finitef(float x)
{
  /* This works on Hammer */
  float temp = 1.0e444; /* = infinity = 0x7f800000 */
  volatile int retval;
  retval = 0;
  asm volatile ("andps	%0, %1;" : : "x" (temp), "x" (x));
  asm volatile ("comiss	%0, %1" : : "x" (temp), "x" (x));
  asm volatile ("setnz	%0" : "=g" (retval));
  return retval;
}

hidden_def (__finitef)
weak_alias (__finitef, finitef)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

double __floor(double x)
{
  double r;
  long rexp, xneg;


  unsigned long ux, ax, ur, mask;

  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);
  xneg = (ux != ax);

  if (ax >= 0x4340000000000000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^53 */
      if (ax > 0x7ff0000000000000)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        return x;
    }
  else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
    {
      if (ax == 0x0000000000000000)
        /* x is +zero or -zero; return the same zero */
        return x;
      else if (xneg) /* x < 0.0 */
        return -1.0;
      else
        return 0.0;
    }
  else
    {
      r = x;
      rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
      /* Mask out the bits of r that we don't want */
      mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
      ur = (ux & ~mask);
      PUT_BITS_DP64(ur, r);
      if (xneg && (ur != ux))
        /* We threw some bits away and x was negative */
        return r - 1.0;
      else
        return r;
    }

}

weak_alias (__floor, floor)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

float __floorf(float x)
{
  float r;
  int rexp, xneg;
  unsigned int ux, ax, ur, mask;

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);
  xneg = (ux != ax);

  if (ax >= 0x4b800000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^24 */
      if (ax > 0x7f800000)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        return x;
    }
  else if (ax < 0x3f800000) /* abs(x) < 1.0 */
    {
      if (ax == 0x00000000)
        /* x is +zero or -zero; return the same zero */
        return x;
      else if (xneg) /* x < 0.0 */
        return -1.0F;
      else
        return 0.0F;
    }
  else
    {
      rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
      /* Mask out the bits of r that we don't want */
      mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
      ur = (ux & ~mask);
      PUT_BITS_SP32(ur, r);
      if (xneg && (ux != ur))
        /* We threw some bits away and x was negative */
        return r - 1.0F;
      else
        return r;
    }
}

weak_alias (__floorf, floorf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SCALEDOUBLE_1
#define USE_SCALEDOUBLE_2
#include "libm_inlines_amd.h"
#undef USE_SCALEDOUBLE_1
#undef USE_SCALEDOUBLE_2

double __fma(double a, double b, double sum)
{
  /* Returns a * b + sum with no intermediate loss of precision */

  double ha, ta, hb, tb, z, zz, r, s, az, asum;
  int ua, ub, usum;
  int scaled, expover, expunder, scaleexp;
  unsigned long u;

  GET_BITS_DP64(a, u);
  ua = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
  GET_BITS_DP64(b, u);
  ub = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
  GET_BITS_DP64(sum, u);
  usum = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;

  if (ua == EMAX_DP64 + 1 || ub == EMAX_DP64 + 1 || usum == EMAX_DP64 + 1)
    {
      /* One or more of the arguments is NaN or infinity. The
         result will also be NaN or infinity. */
      return a * b + sum;
    }
  else if (ua + ub > usum + 2 * MANTLENGTH_DP64)
    {
      /* sum is negligible compared with the extra-length product a*b */
      return a*b;
    }
  else if (usum > ua + ub + MANTLENGTH_DP64)
    {
      /* The product a*b is negligible compared with sum */
      return sum;
    }

  expover = EMAX_DP64 - 2;
  expunder = EMIN_DP64 + MANTLENGTH_DP64;
  scaleexp = 0;


  if (ua + ub > expover || usum > expover)
    {
      /* The result is likely to overflow. Scale down in an attempt
         to avoid unnecessary overflow. The true result may still overflow. */
      scaled = 1;
      scaleexp = expover / 2;
      a = scaleDouble_1(a, -scaleexp);
      b = scaleDouble_1(b, -scaleexp);
      sum = scaleDouble_2(sum, -2*scaleexp);
    }
  else if (ua + ub < expunder)
    {
      /* The product a*b is near underflow; scale up */
      scaled = 1;
      scaleexp = expunder / 2;
      a = scaleDouble_1(a, -scaleexp);
      b = scaleDouble_1(b, -scaleexp);
      sum = scaleDouble_2(sum, -2*scaleexp);
    }
  else
    scaled = 0;

  /* Split a into ha (head) and ta (tail). Do the same for b. */
  ha = a;
  GET_BITS_DP64(ha, u);
  u &= 0xfffffffff8000000;
  PUT_BITS_DP64(u, ha);
  ta = a - ha;
  hb = b;
  GET_BITS_DP64(hb, u);
  u &= 0xfffffffff8000000;
  PUT_BITS_DP64(u, hb);
  tb = b - hb;

  /* Carefully multiply the parts together. z is the most significant
     part of the result, and zz the least significant part */
  z = a * b;
  zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb;

  /* Set az = abs(z), asum = abs(sum) */
  GET_BITS_DP64(z, u);
  u &= ~SIGNBIT_DP64;
  PUT_BITS_DP64(u, az);
  GET_BITS_DP64(sum, u);
  u &= ~SIGNBIT_DP64;
  PUT_BITS_DP64(u, asum);

  /* Carefully add (z,zz) to sum */
  r = z + sum;

  if (az > asum)
    s = ((z - r) + sum) + zz;
  else
    s = ((sum - r) + z) + zz;

  if (scaled)
    return scaleDouble_1(r + s, 2*scaleexp);
  else
    return r + s;
}

weak_alias (__fma, fma)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SCALEFLOAT_1
#define USE_SCALEFLOAT_2
#include "libm_inlines_amd.h"
#undef USE_SCALEFLOAT_1
#undef USE_SCALEFLOAT_2

float __fmaf(float a, float b, float sum)
{
  /* Returns a * b + sum with no intermediate loss of precision */

  float ha, ta, hb, tb, z, zz, r, s, az, asum;
  int ua, ub, usum;
  int scaled, expover, expunder, scaleexp;
  unsigned int u;

  GET_BITS_SP32(a, u);
  ua = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
  GET_BITS_SP32(b, u);
  ub = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
  GET_BITS_SP32(sum, u);
  usum = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;

  if (ua == EMAX_SP32 + 1 || ub == EMAX_SP32 + 1 || usum == EMAX_SP32 + 1)
    {
      /* One or more of the arguments is NaN or infinity. The
         result will also be NaN or infinity. */
      return a * b + sum;
    }
  else if (ua + ub > usum + 2 * MANTLENGTH_SP32)
    {
      /* sum is negligible compared with the extra-length product a*b */
      return a*b;
    }
  else if (usum > ua + ub + MANTLENGTH_SP32)
    {
      /* The product a*b is negligible compared with sum */
      return sum;
    }

  expover = EMAX_SP32 - 2;
  expunder = EMIN_SP32 + MANTLENGTH_SP32;
  scaleexp = 0;

  if (ua + ub > expover || usum > expover)
    {
      /* The result is likely to overflow. Scale down in an attempt
         to avoid unnecessary overflow. The true result may still overflow. */
      scaled = 1;
      scaleexp = expover / 2;
      a = scaleFloat_1(a, -scaleexp);
      b = scaleFloat_1(b, -scaleexp);
      sum = scaleFloat_2(sum, -2*scaleexp);
    }
  else if (ua + ub < expunder)
    {
      /* The product a*b is near underflow; scale up */
      scaled = 1;
      scaleexp = expunder / 2;
      a = scaleFloat_1(a, -scaleexp);
      b = scaleFloat_1(b, -scaleexp);
      sum = scaleFloat_2(sum, -2*scaleexp);
    }
  else
    scaled = 0;

  /* Split a into ha (head) and ta (tail). Do the same for b. */
  ha = a;
  GET_BITS_SP32(ha, u);
  u &= 0xfffff000;
  PUT_BITS_SP32(u, ha);
  ta = a - ha;
  hb = b;
  GET_BITS_SP32(hb, u);
  u &= 0xfffff000;
  PUT_BITS_SP32(u, hb);
  tb = b - hb;

  /* Carefully multiply the parts together. z is the most significant
     part of the result, and zz the least significant part */
  z = a * b;
  zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb;

  /* Set az = abs(z), asum = abs(sum) */
  GET_BITS_SP32(z, u);
  u &= ~SIGNBIT_SP32;
  PUT_BITS_SP32(u, az);
  GET_BITS_SP32(sum, u);
  u &= ~SIGNBIT_SP32;
  PUT_BITS_SP32(u, asum);

  /* Carefully add (z,zz) to sum */
  r = z + sum;

  if (az > asum)
    s = ((z - r) + sum) + zz;
  else
    s = ((sum - r) + z) + zz;

  if (scaled)
    return scaleFloat_1(r + s, 2*scaleexp);
  else
    return r + s;
}

weak_alias (__fmaf, fmaf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_INFINITY_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_INFINITY_WITH_FLAGS

double __logb(double x)
{

  unsigned long ux;
  long u;
  GET_BITS_DP64(x, ux);
  u = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
  if ((ux & ~SIGNBIT_DP64) == 0)
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
    return -infinity_with_flags(AMD_F_DIVBYZERO);
  else if (EMIN_DP64 <= u && u <= EMAX_DP64)
    /* x is a normal number */
    return u;
  else if (u > EMAX_DP64)
    {
      /* x is infinity or NaN */
      if ((ux & MANTBITS_DP64) == 0)
        /* x is +/-infinity. Return +infinity with no flags. */
        return infinity_with_flags(0);
      else
        /* x is NaN, result is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
    }
  else
    {
      /* x is denormalized. */
#ifdef FOLLOW_IEEE754_LOGB
      /* Return the value of the minimum exponent to ensure that
         the relationship between logb and scalb, defined in
         IEEE 754, holds. */
      return EMIN_DP64;
#else
      /* Follow the rule set by IEEE 854 for logb */
      ux &= MANTBITS_DP64;
      u = EMIN_DP64;
      while (ux < IMPBIT_DP64)
        {
          ux <<= 1;
          u--;
        }
      return u;
#endif
    }

}

weak_alias (__logb, logb)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_INFINITYF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_INFINITYF_WITH_FLAGS

float __logbf(float x)
{
  unsigned int ux;
  int u;
  GET_BITS_SP32(x, ux);
  u = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
  if ((ux & ~SIGNBIT_SP32) == 0)
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
    return -infinityf_with_flags(AMD_F_DIVBYZERO);
  else if (EMIN_SP32 <= u && u <= EMAX_SP32)
    /* x is a normal number */
    return u;
  else if (u > EMAX_SP32)
    {
      /* x is infinity or NaN */
      if ((ux & MANTBITS_SP32) == 0)
        /* x is +/-infinity. Return +infinity with no flags. */
        return infinityf_with_flags(0);
      else
        /* x is NaN, result is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
    }
  else
    {
      /* x is denormalized. */
#ifdef FOLLOW_IEEE754_LOGB
      /* Return the value of the minimum exponent to ensure that
         the relationship between logb and scalb, defined in
         IEEE 754, holds. */
      return EMIN_SP32;
#else
      /* Follow the rule set by IEEE 854 for logb */
      ux &= MANTBITS_SP32;
      u = EMIN_SP32;
      while (ux < IMPBIT_SP32)
        {
          ux <<= 1;
          u--;
        }
      return u;
#endif
    }
}

weak_alias (__logbf, logbf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

double __modf(double x, double *iptr)
{
  /* modf splits the argument x into integer and fraction parts,
     each with the same sign as x. */


  long xexp;
  unsigned long ux, ax, mask;

  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);

  if (ax >= 0x4340000000000000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^53 */
      if (ax > 0x7ff0000000000000)
        {
          /* x is NaN */
          *iptr = x;
          return x + x; /* Raise invalid if it is a signalling NaN */
        }
      else
        {
          /* x is infinity or large. Return zero with the sign of x */
          *iptr = x;
          PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
          return x;
        }
    }
  else if (ax < 0x3ff0000000000000)
    {
      /* abs(x) < 1.0. Set iptr to zero with the sign of x
         and return x. */
      PUT_BITS_DP64(ux & SIGNBIT_DP64, *iptr);
      return x;
    }
  else
    {
      xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
      /* Mask out the bits of x that we don't want */
      mask = (1L << (EXPSHIFTBITS_DP64 - xexp)) - 1;
      PUT_BITS_DP64(ux & ~mask, *iptr);
      return x - *iptr;
    }

}

weak_alias (__modf, modf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

float __modff(float x, float *iptr)
{
  /* modff splits the argument x into integer and fraction parts,
     each with the same sign as x. */

  unsigned int ux, mask;
  int xexp;

  GET_BITS_SP32(x, ux);
  xexp = ((ux & (~SIGNBIT_SP32)) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;

  if (xexp < 0)
    {
      /* abs(x) < 1.0. Set iptr to zero with the sign of x
         and return x. */
      PUT_BITS_SP32(ux & SIGNBIT_SP32, *iptr);
      return x;
    }
  else if (xexp < EXPSHIFTBITS_SP32)
    {
      /* x lies between 1.0 and 2**(24) */
      /* Mask out the bits of x that we don't want */
      mask = (1 << (EXPSHIFTBITS_SP32 - xexp)) - 1;
      PUT_BITS_SP32(ux & ~mask, *iptr);
      return x - *iptr;
    }
  else if ((ux & (~SIGNBIT_SP32)) > 0x7f800000)
    {
      /* x is NaN */
      *iptr = x;
      return x + x; /* Raise invalid if it is a signalling NaN */
    }
  else
    {
      /* x is infinity or large. Set iptr to x and return zero
         with the sign of x. */
      *iptr = x;
      PUT_BITS_SP32(ux & SIGNBIT_SP32, x);
      return x;
    }
}

weak_alias (__modff, modff)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sin.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/


#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_NAN_WITH_FLAGS
#define USE_VAL_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_VAL_WITH_FLAGS

/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */
static inline double sin_piby4(double x, double xx)
{
  /* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
                          = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
                          = x * f(w)
     where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
     We use a minimax approximation of (f(w) - 1) / w
     because this produces an expansion in even powers of x.
     If xx (the tail of x) is non-zero, we add a correction
     term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
     is an approximation to cos(x)*sin(xx) valid because
     xx is tiny relative to x.
  */
  static const double
    c1 = -0.166666666666666646259241729,
    c2 = 0.833333333333095043065222816e-2,
    c3 = -0.19841269836761125688538679e-3,
    c4 = 0.275573161037288022676895908448e-5,
    c5 = -0.25051132068021699772257377197e-7,
    c6 = 0.159181443044859136852668200e-9;
  double x2, x3, r;
  x2 = x * x;
  x3 = x2 * x;
  r = (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6))));
  if (xx == 0.0)
    return x + x3 * (c1 + x2 * r);
  else
    return x - ((x2 * (0.5 * xx - x3 * r) - xx) - x3 * c1);
}

/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */
static inline double cos_piby4(double x, double xx)
{
  /* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
                          = f(w)
     where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
     We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
     because this produces an expansion in even powers of x.
     If xx (the tail of x) is non-zero, we subtract a correction
     term g(x,xx) = x*xx to the result, where g(x,xx)
     is an approximation to sin(x)*sin(xx) valid because
     xx is tiny relative to x.
  */
  double r, x2, t;
  static const double
    c1 = 0.41666666666666665390037e-1,
    c2 = -0.13888888888887398280412e-2,
    c3 = 0.248015872987670414957399e-4,
    c4 = -0.275573172723441909470836e-6,
    c5 = 0.208761463822329611076335e-8,
    c6 = -0.113826398067944859590880e-10;

  x2 = x * x;
  r = 0.5 * x2;
  t = 1.0 - r;
  return t + ((((1.0 - t) - r) - x * xx) + x2 * x2 *
              (c1 + x2 * (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6))))));
}

void __sincos(double x, double *s, double *c)
{
  double r, rr;
  int region, xneg;

  unsigned long ux, ax;
  GET_BITS_DP64(x, ux);
  ax = (ux & ~SIGNBIT_DP64);
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
        {
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
	    {
	      if (ax == 0x0000000000000000)
                {
                  *s = x;
                  *c = 1.0;
                }
              else
                {
                  *s = x;
                  *c = val_with_flags(1.0, AMD_F_INEXACT);
                }
	    }
          else
            {
              *s = x - x*x*x*0.166666666666666666;
              *c = 1.0 - x*x*0.5;
            }
        }
      else
        {
          *s = sin_piby4(x, 0.0);
          *c = cos_piby4(x, 0.0);
        }
      return;
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        *s = *c = x + x; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        *s = *c = nan_with_flags(AMD_F_INVALID);
      return;
    }

  xneg = (ax != ux);


  if (xneg)
    x = -x;

  /* Reduce x into range [-pi/4,pi/4] */
  __remainder_piby2(x, &r, &rr, &region);

  if (xneg)
    {
      switch (region)
        {
        default:
        case 0:
          *s = -sin_piby4(r, rr);
          *c = cos_piby4(r, rr);
          break;
        case 1:
          *s = -cos_piby4(r, rr);
          *c = -sin_piby4(r, rr);
          break;
        case 2:
          *s = sin_piby4(r, rr);
          *c = -cos_piby4(r, rr);
          break;
        case 3:
          *s = cos_piby4(r, rr);
          *c = sin_piby4(r, rr);
          break;
        }
    }
  else
    {
      switch (region)
        {
        default:
        case 0:
          *s = sin_piby4(r, rr);
          *c = cos_piby4(r, rr);
          break;
        case 1:
          *s = cos_piby4(r, rr);
          *c = -sin_piby4(r, rr);
          break;
        case 2:
          *s = -sin_piby4(r, rr);
          *c = -cos_piby4(r, rr);
          break;
        case 3:
          *s = -cos_piby4(r, rr);
          *c = sin_piby4(r, rr);
          break;
        }
    }
  return;
}

double __sin(double x)
{
  double r, rr;
  int region, xneg;

  unsigned long ux, ax;
  GET_BITS_DP64(x, ux);
  ax = (ux & ~SIGNBIT_DP64);
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
        {
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
	    {
	      if (ax == 0x0000000000000000)
                return x;
              else
                return val_with_flags(x, AMD_F_INEXACT);
	    }
          else
            return x - x*x*x*0.166666666666666666;
        }
      else
        return sin_piby4(x, 0.0);
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        return nan_with_flags(AMD_F_INVALID);
    }
  xneg = (ax != ux);


  if (xneg)
    x = -x;

  /* Reduce x into range [-pi/4,pi/4] */
  __remainder_piby2(x, &r, &rr, &region);

  if (xneg)
    {
      switch (region)
        {
        default:
        case 0: return -sin_piby4(r, rr);
        case 1: return -cos_piby4(r, rr);
        case 2: return sin_piby4(r, rr);
        case 3: return cos_piby4(r, rr);
        }
    }
  else
    {
      switch (region)
        {
        default:
        case 0: return sin_piby4(r, rr);
        case 1: return cos_piby4(r, rr);
        case 2: return -sin_piby4(r, rr);
        case 3: return -cos_piby4(r, rr);
        }
    }
}

double __cos(double x)
{
  double r, rr;
  int region, xneg;

  unsigned long ux, ax;
  GET_BITS_DP64(x, ux);
  ax = (ux & ~SIGNBIT_DP64);
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
        {
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
            {
              if (ax == 0x0000000000000000) /* abs(x) = 0.0 */
                return 1.0;
              else
                return val_with_flags(1.0, AMD_F_INEXACT);
            }
          else
            return 1.0 - x*x*0.5;
        }
      else
        return cos_piby4(x, 0.0);
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        return nan_with_flags(AMD_F_INVALID);
    }
  xneg = (ax != ux);


  if (xneg)
    x = -x;

  /* Reduce x into range [-pi/4,pi/4] */
  __remainder_piby2(x, &r, &rr, &region);

  switch (region)
    {
    default:
    case 0: return cos_piby4(r, rr);
    case 1: return -sin_piby4(r, rr);
    case 2: return -cos_piby4(r, rr);
    case 3: return sin_piby4(r, rr);
    }
}

weak_alias (__sin, sin)
weak_alias (__cos, cos)
weak_alias (__sincos, sincos)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/


#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_REMAINDER_PIBY2F_INLINE
#define USE_VAL_WITH_FLAGS
#define USE_NAN_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_VAL_WITH_FLAGS
#undef USE_NAN_WITH_FLAGS
#undef USE_REMAINDER_PIBY2F_INLINE

/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */
static inline double sinf_piby4(double x)
{
  /* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
                          = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
                          = x * f(w)
     where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
     We use a minimax approximation of (f(w) - 1) / w
     because this produces an expansion in even powers of x.
  */
  double x2;
  static const double
  c1 = -0.166666666638608441788607926e0,
  c2 = 0.833333187633086262120839299e-2,
  c3 = -0.198400874359527693921333720e-3,
  c4 = 0.272500015145584081596826911e-5;

  x2 = x * x;
  return (x + x * x2 * (c1 + x2 * (c2 + x2 * (c3 + x2 * c4))));
}

/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */
static inline double cosf_piby4(double x)
{
  /* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
                          = f(w)
     where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
     We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
     because this produces an expansion in even powers of x.
  */
  double x2;
  static const double
    c1 = 0.41666666664325175238031e-1,
    c2 = -0.13888887673175665567647e-2,
    c3 = 0.24800600878112441958053e-4,
    c4 = -0.27301013343179832472841e-6;

  x2 = x * x;
  return (1.0 - 0.5 * x2 + (x2 * x2 *
                      (c1 + x2 * (c2 + x2 * (c3 + x2 * c4)))));
}


void __sincosf(float x, float *s, float *c)
{
  double r, dx;
  int region, xneg;

  unsigned long ux, ax;

  dx = x;

  GET_BITS_DP64(dx, ux);
  ax = (ux & ~SIGNBIT_DP64);

  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
        {
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
            {
              if (ax == 0x0000000000000000)
                {
                  *s = dx;
                  *c = 1.0;
                }
              else
                {
                  *s = val_with_flags(dx, AMD_F_INEXACT);
                  *c = val_with_flags(1.0, AMD_F_INEXACT);
                }
            }
          else
            {
              *s = dx - dx*dx*dx*0.166666666666666666;
              *c = 1.0 - dx*dx*0.5;
            }
        }
      else
        {
          *s = sinf_piby4(x);
          *c = cosf_piby4(x);
        }
      return;
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        *s = *c = dx + dx; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        *s = *c = nan_with_flags(AMD_F_INVALID);
      return;
    }

  xneg = (ux >> 63);

  if (xneg)
    dx = -dx;

  /* Reduce abs(x) into range [-pi/4,pi/4] */
  __remainder_piby2f_inline(dx, ax, &r, &region);

  if (xneg)
    {
      switch (region)
        {
        default:
        case 0:
          *s = -sinf_piby4(r);
          *c = cosf_piby4(r);
          break;
        case 1:
          *s = -cosf_piby4(r);
          *c = -sinf_piby4(r);
          break;
        case 2:
          *s = sinf_piby4(r);
          *c = -cosf_piby4(r);
          break;
        case 3:
          *s = cosf_piby4(r);
          *c = sinf_piby4(r);
          break;
        }
    }
  else
    {
      switch (region)
        {
        default:
        case 0:
          *s = sinf_piby4(r);
          *c = cosf_piby4(r);
          break;
        case 1:
          *s = cosf_piby4(r);
          *c = -sinf_piby4(r);
          break;
        case 2:
          *s = -sinf_piby4(r);
          *c = -cosf_piby4(r);
          break;
        case 3:
          *s = -cosf_piby4(r);
          *c = sinf_piby4(r);
          break;
        }
    }
}

float __sinf(float x)
{
  double r, dx;
  int region, xneg;

  unsigned long ux, ax;

  dx = x;

  GET_BITS_DP64(dx, ux);
  ax = (ux & ~SIGNBIT_DP64);

  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
        {
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
            {
              if (ax == 0x0000000000000000)
                return x;
              else
                return val_with_flags(dx, AMD_F_INEXACT);
            }
          else
            return x - x*x*x*0.166666666666666666;
        }
      else
        return sinf_piby4(dx);
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        return nan_with_flags(AMD_F_INVALID);
    }

  xneg = (ux >> 63);

  if (xneg)
    dx = -dx;

  /* Reduce abs(x) into range [-pi/4,pi/4] */
  __remainder_piby2f_inline(dx, ax, &r, &region);

  if (xneg)
    {
      switch (region)
        {
        default:
        case 0: return -sinf_piby4(r);
        case 1: return -cosf_piby4(r);
        case 2: return sinf_piby4(r);
        case 3: return cosf_piby4(r);
        }
    }
  else
    {
      switch (region)
        {
        default:
        case 0: return sinf_piby4(r);
        case 1: return cosf_piby4(r);
        case 2: return -sinf_piby4(r);
        case 3: return -cosf_piby4(r);
        }
    }
}

#if 1
/* Stupidly, computing cosf via sincosf is much faster,
   even though sincosf does the same work and more. */
float __cosf(float x)
{
  float s, c;
  __sincosf(x, &s, &c);
  return c;
}

#else
/* This is the way cosf should be done, but it runs half
   as fast as it ought to */

float __cosf(float x)
{
  double r, dx;
  int region, xneg;

  unsigned long ux, ax;

  dx = x;

  GET_BITS_DP64(dx, ux);
  ax = (ux & ~SIGNBIT_DP64);

  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
        {
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
            {
              if (ax == 0x0000000000000000)
                return 1.0F;
              else
                return val_with_flags(1.0, AMD_F_INEXACT);
            }
          else
            return 1.0F - x*x*0.5F;
        }
      else
        return cosf_piby4(dx);
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        return nan_with_flags(AMD_F_INVALID);
    }

  xneg = (ux >> 63);

  if (xneg)
    dx = -dx;

  /* Reduce abs(x) into range [-pi/4,pi/4] */
  __remainder_piby2f_inline(dx, ax, &r, &region);

  switch (region)
    {
    default:
    case 0: return cosf_piby4(r);
    case 1: return -sinf_piby4(r);
    case 2: return -cosf_piby4(r);
    case 3: return sinf_piby4(r);
    }
}
#endif

weak_alias (__sinf, sinf)
weak_alias (__cosf, cosf)
weak_alias (__sincosf, sincosf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sinf.c.x86_64-new-libm (+1 lines)
	1	/* Not needed. */




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/


#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_NAN_WITH_FLAGS
#define USE_VAL_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_VAL_WITH_FLAGS

/* tan(x + xx) approximation valid on the interval [-pi/4,pi/4]. 
   If recip is true return -1/tan(x + xx) instead. */
static inline double tan_piby4(double x, double xx, int recip)
{
  double r, t1, t2, xl;
  int transform = 0;
  static const double
     piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */
     piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */

  /* In order to maintain relative precision transform using the identity:
     tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. 
     Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */

  if (x > 0.68)
    {
      transform = 1;
      x = piby4_lead - x;
      xl = piby4_tail - xx;
      x += xl;
      xx = 0.0;
    }
  else if (x < -0.68)
    {
      transform = -1;
      x = piby4_lead + x;
      xl = piby4_tail + xx;
      x += xl;
      xx = 0.0;
    }

  /* Core Remez [2,3] approximation to tan(x+xx) on the
     interval [0,0.68]. */

  r = x*x + 2.0 * x * xx;
  t1 = x;
  t2 = xx + x*r*
    (0.372379159759792203640806338901e0 +
     (-0.229345080057565662883358588111e-1 +
      0.224044448537022097264602535574e-3*r)*r)/
    (0.111713747927937668539901657944e1 +
     (-0.515658515729031149329237816945e0 +
      (0.260656620398645407524064091208e-1 -
       0.232371494088563558304549252913e-3*r)*r)*r);

  /* Reconstruct tan(x) in the transformed case. */

  if (transform)
    {
      double t;
      t = t1 + t2;
      if (recip)
         return transform*(2*t/(t-1) - 1.0);
      else
         return transform*(1.0 - 2*t/(1+t));
    }

  if (recip)
    {
      /* Compute -1.0/(t1 + t2) accurately */
      double trec, trec_top, z1, z2, t;
      unsigned long u;
      t = t1 + t2;
      GET_BITS_DP64(t, u);
      u &= 0xffffffff00000000;
      PUT_BITS_DP64(u, z1);
      z2 = t2 - (z1 - t1);
      trec = -1.0 / t;
      GET_BITS_DP64(trec, u);
      u &= 0xffffffff00000000;
      PUT_BITS_DP64(u, trec_top);
      return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2);

    }
  else
    return t1 + t2;
}

double __tan(double x)
{
  double r, rr;
  int region, xneg;

  unsigned long ux, ax;
  GET_BITS_DP64(x, ux);
  ax = (ux & ~SIGNBIT_DP64);
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
        {
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
	    {
	      if (ax == 0x0000000000000000) return x;
              else return val_with_flags(x, AMD_F_INEXACT);
	    }
          else
            return x + x*x*x*0.333333333333333333;
        }
      else
        return tan_piby4(x, 0.0, 0);
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        return nan_with_flags(AMD_F_INVALID);
    }
  xneg = (ax != ux);


  if (xneg)
    x = -x;

  /* Reduce x into range [-pi/4,pi/4] */
  __remainder_piby2(x, &r, &rr, &region);

  if (xneg)
    return -tan_piby4(r, rr, region & 1);
  else
    return tan_piby4(r, rr, region & 1);
}

weak_alias (__tan, tan)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/


#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_REMAINDER_PIBY2F_INLINE
#define USE_VAL_WITH_FLAGS
#define USE_NAN_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_VAL_WITH_FLAGS
#undef USE_NAN_WITH_FLAGS
#undef USE_REMAINDER_PIBY2F_INLINE

/* tan(x) approximation valid on the interval [-pi/4,pi/4]. 
   If recip is true return -1/tan(x) instead. */
static inline double tanf_piby4(double x, int recip)
{
  double r, t;

  /* Core Remez [1,2] approximation to tan(x) on the
     interval [0,pi/4]. */
  r = x*x;
  t = x + x*r*
    (0.385296071263995406715129e0 -
     0.172032480471481694693109e-1 * r) /
    (0.115588821434688393452299e+1 +
     (-0.51396505478854532132342e0 +
      0.1844239256901656082986661e-1 * r) * r);

  if (recip)
    return -1.0 / t;
  else
    return t;
}

float __tanf(float x)
{
  double r, dx;
  int region, xneg;

  unsigned long ux, ax;

  dx = x;

  GET_BITS_DP64(dx, ux);
  ax = (ux & ~SIGNBIT_DP64);

  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
    {
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
        {
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
            {
              if (ax == 0x0000000000000000)
                return dx;
              else
                return val_with_flags(dx, AMD_F_INEXACT);
            }
          else
            return dx + dx*dx*dx*0.333333333333333333;
        }
      else
        return tanf_piby4(dx, 0);
    }
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return dx + dx; /* Raise invalid if it is a signalling NaN */
      else
        /* x is infinity. Return a NaN */
        return nan_with_flags(AMD_F_INVALID);
    }

  xneg = (ux >> 63);

  if (xneg)
    x = -x;

  /* Reduce x into range [-pi/4,pi/4] */
  __remainder_piby2f_inline(x, ax, &r, &region);

  if (xneg)
    return -tanf_piby4(r, region & 1);
  else
    return tanf_piby4(r, region & 1);
}

weak_alias (__tanf, tanf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

double __trunc(double x)
{
  double r;
  long rexp;
  unsigned long ux, ax, mask;

  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);

  if (ax >= 0x4340000000000000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^53 */
      if (ax > 0x7ff0000000000000)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        return x;
    }
  else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
    {
      /* Return zero with the sign of x */
      PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
      return x;
    }
  else
    {
      r = x;
      rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
      /* Mask out the bits of r that we don't want */
      mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
      PUT_BITS_DP64(ux & ~mask, r);
      return r;
    }

}

weak_alias (__trunc, trunc)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

float __truncf(float x)
{
  float r;
  int rexp;
  unsigned int ux, ax, mask;

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);

  if (ax >= 0x4b800000)
    {
      /* abs(x) is either NaN, infinity, or >= 2^24 */
      if (ax > 0x7f800000)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        return x;
    }
  else if (ax < 0x3f800000) /* abs(x) < 1.0 */
    {
      /* Return zero with the sign of x */
      PUT_BITS_SP32(ux & SIGNBIT_SP32, x);
      return x;
    }
  else
    {
      r = x;
      rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
      /* Mask out the bits of r that we don't want */
      mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
      PUT_BITS_SP32(ux & ~mask, r);
      return r;
    }
}

weak_alias (__truncf, truncf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_VAL_WITH_FLAGS
#define USE_NAN_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_VAL_WITH_FLAGS

/* Deal with errno for out-of-range argument */
#include "libm_errno_amd.h"
static inline double retval_errno_edom(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = DOMAIN;
  exc.name = (char *)"acos";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = nan_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(EDOM);
  else if (!matherr(&exc))
    {
      if(_LIB_VERSION == _SVID_)
        (void)fputs("acos: DOMAIN error\n", stderr);
    __set_errno(EDOM);
    }
  return exc.retval;
}

double __acos(double x)
{
  /* Computes arccos(x).
     The argument is first reduced by noting that arccos(x) 
     is invalid for abs(x) > 1. For denormal and small 
     arguments arccos(x) = pi/2 to machine accuracy. 
     Remaining argument ranges are handled as follows.
     For abs(x) <= 0.5 use 
     arccos(x) = pi/2 - arcsin(x)
     = pi/2 - (x + x^3*R(x^2))
     where R(x^2) is a rational minimax approximation to 
     (arcsin(x) - x)/x^3.
     For abs(x) > 0.5 exploit the identity:
     arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
     together with the above rational approximation, and 
     reconstruct the terms carefully.
  */

  /* Some constants and split constants. */

  static const double
    pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ 
    piby2      = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
    piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
    piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */

  double u, y, s=0.0, r;
  int xexp, xnan, transform=0;

  unsigned long ux, aux, xneg;
  GET_BITS_DP64(x, ux);
  aux = ux & ~SIGNBIT_DP64;
  xneg = (ux & SIGNBIT_DP64);
  xnan = (aux > PINFBITPATT_DP64);
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;

  /* Special cases */

  if (xexp < -56)
    { /* y small enough that arccos(x) = pi/2 */
      return val_with_flags(piby2, AMD_F_INEXACT);
    }
  else if (xnan) return x + x;
  else if (xexp >= 0) 
    { /* abs(x) >= 1.0 */
      if (x == 1.0) return 0.0;
      else if (x == -1.0) return val_with_flags(pi, AMD_F_INEXACT);
      else return retval_errno_edom(x);
    }

  if (xneg) y = -x;
  else y = x;

  transform = (xexp >= -1); /* abs(x) >= 0.5 */

  if (transform)
    { /* Transform y into the range [0,0.5) */
      transform = 1;
      r = 0.5*(1-y);
      /* Hammer sqrt instruction */
      asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
      y = s;
    }
  else
    r = y*y;

  /* Use a rational approximation for [0.0, 0.5] */

  u = r*(0.227485835556935010735943483075 + 
         (-0.445017216867635649900123110649 +
          (0.275558175256937652532686256258 + 
           (-0.0549989809235685841612020091328 +
            (0.00109242697235074662306043804220 + 
             0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
    (1.36491501334161032038194214209 +
     (-3.28431505720958658909889444194 + 
      (2.76568859157270989520376345954 + 
       (-0.943639137032492685763471240072 +
	0.105869422087204370341222318533*r)*r)*r)*r);

  if (transform) 
    { /* Reconstruct acos carefully in transformed region */
      if (xneg) return pi - 2*(s+(y*u - piby2_tail));
      else
	{
	  double c, s1;
	  unsigned long us;
	  GET_BITS_DP64(s, us);
	  PUT_BITS_DP64(0xffffffff00000000 & us, s1);
	  c = (r-s1*s1)/(s+s1);
          return 2*s1 + (2*c+2*y*u);
	}
    }
  else
    return piby2_head - (x - (piby2_tail - x*u));
}

weak_alias (__acos, acos)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_VALF_WITH_FLAGS
#define USE_NANF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NANF_WITH_FLAGS
#undef USE_VALF_WITH_FLAGS

/* Deal with errno for out-of-range argument */
#include "libm_errno_amd.h"
static inline float retval_errno_edom(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = DOMAIN;
  exc.name = (char *)"acosf";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = nanf_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(EDOM);
  else if (!matherr(&exc))
    {
      if(_LIB_VERSION == _SVID_)
        (void)fputs("acosf: DOMAIN error\n", stderr);
    __set_errno(EDOM);
    }
  return exc.retval;
}

float __acosf(float x)
{
  /* Computes arccos(x).
     The argument is first reduced by noting that arccos(x)
     is invalid for abs(x) > 1. For denormal and small
     arguments arccos(x) = pi/2 to machine accuracy.
     Remaining argument ranges are handled as follows.
     For abs(x) <= 0.5 use
     arccos(x) = pi/2 - arcsin(x)
     = pi/2 - (x + x^3*R(x^2))
     where R(x^2) is a rational minimax approximation to
     (arcsin(x) - x)/x^3.
     For abs(x) > 0.5 exploit the identity:
     arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
     together with the above rational approximation, and
     reconstruct the terms carefully.
  */

  /* Some constants and split constants. */

  static const float
    piby2      = 1.5707963705e+00F; /* 0x3fc90fdb */
  static const double
    pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
    piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
    piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */

  float u, y, s = 0.0F, r;
  int xexp, xnan, transform = 0;

  unsigned int ux, aux, xneg;

  /* For some reason using this:
        GET_BITS_SP32(x, ux);
     instead of the following line makes acosf run like a snail on a Hammer. */
  ux = (*((unsigned int *)&x));

  aux = ux & ~SIGNBIT_SP32;
  xneg = (ux & SIGNBIT_SP32);
  xnan = (aux > PINFBITPATT_SP32);
  xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;

  /* Special cases */

  if (xexp < -26)
    /* y small enough that arccos(x) = pi/2 */
    return valf_with_flags(piby2, AMD_F_INEXACT);
  else if (xnan) return x + x;
  else if (xexp >= 0)
    { /* abs(x) >= 1.0 */
      if (x == 1.0F) return 0.0F;
      else if (x == -1.0F) return valf_with_flags(pi, AMD_F_INEXACT);
      else return retval_errno_edom(x);
    }

  if (xneg) y = -x;
  else y = x;

  transform = (xexp >= -1); /* abs(x) >= 0.5 */

  if (transform)
    { /* Transform y into the range [0,0.5) */
      transform = 1;
      r = 0.5F*(1-y);
      /* Hammer sqrt instruction */
      asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r));
      y = s;
    }
  else
    r = y*y;

  /* Use a rational approximation for [0.0, 0.5] */

  u=r*(0.184161606965100694821398249421F +
       (-0.0565298683201845211985026327361F +
	(-0.0133819288943925804214011424456F -
	 0.00396137437848476485201154797087F*r)*r)*r)/
    (1.10496961524520294485512696706F -
     0.836411276854206731913362287293F*r);

  if (transform)
    {
      /* Reconstruct acos carefully in transformed region */
      if (xneg)
        return pi - 2.0F*(s+(y*u - piby2_tail));
      else
	{
	  float c, s1;
	  unsigned int us;
	  GET_BITS_SP32(s, us);
	  PUT_BITS_SP32(0xffff0000 & us, s1);
	  c = (r-s1*s1)/(s+s1);
          return 2.0F*s1 + (2.0F*c+2.0F*y*u);
	}
    }
  else
    return piby2_head - (x - (piby2_tail - x*u));
}

weak_alias (__acosf, acosf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_VAL_WITH_FLAGS
#define USE_NAN_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_VAL_WITH_FLAGS

/* Deal with errno for out-of-range argument */
#include "libm_errno_amd.h"
static inline double retval_errno_edom(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = DOMAIN;
  exc.name = (char *)"asin";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = nan_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(EDOM);
  else if (!matherr(&exc))
    {
      if(_LIB_VERSION == _SVID_)
        (void)fputs("asin: DOMAIN error\n", stderr);
    __set_errno(EDOM);
    }
  return exc.retval;
}

double __asin(double x)
{
  /* Computes arcsin(x).
     The argument is first reduced by noting that arcsin(x) 
     is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
     For denormal and small arguments arcsin(x) = x to machine
     accuracy. Remaining argument ranges are handled as follows. 
     For abs(x) <= 0.5 use 
     arcsin(x) = x + x^3*R(x^2)
     where R(x^2) is a rational minimax approximation to 
     (arcsin(x) - x)/x^3.
     For abs(x) > 0.5 exploit the identity:
      arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
     together with the above rational approximation, and 
     reconstruct the terms carefully.
    */

  /* Some constants and split constants. */

  static const double 
    piby2_tail  = 6.1232339957367660e-17, /* 0x3c91a62633145c07 */
    hpiby2_head = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
    piby2       = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
  double u, v, y, s=0.0, r;
  int xexp, xnan, transform=0;

  unsigned long ux, aux, xneg;
  GET_BITS_DP64(x, ux);
  aux = ux & ~SIGNBIT_DP64;
  xneg = (ux & SIGNBIT_DP64);
  xnan = (aux > PINFBITPATT_DP64);
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;

  /* Special cases */

  if (xexp < -28)
    { /* y small enough that arcsin(x) = x */
      return val_with_flags(x, AMD_F_INEXACT);
    }
  else if (xnan) return x + x;
  else if (xexp >= 0) 
    { /* abs(x) >= 1.0 */
      if (x == 1.0) return val_with_flags(piby2, AMD_F_INEXACT);
      else if (x == -1.0) return val_with_flags(-piby2, AMD_F_INEXACT);
      else return retval_errno_edom(x);

    }

  if (xneg) y = -x;
  else y = x;

  transform = (xexp >= -1); /* abs(x) >= 0.5 */

  if (transform)
    { /* Transform y into the range [0,0.5) */
      transform = 1;
      r = 0.5*(1-y);
      /* Hammer sqrt instruction */
      asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
      y = s;
    }
  else
    {
      r = y*y;
    }

  /* Use a rational approximation for [0.0, 0.5] */

  u = r*(0.227485835556935010735943483075 + 
         (-0.445017216867635649900123110649 +
          (0.275558175256937652532686256258 + 
           (-0.0549989809235685841612020091328 +
            (0.00109242697235074662306043804220 + 
             0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
    (1.36491501334161032038194214209 +
     (-3.28431505720958658909889444194 + 
      (2.76568859157270989520376345954 + 
       (-0.943639137032492685763471240072 +
	0.105869422087204370341222318533*r)*r)*r)*r);

  if (transform) 
    { /* Reconstruct asin carefully in transformed region */
      	{
	  double c, s1, p, q;
	  unsigned long us;
	  GET_BITS_DP64(s, us);
	  PUT_BITS_DP64(0xffffffff00000000 & us, s1);
	  c = (r-s1*s1)/(s+s1);
	  p = 2*s*u-(piby2_tail-2*c);
	  q = hpiby2_head-2*s1;
	  v = hpiby2_head-(p-q);
	}
    }
  else
    {
      v = y + y*u;
    }

  if (xneg) return -v;
  else return v;
}

weak_alias (__asin, asin)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_VALF_WITH_FLAGS
#define USE_NANF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NANF_WITH_FLAGS
#undef USE_VALF_WITH_FLAGS

/* Deal with errno for out-of-range argument */
#include "libm_errno_amd.h"
static inline float retval_errno_edom(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = DOMAIN;
  exc.name = (char *)"asinf";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = nanf_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(EDOM);
  else if (!matherr(&exc))
    {
      if(_LIB_VERSION == _SVID_)
        (void)fputs("asinf: DOMAIN error\n", stderr);
    __set_errno(EDOM);
    }
  return exc.retval;
}

float __asinf(float x)
{
  /* Computes arcsin(x).
     The argument is first reduced by noting that arcsin(x) 
     is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
     For denormal and small arguments arcsin(x) = x to machine
     accuracy. Remaining argument ranges are handled as follows. 
     For abs(x) <= 0.5 use 
     arcsin(x) = x + x^3*R(x^2)
     where R(x^2) is a rational minimax approximation to 
     (arcsin(x) - x)/x^3.
     For abs(x) > 0.5 exploit the identity:
      arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
     together with the above rational approximation, and 
     reconstruct the terms carefully.
    */

  /* Some constants and split constants. */

  static const float
    piby2_tail  = 7.5497894159e-08F, /* 0x33a22168 */
    hpiby2_head = 7.8539812565e-01F, /* 0x3f490fda */
    piby2       = 1.5707963705e+00F; /* 0x3fc90fdb */
  float u, v, y, s = 0.0F, r;
  int xexp, xnan, transform = 0;

  unsigned int ux, aux, xneg;
  GET_BITS_SP32(x, ux);
  aux = ux & ~SIGNBIT_SP32;
  xneg = (ux & SIGNBIT_SP32);
  xnan = (aux > PINFBITPATT_SP32);
  xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;

  /* Special cases */

  if (xexp < -14)
    /* y small enough that arcsin(x) = x */
    return valf_with_flags(x, AMD_F_INEXACT);
  else if (xnan) return x + x;
  else if (xexp >= 0) 
    {
      /* abs(x) >= 1.0 */
      if (x == 1.0F) return valf_with_flags(piby2, AMD_F_INEXACT);
      else if (x == -1.0F) return valf_with_flags(-piby2, AMD_F_INEXACT);
      else return retval_errno_edom(x);
    }

  if (xneg) y = -x;
  else y = x;

  transform = (xexp >= -1); /* abs(x) >= 0.5 */

  if (transform)
    { /* Transform y into the range [0,0.5) */
      transform = 1;
      r = 0.5F*(1-y);
      /* Hammer sqrt instruction */
      asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r));
      y = s;
    }
  else
    r = y*y;

  /* Use a rational approximation for [0.0, 0.5] */

  u=r*(0.184161606965100694821398249421F + 
       (-0.0565298683201845211985026327361F +
	(-0.0133819288943925804214011424456F - 
	 0.00396137437848476485201154797087F*r)*r)*r)/
    (1.10496961524520294485512696706F - 
     0.836411276854206731913362287293F*r);

  if (transform) 
    {
      /* Reconstruct asin carefully in transformed region */
      float c, s1, p, q;
      unsigned int us;
      GET_BITS_SP32(s, us);
      PUT_BITS_SP32(0xffff0000 & us, s1);
      c = (r-s1*s1)/(s+s1);
      p = 2.0F*s*u-(piby2_tail-2.0F*c);
      q = hpiby2_head-2*s1;
      v = hpiby2_head-(p-q);
    }
  else
    v = y + y*u;

  if (xneg) return -v;
  else return v;
}

weak_alias (__asinf, asinf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXP
#define USE_SCALEDOUBLE_1
#define USE_SCALEDOUBLE_2
#define USE_ZERO_WITH_FLAGS
#define USE_INFINITY_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_ZERO_WITH_FLAGS
#undef USE_SPLITEXP
#undef USE_SCALEDOUBLE_1
#undef USE_SCALEDOUBLE_2
#undef USE_INFINITY_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline double retval_errno_erange_overflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = OVERFLOW;
  exc.name = (char *)"exp";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline double retval_errno_erange_underflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = UNDERFLOW;
  exc.name = (char *)"exp";
  exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

double __exp(double x)
{
  static const double
       max_exp_arg =  7.09782712893383973096e+02, /* 0x40862e42fefa39ef */
       min_exp_arg = -7.45133219101941108420e+02, /* 0xc0874910d52d3051 */
 thirtytwo_by_log2 = 4.61662413084468283841e+01,  /* 0x40471547652b82fe */
   log2_by_32_lead = 2.16608493356034159660e-02,  /* 0x3f962e42fe000000 */
  log2_by_32_trail = 5.68948749532545630390e-11;  /* 0x3dcf473de6af278e */

  double z1, z2, z;
  int m;
  unsigned long ux, ax;

  /*
    Computation of exp(x).

    We compute the values m, z1, and z2 such that
    exp(x) = 2**m * (z1 + z2),  where
    exp(x) is the natural exponential of x.

    Computations needed in order to obtain m, z1, and z2
    involve three steps.

    First, we reduce the argument x to the form
    x = n * log2/32 + remainder,
    where n has the value of an integer and |remainder| <= log2/64.
    The value of n = x * 32/log2 rounded to the nearest integer and
    the remainder = x - n*log2/32.

    Second, we approximate exp(r1 + r2) - 1 where r1 is the leading
    part of the remainder and r2 is the trailing part of the remainder.

    Third, we reconstruct the exponential of x so that
    exp(x) = 2**m * (z1 + z2).
  */


  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);

  if (ax >= 0x40862e42fefa39ef) /* abs(x) >= 709.78... */
    {
      if(ax >= 0x7ff0000000000000)
        {
          /* x is either NaN or infinity */
          if (ux & MANTBITS_DP64)
            /* x is NaN */
            return x + x; /* Raise invalid if it is a signalling NaN */
          else if (ux & SIGNBIT_DP64)
            /* x is negative infinity; return 0.0 with no flags. */
            return 0.0;
          else
            /* x is positive infinity */
            return x;
        }
      if (x > max_exp_arg)
        /* Return +infinity with overflow flag */
        return retval_errno_erange_overflow(x);
      else if (x < min_exp_arg)
        /* x is negative. Return +zero with underflow and inexact flags */
        return retval_errno_erange_underflow(x);
    }

  /* Handle small arguments separately */
  if (ax < 0x3fb0000000000000)   /* abs(x) < 1/16 */
    {
      if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
        z = 1.0 + x; /* Raises inexact if x is non-zero */
      else
        z = ((((((((((
		      1.0/3628800)*x+
		     1.0/362880)*x+
		    1.0/40320)*x+
		   1.0/5040)*x+
		  1.0/720)*x+
		 1.0/120)*x+
		1.0/24)*x+
	       1.0/6)*x+
	      1.0/2)*x+
	     1.0)*x + 1.0;
    }
  else
    {
      /* Find m, z1 and z2 such that exp(x) = 2**m * (z1 + z2) */

      splitexp(x, 1.0, thirtytwo_by_log2, log2_by_32_lead, log2_by_32_trail,
               &m, &z1, &z2);

      /* Scale (z1 + z2) by 2.0**m */

      if (m >= EMIN_DP64 && m <= EMAX_DP64)
	z = scaleDouble_1((z1+z2),m);
      else
	z = scaleDouble_2((z1+z2),m);
    }
  return z;
}


weak_alias (__exp, __ieee754_exp)
weak_alias (__exp, exp)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXP
#define USE_SCALEDOUBLE_1
#define USE_SCALEDOUBLE_2
#define USE_ZERO_WITH_FLAGS
#define USE_INFINITY_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_SPLITEXP
#undef USE_SCALEDOUBLE_1
#undef USE_SCALEDOUBLE_2
#undef USE_ZERO_WITH_FLAGS
#undef USE_INFINITY_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline double retval_errno_erange_overflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = OVERFLOW;
  exc.name = (char *)"exp10";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline double retval_errno_erange_underflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = UNDERFLOW;
  exc.name = (char *)"exp10";
  exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

double __exp10(double x)
{
  static const double
         max_exp10_arg =  3.0825471555991674677e+02, /* 0x40734413509f79ff */
         min_exp10_arg = -3.2330621534311580944e+02, /* 0xc07434e6420f4374 */
                 log10 = 2.30258509299404568401e+00, /* 0x40026bb1bbb55516 */
 thirtytwo_by_log10of2 = 1.06301699036395595131e+02, /* 0x405a934f0979a371 */
   log10of2_by_32_lead = 9.40718688070774078369e-03, /* 0x3F83441340000000 */
  log10of2_by_32_trail = 4.83791671566737916758e-10; /* 0x3E009F79FEF311F1 */

  double y, z1, z2, z;
  int m;
  unsigned long ux, ax;

  /*
    Computation of exp10(x).

    We compute the values m, z1, and z2 such that
    exp10(x) = 2**m * (z1 + z2),  where exp10(x) is 10**x.

    Computations needed in order to obtain m, z1, and z2
    involve three steps.

    First, we reduce the argument x to the form
    x = n * log10of2/32 + remainder,
    where n has the value of an integer and |remainder| <= log10of2/64.
    The value of n = x * 32/log10of2 rounded to the nearest integer and
    the remainder = x - n*log10of2/32.

    Second, we approximate exp10(r1 + r2) - 1 where r1 is the leading
    part of the remainder and r2 is the trailing part of the remainder.

    Third, we reconstruct exp10(x) so that
    exp10(x) = 2**m * (z1 + z2).
  */


  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);

  if (ax >= 0x40734413509f79ff) /* abs(x) >= 308.25... */
    {
       if(ax >= 0x7ff0000000000000)
        {
          /* x is either NaN or infinity */
          if (ux & MANTBITS_DP64)
            /* x is NaN */
            return x + x; /* Raise invalid if it is a signalling NaN */
          else if (ux & SIGNBIT_DP64)
            /* x is negative infinity; return 0.0 with no flags. */
            return 0.0;
          else
            /* x is positive infinity */
            return x;
        }
      if (x > max_exp10_arg)
        /* Return +infinity with overflow flag */
        return retval_errno_erange_overflow(x);
      else if (x < min_exp10_arg)
        /* x is negative. Return +zero with underflow and inexact flags */
        return retval_errno_erange_underflow(x);
    }


  /* Handle small arguments separately */
  if (ax < 0x3f9bcb7b131bbb9d)   /* abs(x) < 1/(16*log10) */
    {
      if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
        return 1.0 + x; /* Raises inexact if x is non-zero */
      else
        y = log10*x;
        z = ((((((((((
		      1.0/3628800)*y+
		     1.0/362880)*y+
		    1.0/40320)*y+
		   1.0/5040)*y+
		  1.0/720)*y+
		 1.0/120)*y+
		1.0/24)*y+
	       1.0/6)*y+
	      1.0/2)*y+
	     1.0)*y + 1.0;
    }
  else
    {
      /* Find m, z1 and z2 such that exp10(x) = 2**m * (z1 + z2) */

      splitexp(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead,
               log10of2_by_32_trail, &m, &z1, &z2);

      /* Scale (z1 + z2) by 2.0**m */
      if (m > EMIN_DP64 && m < EMAX_DP64)
	return scaleDouble_1((z1+z2),m);
      else
	return scaleDouble_2((z1+z2),m);
    }
  return z;
}

weak_alias (__exp10, exp10)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXPF
#define USE_SCALEFLOAT_1
#define USE_SCALEFLOAT_2
#define USE_ZEROF_WITH_FLAGS
#define USE_INFINITYF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_SPLITEXPF
#undef USE_SCALEFLOAT_1
#undef USE_SCALEFLOAT_2
#undef USE_ZEROF_WITH_FLAGS
#undef USE_INFINITYF_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline float retval_errno_erange_overflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = OVERFLOW;
  exc.name = (char *)"exp10f";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline float retval_errno_erange_underflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = UNDERFLOW;
  exc.name = (char *)"exp10f";
  exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

float __exp10f(float x)
{
  static const float
         max_exp10_arg = 3.8531841278E+01F, /* 0x421A209B */
         min_exp10_arg =-4.4853469848E+01F, /* 0xC23369F4 */
                 log10 = 2.3025850929E+00F, /* 0x40135D8E */
 thirtytwo_by_log10of2 = 1.0630169677E+02F, /* 0x42D49A78 */
   log10of2_by_32_lead = 9.4070434570E-03F, /* 0x3C1A2000 */
   log10of2_by_32_tail = 1.4390730030E-07F; /* 0x341A84F0 */

  float y, z1, z2, z;
  int m;
  unsigned int ux, ax;

  /*
    Computation of exp10f (x).

    We compute the values m, z1, and z2 such that
    exp10f(x) = 2**m * (z1 + z2),  where exp10f(x) is 10**x.

    Computations needed in order to obtain m, z1, and z2
    involve three steps.

    First, we reduce the argument x to the form
    x = n * log10of2/32 + remainder,
    where n has the value of an integer and |remainder| <= log10of2/64.
    The value of n = x * 32/log10of2 rounded to the nearest integer and
    the remainder = x - n*log10of2/32.

    Second, we approximate exp10f(r1 + r2) - 1 where r1 is the leading
    part of the remainder and r2 is the trailing part of the remainder.

    Third, we reconstruct exp10f(x) so that
    exp10f(x) = 2**m * (z1 + z2).
  */

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);

  if (ax >= 0x421A209B) /* abs(x) >= 38.5... */
    {
      if(ax >= 0x7f800000)
        {
          /* x is either NaN or infinity */
          if (ux & MANTBITS_SP32)
            /* x is NaN */
            return x + x; /* Raise invalid if it is a signalling NaN */
          else if (ux & SIGNBIT_SP32)
            /* x is negative infinity; return 0.0 with no flags. */
            return 0.0F;
          else
            /* x is positive infinity */
            return x;
        }
      if (x > max_exp10_arg)
        /* Return +infinity with overflow flag */
        return retval_errno_erange_overflow(x);
      else if (x < min_exp10_arg)
        /* x is negative. Return +zero with underflow and inexact flags */
        return retval_errno_erange_underflow(x);
    }

  /* Handle small arguments separately */
  if (ax < 0x3bde5bd9)   /* abs(x) < 1/(64*log10) */
    {
      if (ax < 0x32800000)   /* abs(x) < 2^(-26) */
        return 1.0F + x; /* Raises inexact if x is non-zero */
      else
        y = log10*x;
      z = ((((((((
		    1.0F/40320)*x+
		   1.0F/5040)*y+
		  1.0F/720)*y+
		 1.0F/120)*y+
		1.0F/24)*y+
	       1.0F/6)*y+
	      1.0F/2)*y+
	     1.0F)*y + 1.0;
    }
  else
    {
      /* Find m, z1 and z2 such that exp10f(x) = 2**m * (z1 + z2) */

      splitexpf(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead,
                log10of2_by_32_tail, &m, &z1, &z2);

      /* Scale (z1 + z2) by 2.0**m */

      if (m >= EMIN_SP32 && m <= EMAX_SP32)
	z = scaleFloat_1((z1+z2),m);
      else
	z = scaleFloat_2((z1+z2),m);
    }
  return z;
}

weak_alias (__exp10f, exp10f)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXP
#define USE_SCALEDOUBLE_1
#define USE_SCALEDOUBLE_2
#define USE_ZERO_WITH_FLAGS
#define USE_INFINITY_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_ZERO_WITH_FLAGS
#undef USE_SPLITEXP
#undef USE_SCALEDOUBLE_1
#undef USE_SCALEDOUBLE_2
#undef USE_INFINITY_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline double retval_errno_erange_overflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = OVERFLOW;
  exc.name = (char *)"exp2";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline double retval_errno_erange_underflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = UNDERFLOW;
  exc.name = (char *)"exp2";
  exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

double __exp2(double x)
{
  static const double
    max_exp2_arg = 1024.0,  /* 0x4090000000000000 */
    min_exp2_arg = -1074.0, /* 0xc090c80000000000 */
    log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */
    log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */
    log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */
    one_by_32_lead = 0.03125;

  double y, z1, z2, z, hx, tx, y1, y2;
  int m;
  unsigned long ux, ax;

  /*
    Computation of exp2(x).

    We compute the values m, z1, and z2 such that
    exp2(x) = 2**m * (z1 + z2),  where exp2(x) is 2**x.

    Computations needed in order to obtain m, z1, and z2
    involve three steps.

    First, we reduce the argument x to the form
    x = n/32 + remainder,
    where n has the value of an integer and |remainder| <= 1/64.
    The value of n = x * 32 rounded to the nearest integer and
    the remainder = x - n/32.

    Second, we approximate exp2(r1 + r2) - 1 where r1 is the leading
    part of the remainder and r2 is the trailing part of the remainder.

    Third, we reconstruct exp2(x) so that
    exp2(x) = 2**m * (z1 + z2).
  */


  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);

  if (ax >= 0x4090000000000000) /* abs(x) >= 1024.0 */
    {
      if(ax >= 0x7ff0000000000000)
        {
          /* x is either NaN or infinity */
          if (ux & MANTBITS_DP64)
            /* x is NaN */
            return x + x; /* Raise invalid if it is a signalling NaN */
          else if (ux & SIGNBIT_DP64)
            /* x is negative infinity; return 0.0 with no flags. */
            return 0.0;
          else
            /* x is positive infinity */
            return x;
        }
      if (x > max_exp2_arg)
        /* Return +infinity with overflow flag */
        return retval_errno_erange_overflow(x);
      else if (x < min_exp2_arg)
        /* x is negative. Return +zero with underflow and inexact flags */
        return retval_errno_erange_underflow(x);
    }


  /* Handle small arguments separately */
  if (ax < 0x3fb7154764ee6c2f)   /* abs(x) < 1/(16*log2) */
    {
      if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
        return 1.0 + x; /* Raises inexact if x is non-zero */
      else
        {
          /* Split x into hx (head) and tx (tail). */
          unsigned long u;
          hx = x;
          GET_BITS_DP64(hx, u);
          u &= 0xfffffffff8000000;
          PUT_BITS_DP64(u, hx);
          tx = x - hx;
          /* Carefully multiply x by log2. y1 is the most significant
             part of the result, and y2 the least significant part */
          y1 = x * log2_lead;
          y2 = (((hx * log2_lead - y1) + hx * log2_tail) +
                  tx * log2_lead) + tx * log2_tail;

          y = y1 + y2;
		z = (9.99564649780173690e-1 +
		     (1.61251249355268050e-5 +
		      (2.37986978239838493e-2 +
		        2.68724774856111190e-7*y)*y)*y)/
		    (9.99564649780173692e-1 +
		     (-4.99766199765151309e-1 +
		      (1.070876894098586184e-1 +
		       (-1.189773642681502232e-2 +
			 5.9480622371960190616e-4*y)*y)*y)*y);
          z = ((z * y1) + (z * y2)) + 1.0;
        }
    }
  else
    {
      /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */

      splitexp(x, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2);

      /* Scale (z1 + z2) by 2.0**m */
      if (m > EMIN_DP64 && m < EMAX_DP64)
	z = scaleDouble_1((z1+z2),m);
      else
	z = scaleDouble_2((z1+z2),m);
    }
  return z;
}

weak_alias (__exp2, exp2)
weak_alias (__exp2, __libm_ieee754_exp2)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXPF
#define USE_SCALEFLOAT_1
#define USE_SCALEFLOAT_2
#define USE_INFINITYF_WITH_FLAGS
#define USE_ZEROF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_SPLITEXPF
#undef USE_SCALEFLOAT_1
#undef USE_SCALEFLOAT_2
#undef USE_INFINITYF_WITH_FLAGS
#undef USE_ZEROF_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline float retval_errno_erange_overflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = OVERFLOW;
  exc.name = (char *)"exp2f";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline float retval_errno_erange_underflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = UNDERFLOW;
  exc.name = (char *)"exp2f";
  exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

float __exp2f(float x)
{
  static const float
    max_exp2_arg = 128.0F,  /* 0x43000000 */
    min_exp2_arg = -149.0F, /* 0xc3150000 */
    log2 = 6.931471824645996e-01F, /* 0x3f317218 */
    one_by_32_lead = 0.03125F;

  float y, z1, z2, z;
  int m;
  unsigned int ux, ax;

  /*
    Computation of exp2f(x).

    We compute the values m, z1, and z2 such that
    exp2f(x) = 2**m * (z1 + z2),  where exp2f(x) is 2**x.

    Computations needed in order to obtain m, z1, and z2
    involve three steps.

    First, we reduce the argument x to the form
    x = n/32 + remainder,
    where n has the value of an integer and |remainder| <= 1/64.
    The value of n = x * 32 rounded to the nearest integer and
    the remainder = x - n/32.

    Second, we approximate exp2f(r1 + r2) - 1 where r1 is the leading
    part of the remainder and r2 is the trailing part of the remainder.

    Third, we reconstruct exp2f(x) so that
    exp2f(x) = 2**m * (z1 + z2).
  */

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);

  if (ax >= 0x43000000) /* abs(x) >= 128.0 */
    {
      if(ax >= 0x7f800000)
        {
          /* x is either NaN or infinity */
          if (ux & MANTBITS_SP32)
            /* x is NaN */
            return x + x; /* Raise invalid if it is a signalling NaN */
          else if (ux & SIGNBIT_SP32)
            /* x is negative infinity; return 0.0 with no flags. */
            return 0.0F;
          else
            /* x is positive infinity */
            return x;
        }
      if (x > max_exp2_arg)
        /* Return +infinity with overflow flag */
        return retval_errno_erange_overflow(x);
      else if (x < min_exp2_arg)
        /* x is negative. Return +zero with underflow and inexact flags */
        return retval_errno_erange_underflow(x);
    }

  /* Handle small arguments separately */
  if (ax < 0x3cb8aa3b)   /* abs(x) < 1/(64*log2) */
    {
      if (ax < 0x32800000)   /* abs(x) < 2^(-26) */
        return 1.0F + x; /* Raises inexact if x is non-zero */
      else
        {
	  y = log2*x;
      z = ((((((((
		    1.0F/40320)*y+
		   1.0F/5040)*y+
		  1.0F/720)*y+
		 1.0F/120)*y+
		1.0F/24)*y+
	       1.0F/6)*y+
	      1.0F/2)*y+
	     1.0F)*y + 1.0;
        }
    }
  else
    {
      /* Find m, z1 and z2 such that exp2f(x) = 2**m * (z1 + z2) */

      splitexpf(x, log2, 32.0F, one_by_32_lead, 0.0F, &m, &z1, &z2);

      /* Scale (z1 + z2) by 2.0**m */

      if (m >= EMIN_SP32 && m <= EMAX_SP32)
	z = scaleFloat_1((z1+z2),m);
      else
	z = scaleFloat_2((z1+z2),m);
    }
  return z;
}

weak_alias (__exp2f, exp2f)
weak_alias (__exp2f, __libm_ieee754_exp2f)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXPF
#define USE_SCALEFLOAT_1
#define USE_SCALEFLOAT_2
#define USE_ZEROF_WITH_FLAGS
#define USE_INFINITYF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_SPLITEXPF
#undef USE_SCALEFLOAT_1
#undef USE_SCALEFLOAT_2
#undef USE_ZEROF_WITH_FLAGS
#undef USE_INFINITYF_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline float retval_errno_erange_overflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = OVERFLOW;
  exc.name = (char *)"expf";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline float retval_errno_erange_underflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = UNDERFLOW;
  exc.name = (char *)"expf";
  exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

float __expf(float x)
{
  static const float
       max_exp_arg =  8.8722839355E+01, /* 0x42B17218 */
       min_exp_arg = -1.0327893066E+02, /* 0xC2CE8ED0 */
 thirtytwo_by_log2 =  4.6166240692E+01, /* 0x4238AA3B */
   log2_by_32_lead =  2.1659851074E-02, /* 0x3CB17000 */
   log2_by_32_tail =  9.9831822808E-07; /* 0x3585FDF4 */

  float z1, z2, z;
  int m;
  unsigned int ux, ax;

  /*
    Computation of exp(x).

    We compute the values m, z1, and z2 such that
    exp(x) = 2**m * (z1 + z2),  where
    exp(x) is the natural exponential of x.

    Computations needed in order to obtain m, z1, and z2
    involve three steps.

    First, we reduce the argument x to the form
    x = n * log2/32 + remainder,
    where n has the value of an integer and |remainder| <= log2/64.
    The value of n = x * 32/log2 rounded to the nearest integer and
    the remainder = x - n*log2/32.

    Second, we approximate exp(r1 + r2) - 1 where r1 is the leading
    part of the remainder and r2 is the trailing part of the remainder.

    Third, we reconstruct the exponential of x so that
    exp(x) = 2**m * (z1 + z2).
  */

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);

  if (ax >= 0x42B17218) /* abs(x) >= 88.7... */
    {
      if(ax >= 0x7f800000)
        {
          /* x is either NaN or infinity */
          if (ux & MANTBITS_SP32)
            /* x is NaN */
            return x + x; /* Raise invalid if it is a signalling NaN */
          else if (ux & SIGNBIT_SP32)
            /* x is negative infinity; return 0.0 with no flags */
            return 0.0;
          else
            /* x is positive infinity */
            return x;
        }
      if (x > max_exp_arg)
        /* Return +infinity with overflow flag */
        return retval_errno_erange_overflow(x);
      else if (x < min_exp_arg)
        /* x is negative. Return +zero with underflow and inexact flags */
        return retval_errno_erange_underflow(x);
    }

  /* Handle small arguments separately */
  if (ax < 0x3c800000)   /* abs(x) < 1/64 */
    {
      if (ax < 0x32800000)   /* abs(x) < 2^(-26) */
        return 1.0 + x; /* Raises inexact if x is non-zero */
      else
        z = (((((((
		   1.0/5040)*x+
		  1.0/720)*x+
		 1.0/120)*x+
		1.0/24)*x+
	       1.0/6)*x+
	      1.0/2)*x+
	     1.0)*x + 1.0;
    }
  else
    {
      /* Find m and z such that exp(x) = 2**m * (z1 + z2) */

      splitexpf(x, 1.0, thirtytwo_by_log2, log2_by_32_lead,
                log2_by_32_tail, &m, &z1, &z2);

      /* Scale (z1 + z2) by 2.0**m */

      if (m >= EMIN_SP32 && m <= EMAX_SP32)
	z = scaleFloat_1((z1+z2),m);
      else
	z = scaleFloat_2((z1+z2),m);
    }
  return z;
}

weak_alias (__expf, expf)
weak_alias (__expf, __ieee754_expf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_fmod.c.x86_64-new-libm (+2 lines)
	1	#define COMPILING_FMOD
	2	#include <w_remainder.c>

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_fmodf.c.x86_64-new-libm (+2 lines)
	1	#define COMPILING_FMOD
	2	#include <w_remainderf.c>




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SCALEDOUBLE_1
#define USE_INFINITY_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_SCALEDOUBLE_1
#undef USE_INFINITY_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline double retval_errno_erange_overflow(double x, double y)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = y;
  exc.type = OVERFLOW;
  exc.name = (char *)"hypot";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

double __hypot(double x, double y)
{
  /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
     the result warrants it */

  const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */

  double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts;
  unsigned long xexp, yexp, ux, uy, ut;
  int dexp, expadjust;

  GET_BITS_DP64(x, ux);
  ux &= ~SIGNBIT_DP64;
  GET_BITS_DP64(y, uy);
  uy &= ~SIGNBIT_DP64;
  xexp = (ux >> EXPSHIFTBITS_DP64);
  yexp = (uy >> EXPSHIFTBITS_DP64);

  if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1)
    {
      /* One or both of the arguments are NaN or infinity. The
         result will also be NaN or infinity. */
      retval = x*x + y*y;
      if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) ||
          ((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64)))
        /* x or y is infinity. ISO C99 defines that we must
           return +infinity, even if the other argument is NaN.
           Note that the computation of x*x + y*y above will already 
           have raised invalid if either x or y is a signalling NaN. */
        return infinity_with_flags(0);
      else
        /* One or both of x or y is NaN, and neither is infinity.
           Raise invalid if it's a signalling NaN */
        return retval;
    }

  /* Set x = abs(x) and y = abs(y) */
  PUT_BITS_DP64(ux, x);
  PUT_BITS_DP64(uy, y);

  /* The difference in exponents between x and y */
  dexp = xexp - yexp;
  expadjust = 0;

  if (ux == 0)
    /* x is zero */
    return y;
  else if (uy == 0)
    /* y is zero */
    return x;
  else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1)
    /* One of x and y is insignificant compared to the other */
    return x + y; /* Raise inexact */
  else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500)
    {
      /* Danger of overflow; scale down by 2**600. */
      expadjust = 600;
      ux -= 0x2580000000000000;
      PUT_BITS_DP64(ux, x);
      uy -= 0x2580000000000000;
      PUT_BITS_DP64(uy, y);
    }
  else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500)
    {
      /* Danger of underflow; scale up by 2**600. */
      expadjust = -600;
      if (xexp == 0)
        {
          /* x is denormal - handle by adding 601 to the exponent
           and then subtracting a correction for the implicit bit */
          PUT_BITS_DP64(ux + 0x2590000000000000, x);
          x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
          GET_BITS_DP64(x, ux);
        }
      else
        {
          /* x is normal - just increase the exponent by 600 */
          ux += 0x2580000000000000;
          PUT_BITS_DP64(ux, x);
        }
      if (yexp == 0)
        {
          PUT_BITS_DP64(uy + 0x2590000000000000, y);
          y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
          GET_BITS_DP64(y, uy);
        }
      else
        {
          uy += 0x2580000000000000;
          PUT_BITS_DP64(uy, y);
        }
    }


#ifdef FAST_BUT_GREATER_THAN_ONE_ULP
  /* Not awful, but results in accuracy loss larger than 1 ulp */
  r = x*x + y*y
#else
  /* Slower but more accurate */

  /* Sort so that x is greater than y */
  if (x < y)
    {
      u = y;
      y = x;
      x = u;
      ut = ux;
      ux = uy;
      uy = ut;
    }

  /* Split x into hx and tx, head and tail */
  PUT_BITS_DP64(ux & 0xfffffffff8000000, hx);
  tx = x - hx;

  PUT_BITS_DP64(uy & 0xfffffffff8000000, hy);
  ty = y - hy;

  /* Compute r = x*x + y*y with extra precision */
  x2 = x*x;
  y2 = y*y;
  hs = x2 + y2;

  if (dexp == 0)
    /* We take most care when x and y have equal exponents,
       i.e. are almost the same size */
    ts = (((x2 - hs) + y2) +
          ((hx * hx - x2) + 2 * hx * tx) + tx * tx) +
      ((hy * hy - y2) + 2 * hy * ty) + ty * ty;
  else
    ts = (((x2 - hs) + y2) +
          ((hx * hx - x2) + 2 * hx * tx) + tx * tx);

  r = hs + ts;
#endif

  /* The sqrt can introduce another half ulp error. */
  /* Hammer sqrt instruction */
  asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r));

  /* If necessary scale the result back. This may lead to
     overflow but if so that's the correct result. */
  retval = scaleDouble_1(retval, expadjust);

  if (retval > large)
    /* The result overflowed. Deal with errno. */
    return retval_errno_erange_overflow(x, y);

  return retval;
}

weak_alias (__hypot, hypot)
weak_alias (__hypot, __ieee754_hypot)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#if USE_SOFTWARE_SQRT
#define USE_SQRTF_AMD_INLINE
#endif
#define USE_INFINITYF_WITH_FLAGS
#include "libm_inlines_amd.h"
#if USE_SOFTWARE_SQRT
#undef USE_SQRTF_AMD_INLINE
#endif
#undef USE_INFINITYF_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline float retval_errno_erange_overflow(float x, float y)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)y;
  exc.type = OVERFLOW;
  exc.name = (char *)"hypotf";
  if (_LIB_VERSION == _SVID_)
    exc.retval = HUGE;
  else
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

float __hypotf(float x, float y)
{
  /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
     the result warrants it */

    /* Do intermediate computations in double precision
       and use sqrt instruction from chip if available. */
    double dx = x, dy = y, dr, retval;

    /* The largest finite float, stored as a double */
    const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */


  unsigned long ux, uy, avx, avy;

  GET_BITS_DP64(x, avx);
  avx &= ~SIGNBIT_DP64;
  GET_BITS_DP64(y, avy);
  avy &= ~SIGNBIT_DP64;
  ux = (avx >> EXPSHIFTBITS_DP64);
  uy = (avy >> EXPSHIFTBITS_DP64);

  if (ux == BIASEDEMAX_DP64 + 1 || uy == BIASEDEMAX_DP64 + 1)
    {
      retval = x*x + y*y;
      /* One or both of the arguments are NaN or infinity. The
         result will also be NaN or infinity. */
      if (((ux == BIASEDEMAX_DP64 + 1) && !(avx & MANTBITS_DP64)) ||
          ((uy == BIASEDEMAX_DP64 + 1) && !(avy & MANTBITS_DP64)))
        /* x or y is infinity. ISO C99 defines that we must
           return +infinity, even if the other argument is NaN.
           Note that the computation of x*x + y*y above will already 
           have raised invalid if either x or y is a signalling NaN. */
        return infinityf_with_flags(0);
      else
        /* One or both of x or y is NaN, and neither is infinity.
           Raise invalid if it's a signalling NaN */
        return retval;
    }

    dr = (dx*dx + dy*dy);

#if USE_SOFTWARE_SQRT
    retval = sqrtf_amd_inline(r);
#else
    /* Hammer sqrt instruction */
    asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (dr));
#endif

    if (retval > large)
      return retval_errno_erange_overflow(x, y);
    else
      return retval;
  }

weak_alias (__hypotf, hypotf)
weak_alias (__hypotf, __ieee754_hypotf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_NAN_WITH_FLAGS
#define USE_INFINITY_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_INFINITY_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline double retval_errno_erange_overflow(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = SING;
#if defined(COMPILING_LOG10)
  exc.name = (char *)"log10";
#elif defined(COMPILING_LOG2)
  exc.name = (char *)"log2";
#else
  exc.name = (char *)"log";
#endif
  if (_LIB_VERSION == _SVID_)
    exc.retval = -HUGE;
  else
    exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

/* Deal with errno for out-of-range argument */
static inline double retval_errno_edom(double x)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = x;
  exc.type = DOMAIN;
#if defined(COMPILING_LOG10)
  exc.name = (char *)"log10";
#elif defined(COMPILING_LOG2)
  exc.name = (char *)"log2";
#else
  exc.name = (char *)"log";
#endif
  if (_LIB_VERSION == _SVID_)
    exc.retval = -HUGE;
  else
    exc.retval = nan_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(EDOM);
  else if (!matherr(&exc))
    {
      if(_LIB_VERSION == _SVID_)
#if defined(COMPILING_LOG10)
        (void)fputs("log10: DOMAIN error\n", stderr);
#elif defined(COMPILING_LOG2)
        (void)fputs("log2: DOMAIN error\n", stderr);
#else
        (void)fputs("log: DOMAIN error\n", stderr);
#endif
    __set_errno(EDOM);
    }
  return exc.retval;
}

#if defined(COMPILING_LOG10)
double __log10(double x)
#elif defined(COMPILING_LOG2)
double __log2(double x)
#else
double __log(double x)
#endif
{

  int expadjust, xexp;
  double r, r1, r2, correction, f, f1, f2, q, u, v, z1, z2, poly;
  int index;
  unsigned long ux;
#if defined(COMPILING_LOG10) || defined (COMPILING_LOG2)
  unsigned long ut;
#endif

  /*
    Computes natural log(x). Algorithm based on:
    Ping-Tak Peter Tang
    "Table-driven implementation of the logarithm function in IEEE
    floating-point arithmetic"
    ACM Transactions on Mathematical Software (TOMS)
    Volume 16, Issue 4 (December 1990)
  */

/* Arrays ln_lead_table and ln_tail_table contain
   leading and trailing parts respectively of precomputed
   values of natural log(1+i/64), for i = 0, 1, ..., 64.
   ln_lead_table contains the first 24 bits of precision,
   and ln_tail_table contains a further 53 bits precision. */

  static const double ln_lead_table[65] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */
    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */
    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */
    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */
    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */
    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */
    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */
    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */
    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */
    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */
    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */
    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */
    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */
    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */
    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */
    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */
    2.35566020011901855469e-01,   /* 0x3fce270700000000 */
    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */
    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */
    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */
    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */
    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */
    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */
    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */
    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */
    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */
    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */
    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */
    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */
    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */
    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */
    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */
    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */
    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */
    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */
    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */
    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */
    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */
    4.75845873355865478516e-01,   /* 0x3fde744240000000 */
    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */
    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */
    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */
    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */
    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */
    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */
    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */
    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */
    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */
    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */
    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */
    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */
    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */
    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */
    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */
    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */
    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */
    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */
    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */
    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */
    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */
    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */
    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */
    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */
    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */

  static const double ln_tail_table[65] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */
    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */
    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */
    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */
    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */
    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */
    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */
    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */
    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */
    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */
    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */
    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */
    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */
    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */
    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */
    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */
    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */
    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */
    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */
    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */
    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */
    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */
    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */
    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */
    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */
    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */
    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */
    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */
    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */
    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */
    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */
    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */
    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */
    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */
    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */
    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */
    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */
    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */
    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */
    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */
    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */
    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */
    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */
    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */
    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */
    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */
    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */
    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */
    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */
    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */
    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */
    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */
    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */
    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */
    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */
    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */
    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */
    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */
    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */
    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */
    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */
    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */
    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */
    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */

#ifndef COMPILING_LOG2
  /* log2_lead and log2_tail sum to an extra-precise version
     of log(2) */
  static const double
    log2_lead = 6.93147122859954833984e-01,  /* 0x3fe62e42e0000000 */
    log2_tail = 5.76999904754328540596e-08;  /* 0x3e6efa39ef35793c */
#endif

  static const double
  /* Approximating polynomial coefficients for x near 1.0 */
    ca_1 = 8.33333333333317923934e-02,  /* 0x3fb55555555554e6 */
    ca_2 = 1.25000000037717509602e-02,  /* 0x3f89999999bac6d4 */
    ca_3 = 2.23213998791944806202e-03,  /* 0x3f62492307f1519f */
    ca_4 = 4.34887777707614552256e-04,  /* 0x3f3c8034c85dfff0 */

  /* Approximating polynomial coefficients for other x */
    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */
    cb_2 = 1.24999999978138668903e-02,  /* 0x3f89999999865ede */
    cb_3 = 2.23219810758559851206e-03;  /* 0x3f6249423bd94741 */

#if defined(COMPILING_LOG10)
  /* log10e_lead and log10e_tail sum to an extra-precision
     version of log10(e) (19 bits in lead) */
  static const double
    log10e_lead = 4.34293746948242187500e-01, /* 0x3fdbcb7800000000 */
    log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */
#elif defined(COMPILING_LOG2)
  /* log2e_lead and log2e_tail sum to an extra-precision
     version of log2(e) (19 bits in lead) */
   static const double
     log2e_lead = 1.44269180297851562500E+00, /* 0x3FF7154400000000 */
     log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */
#endif

  static const unsigned long
    log_thresh1 = 0x3fee0faa00000000,
    log_thresh2 = 0x3ff1082c00000000;


  GET_BITS_DP64(x, ux);

  if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return x + x; /* Raise invalid if it is a signalling NaN */
      else
        {
          /* x is infinity */
          if (ux & SIGNBIT_DP64)
            /* x is negative infinity. Return a NaN. */
            return retval_errno_edom(x);
          else
            return x;
        }
    }
  else if (!(ux & ~SIGNBIT_DP64))
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
    return retval_errno_erange_overflow(x);
  else if (ux & SIGNBIT_DP64)
    /* x is negative. Return a NaN. */
    return retval_errno_edom(x);


  /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
     log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */
  if (ux >= log_thresh1 && ux <= log_thresh2)
    {
      /* Arguments close to 1.0 are handled separately to maintain
         accuracy.

         The approximation in this region exploits the identity
             log( 1 + r ) = log( 1 + u/2 )  -  log( 1 - u/2 ), where
             u  = 2r / (2+r).
         Note that the right hand side has an odd Taylor series expansion
         which converges much faster than the Taylor series expansion of
         log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by
             u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1).

         One subtlety is that since u cannot be calculated from
         r exactly, the rounding error in the first u should be
         avoided if possible. To accomplish this, we observe that
                       u  =  r  -  r*r/(2+r).
         Since x (=1+r) is the input argument, and thus presumed exact,
         the formula above approximates u accurately because
                       u  =  r  -  correction,
         and the magnitude of "correction" (of the order of r*r)
         is small.
         With these observations, we will approximate log( 1 + r ) by
            r + (  (A1*u^3 + ... + An*u^(2n+1)) - correction ).

         We approximate log(1+r) by an odd polynomial in u, where
                  u = 2r/(2+r) = r - r*r/(2+r).
      */
      r = x - 1.0;
      u          = r / (2.0 + r);
      correction = r * u;
      u          = u + u;
      v          = u * u;
      r1 = r;
      r2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction);
#if defined(COMPILING_LOG10)
      /* At this point r1,r2 is an extra-precise approximation to
         natural log(x). Convert it to log10(x) by multiplying
         carefully by log10(e).
         Shift some bits from r1 to r2 so that log10e_lead*r1
         can be computed without rounding error */
      r = r1;
      GET_BITS_DP64(r1, ut);
      PUT_BITS_DP64(ut & 0xffffffff00000000, r1);
      r2 = r2 + (r - r1);
      return (((log10e_tail*r2) + log10e_tail*r1) + log10e_lead*r2) +
        log10e_lead*r1;
#elif defined(COMPILING_LOG2)
      /* Similarly handle log2(x) by multiplying carefully by log2(e). */
      r = r1;
      GET_BITS_DP64(r1, ut);
      PUT_BITS_DP64(ut & 0xffffffff00000000, r1);
      r2 = r2 + (r - r1);
      return (((log2e_tail*r2) + log2e_tail*r1) + log2e_lead*r2) +
        log2e_lead*r1;
#else
      return r1 + r2;
#endif
    }
  else
    {
      /*
        First, we decompose the argument x to the form
        x  =  2**M  *  (F1  +  F2),
        where  1 <= F1+F2 < 2, M has the value of an integer,
        F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.

        Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
        in U, where U  =  2 F2 / (2 F1 + F2).
        Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
        The core approximation calculates
        Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.
        Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
        thus, Poly =  2 arctanh( U/2 ) / U  -  1.

        It is not hard to see that
          log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
        Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).
        The values of log(F1) are calculated beforehand and stored
        in the program.
      */

      if (ux < IMPBIT_DP64)
        {
          /* The input argument x is denormalized */
          /* Normalize f by increasing the exponent by 60
             and subtracting a correction to account for the implicit
             bit. This replaces a slow denormalized
             multiplication by a fast normal subtraction. */
          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
          PUT_BITS_DP64(ux | 0x03d0000000000000, f);
          f -= corr;
          GET_BITS_DP64(f, ux);
          expadjust = 60;
        }
      else
        {
          f = x;
          expadjust = 0;
        }

      /* Store the exponent of x in xexp and put
         f into the range [0.5,1) */
      xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
      PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);

      /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */

      /* Set index to be the nearest integer to 128*f */
      /*
        r = 128.0 * f;
        index = (int)(r + 0.5);
      */
      /* This code instead of the above can save several cycles.
         It only works because 64 <= r < 128, so
         the nearest integer is always contained in exactly
         7 bits, and the right shift is always the same. */
      index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46)
        + ((ux & 0x0000200000000000) >> 45);

      z1 = ln_lead_table[index-64];
      q = ln_tail_table[index-64];
      f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
      f2 = f - f1;
      /* At this point, x = 2**xexp * ( f1  +  f2 ) where
         f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */

      /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */
      u = f2 / (f1 + 0.5 * f2);

      /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).
         The core approximation calculates
         poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */
      v = u * u;
      poly = (v * (cb_1 + v * (cb_2 + v * cb_3)));
      z2 = q + (u + u * poly);

      /* Now z1,z2 is an extra-precise approximation of log(2f). */

#if defined (COMPILING_LOG10)
      /* Add xexp * log(2) to z1,z2 to get log(x). */
      r1 = (xexp * log2_lead + z1);
      r2 = (xexp * log2_tail + z2);
      /* At this point r1,r2 is an extra-precise approximation to
         natural log(x). Convert it to log10(x) by multiplying
         carefully by log10(e). */
      return (((log10e_tail*r2) + log10e_tail*r1) + log10e_lead*r2) +
        log10e_lead*r1;
#elif defined(COMPILING_LOG2)
      /* Convert to log2(x) by multiplying carefully by log2(e)
         and adding xexp. */
      r1 = xexp + log2e_lead*z1;
      r2 = (((log2e_tail*z2) + log2e_tail*z1) + log2e_lead*z2);
      return r1 + r2;
#else
      /* Add xexp * log(2) to z1,z2 to get the result log(x).
         The computed r1 is not subject to rounding error because
         xexp has at most 10 significant bits, log(2) has 24 significant
         bits, and z1 has up to 24 bits; and the exponents of z1
         and z2 differ by at most 6. */
      r1 = (xexp * log2_lead + z1);
      r2 = (xexp * log2_tail + z2);
      /* Natural log(x) */
      return r1 + r2;
#endif
    }
}

#if defined(COMPILING_LOG10)
weak_alias (__log10, log10)
weak_alias (__log10, __ieee754_log10)
#elif defined(COMPILING_LOG2)
weak_alias (__log2, log2)
weak_alias (__log2, __ieee754_log2)
#else
weak_alias (__log, log)
weak_alias (__log, __ieee754_log)
#endif

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log10.c.x86_64-new-libm (+2 lines)
	1	#define COMPILING_LOG10 1
	2	#include <w_log.c>

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log10f.c.x86_64-new-libm (+2 lines)
	1	#define COMPILING_LOG10 1
	2	#include <w_logf.c>

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log2.c.x86_64-new-libm (+2 lines)
	1	#define COMPILING_LOG2 1
	2	#include <w_log.c>

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log2f.c.x86_64-new-libm (+2 lines)
	1	#define COMPILING_LOG2 1
	2	#include <w_logf.c>




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_NANF_WITH_FLAGS
#define USE_INFINITYF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_NANF_WITH_FLAGS
#undef USE_INFINITYF_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline float retval_errno_erange_overflow(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = SING;
#if defined(COMPILING_LOG10)
  exc.name = (char *)"log10f";
#elif defined(COMPILING_LOG2)
  exc.name = (char *)"log2f";
#else
  exc.name = (char *)"logf";
#endif
  if (_LIB_VERSION == _SVID_)
    exc.retval = -HUGE;
  else
    exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

/* Deal with errno for out-of-range argument */
static inline float retval_errno_edom(float x)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)x;
  exc.type = DOMAIN;
#if defined(COMPILING_LOG10)
  exc.name = (char *)"log10f";
#elif defined(COMPILING_LOG2)
  exc.name = (char *)"log2f";
#else
  exc.name = (char *)"logf";
#endif
  if (_LIB_VERSION == _SVID_)
    exc.retval = -HUGE;
  else
    exc.retval = nanf_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(EDOM);
  else if (!matherr(&exc))
    {
      if(_LIB_VERSION == _SVID_)
#if defined(COMPILING_LOG10)
        (void)fputs("log10f: DOMAIN error\n", stderr);
#elif defined(COMPILING_LOG2)
        (void)fputs("log2f: DOMAIN error\n", stderr);
#else
        (void)fputs("logf: DOMAIN error\n", stderr);
#endif
    __set_errno(EDOM);
    }
  return exc.retval;
}

#if defined(COMPILING_LOG10)
float __log10f(float fx)
#elif defined(COMPILING_LOG2)
float __log2f(float fx)
#else
float __logf(float fx)
#endif
{

  double x = fx;

  int xexp;
  double r, f, f1, f2, q, u, v, z1, z2, poly;
  int index;
  unsigned long ux;

  /*
    Computes natural log(x) for float arguments. Algorithm is 
    basically a promotion of the arguments to double followed 
    by an inlined version of the double algorithm, simplified 
    for efficiency (see log_amd.c). Simplifications include:
    * Special algorithm for arguments near 1.0 not required
    * Scaling of denormalised arguments not required
    * Shorter core series approximations used
  */

/* Arrays ln_lead_table and ln_tail_table contain
   leading and trailing parts respectively of precomputed
   values of natural log(1+i/64), for i = 0, 1, ..., 64.
   ln_lead_table contains the first 24 bits of precision,
   and ln_tail_table contains a further 53 bits precision. */

  static const double ln_lead_table[65] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */
    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */
    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */
    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */
    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */
    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */
    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */
    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */
    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */
    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */
    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */
    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */
    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */
    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */
    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */
    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */
    2.35566020011901855469e-01,   /* 0x3fce270700000000 */
    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */
    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */
    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */
    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */
    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */
    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */
    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */
    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */
    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */
    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */
    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */
    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */
    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */
    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */
    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */
    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */
    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */
    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */
    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */
    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */
    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */
    4.75845873355865478516e-01,   /* 0x3fde744240000000 */
    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */
    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */
    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */
    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */
    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */
    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */
    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */
    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */
    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */
    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */
    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */
    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */
    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */
    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */
    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */
    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */
    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */
    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */
    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */
    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */
    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */
    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */
    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */
    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */
    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */

  static const double ln_tail_table[65] = {
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */
    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */
    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */
    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */
    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */
    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */
    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */
    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */
    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */
    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */
    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */
    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */
    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */
    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */
    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */
    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */
    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */
    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */
    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */
    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */
    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */
    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */
    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */
    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */
    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */
    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */
    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */
    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */
    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */
    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */
    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */
    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */
    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */
    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */
    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */
    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */
    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */
    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */
    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */
    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */
    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */
    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */
    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */
    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */
    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */
    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */
    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */
    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */
    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */
    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */
    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */
    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */
    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */
    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */
    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */
    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */
    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */
    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */
    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */
    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */
    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */
    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */
    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */
    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */

  static const double
    log2 = 6.931471805599453e-01,       /* 0x3fe62e42fefa39ef */

  /* Approximating polynomial coefficients */
    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */
    cb_2 = 1.24999999978138668903e-02;  /* 0x3f89999999865ede */

#if defined(COMPILING_LOG10)
  static const double
    log10e      = 4.34294481903251827651e-01; /* 0x3fdbcb7b1526e50e */
#elif defined(COMPILING_LOG2)
   static const double
     log2e      = 1.44269504088896340735e+00; /* 0x3ff71547652b82fe */
#endif


  GET_BITS_DP64(x, ux);

#if !defined(COMPILING_LOG10) && !defined(COMPILING_LOG2)
  if (ux == 0x4005bf0a80000000)
    /* Treat this, the number closest to e in float arithmetic,
       as a special case and return 1.0 */
    return 1.0F;
#endif

  if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is either NaN or infinity */
      if (ux & MANTBITS_DP64)
        /* x is NaN */
        return fx + fx; /* Raise invalid if it is a signalling NaN */
      else
        {
          /* x is infinity */
          if (ux & SIGNBIT_DP64)
            /* x is negative infinity. Return a NaN. */
            return retval_errno_edom(fx);
          else
            return fx;
        }
    }
  else if (!(ux & ~SIGNBIT_DP64))
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
    return retval_errno_erange_overflow(fx);
  else if (ux & SIGNBIT_DP64)
    /* x is negative. Return a NaN. */
    return retval_errno_edom(fx);


  /*
    First, we decompose the argument x to the form
    x  =  2**M  *  (F1  +  F2),
    where  1 <= F1+F2 < 2, M has the value of an integer,
    F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.
    
    Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
    in U, where U  =  2 F2 / (2 F2 + F1).
    Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
    The core approximation calculates
    Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.
    Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
    thus, Poly =  2 arctanh( U/2 ) / U  -  1.

    It is not hard to see that
    log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
    Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).
    The values of log(F1) are calculated beforehand and stored
    in the program.
  */

  f = x;

  /* Store the exponent of x in xexp and put
     f into the range [0.5,1) */
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
  PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);

  /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */

  /* Set index to be the nearest integer to 128*f */
  /*
    r = 128.0 * f;
    index = (int)(r + 0.5);
  */
  /* This code instead of the above can save several cycles.
     It only works because 64 <= r < 128, so
     the nearest integer is always contained in exactly
     7 bits, and the right shift is always the same. */
  index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46)
    + ((ux & 0x0000200000000000) >> 45);
  z1 = ln_lead_table[index-64];
  q = ln_tail_table[index-64];
  f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
  f2 = f - f1;
  /* At this point, x = 2**xexp * ( f1  +  f2 ) where
     f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */
  
  /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */
  /* u = f2 / (f1 + 0.5 * f2); */
  u = f2 / (f1 + 0.5 * f2);
  
  /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).
     The core approximation calculates
     poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */
  v = u * u;
  poly = (v * (cb_1 + v * cb_2));
  z2 = q + (u + u * poly);
  
  /* Now z1,z2 is an extra-precise approximation of log(f).
     Add xexp * log(2) to z1, z2 to get the result log(x). */

  r = xexp*log2 + z1 + z2;
#if defined (COMPILING_LOG10)
  return log10e*r;
#elif defined(COMPILING_LOG2)
  return log2e*r;
#else
  return r;
#endif
}

#if defined(COMPILING_LOG10)
weak_alias (__log10f, log10f)
weak_alias (__log10f, __ieee754_log10f)
#elif defined(COMPILING_LOG2)
weak_alias (__log2f, log2f)
weak_alias (__log2f, __ieee754_log2f)
#else
weak_alias (__logf, logf)
weak_alias (__logf, __ieee754_logf)
#endif




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_SPLITEXP
#define USE_SCALEDOUBLE_2
#define USE_SCALEDOUBLE_3
#define USE_SCALEDOWNDOUBLE
#define USE_INFINITY_WITH_FLAGS
#define USE_ZERO_WITH_FLAGS
#define USE_NAN_WITH_FLAGS
#define USE_VAL_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_SPLITEXP
#undef USE_SCALEDOUBLE_2
#undef USE_SCALEDOUBLE_3
#undef USE_SCALEDOWNDOUBLE
#undef USE_INFINITY_WITH_FLAGS
#undef USE_ZERO_WITH_FLAGS
#undef USE_NAN_WITH_FLAGS
#undef USE_VAL_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline double retval_errno_erange_overflow(double x, double y, int sign)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = y;
  exc.type = OVERFLOW;
  exc.name = (char *)"pow";
  if (_LIB_VERSION == _SVID_)
    {
      if (sign == 1)
        exc.retval = HUGE;
      else /* sign = -1 */
        exc.retval = -HUGE;
    }
  else
    {
      if (sign == 1)
        exc.retval = infinity_with_flags(AMD_F_OVERFLOW);
      else /* sign == -1 */
        exc.retval = -infinity_with_flags(AMD_F_OVERFLOW);
    }
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline double retval_errno_erange_underflow(double x, double y, int sign)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = y;
  exc.type = UNDERFLOW;
  exc.name = (char *)"pow";
  if (sign == 1)
    exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  else /* sign == -1 */
    exc.retval = -zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

/* Deal with errno for out-of-range arguments */
static inline double retval_errno_edom(double x, double y, int type)
{
  struct exception exc;
  exc.arg1 = x;
  exc.arg2 = y;
  exc.type = DOMAIN;
  exc.name = (char *)"pow";
  if (_LIB_VERSION == _SVID_)
    exc.retval = 0.0;
  else if (type == 1)
    exc.retval = infinity_with_flags(AMD_F_DIVBYZERO);
  else if (type == 2)
    exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO);
  else /* type == 3 */
    exc.retval = nan_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno (EDOM);
  if (!matherr(&exc))
    {
      if (_LIB_VERSION == _SVID_)
        (void)fputs("pow: DOMAIN error\n", stderr);
      __set_errno(EDOM);
    }
  return exc.retval;
}

/* Computes the exact product of x and y, the result being the
   nearly doublelength number (z,zz) */
static void mul12(double x, double y,
                  double *z, double *zz)
{
  double hx, tx, hy, ty;
  /* Split x into hx (head) and tx (tail). Do the same for y. */
  unsigned long u;
  GET_BITS_DP64(x, u);
  u &= 0xfffffffff8000000;
  PUT_BITS_DP64(u, hx);
  tx = x - hx;
  GET_BITS_DP64(y, u);
  u &= 0xfffffffff8000000;
  PUT_BITS_DP64(u, hy);
  ty = y - hy;
  *z = x * y;
  *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty;
}

/* Computes the doublelength product of (x,xx) and (y,yy), the result
   being the doublelength number (z,zz) */
static void mul2(double x, double xx, double y, double yy,
                 double *z, double *zz)
{
  double c, cc;
  mul12(x, y, &c, &cc);
  cc = x * yy + xx * y + cc;
  *z = c + cc;
  *zz = (c - *z) + cc;
}

double __pow(double x, double y)
{

  /* Arrays log2_lead_table and log2_tail_table contain
     leading and trailing parts respectively of precomputed
     values of natural log2(1+i/64), for i = 0, 1, ..., 64.
     ln_lead_table contains the first 24 bits of precision,
     and ln_tail_table contains a further 53 bits precision. */

  static const double log2_lead_table[65] = {
    0.00000000000000000000E+00,  /* 0x0000000000000000 */
    2.23678126931190490723E-02,  /* 0x3F96E79680000000 */
    4.43941168487071990967E-02,  /* 0x3FA6BAD360000000 */
    6.60891830921173095703E-02,  /* 0x3FB0EB3880000000 */
    8.74628350138664245605E-02,  /* 0x3FB663F6E0000000 */
    1.08524456620216369629E-01,  /* 0x3FBBC84240000000 */
    1.29283010959625244141E-01,  /* 0x3FC08C5880000000 */
    1.49747118353843688965E-01,  /* 0x3FC32AE9E0000000 */
    1.69924989342689514160E-01,  /* 0x3FC5C01A20000000 */
    1.89824551343917846680E-01,  /* 0x3FC84C2BC0000000 */
    2.09453359246253967285E-01,  /* 0x3FCACF5E20000000 */
    2.28818684816360473633E-01,  /* 0x3FCD49EE40000000 */
    2.47927501797676086426E-01,  /* 0x3FCFBC16A0000000 */
    2.66786515712738037109E-01,  /* 0x3FD11307C0000000 */
    2.85402208566665649414E-01,  /* 0x3FD24407A0000000 */
    3.03780734539031982422E-01,  /* 0x3FD37124C0000000 */
    3.21928083896636962891E-01,  /* 0x3FD49A7840000000 */
    3.39849978685379028320E-01,  /* 0x3FD5C01A20000000 */
    3.57551991939544677734E-01,  /* 0x3FD6E221C0000000 */
    3.75039428472518920898E-01,  /* 0x3FD800A560000000 */
    3.92317414283752441406E-01,  /* 0x3FD91BBA80000000 */
    4.09390926361083984375E-01,  /* 0x3FDA337600000000 */
    4.26264733076095581055E-01,  /* 0x3FDB47EBE0000000 */
    4.42943483591079711914E-01,  /* 0x3FDC592FA0000000 */
    4.59431618452072143555E-01,  /* 0x3FDD6753E0000000 */
    4.75733429193496704102E-01,  /* 0x3FDE726AA0000000 */
    4.91853088140487670898E-01,  /* 0x3FDF7A8560000000 */
    5.07794618606567382812E-01,  /* 0x3FE03FDA80000000 */
    5.23561954498291015625E-01,  /* 0x3FE0C10500000000 */
    5.39158761501312255859E-01,  /* 0x3FE140C9E0000000 */
    5.54588794708251953125E-01,  /* 0x3FE1BF3100000000 */
    5.69855570793151855469E-01,  /* 0x3FE23C41C0000000 */
    5.84962487220764160156E-01,  /* 0x3FE2B80340000000 */
    5.99912822246551513672E-01,  /* 0x3FE3327C60000000 */
    6.14709794521331787109E-01,  /* 0x3FE3ABB3E0000000 */
    6.29356563091278076172E-01,  /* 0x3FE423B060000000 */
    6.43856167793273925781E-01,  /* 0x3FE49A7840000000 */
    6.58211469650268554688E-01,  /* 0x3FE5101180000000 */
    6.72425329685211181641E-01,  /* 0x3FE5848220000000 */
    6.86500489711761474609E-01,  /* 0x3FE5F7CFE0000000 */
    7.00439691543579101562E-01,  /* 0x3FE66A0080000000 */
    7.14245498180389404297E-01,  /* 0x3FE6DB1960000000 */
    7.27920413017272949219E-01,  /* 0x3FE74B1FC0000000 */
    7.41466939449310302734E-01,  /* 0x3FE7BA18E0000000 */
    7.54887461662292480469E-01,  /* 0x3FE82809C0000000 */
    7.68184304237365722656E-01,  /* 0x3FE894F740000000 */
    7.81359672546386718750E-01,  /* 0x3FE900E600000000 */
    7.94415831565856933594E-01,  /* 0x3FE96BDAC0000000 */
    8.07354867458343505859E-01,  /* 0x3FE9D5D9E0000000 */
    8.20178925991058349609E-01,  /* 0x3FEA3EE7E0000000 */
    8.32889974117279052734E-01,  /* 0x3FEAA708E0000000 */
    8.45490038394927978516E-01,  /* 0x3FEB0E4120000000 */
    8.57980966567993164062E-01,  /* 0x3FEB749480000000 */
    8.70364665985107421875E-01,  /* 0x3FEBDA0700000000 */
    8.82643043994903564453E-01,  /* 0x3FEC3E9CA0000000 */
    8.94817709922790527344E-01,  /* 0x3FECA258C0000000 */
    9.06890571117401123047E-01,  /* 0x3FED053F60000000 */
    9.18863236904144287109E-01,  /* 0x3FED6753E0000000 */
    9.30737316608428955078E-01,  /* 0x3FEDC899A0000000 */
    9.42514479160308837891E-01,  /* 0x3FEE291420000000 */
    9.54196274280548095703E-01,  /* 0x3FEE88C6A0000000 */
    9.65784251689910888672E-01,  /* 0x3FEEE7B460000000 */
    9.77279901504516601562E-01,  /* 0x3FEF45E080000000 */
    9.88684654235839843750E-01,  /* 0x3FEFA34E00000000 */
    1.00000000000000000000E+00}; /* 0x3FF0000000000000 */

  static const double log2_tail_table[65] = {
    0.00000000000000000000E+00,  /* 0x0000000000000000 */
    3.35335459194866276130E-10,  /* 0x3DF70B48A629B89C */
    2.50974623855642191448E-09,  /* 0x3E258EFD87313606 */
    7.36565512335896390543E-09,  /* 0x3E3FA29F9AB3CF74 */
    6.23647298369351871453E-09,  /* 0x3E3AC913167CCC53 */
    1.57952684118980398844E-10,  /* 0x3DE5B5774C7658A0 */
    5.98534121117166302413E-09,  /* 0x3E39B4F3C72C4F78 */
    1.15083837244190179789E-09,  /* 0x3E13C570D0FA8F90 */
    1.20996228487473215213E-08,  /* 0x3E49FBD6879FA00B */
    7.53609938318432874467E-09,  /* 0x3E402F03B2FDD224 */
    6.38269581457264661091E-09,  /* 0x3E3B69D927DFC23C */
    5.67952040356156465017E-09,  /* 0x3E3864B2DF91E96A */
    1.16459094073677371864E-08,  /* 0x3E4902680A23A8D9 */
    2.49821633265319561946E-08,  /* 0x3E5AD30B75CB0970 */
    1.02955826924364881206E-08,  /* 0x3E461C0E73048B72 */
    1.36380709420054099385E-08,  /* 0x3E4D499BD9B32266 */
    1.09907253849796912371E-08,  /* 0x3E479A3715FC9256 */
    2.41992456974946430426E-08,  /* 0x3E59FBD6879FA00B */
    1.26785390154315961619E-08,  /* 0x3E4B3A19BCAF1AA4 */
    2.87440583546118995874E-09,  /* 0x3E28B0E2A19575B0 */
    8.49500784748945819113E-09,  /* 0x3E423E2E1169656A */
    9.77661777174938265384E-09,  /* 0x3E44FEC0A13AF880 */
    2.16260023578294509223E-08,  /* 0x3E573882A0A4146E */
    1.22576485902594488001E-08,  /* 0x3E4A52B6ACFCFDCA */
    1.85225112644675216321E-10,  /* 0x3DE975077F1F5F00 */
    1.77290105086271740075E-09,  /* 0x3E1E754D20C519E0 */
    8.18918703987935816281E-09,  /* 0x3E41960D9D9C3262 */
    2.15921288850262793860E-08,  /* 0x3E572F32FE672868 */
    1.55872185666914610882E-09,  /* 0x3E1AC754CB104AE0 */
    4.96067191344004864525E-08,  /* 0x3E6AA1E5439E15A5 */
    5.69693854190458063634E-08,  /* 0x3E6E95D00DE3B513 */
    3.75377959861950863279E-08,  /* 0x3E642727C8080ECC */
    1.35003920212974864041E-08,  /* 0x3E4CFDEB43CFD004 */
    1.99405761661543437744E-08,  /* 0x3E5569394D90D724 */
    4.95938764277745619566E-08,  /* 0x3E6AA02166CCCAB2 */
    5.69883315429349605246E-08,  /* 0x3E6E986AA9670761 */
    2.19814507699593824742E-08,  /* 0x3E579A3715FC9256 */
    1.31015261824841576777E-08,  /* 0x3E4C22A3E377A524 */
    1.22862844080671745121E-08,  /* 0x3E4A6274CF0E362C */
    3.74714569064514928410E-08,  /* 0x3E641E09AEB8CB1A */
    2.65975130588343109077E-08,  /* 0x3E5C8F11979A5DB6 */
    1.94857332324691494283E-08,  /* 0x3E54EC3293B2FBE0 */
    4.15459262300620263689E-08,  /* 0x3E664E0753C6E578 */
    4.69518366451302198484E-08,  /* 0x3E693502E409EAB7 */
    4.05011760638924658298E-08,  /* 0x3E65BE7072DBDC04 */
    2.05395606358225316367E-08,  /* 0x3E560DDF1680DD44 */
    4.09782728853196822622E-08,  /* 0x3E660002CCFE43F5 */
    3.47842490297177925737E-08,  /* 0x3E62ACB5F5EFEC49 */
    5.45992606015825934783E-08,  /* 0x3E6D5010B3666559 */
    3.64241293587091694274E-08,  /* 0x3E638E181ED0798D */
    4.00474626225128781862E-08,  /* 0x3E658014D37CDE37 */
    1.25494472416488406547E-08,  /* 0x3E4AF321AF5E9BB4 */
    2.85595789566572715872E-08,  /* 0x3E5EAA65B49696E2 */
    5.35982971014292903334E-08,  /* 0x3E6CC67E6DB516DE */
    5.36693769435427990824E-09,  /* 0x3E370D02A99B4C58 */
    5.33851529883522815863E-08,  /* 0x3E6CA9331635FEE3 */
    2.44911174062771809500E-08,  /* 0x3E5A4C112CE6312E */
    3.70450225289350432643E-10,  /* 0x3DF975077F1F5F00 */
    2.09544573213940723936E-08,  /* 0x3E567FEAD8BCCE74 */
    2.61789310367290825660E-08,  /* 0x3E5C1C02803F7554 */
    3.61063271131029934309E-08,  /* 0x3E63626A72AA21A3 */
    3.29721761549390770201E-08,  /* 0x3E61B3A9507D6DC1 */
    2.19953998687869412865E-08,  /* 0x3E579E0CAA9C9AB6 */
    3.25363260095299997864E-08,  /* 0x3E6177C23362928B */
    0.00000000000000000000E+00}; /* 0x0000000000000000 */

  static const double
    /* Reciprocal of log(2.0) */
    reclog2_lead =     1.44269504088896338700E+00, /* 0x3FF71547652B82FE */
    reclog2_tail =     2.03552737409310207851E-17; /* 0x3C7777D0FFDA0D20 */

  const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */

  unsigned long ux, ax, uy, ay, mask;
  int yexp, inty, xpos, ypos, negateres;
  double r, u1, u2, w, w1, w2;
  volatile int dummy;

  double u, r1, r2, f, z1, z2, q, f1, f2, poly;
  int xexp, expadjust, index, iw;

  double argx = x, argy = y;

  GET_BITS_DP64(x, ux);
  ax = ux & (~SIGNBIT_DP64);
  xpos = ax == ux;
  GET_BITS_DP64(y, uy);
  ay = uy & (~SIGNBIT_DP64);
  ypos = ay == uy;

  if (ux == 0x3ff0000000000000)
    {
      /* x = +1.0. Return +1.0 for all y, even NaN,
         raising invalid only if y is a signalling NaN */
      if (y + 1.0 == 2.0) dummy = 1;
      return 1.0;
    }
  else if (ay == 0)
    {
      /* y is zero. Return 1.0, even if x is infinity or NaN,
         raising invalid only if x is a signalling NaN */
      if (x + 1.0 == 2.0) dummy = 1;
      return 1.0;
    }
  else if (((ax & EXPBITS_DP64) == EXPBITS_DP64) &&
           (ax & MANTBITS_DP64))
    /* x is NaN. Return NaN, with invalid exception if it's
       a signalling NaN. */
    return x + x;
  else if (((ay & EXPBITS_DP64) == EXPBITS_DP64) &&
             (ay & MANTBITS_DP64))
   /* y is NaN. Return NaN, with invalid exception if y
      is a signalling NaN. */
   return y + y;
  else if (uy == 0x3ff0000000000000)
    /* y is 1.0; return x */
    return x;
  else if ((ay & EXPBITS_DP64) > 0x43e0000000000000)
    {
      /* y is infinite or so large that the result would
         overflow or underflow. Flags should be raised
         unless y is an exact infinity. */
      int yinf = (ay == EXPBITS_DP64);
      if (ypos)
        {
          /* y is +ve */
          if (ax == 0)
            /* abs(x) = 0.0. */
            return 0.0;
          else if (ax < 0x3ff0000000000000)
            {
              /* abs(x) < 1.0 */
              if (yinf)
                return 0.0;
              else
                return retval_errno_erange_underflow(argx, argy, 1);
            }
          else if (ax == 0x3ff0000000000000)
            /* abs(x) = 1.0. */
            return 1.0;
          else
            {
              /* abs(x) > 1.0 */
              if (yinf)
                return infinity_with_flags(0);
              else
                return retval_errno_erange_overflow(argx, argy, 1);
            }
        }
      else
        {
          /* y is -ve */
          if (ax == 0)
            /* abs(x) = 0.0. Return +infinity. */
            return retval_errno_edom(argx, argy, 1);
          else if (ax < 0x3ff0000000000000)
            {
            /* abs(x) < 1.0; return +infinity. */
              if (yinf)
                return infinity_with_flags(0);
              else
                return retval_errno_erange_overflow(argx, argy, 1);
            }
          else if (ax == 0x3ff0000000000000)
            /* abs(x) = 1.0. */
            return 1.0;
          else
            {
              /* abs(x) > 1.0 */
              if (yinf)
                return 0.0;
              else
                return retval_errno_erange_underflow(argx, argy, 1);
            }
        }
    }

  /* See whether y is an integer.
     inty = 0 means not an integer.
     inty = 1 means odd integer.
     inty = 2 means even integer.
  */
  yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 + 1;
  if (yexp < 1)
    inty = 0;
  else if (yexp > 53)
    inty = 2;
  else /* 1 <= yexp <= 53 */
    {
      /* Mask out the bits of r that we don't want */
      mask = (1L << (53 - yexp)) - 1;
      if ((uy & mask) != 0)
        inty = 0;
      else if (((uy & ~mask) >> (53 - yexp)) & 0x0000000000000001)
        inty = 1;
      else
        inty = 2;
    }

  if ((ax & EXPBITS_DP64) == EXPBITS_DP64)
    {
      /* x is infinity (NaN was already ruled out). */
      if (xpos)
        {
          /* x is +infinity */
          if (ypos)
            /* y > 0.0 */
            return x;
          else
            return 0.0;
        }
      else
        {
          /* x is -infinity */
          if (inty == 1)
            {
              /* y is an odd integer */
              if (ypos)
                /* Result is -infinity */
                return x;
              else
                return -zero_with_flags(0);
            }
          else
            {
              if (ypos)
                /* Result is +infinity */
                return -x;
              else
                return 0.0;
            }
        }
    }
  else if (ax == 0)
    {
      /* x is zero */
      if (xpos)
        {
          /* x is +0.0 */
          if (ypos)
            /* y is positive; return +0.0 for all cases */
            return x;
          else
            /* y is negative; return +infinity with div-by-zero
               for all cases */
            return retval_errno_edom(argx, argy, 1);
        }
      else
        {
          /* x is -0.0 */
          if (ypos)
            {
              /* y is positive */
              if (inty == 1)
                /* -0.0 raised to a positive odd integer returns -0.0 */
                return x;
              else
                /* Return +0.0 */
                return -x;
            }
          else
            {
              /* y is negative */
              if (inty == 1)
                /* -0.0 raised to a negative odd integer returns -infinity
                   with div-by-zero */
                return retval_errno_edom(argx, argy, 2);
              else
                /* Return +infinity with div-by-zero */
                return retval_errno_edom(argx, argy, 1);
            }
        }
    }

  negateres = 0;
  if (!xpos)
    {
      /* x is negative */
      if (inty)
        {
          /* It's OK because y is an integer. */
          ux = ax;
          PUT_BITS_DP64(ux, x); /* x = abs(x) */
          /* If y is odd, the result will be negative */
          negateres = (inty == 1);
        }
      else
        /* y is not an integer. Return a NaN. */
        return retval_errno_edom(argx, argy, 3);
    }

  if (ay < 0x3c00000000000000)   /* abs(y) < 2^(-63) */
      {
        /* y is close enough to zero for the result to be 1.0
           no matter what the size of x */
        return 1.0 + y;
      }

  /*
    Calculate log2(x)

    First, we decompose the argument x to the form
    x  =  2**M  *  (F1  +  F2),
    where  1 <= F1+F2 < 2, M has the value of an integer,
    F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.

    Second, we approximate log2( 1 + F2/F1 ) by a polynomial
    in U, where U  =  2 F2 / (2 F1 + F2).
    Note that log2( 1 + F2/F1 ) = log2( 1 + U/2 ) - log2( 1 - U/2 ).
    The core approximation calculates
    Poly = [log2( 1 + U/2 ) - log2( 1 - U/2 )]/U   -   1.

    It is not hard to see that
    log2(x) = M + log2(F1) + log2( 1 + F2/F1 ).
    Hence, we return Z1 = log2(F1), and  Z2 = log2( 1 + F2/F1).
    The values of log2(F1) are calculated beforehand and stored
    in the program.
  */

  if (ux < IMPBIT_DP64)
      {
        /* The input argument x is denormalized */
        /* Normalize f by increasing the exponent by 60
           and subtracting a correction to account for the implicit
           bit. This replaces a slow denormalized
           multiplication by a fast normal subtraction. */
        static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
        PUT_BITS_DP64(ux | 0x03d0000000000000, f);
        f -= corr;
        GET_BITS_DP64(f, ux);
        expadjust = 60;
      }
    else
      {
        f = x;
        expadjust = 0;
      }

  /* Store the exponent of x in xexp and put
     f into the range [0.5,1) */
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
  PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);

  /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */

  /* Set index to be the nearest integer to 128*f */
  /*
    r = 128.0 * f;
    index = (int)(r + 0.5);
  */
  /* This code instead of the above can save several cycles.
     It only works because 64 <= r < 128, so
     the nearest integer is always contained in exactly
     7 bits, and the right shift is always the same. */
      index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46)
        + ((ux & 0x0000200000000000) >> 45);

  z1 = log2_lead_table[index-64];
  q = log2_tail_table[index-64];
  f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
  f2 = f - f1;
  /* At this point, x = 2**xexp * ( f1  +  f2 ) where
     f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */

  /* Compute z2 from Taylor series of log2(1+f1/f2) */

  u = f2/f1;
#if 0
  div2(f2,0.0,f1,0.0,&r1,&r2);
#else
  {
    double cc, h, hh;
    mul12(u, f1, &h, &hh);
    cc = ((f2 - h) - hh) / f1;
    r1 = u + cc;
    r2 = (u - r1) + cc;
  }
#endif
  poly = -u*(u*(1./2.-u*(1./3.-u*
                         (1./4.-u*(1./5.-u*(1./6.-u*
                                            (1./7.-u*(1./8.))))))));
#if 0
  add2(r1,r2,poly,0.0,&r1,&r2);
#else
  {
    double r, s;
    r = r1 + poly;
    s = ((r1 - r) + poly) + r2;
    r1 = r + s;
    r2 = (r - r1) + s;
  }
#endif

  mul2(reclog2_lead,reclog2_tail,r1,r2,&r1,&r2);

#if 0
  add2(r1,r2,q,0.0,&r1,&r2);
#else
  {
    if (r1 == 0.0)
      r1 = q;
    else if (q != 0.0)
      {
        double r, s;
        r = r1 + q;
        s = ((r1 - r) + q) + r2;
        r1 = r + s;
        r2 = (r - r1) + s;
      }
  }
#endif

#if 0
  add2(z1,0.0,r1,r2,&z1,&z2);
#else
  {
    double r, s;
    r = z1 + r1;
    s = ((z1 - r) + r1) + r2;
    z1 = r + s;
    z2 = (r - z1) + s;
  }
#endif

  /* Now z1,z2 is an extra-precise approximation of log2(2f).
     Add xexp to z1,z2 to get the result log2(x).
     The computed r1 is not subject to rounding error because
     xexp has at most 10 significant bits, log(2) has 24 significant
     bits, and z1 has up to 24 bits; and the exponents of z1
     and z2 differ by at most 6. */

#if 0
  add2(z1,z2,xexp,0.0,&u1,&u2);
#else
  {
    double r, s;
    r = z1 + xexp;
    s = ((xexp - r) + z1) + z2;
    u1 = r + s;
    u2 = (r - u1) + s;
  }
#endif

  /* end of log2(x) calculation*/

  /* Test for overflow and underflow due to y*log2(x)
     being too large or small. */

  if ((u1+u2)*y > 1025)
    {
      if (negateres)
        return retval_errno_erange_overflow(argx, argy, -1);
      else
        return retval_errno_erange_overflow(argx, argy, 1);
    }
  else if ((u1+u2)*y < -1074)
    {
      if (negateres)
        return retval_errno_erange_underflow(argx, argy, -1);
      else
        return retval_errno_erange_underflow(argx, argy, 1);
    }

  /* Carefully compute log2(x) * y */
#if 0
  mul2(u1, u2, y, 0.0, &w1, &w2);
#else
  {
    double c, cc;
    mul12(u1, y, &c, &cc);
    cc = u2 * y + cc;
    w1 = c + cc;
    w2 = (c - w1) + cc;
  }
#endif

  w = w1 + w2;
  iw = (int)(w);

#if 0
  sub2(w1, w2, (double)iw, 0.0, &w1, &w2);
#else
  {
    double a, b;
    a = w1 - iw;
    b = ((w1 - a) - iw) + w2;
    w1 = a + b;
    w2 = (a - w1) + b;
  }
#endif

  w = w1 + w2;

  /* The following code computes r = exp2(w) */

  {
    static const double
      log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */
      log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */
      log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */
      one_by_32_lead = 0.03125;

    double p, z1, z2, z, hx, tx, y1, y2;
    int m, n;

    GET_BITS_DP64(w, ux);
    ax = ux & (~SIGNBIT_DP64);

    /* Handle small arguments separately */
    if (ax < 0x3fb7154764ee6c2f)   /* abs(x) < 1/(16*log2) */
      {
        if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
          z = 1.0 + w; /* Raises inexact if x is non-zero */
        else
          {
            /* Split x into hx (head) and tx (tail). */
            unsigned long u;
            hx = w;
            GET_BITS_DP64(hx, u);
            u &= 0xfffffffff8000000;
            PUT_BITS_DP64(u, hx);
            tx = w - hx;
            /* Carefully multiply x by log2. y1 is the most significant
               part of the result, and y2 the least significant part */
            y1 = w * log2_lead;
            y2 = (((hx * log2_lead - y1) + hx * log2_tail) +
                  tx * log2_lead) + tx * log2_tail;

            p = y1 + y2;
            z = (9.99564649780173690e-1 +
                 (1.61251249355268050e-5 +
                  (2.37986978239838493e-2 +
                   2.68724774856111190e-7*p)*p)*p)/
              (9.99564649780173692e-1 +
               (-4.99766199765151309e-1 +
                (1.070876894098586184e-1 +
                 (-1.189773642681502232e-2 +
                  5.9480622371960190616e-4*p)*p)*p)*p);

            z = ((z * y1) + (z * y2)) + 1.0;
          }
        r = scaleDouble_2(z, iw);
      }
    else
      {
        /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */
        splitexp(w, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2);

        /* Scale (z1 + z2) by 2.0**(m + iw) */

        n = m+iw;
        z = z1+z2;

        if (n < -1022)
          { /* Result will be denormalised after scaling
               down by 2**n. Using scaleDownDouble instead
               of scaleDouble_3 is faster in this case. */
            GET_BITS_DP64(z, ux);
            scaleDownDouble(ux, -n, &ux);
            PUT_BITS_DP64(ux, r);
          }
        else
          r = scaleDouble_3(z, n);
      }
  }

  /* If r overflowed or underflowed we need to deal with errno */
  if (r > large)
    {
      /* Result has overflowed. */
      if (negateres)
        return retval_errno_erange_overflow(argx, argy, -1);
      else
        return retval_errno_erange_overflow(argx, argy, 1);
    }
  else if (r == 0.0)
    {
      /* Result has underflowed. */
      if (negateres)
        return retval_errno_erange_underflow(argx, argy, -1);
      else
        return retval_errno_erange_underflow(argx, argy, 1);
    }
  else
    {
      if (negateres)
        return -r;
      else
        return r;
    }
}

weak_alias (__pow, pow)
weak_alias (__pow, __ieee754_pow)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_ZEROF_WITH_FLAGS
#define USE_INFINITYF_WITH_FLAGS
#define USE_NANF_WITH_FLAGS
#include "libm_inlines_amd.h"
#undef USE_ZEROF_WITH_FLAGS
#undef USE_INFINITYF_WITH_FLAGS
#undef USE_NANF_WITH_FLAGS

/* Deal with errno for out-of-range result */
#include "libm_errno_amd.h"
static inline float retval_errno_erange_overflow(float x, float y, int sign)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)y;
  exc.type = OVERFLOW;
  exc.name = (char *)"powf";
  if (_LIB_VERSION == _SVID_)
    {
      if (sign == 1)
        exc.retval = HUGE;
      else /* sign = -1 */
        exc.retval = -HUGE;
    }
  else
    {
      if (sign == 1)
        exc.retval = infinityf_with_flags(AMD_F_OVERFLOW);
      else /* sign == -1 */
        exc.retval = -infinityf_with_flags(AMD_F_OVERFLOW);
    }
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

static inline float retval_errno_erange_underflow(float x, float y, int sign)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)y;
  exc.type = UNDERFLOW;
  exc.name = (char *)"powf";
  if (sign == 1)
    exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  else /* sign == -1 */
    exc.retval = -zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
  if (_LIB_VERSION == _POSIX_)
    __set_errno(ERANGE);
  else if (!matherr(&exc))
    __set_errno(ERANGE);
  return exc.retval;
}

/* Deal with errno for out-of-range arguments */
static inline float retval_errno_edom(float x, float y, int type)
{
  struct exception exc;
  exc.arg1 = (double)x;
  exc.arg2 = (double)y;
  exc.type = DOMAIN;
  exc.name = (char *)"powf";
  if (_LIB_VERSION == _SVID_)
    exc.retval = 0.0;
  else if (type == 1)
    exc.retval = infinityf_with_flags(AMD_F_DIVBYZERO);
  else if (type == 2)
    exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO);
  else /* type == 3 */
    exc.retval = nanf_with_flags(AMD_F_INVALID);
  if (_LIB_VERSION == _POSIX_)
    __set_errno (EDOM);
  if (!matherr(&exc))
    {
      if (_LIB_VERSION == _SVID_)
        (void)fputs("pow: DOMAIN error\n", stderr);
      __set_errno(EDOM);
    }
  return exc.retval;
}

float __powf(float x, float y)
{
  unsigned int ux, ax, uy, ay, mask;
  int yexp, inty, xpos, ypos, negateres;
  double dx, dy, dw, dlog2, dr;
  volatile int dummy;

  /* Largest float, stored as a double */
  const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */

  /* Smallest float, stored as a double */
  const double tiny = 1.40129846432481707092e-45; /* 0x36a0000000000000 */

  GET_BITS_SP32(x, ux);
  ax = ux & (~SIGNBIT_SP32);
  xpos = ax == ux;
  GET_BITS_SP32(y, uy);
  ay = uy & (~SIGNBIT_SP32);
  ypos = ay == uy;

  if (ux == 0x3f800000)
    {
      /* x = +1.0. Return +1.0 for all y, even NaN,
	 raising invalid only if y is a signalling NaN */
      if (y + 1.0F == 2.0F) dummy = 1;
      return 1.0F;
    }
  else if (ay == 0)
    {
      /* y is zero. Return 1.0, even if x is infinity or NaN,
         raising invalid only if x is a signalling NaN */
      if (x + 1.0F == 2.0F) dummy = 1;
      return 1.0F;
    }
  else if (((ax & EXPBITS_SP32) == EXPBITS_SP32) &&
           (ax & MANTBITS_SP32))
    /* x is NaN. Return NaN, with invalid exception if it's
       a signalling NaN. */
    return x + x;
  else if (((ay & EXPBITS_SP32) == EXPBITS_SP32) &&
           (ay & MANTBITS_SP32))
   /* y is NaN. Return NaN, with invalid exception if y
      is a signalling NaN. */
   return y + y;
  else if (uy == 0x3f800000)
    /* y is 1.0; return x */
    return x;
  else if ((ay & EXPBITS_SP32) > 0x4f000000)
    {
      /* y is infinite or so large that the result would 
         overflow or underflow. Flags should be raised 
         unless y is an exact infinity. */
      int yinf = (ay == EXPBITS_SP32);
      if (ypos)
        {
          /* y is +ve */
          if (ax == 0)
            /* abs(x) = 0.0. */
            return 0.0F;
          else if (ax < 0x3f800000)
	    {
	      /* abs(x) < 1.0 */
	      if (yinf)
                return 0.0F;
	      else
                return retval_errno_erange_underflow(x, y, 1);
	    }
          else if (ax == 0x3f800000)
            /* abs(x) = 1.0. */
            return 1.0F;
          else
	    {
	      /* abs(x) > 1.0 */
	      if (yinf)
                return infinityf_with_flags(0);
	      else
                return retval_errno_erange_overflow(x, y, 1);
	    }
        }
      else
        {
          /* y is -ve */
          if (ax == 0)
            /* abs(x) = 0.0. Return +infinity. */
            return retval_errno_edom(x, y, 1);
          else if (ax < 0x3f800000)
	    {
            /* abs(x) < 1.0; return +infinity. */
	      if (yinf)
                return infinityf_with_flags(0); 
	      else
                return retval_errno_erange_overflow(x, y, 1);
	    }
          else if (ax == 0x3f800000)
            /* abs(x) = 1.0. */
            return 1.0F;
          else
	    {
	      /* abs(x) > 1.0 */
	      if (yinf)
                return 0.0F;
	      else
                return retval_errno_erange_underflow(x, y, 1);
	    }
        }
    }

  /* See whether y is an integer.
     inty = 0 means not an integer.
     inty = 1 means odd integer.
     inty = 2 means even integer.
  */
  yexp = ((uy & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32 + 1;
  if (yexp < 1)
    inty = 0;
  else if (yexp > 24)
    inty = 2;
  else /* 1 <= yexp <= 24 */
    {
      /* Mask out the bits of r that we don't want */
      mask = (1 << (24 - yexp)) - 1;
      if ((uy & mask) != 0)
        inty = 0;
      else if (((uy & ~mask) >> (24 - yexp)) & 0x00000001)
        inty = 1;
      else
        inty = 2;
    }

  if ((ax & EXPBITS_SP32) == EXPBITS_SP32)
    {
      /* x is infinity (NaN was already ruled out). */
      if (xpos)
        {
          /* x is +infinity */
          if (ypos)
            /* y > 0.0 */
            return x;
          else
            return 0.0F;
        }
      else
        {
          /* x is -infinity */
          if (inty == 1)
            {
              /* y is an odd integer */
              if (ypos)
                /* Result is -infinity */
                return x;
              else
                return -0.0F;
            }
          else
            {
              if (ypos)
                /* Result is +infinity */
                return -x;
              else
                return 0.0F;
            }
        }
    }
  else if (ax == 0)
    {
      /* x is zero */
      if (xpos)
        {
          /* x is +0.0 */
          if (ypos)
            /* y is positive; return +0.0 for all cases */
            return x;
          else
            /* y is negative; return +infinity with div-by-zero
               for all cases */
            return retval_errno_edom(x, y, 1);
        }
      else
        {
          /* x is -0.0 */
          if (ypos)
            {
              /* y is positive */
              if (inty == 1)
                /* -0.0 raised to a positive odd integer returns -0.0 */
                return x;
              else
                /* Return +0.0 */
                return -x;
            }
          else
            {
              /* y is negative */
              if (inty == 1)
                /* -0.0 raised to a negative odd integer returns -infinity
                   with div-by-zero */
                return retval_errno_edom(x, y, 2);
              else
                /* Return +infinity with div-by-zero */
                return retval_errno_edom(x, y, 1);
            }
        }
    }

  negateres = 0;
  if (!xpos)
    {
      /* x is negative */
      if (inty)
        {
          /* It's OK because y is an integer. */
          ux = ax;
          PUT_BITS_SP32(ux, x); /* x = abs(x) */
          /* If y is odd, the result will be negative */
          negateres = (inty == 1);
        }
      else
        /* y is not an integer. Return a NaN. */
        return retval_errno_edom(x, y, 3);
    }

  if (ay < 0x2e800000)   /* abs(y) < 2^(-34) */
    {
      /* y is close enough to zero for the result to be 1.0
         no matter what the size of x */
      return 1.0F + y;
    }

  /* Simply use double precision for computation of log2(x),
     y*log2(x) and exp2(y*log2(x)) */
  dx = x;
  dy = y;
  dlog2 = log2(dx);
  dw = y * dlog2;
  dr = exp2(dw);

  /* If dr overflowed or underflowed we need to deal with errno */
  if (dr > large)
    {
      /* Double dr has overflowed range of float. */
      if (negateres)
        return retval_errno_erange_overflow(x, y, -1);
      else
        return retval_errno_erange_overflow(x, y, 1);
    }
  else if (dr < tiny)
    {
      /* Double dr has underflowed range of float. */
      if (negateres)
        return retval_errno_erange_underflow(x, y, -1);
      else
        return retval_errno_erange_underflow(x, y, 1);
    }
  else
    {
      if (negateres)
        return -dr;
      else
        return dr;
    }
}

weak_alias (__powf, powf)
weak_alias (__powf, __ieee754_powf)




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_NAN_WITH_FLAGS
#define USE_SCALEDOUBLE_3
#define USE_GET_FPSW_INLINE
#define USE_SET_FPSW_INLINE
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_SCALEDOUBLE_3
#undef USE_GET_FPSW_INLINE
#undef USE_SET_FPSW_INLINE

/* Computes the exact product of x and y, the result being the
   nearly doublelength number (z,zz) */
static inline void dekker_mul12(double x, double y,
				double *z, double *zz)
{
  double hx, tx, hy, ty;
  /* Split x into hx (head) and tx (tail). Do the same for y. */
  unsigned long u;
  GET_BITS_DP64(x, u);
  u &= 0xfffffffff8000000;
  PUT_BITS_DP64(u, hx);
  tx = x - hx;
  GET_BITS_DP64(y, u);
  u &= 0xfffffffff8000000;
  PUT_BITS_DP64(u, hy);
  ty = y - hy;
  *z = x * y;
  *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty;
}


#if defined(COMPILING_FMOD)
double __fmod(double x, double y)
#else
double __remainder(double x, double y)
#endif
{
  double dx, dy, scale, w, t, v, c, cc;
  int i, ntimes, xexp, yexp;
  unsigned long u, ux, uy, ax, ay, todd;
  unsigned int sw;

  dx = x;
  dy = y;

  GET_BITS_DP64(dx, ux);
  GET_BITS_DP64(dy, uy);
  ax = ux & ~SIGNBIT_DP64;
  ay = uy & ~SIGNBIT_DP64;
  xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
  yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);

  if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
      yexp < 1 || yexp > BIASEDEMAX_DP64)
    {
      /* x or y is zero, denormalized, NaN or infinity */
      if (xexp > BIASEDEMAX_DP64)
        {
          /* x is NaN or infinity */
          if (ux & MANTBITS_DP64)
            /* x is NaN */
            return dx + dx; /* Raise invalid if it is a signalling NaN */
          else
            /* x is infinity; result is NaN */
            return nan_with_flags(AMD_F_INVALID);
        }
      else if (yexp > BIASEDEMAX_DP64)
        {
          /* y is NaN or infinity */
          if (uy & MANTBITS_DP64)
            /* y is NaN */
            return dy + dy; /* Raise invalid if it is a signalling NaN */
          else
            /* y is infinity; result is x */
            return dx;
        }
      else if (ax == 0x0000000000000000)
        {
          /* x is zero */
          if (ay == 0x0000000000000000)
            /* y is zero */
            return nan_with_flags(AMD_F_INVALID);
          else
            return dx;
        }
      else if (ay == 0x0000000000000000)
        /* y is zero */
        return nan_with_flags(AMD_F_INVALID);

      /* We've exhausted all other possibilities. One or both of x and
         y must be denormalized */
      if (xexp < 1)
        {
          /* x is denormalized. Figure out its exponent. */
          u = ax;
          while (u < IMPBIT_DP64)
            {
              xexp--;
              u <<= 1;
            }
        }
      if (yexp < 1)
        {
          /* y is denormalized. Figure out its exponent. */
          u = ay;
          while (u < IMPBIT_DP64)
            {
              yexp--;
              u <<= 1;
            }
        }
    }
  else if (ax == ay)
    {
      /* abs(x) == abs(y); return zero with the sign of x */
      PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
      return dx;
    }

  /* Set x = abs(x), y = abs(y) */
  PUT_BITS_DP64(ax, dx);
  PUT_BITS_DP64(ay, dy);

  if (ax < ay)
    {
      /* abs(x) < abs(y) */
#if !defined(COMPILING_FMOD)
      if (dx > 0.5*dy)
        dx -= dy;
#endif
      return x < 0.0? -dx : dx;
    }

  /* Save the current floating-point status word. We need
     to do this because the remainder function is always
     exact for finite arguments, but our algorithm causes
     the inexact flag to be raised. We therefore need to
     restore the entry status before exiting. */
  sw = get_fpsw_inline();

  /* Set ntimes to the number of times we need to do a
     partial remainder. If the exponent of x is an exact multiple
     of 52 larger than the exponent of y, and the mantissa of x is
     less than the mantissa of y, ntimes will be one too large
     but it doesn't matter - it just means that we'll go round
     the loop below one extra time. */
  if (xexp <= yexp)
    ntimes = 0;
  else
    ntimes = (xexp - yexp) / 52;

  if (ntimes == 0)
    {
      w = dy;
      scale = 1.0;
    }
  else
    {
      /* Set w = y * 2^(52*ntimes) */
      w = scaleDouble_3(dy, ntimes * 52);

      /* Set scale = 2^(-52) */
      PUT_BITS_DP64((unsigned long)(-52 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
                    scale);
    }

  /* Each time round the loop we compute a partial remainder.
     This is done by subtracting a large multiple of w
     from x each time, where w is a scaled up version of y.
     The subtraction must be performed exactly in quad
     precision, though the result at each stage can
     fit exactly in a double precision number. */
  for (i = 0; i < ntimes; i++)
    {
      /* t is the integer multiple of w that we will subtract.
         We use a truncated value for t.

         N.B. w has been chosen so that the integer t will have
         at most 52 significant bits. This is the amount by
         which the exponent of the partial remainder dx gets reduced
         every time around the loop. In theory we could use
         53 bits in t, but the quad precision multiplication
         routine dekker_mul12 does not allow us to do that because
         it loses the last (106th) bit of its quad precision result. */

      /* Set dx = dx - w * t, where t is equal to trunc(dx/w). */
      t = (double)(long)(dx / w);
      /* At this point, t may be one too large due to
         rounding of dx/w */

      /* Compute w * t in quad precision */
      dekker_mul12(w, t, &c, &cc);

      /* Subtract w * t from dx */
      v = dx - c;
      dx = v + (((dx - v) - c) - cc);

      /* If t was one too large, dx will be negative. Add back
         one w */
      /* It might be possible to speed up this loop by finding
         a way to compute correctly truncated t directly from dx and w.
         We would then avoid the need for this check on negative dx. */
      if (dx < 0.0)
        dx += w;

      /* Scale w down by 2^(-52) for the next iteration */
      w *= scale;
    }

  /* One more time */
  /* Variable todd says whether the integer t is odd or not */
  t = (double)(long)(dx / w);
  todd = ((long)(dx / w)) & 1;
  dekker_mul12(w, t, &c, &cc);
  v = dx - c;
  dx = v + (((dx - v) - c) - cc);
  if (dx < 0.0)
    {
      todd = !todd;
      dx += w;
    }

  /* At this point, dx lies in the range [0,dy) */
#if !defined(COMPILING_FMOD)
  /* For the fmod function, we're done apart from setting 
     the correct sign. */
  /* For the remainder function, we need to adjust dx
     so that it lies in the range (-y/2, y/2] by carefully
     subtracting w (== dy == y) if necessary. The rigmarole
     with todd is to get the correct sign of the result
     when x/y lies exactly half way between two integers,
     when we need to choose the even integer. */
  if (ay < 0x7fd0000000000000)
    {
      if (dx + dx > w || (todd && (dx + dx == w)))
        dx -= w;
    }
  else if (dx > 0.5 * w || (todd && (dx == 0.5 * w)))
    dx -= w;

#endif

  /* **** N.B. for some reason this breaks the 32 bit version
     of remainder when compiling with optimization. */
  /* Restore the entry status flags */
  set_fpsw_inline(sw);

  /* Set the result sign according to input argument x */
  return x < 0.0? -dx : dx;

}

#if defined(COMPILING_FMOD)
weak_alias (__fmod, fmod)
weak_alias (__fmod,  __ieee754_fmod)
#else
weak_alias (__remainder, remainder)
weak_alias (__remainder,  __ieee754_remainder)
#endif




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

/* Define this to get debugging print statements activated */
#define DEBUGGING_PRINT
#undef DEBUGGING_PRINT


#ifdef DEBUGGING_PRINT
#include <stdio.h>
char *d2b(int d, int bitsper, int point)
{
  static char buff[50];
  int i, j;
  j = bitsper;
  if (point >= 0 && point <= bitsper)
    j++;
  buff[j] = '\0';
  for (i = bitsper - 1; i >= 0; i--)
    {
      j--;
      if (d % 2 == 1)
        buff[j] = '1';
      else
        buff[j] = '0';
      if (i == point)
        {
          j--;
          buff[j] = '.';
        }
      d /= 2;
    }
  return buff;
}
#endif

/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
   extra precision, and return the result in r, rr.
   Return value "region" tells how many lots of pi/2 were subtracted
   from x to put it in the range [-pi/4,pi/4], mod 4. */
void __remainder_piby2(double x, double *r, double *rr, int *region)
{

  /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */
  static const double
    eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */

  static const double
    piby2_lead = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
    piby2_tail = 6.12323399573676480327e-17, /* 0x3c91a62633145c06 */
    pi_lead = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */
    pi_tail = 1.22464679914735296065e-16, /* 0x3ca1a62633145c06 */
    three_piby2_lead = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */
    three_piby2_tail = 1.83697019872102919446e-16, /* 0x3caa79394c9e8a08 */
    two_pi_lead = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */
    two_pi_tail = 2.44929359829470592131e-16, /* 0x3cb1a62633145c06 */
    five_piby2_lead = 7.85398163397448278999e+00, /* 0x401f6a7a2955385e */
    five_piby2_tail = 3.06161699786838264816e-16; /* 0x3cb60fafbfd97308 */

  /* Each of these threshold values is the closest machine
     number BELOW a multiple of pi/4, i.e. they are not
     rounded to nearest. thresh1 is 1*pi/4, thresh2 is 2*pi/4, etc.
     This ensures that we end up in precisely the correct region. */
  static const double
    thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */
    thresh2 = 1.5707963267948965579989e+00, /* 0x3ff921fb54442d18 */
    thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */
    thresh4 = 3.1415926535897931159979e+00, /* 0x400921fb54442d18 */
    thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */
    thresh6 = 4.7123889803846896739969e+00, /* 0x4012d97c7f3321d2 */
    thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */
    thresh8 = 6.2831853071795862319959e+00, /* 0x401921fb54442d18 */
    thresh9 = 7.0685834705770345109954e+00, /* 0x401c463abeccb2bb */
    thresh10 = 7.8539816339744827899949e+00; /* 0x401f6a7a2955385e */

  static const double
    twobypi = 6.36619772367581271411E-01, /* 0x3FE45F306DC9C882 */
    twobypi_part1 = 6.36619761586189270020e-01, /* 0x3fe45f3068000000 */
    twobypi_part2 = 1.07813920013910546913e-08, /* 0x3e47272208000000 */
    twobypi_part3 = 7.16649491121506946045e-17, /* 0x3c94a7f09d5f47d6 */
    piby2_part1 = 1.57079631090164184570e+00, /* 0x3ff921fb50000000 */
    piby2_part2 = 1.58932547122958567343e-08, /* 0x3e5110b460000000 */
    piby2_part3 = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */

  static const double cancellationThresh = 1.0e-12;
  int done = 0;

  /* For small values of x, up to 11*pi/4, we do quad precision
     subtraction of the relevant multiple of pi/2 */
  if (x <= eleven_piby4)
    {
      double s, t, ctest;
      if (x <= thresh1) /* x < pi/4 */
        {
          /* Quick return if x is already less than pi/4 */
          *r = x;
          *rr = 0.0;
          *region = 0;
          return;
        }
      else if (x <= thresh2) /* x < 2*pi/4 */
        {
          t = x - piby2_lead;
          s = ((-piby2_lead - t) + x) - piby2_tail;
          *region = 1;
        }
      else if (x <= thresh3) /* x < 3*pi/4 */
        {
          t = x - piby2_lead;
          s = ((x - t) - piby2_lead) - piby2_tail;
          *region = 1;
        }
      else if (x <= thresh4) /* x < 4*pi/4 */
        {
          t = x - pi_lead;
          s = ((-pi_lead - t) + x) - pi_tail;
          *region = 2;
        }
      else if (x <= thresh5) /* x < 5*pi/4 */
        {
          t = x - pi_lead;
          s = ((x - t) - pi_lead) - pi_tail;
          *region = 2;
        }
      else if (x <= thresh6) /* x < 6*pi/4 */
        {
          t = x - three_piby2_lead;
          s = ((-three_piby2_lead - t) + x) - three_piby2_tail;
          *region = 3;
        }
      else if (x <= thresh7) /* x < 7*pi/4 */
        {
          t = x - three_piby2_lead;
          s = ((x - t) - three_piby2_lead) - three_piby2_tail;
          *region = 3;
        }
      else if (x <= thresh8) /* x < 8*pi/4 */
        {
          t = x - two_pi_lead;
          s = ((-two_pi_lead - t) + x) - two_pi_tail;
          *region = 0;
        }
      else if (x <= thresh9) /* x < 9*pi/4 */
        {
          t = x - two_pi_lead;
          s = ((x - t) - two_pi_lead) - two_pi_tail;
          *region = 0;
        }
      else if (x <= thresh10) /* x < 10*pi/4 */
        {
          t = x - five_piby2_lead;
          s = ((-five_piby2_lead - t) + x) - five_piby2_tail;
          *region = 1;
        }
      else /* x < 11*pi/4 */
        {
          t = x - five_piby2_lead;
          s = ((x - t) - five_piby2_lead) - five_piby2_tail;
          *region = 1;
        }

      *r = t + s;
      *rr = (t - *r) + s;

      /* Check for massive cancellation which may happen very close
         to multiples of pi/2 */
      if (*r < 0.0)
        ctest = -(*r);
      else
        ctest = *r;
#ifdef DEBUGGING_PRINT
      printf("Cancellation threshold test = (%g > %g)\n",
             ctest, cancellationThresh);
#endif

      /* Check if cancellation error was not too large */
      if (ctest > cancellationThresh)
        done = 1;
      /* Otherwise fall through to the expensive method */
    }
  else if (x <= 1.0e5)
    {
      /* This range reduction is accurate enough for x up to
         approximately 2**(20) except near multiples of pi/2 */

      /* We perform quad precision arithmetic to find the
         nearest multiple of pi/2 to x */

      int reg, it;
      double hx, tx, z, zz, w, ww, dreg, s, t, c, cc, ctest;

      /* Split x into head and tail, hx and tx */
      unsigned long u;
      GET_BITS_DP64(x, u);
      u &= 0xfffffffff8000000;
      PUT_BITS_DP64(u, hx);
      tx = x - hx;

      /* Multiply x by 2/pi in extra precision, result in (z, zz) */
      c = x * twobypi;
      cc = ((((hx * twobypi_part1 - c) + hx * twobypi_part2) +
            tx * twobypi_part1) + tx * twobypi_part2) + x * twobypi_part3;
      z = c + cc;
      zz = (c - z) + cc;

#ifdef DEBUGGING_PRINT
      printf("z = %30.20e = %s\n", z, double2hex(&z));
      printf("zz = %30.20e = %s\n", zz, double2hex(&zz));
#endif

      /* Find reg, the nearest integer to (z, zz). We need to be
         careful when (z,zz) is very near an odd multiple of 0.5.
         The simple formula
           reg = (int)((zz + 0.5) + z);
         fails in that case because the double rounding may
         lead us astray. */
      t = z + z;
      it = (int)t;
      if (it == t && it & 1)
        {
          /* z is an odd multiple of 0.5; we must use zz
             to discriminate */
          if (zz > 0.0)
            reg = (int)z + 1;
          else
            reg = (int)z;
        }
      else
        reg = (int)(z + 0.5);

#ifdef DEBUGGING_PRINT
      printf("reg = %d\n", reg);
#endif

      /* Carefully subtract reg from (z, zz), result in (w, ww) */
      dreg = reg;
      s = z - dreg;
      if (z > dreg)
        t = ((z - s) - dreg) + zz;
      else
        t = ((-dreg - s) + z) + zz;
      w = s + t;
      ww = (s - w) + t;

#ifdef DEBUGGING_PRINT
      printf("w = %30.20e = %s\n", w, double2hex(&w));
      printf("ww = %30.20e = %s\n", ww, double2hex(&ww));
#endif

     /* Check for massive cancellation which may happen very close
        to multiples of pi/2 */
      if (w < 0.0)
        ctest = -w;
      else
        ctest = w;

      /* If cancellation is not too severe, continue with this method.
         Otherwise we fall through to the expensive, accurate method */
      if (ctest > cancellationThresh)
        {
          /* Split w into (hx, tx) */
          GET_BITS_DP64(w, u);
          u &= 0xfffffffff8000000;
          PUT_BITS_DP64(u, hx);
          tx = w - hx;

          /* Carefully multiply (w, ww) by pi/2 */
          c = piby2_lead * w;
          cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) +
                 piby2_part2 * hx) + piby2_part2 * tx) +
            (piby2_lead * ww + piby2_part3 * w);
          *r = c + cc;
          *rr = (c - *r) + cc;

          *region = reg & 3;

#ifdef DEBUGGING_PRINT
          printf("r = %30.20e = %s\n", *r, double2hex(r));
          printf("rr = %30.20e = %s\n", *rr, double2hex(rr));
#endif
          done = 1;
        }
    }

  if (!done)
    {
      /* This method simulates multi-precision floating-point
         arithmetic and is accurate for all 1 <= x < infinity */
      const int bitsper = 10;
      unsigned long res[500];
      unsigned long ux, u, carry, mask, mant, highbitsrr;
      int first, last, i, rexp, xexp, resexp, ltb, determ;
      double xx, t;
      static unsigned long pibits[] =
      {
        0,    0,    0,    0,    0,    0,
        162,  998,   54,  915,  580,   84,  671,  777,  855,  839,
        851,  311,  448,  877,  553,  358,  316,  270,  260,  127,
        593,  398,  701,  942,  965,  390,  882,  283,  570,  265,
        221,  184,    6,  292,  750,  642,  465,  584,  463,  903,
        491,  114,  786,  617,  830,  930,   35,  381,  302,  749,
        72,  314,  412,  448,  619,  279,  894,  260,  921,  117,
        569,  525,  307,  637,  156,  529,  504,  751,  505,  160,
        945, 1022,  151, 1023,  480,  358,   15,  956,  753,   98,
        858,   41,  721,  987,  310,  507,  242,  498,  777,  733,
        244,  399,  870,  633,  510,  651,  373,  158,  940,  506,
        997,  965,  947,  833,  825,  990,  165,  164,  746,  431,
        949, 1004,  287,  565,  464,  533,  515,  193,  111,  798
      };

      GET_BITS_DP64(x, ux);

#ifdef DEBUGGING_PRINT
      printf("On entry, x = %25.20e = %s\n", x, double2hex(&x));
#endif

      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
      ux = (ux & MANTBITS_DP64) | IMPBIT_DP64;

      /* Now ux is the mantissa bit pattern of x as a long integer */
      carry = 0;
      mask = (1L << bitsper) - 1;

      /* Set first and last to the positions of the first
         and last chunks of 2/pi that we need */
      first = xexp / bitsper;
      resexp = xexp - first * bitsper;
      /* 180 is the theoretical maximum number of bits (actually
         175 for IEEE double precision) that we need to extract
         from the middle of 2/pi to compute the reduced argument
         accurately enough for our purposes */
      last = first + 180 / bitsper;

      /* Do a long multiplication of the bits of 2/pi by the
         integer mantissa */
#if 0
      for (i = last; i >= first; i--)
        {
          u = pibits[i] * ux + carry;
          res[i - first] = u & mask;
          carry = u >> bitsper;
        }
      res[last - first + 1] = 0;
#else
      /* Unroll the loop. This is only correct because we know
         that bitsper is fixed as 10. */
      res[19] = 0;
      u = pibits[last] * ux;
      res[18] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-1] * ux + carry;
      res[17] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-2] * ux + carry;
      res[16] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-3] * ux + carry;
      res[15] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-4] * ux + carry;
      res[14] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-5] * ux + carry;
      res[13] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-6] * ux + carry;
      res[12] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-7] * ux + carry;
      res[11] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-8] * ux + carry;
      res[10] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-9] * ux + carry;
      res[9] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-10] * ux + carry;
      res[8] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-11] * ux + carry;
      res[7] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-12] * ux + carry;
      res[6] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-13] * ux + carry;
      res[5] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-14] * ux + carry;
      res[4] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-15] * ux + carry;
      res[3] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-16] * ux + carry;
      res[2] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-17] * ux + carry;
      res[1] = u & mask;
      carry = u >> bitsper;
      u = pibits[last-18] * ux + carry;
      res[0] = u & mask;
#endif

#ifdef DEBUGGING_PRINT
      printf("resexp = %d\n", resexp);
      printf("Significant part of x * 2/pi with binary"
             " point in correct place:\n");
      for (i = 0; i <= last - first; i++)
        {
          if (i > 0 && i % 5 == 0)
            printf("\n ");
          if (i == 1)
            printf("%s ", d2b((int)res[i], bitsper, resexp));
          else
            printf("%s ", d2b((int)res[i], bitsper, -1));
        }
      printf("\n");
#endif

      /* Reconstruct the result */
      ltb = (int)((((res[0] << bitsper) | res[1])
                   >> (bitsper - 1 - resexp)) & 7);

      /* determ says whether the fractional part is >= 0.5 */
      determ = ltb & 1;

#ifdef DEBUGGING_PRINT
      printf("ltb = %d (last two bits before binary point"
             " and first bit after)\n", ltb);
      printf("determ = %d (1 means need to negate because the fractional\n"
             "            part of x * 2/pi is greater than 0.5)\n", determ);
#endif

      i = 1;
      if (determ)
        {
          /* The mantissa is >= 0.5. We want to subtract it
             from 1.0 by negating all the bits */
          *region = ((ltb >> 1) + 1) & 3;
          mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1);
          while (mant < 0x0020000000000000)
            {
              i++;
              mant = (mant << bitsper) | (~(res[i]) & mask);
            }
          highbitsrr = ~(res[i + 1]) << (64 - bitsper);
        }
      else
        {
          *region = (ltb >> 1);
          mant = res[1] & ((1L << (bitsper - resexp)) - 1);
          while (mant < 0x0020000000000000)
            {
              i++;
              mant = (mant << bitsper) | res[i];
            }
          highbitsrr = res[i + 1] << (64 - bitsper);
        }

      rexp = 52 + resexp - i * bitsper;

      while (mant >= 0x0020000000000000)
        {
          rexp++;
          highbitsrr = (highbitsrr >> 1) | ((mant & 1) << 63);
          mant >>= 1;
        }

#ifdef DEBUGGING_PRINT
      printf("Normalised mantissa = 0x%016lx\n", mant);
      printf("High bits of rest of mantissa = 0x%016lx\n", highbitsrr);
      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);
#endif

      /* Put the result exponent rexp onto the mantissa pattern */
      u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
      ux = (mant & MANTBITS_DP64) | u;
      if (determ)
        /* If we negated the mantissa we negate x too */
        ux |= SIGNBIT_DP64;
      PUT_BITS_DP64(ux, x);

      /* Create the bit pattern for rr */
      highbitsrr >>= 12; /* Note this is shifted one place too far */
      u = ((unsigned long)rexp + EXPBIAS_DP64 - 53) << EXPSHIFTBITS_DP64;
      PUT_BITS_DP64(u, t);
      u |= highbitsrr;
      PUT_BITS_DP64(u, xx);

      /* Subtract the implicit bit we accidentally added */
      xx -= t;
      /* Set the correct sign, and double to account for the
         "one place too far" shift */
      if (determ)
        xx *= -2.0;
      else
        xx *= 2.0;

#ifdef DEBUGGING_PRINT
      printf("(lead part of x*2/pi) = %25.20e = %s\n", x, double2hex(&x));
      printf("(tail part of x*2/pi) = %25.20e = %s\n", xx, double2hex(&xx));
#endif

      /* (x,xx) is an extra-precise version of the fractional part of
         x * 2 / pi. Multiply (x,xx) by pi/2 in extra precision
         to get the reduced argument (r,rr). */
      {
        double hx, tx, c, cc;
        /* Split x into hx (head) and tx (tail) */
        GET_BITS_DP64(x, ux);
        ux &= 0xfffffffff8000000;
        PUT_BITS_DP64(ux, hx);
        tx = x - hx;

        c = piby2_lead * x;
        cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) +
               piby2_part2 * hx) + piby2_part2 * tx) +
          (piby2_lead * xx + piby2_part3 * x);
        *r = c + cc;
        *rr = (c - *r) + cc;
      }

#ifdef DEBUGGING_PRINT
      printf(" (r,rr) = lead and tail parts of frac(x*2/pi) * pi/2:\n");
      printf(" r = %25.20e = %s\n", *r, double2hex(r));
      printf("rr = %25.20e = %s\n", *rr, double2hex(rr));
      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",
             *region);
#endif
    }
}




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

/* Define this to get debugging print statements activated */
#define DEBUGGING_PRINT
#undef DEBUGGING_PRINT


#ifdef DEBUGGING_PRINT
#include <stdio.h>
char *d2b(long d, int bitsper, int point)
{
  static char buff[200];
  int i, j;
  j = bitsper;
  if (point >= 0 && point <= bitsper)
    j++;
  buff[j] = '\0';
  for (i = bitsper - 1; i >= 0; i--)
    {
      j--;
      if (d % 2 == 1)
        buff[j] = '1';
      else
        buff[j] = '0';
      if (i == point)
        {
          j--;
          buff[j] = '.';
        }
      d /= 2;
    }
  return buff;
}
#endif

/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
   extra precision, and return the result in r.
   Return value "region" tells how many lots of pi/2 were subtracted
   from x to put it in the range [-pi/4,pi/4], mod 4. */
void __remainder_piby2f(float x, double *r, int *region)
{

  /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */
  static const double
    eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */

  static const double
    piby2 = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
    twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */
    pi = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */
    three_piby2 = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */
    two_pi = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */
    five_piby2 = 7.85398163397448278999e+00; /* 0x401f6a7a2955385e */

  /* Each of these threshold values is the closest machine
     number BELOW a multiple of pi/4, i.e. they are not
     rounded to nearest. thresh1 is 1*pi/4, thresh3 is 3*pi/4, etc.
     This ensures that we end up in precisely the correct region. */
  static const double
    thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */
    thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */
    thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */
    thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */
    thresh9 = 7.0685834705770345109954e+00; /* 0x401c463abeccb2bb */

  static const double cancellationThresh = 1.0e-5;
  int done = 0;
  double dx;

  dx = x;

  /* For small values of x, up to 11*pi/4, we do double precision
     subtraction of the relevant multiple of pi/2 */
  if (dx <= eleven_piby4) /* x <= 11*pi/4 */
    {
      double t, ctest;

     if (dx <= thresh5) /* x < 5*pi/4 */
       {
         if (dx <= thresh1) /* x < pi/4 */
           {
             /* Quick return if x is already less than pi/4 */
             *r = dx;
             *region = 0;
             return;
           }
         else if (dx <= thresh3) /* x < 3*pi/4 */
           {
             t = dx - piby2;
             *region = 1;
           }
         else /* x < 5*pi/4 */
           {
             t = dx - pi;
             *region = 2;
           }
       }
     else
       {
         if (dx <= thresh7) /* x < 7*pi/4 */
           {
             t = dx - three_piby2;
             *region = 3;
           }
         else if (dx <= thresh9) /* x < 9*pi/4 */
           {
             t = dx - two_pi;
             *region = 0;
           }
         else /* x < 11*pi/4 */
           {
             t = dx - five_piby2;
             *region = 1;
           }
       }

     /* Check for massive cancellation which may happen very close
        to multiples of pi/2 */
     if (t < 0.0)
       ctest = -t;
     else
       ctest = t;
#ifdef DEBUGGING_PRINT
     printf("Cancellation threshold test = (%g > %g)\n",
            ctest, cancellationThresh);
#endif

     /* Check if cancellation error was not too large */
     if (ctest > cancellationThresh)
       {
         *r = t;
         done = 1;
       }
     /* Otherwise fall through to the expensive method */
    }
  else if (dx <= 1.0e6)
    {
      /* This range reduction is accurate enough for x up to
         approximately 2**(20) except near multiples of pi/2 */

      /* We perform double precision arithmetic to find the
         nearest multiple of pi/2 to x */
      int reg;
      double z, w, c, ctest;

      /* Multiply x by 2/pi in double precision, result in z */
      z = dx * twobypi;

#ifdef DEBUGGING_PRINT
      printf("z = %30.20e = %s\n", z, double2hex(&z));
#endif

      /* Find reg, the nearest integer to z */
      reg = (int)(z + 0.5);

#ifdef DEBUGGING_PRINT
      printf("reg = %d\n", reg);
#endif

      /* Subtract reg from z, result in w */
      w = z - reg;

#ifdef DEBUGGING_PRINT
      printf("w = %30.20e = %s\n", w, double2hex(&w));
#endif

     /* Check for massive cancellation which may happen very close
        to multiples of pi/2 */
      if (w < 0.0)
        ctest = -w;
      else
        ctest = w;

      /* If cancellation is not too severe, continue with this method.
         Otherwise we fall through to the expensive, accurate method */
      if (ctest > cancellationThresh)
        {
          /* Multiply w by pi/2 */
          c = w * piby2;
          *r = c;
          *region = reg & 3;

#ifdef DEBUGGING_PRINT
          printf("r = %30.20e = %s\n", *r, double2hex(r));
#endif
          done = 1;
        }
    }

  if (!done)
    {
      /* This method simulates multi-precision floating-point
         arithmetic and is accurate for all 1 <= x < infinity */
#if 0
      const int bitsper = 36;
#else
#define bitsper 36
#endif
      unsigned long res[10];
      unsigned long u, carry, mask, mant, nextbits;
      unsigned long ux;
      int first, last, i, rexp, xexp, resexp, ltb, determ, bc;
      static const double
        piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
      static unsigned long pibits[] =
      {
        0L,
        5215L, 13000023176L, 11362338026L, 67174558139L,
        34819822259L, 10612056195L, 67816420731L, 57840157550L,
        19558516809L, 50025467026L, 25186875954L, 18152700886L
      };

#ifdef DEBUGGING_PRINT
      printf("On entry, x = %25.20e = %s\n", dx, double2hex(&dx));
#endif


  GET_BITS_DP64(dx, ux);

      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
      ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29;

#ifdef DEBUGGING_PRINT
      printf("ux = %s\n", d2b(ux, 64, -1));
#endif

      /* Now ux is the mantissa bit pattern of x as a long integer */
      mask = (1L << bitsper) - 1;

      /* Set first and last to the positions of the first
         and last chunks of 2/pi that we need */
      first = xexp / bitsper;
      resexp = xexp - first * bitsper;
      /* 120 is the theoretical maximum number of bits (actually
         115 for IEEE single precision) that we need to extract
         from the middle of 2/pi to compute the reduced argument
         accurately enough for our purposes */
      last = first + 120 / bitsper;

#ifdef DEBUGGING_PRINT
      printf("first = %d, last = %d\n", first, last);
#endif

      /* Do a long multiplication of the bits of 2/pi by the
         integer mantissa */
#if 0
      for (i = last; i >= first; i--)
        {
          u = pibits[i] * ux + carry;
          res[i - first] = u & mask;
          carry = u >> bitsper;
        }
      res[last - first + 1] = 0;
#else
      /* Unroll the loop. This is only correct because we know
         that bitsper is fixed as 36. */
      res[4] = 0;
      u = pibits[last] * ux;
      res[3] = u & mask;
      carry = u >> bitsper;
      u = pibits[last - 1] * ux + carry;
      res[2] = u & mask;
      carry = u >> bitsper;
      u = pibits[last - 2] * ux + carry;
      res[1] = u & mask;
      carry = u >> bitsper;
      u = pibits[first] * ux + carry;
      res[0] = u & mask;
#endif

#ifdef DEBUGGING_PRINT
      printf("resexp = %d\n", resexp);
      printf("Significant part of x * 2/pi with binary"
             " point in correct place:\n");
      for (i = 0; i <= last - first; i++)
        {
          if (i > 0 && i % 5 == 0)
            printf("\n ");
          if (i == 1)
            printf("%s ", d2b(res[i], bitsper, resexp));
          else
            printf("%s ", d2b(res[i], bitsper, -1));
        }
      printf("\n");
#endif

      /* Reconstruct the result */
      ltb = (int)((((res[0] << bitsper) | res[1])
                   >> (bitsper - 1 - resexp)) & 7);

      /* determ says whether the fractional part is >= 0.5 */
      determ = ltb & 1;

#ifdef DEBUGGING_PRINT
      printf("ltb = %d (last two bits before binary point"
             " and first bit after)\n", ltb);
      printf("determ = %d (1 means need to negate because the fractional\n"
             "            part of x * 2/pi is greater than 0.5)\n", determ);
#endif

      i = 1;
      if (determ)
        {
          /* The mantissa is >= 0.5. We want to subtract it
             from 1.0 by negating all the bits */
          *region = ((ltb >> 1) + 1) & 3;
          mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1);
          while (mant < 0x0000000000010000)
            {
              i++;
              mant = (mant << bitsper) | (~(res[i]) & mask);
            }
          nextbits = (~(res[i+1]) & mask);
        }
      else
        {
          *region = (ltb >> 1);
          mant = res[1] & ((1L << (bitsper - resexp)) - 1);
          while (mant < 0x0000000000010000)
            {
              i++;
              mant = (mant << bitsper) | res[i];
            }
          nextbits = res[i+1];
        }

#ifdef DEBUGGING_PRINT
      printf("First bits of mant = %s\n", d2b(mant, bitsper, -1));
#endif

      /* Normalize the mantissa. The shift value 6 here, determined by
         trial and error, seems to give optimal speed. */
      bc = 0;
      while (mant < 0x0000400000000000)
        {
          bc += 6;
          mant <<= 6;
        }
      while (mant < 0x0010000000000000)
        {
          bc++;
          mant <<= 1;
        }
      mant |= nextbits >> (bitsper - bc);

      rexp = 52 + resexp - bc - i * bitsper;

#ifdef DEBUGGING_PRINT
      printf("Normalised mantissa = 0x%016lx\n", mant);
      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);
#endif

      /* Put the result exponent rexp onto the mantissa pattern */
      u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
      ux = (mant & MANTBITS_DP64) | u;
      if (determ)
        /* If we negated the mantissa we negate x too */
        ux |= SIGNBIT_DP64;
      PUT_BITS_DP64(ux, dx);

#ifdef DEBUGGING_PRINT
      printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx));
#endif

      /* x is a double precision version of the fractional part of
         x * 2 / pi. Multiply x by pi/2 in double precision
         to get the reduced argument r. */
      *r = dx * piby2;

#ifdef DEBUGGING_PRINT
      printf(" r = frac(x*2/pi) * pi/2:\n");
      printf(" r = %25.20e = %s\n", *r, double2hex(r));
      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",
             *region);
#endif
    }
}




/*
(C) 2002 Advanced Micro Devices, Inc. 
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
    THIS LIBRARY**
*/

#include "libm_amd.h"
#include "libm_util_amd.h"

#define USE_NAN_WITH_FLAGS
#define USE_SCALEDOUBLE_1
#define USE_GET_FPSW_INLINE
#define USE_SET_FPSW_INLINE
#include "libm_inlines_amd.h"
#undef USE_NAN_WITH_FLAGS
#undef USE_SCALEDOUBLE_1
#undef USE_GET_FPSW_INLINE
#undef USE_SET_FPSW_INLINE

#if defined(COMPILING_FMOD)
float __fmodf(float x, float y)
#else
float __remainderf(float x, float y)
#endif
{
  double dx, dy, scale, w, t;
  int i, ntimes, xexp, yexp;
  unsigned long ux, uy, ax, ay;

  unsigned int sw;

  dx = x;
  dy = y;

  GET_BITS_DP64(dx, ux);
  GET_BITS_DP64(dy, uy);
  ax = ux & ~SIGNBIT_DP64;
  ay = uy & ~SIGNBIT_DP64;
  xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
  yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);

  if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
      yexp < 1 || yexp > BIASEDEMAX_DP64)
    {
      /* x or y is zero, NaN or infinity (neither x nor y can be
         denormalized because we promoted from float to double) */
      if (xexp > BIASEDEMAX_DP64)
        {
          /* x is NaN or infinity */
          if (ux & MANTBITS_DP64)
            /* x is NaN */
            return dx + dx; /* Raise invalid if it is a signalling NaN */
          else
            /* x is infinity; result is NaN */
            return nan_with_flags(AMD_F_INVALID);
        }
      else if (yexp > BIASEDEMAX_DP64)
        {
          /* y is NaN or infinity */
          if (uy & MANTBITS_DP64)
            /* y is NaN */
            return dy + dy; /* Raise invalid if it is a signalling NaN */
          else
            /* y is infinity; result is x */
            return dx;
        }
      else if (xexp < 1)
        {
          /* x must be zero (cannot be denormalized) */
          if (yexp < 1)
            /* y must be zero (cannot be denormalized) */
            return nan_with_flags(AMD_F_INVALID);
          else
            return dx;
        }
      else
        /* y must be zero */
        return nan_with_flags(AMD_F_INVALID);
    }
  else if (ax == ay)
    {
      /* abs(x) == abs(y); return zero with the sign of x */
      PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
      return dx;
    }

  /* Set dx = abs(x), dy = abs(y) */
  PUT_BITS_DP64(ax, dx);
  PUT_BITS_DP64(ay, dy);

  if (ax < ay)
    {
      /* abs(x) < abs(y) */
#if !defined(COMPILING_FMOD)
      if (dx > 0.5*dy)
        dx -= dy;
#endif
      return x < 0.0? -dx : dx;
    }

  /* Save the current floating-point status word. We need
     to do this because the remainder function is always
     exact for finite arguments, but our algorithm causes
     the inexact flag to be raised. We therefore need to
     restore the entry status before exiting. */
  sw = get_fpsw_inline();

  /* Set ntimes to the number of times we need to do a
     partial remainder. If the exponent of x is an exact multiple
     of 24 larger than the exponent of y, and the mantissa of x is
     less than the mantissa of y, ntimes will be one too large
     but it doesn't matter - it just means that we'll go round
     the loop below one extra time. */
  if (xexp <= yexp)
    {
      ntimes = 0;
      w = dy;
      scale = 1.0;
    }
  else
    {
      ntimes = (xexp - yexp) / 24;

      /* Set w = y * 2^(24*ntimes) */
      PUT_BITS_DP64((unsigned long)(ntimes * 24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
                    scale);
      w = scale * dy;
      /* Set scale = 2^(-24) */
      PUT_BITS_DP64((unsigned long)(-24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
                    scale);
    }

  /* Each time round the loop we compute a partial remainder.
     This is done by subtracting a large multiple of w
     from x each time, where w is a scaled up version of y.
     The subtraction can be performed exactly when performed
     in double precision, and the result at each stage can
     fit exactly in a single precision number. */
  for (i = 0; i < ntimes; i++)
    {
      /* t is the integer multiple of w that we will subtract.
         We use a truncated value for t. */
      t = (double)((int)(dx / w));
      dx -= w * t;
      /* Scale w down by 2^(-24) for the next iteration */
      w *= scale;
    }

  /* One more time */
#if defined(COMPILING_FMOD)
  t = (double)((int)(dx / w));
  dx -= w * t;
#else
 {
  unsigned int todd;
  /* Variable todd says whether the integer t is odd or not */
  t = (double)((int)(dx / w));
  todd = ((int)(dx / w)) & 1;
  dx -= w * t;

  /* At this point, dx lies in the range [0,dy) */
  /* For the remainder function, we need to adjust dx
     so that it lies in the range (-y/2, y/2] by carefully
     subtracting w (== dy == y) if necessary. */
  if (dx > 0.5 * w || ((dx == 0.5 * w) && todd))
    dx -= w;
 }
#endif

  /* **** N.B. for some reason this breaks the 32 bit version
     of remainder when compiling with optimization. */
  /* Restore the entry status flags */
  set_fpsw_inline(sw);

  /* Set the result sign according to input argument x */
  return x < 0.0? -dx : dx;

}

#if defined(COMPILING_FMOD)
weak_alias (__fmodf, fmodf)
weak_alias (__fmodf,  __ieee754_fmodf)
#else
weak_alias (__remainderf, remainderf)
weak_alias (__remainderf,  __ieee754_remainderf)
#endif

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_sinh.c.x86_64-new-libm (+335 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SPLITEXP
		13	#define USE_SCALEDOUBLE_1
		14	#define USE_SCALEDOUBLE_2
		15	#define USE_INFINITY_WITH_FLAGS
		16	#define USE_VAL_WITH_FLAGS
		17	#include "libm_inlines_amd.h"
		18	#undef USE_SPLITEXP
		19	#undef USE_SCALEDOUBLE_1
		20	#undef USE_SCALEDOUBLE_2
		21	#undef USE_INFINITY_WITH_FLAGS
		22	#undef USE_VAL_WITH_FLAGS
		23
		24	/* Deal with errno for out-of-range result */
		25	#include "libm_errno_amd.h"
		26	static inline double retval_errno_erange(double x, int xneg)
		27	{
		28	struct exception exc;
		29	exc.arg1 = x;
		30	exc.arg2 = x;
		31	exc.type = OVERFLOW;
		32	exc.name = (char *)"sinh";
		33	if (_LIB_VERSION == _SVID_)
		34	{
		35	if (xneg)
		36	exc.retval = -HUGE;
		37	else
		38	exc.retval = HUGE;
		39	}
		40	else
		41	{
		42	if (xneg)
		43	exc.retval = -infinity_with_flags(AMD_F_OVERFLOW);
		44	else
		45	exc.retval = infinity_with_flags(AMD_F_OVERFLOW);
		46	}
		47	if (_LIB_VERSION == _POSIX_)
		48	__set_errno(ERANGE);
		49	else if (!matherr(&exc))
		50	__set_errno(ERANGE);
		51	return exc.retval;
		52	}
		53
		54	double __sinh(double x)
		55	{
		56	/*
		57	After dealing with special cases the computation is split into
		58	regions as follows:
		59
		60	abs(x) >= max_sinh_arg:
		61	sinh(x) = sign(x)*Inf
		62
		63	abs(x) >= small_threshold:
		64	sinh(x) = sign(x)*exp(abs(x))/2 computed using the
65	splitexp and scaleDouble functions as for exp_amd().
66
67	abs(x) < small_threshold:
68	compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
69	sinh(x) is then sign(x)z. /
70
71	static const double
72	max_sinh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */
73	thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
74	log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
75	log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
76	small_threshold = 8BASEDIGITS_DP640.30102999566398119521373889;
77	/* (8BASEDIGITS_DP64log10of2) ' exp(-x) insignificant c.f. exp(x) */
78
79	/* Lead and tail tabulated values of sinh(i) and cosh(i)
80	for i = 0,...,36. The lead part has 26 leading bits. */
81
82	static const double sinh_lead[ 37] = {
83	0.00000000000000000000e+00, /* 0x0000000000000000 */
84	1.17520117759704589844e+00, /* 0x3ff2cd9fc0000000 */
85	3.62686038017272949219e+00, /* 0x400d03cf60000000 */
86	1.00178747177124023438e+01, /* 0x40240926e0000000 */
87	2.72899169921875000000e+01, /* 0x403b4a3800000000 */
88	7.42032089233398437500e+01, /* 0x40528d0160000000 */
89	2.01713153839111328125e+02, /* 0x406936d228000000 */
90	5.48316116333007812500e+02, /* 0x4081228768000000 */
91	1.49047882080078125000e+03, /* 0x409749ea50000000 */
92	4.05154187011718750000e+03, /* 0x40afa71570000000 */
93	1.10132326660156250000e+04, /* 0x40c5829dc8000000 */
94	2.99370708007812500000e+04, /* 0x40dd3c4488000000 */
95	8.13773945312500000000e+04, /* 0x40f3de1650000000 */
96	2.21206695312500000000e+05, /* 0x410b00b590000000 */
97	6.01302140625000000000e+05, /* 0x412259ac48000000 */
98	1.63450865625000000000e+06, /* 0x4138f0cca8000000 */
99	4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */
100	1.20774762500000000000e+07, /* 0x4167093488000000 */
101	3.28299845000000000000e+07, /* 0x417f4f2208000000 */
102	8.92411500000000000000e+07, /* 0x419546d8f8000000 */
103	2.42582596000000000000e+08, /* 0x41aceb0888000000 */
104	6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */
105	1.79245641600000000000e+09, /* 0x41dab5adb8000000 */
106	4.87240166400000000000e+09, /* 0x41f226af30000000 */
107	1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */
108	3.60024494080000000000e+10, /* 0x4220c3d390000000 */
109	9.78648043520000000000e+10, /* 0x4236c93268000000 */
110	2.66024116224000000000e+11, /* 0x424ef822f0000000 */
111	7.23128516608000000000e+11, /* 0x42650bba30000000 */
112	1.96566712320000000000e+12, /* 0x427c9aae40000000 */
113	5.34323724288000000000e+12, /* 0x4293704708000000 */
114	1.45244246507520000000e+13, /* 0x42aa6b7658000000 */
115	3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */
116	1.07321789251584000000e+14, /* 0x42d866f348000000 */
117	2.91730863685632000000e+14, /* 0x42f0953e28000000 */
118	7.93006722514944000000e+14, /* 0x430689e220000000 */
119	2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
120
121	static const double sinh_tail[ 37] = {
122	0.00000000000000000000e+00, /* 0x0000000000000000 */
123	1.60467555584448807892e-08, /* 0x3e513ae6096a0092 */
124	2.76742892754807136947e-08, /* 0x3e5db70cfb79a640 */
125	2.09697499555224576530e-07, /* 0x3e8c2526b66dc067 */
126	2.04940252448908240062e-07, /* 0x3e8b81b18647f380 */
127	1.65444891522700935932e-06, /* 0x3ebbc1cdd1e1eb08 */
128	3.53116789999998198721e-06, /* 0x3ecd9f201534fb09 */
129	6.94023870987375490695e-06, /* 0x3edd1c064a4e9954 */
130	4.98876893611587449271e-06, /* 0x3ed4eca65d06ea74 */
131	3.19656024605152215752e-05, /* 0x3f00c259bcc0ecc5 */
132	2.08687768377236501204e-04, /* 0x3f2b5a6647cf9016 */
133	4.84668088325403796299e-05, /* 0x3f09691adefb0870 */
134	1.17517985422733832468e-03, /* 0x3f53410fc29cde38 */
135	6.90830086959560562415e-04, /* 0x3f46a31a50b6fb3c */
136	1.45697262451506548420e-03, /* 0x3f57defc71805c40 */
137	2.99859023684906737806e-02, /* 0x3f9eb49fd80e0bab */
138	1.02538800507941396667e-02, /* 0x3f84fffc7bcd5920 */
139	1.26787628407699110022e-01, /* 0x3fc03a93b6c63435 */
140	6.86652479544033744752e-02, /* 0x3fb1940bb255fd1c */
141	4.81593627621056619148e-01, /* 0x3fded26e14260b50 */
142	1.70489513795397629181e+00, /* 0x3ffb47401fc9f2a2 */
143	1.12416073482258713767e+01, /* 0x40267bb3f55634f1 */
144	7.06579578070110514432e+00, /* 0x401c435ff8194ddc */
145	5.91244512999659974639e+01, /* 0x404d8fee052ba63a */
146	1.68921736147050694399e+02, /* 0x40651d7edccde3f6 */
147	2.60692936262073658327e+02, /* 0x40704b1644557d1a */
148	3.62419382134885609048e+02, /* 0x4076a6b5ca0a9dc4 */
149	4.07689930834187271103e+03, /* 0x40afd9cc72249aba */
150	1.55377375868385224749e+04, /* 0x40ce58de693edab5 */
151	2.53720210371943067003e+04, /* 0x40d8c70158ac6363 */
152	4.78822310734952334315e+04, /* 0x40e7614764f43e20 */
153	1.81871712615542812273e+05, /* 0x4106337db36fc718 */
154	5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */
155	6.41374032312148716301e+05, /* 0x412392bc108b37cc */
156	7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */
157	3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */
158	7.63580561355670914054e+06}; /* 0x415d20d76744835c */
159
160	static const double cosh_lead[ 37] = {
161	1.00000000000000000000e+00, /* 0x3ff0000000000000 */
162	1.54308062791824340820e+00, /* 0x3ff8b07550000000 */
163	3.76219564676284790039e+00, /* 0x400e18fa08000000 */
164	1.00676617622375488281e+01, /* 0x402422a490000000 */
165	2.73082327842712402344e+01, /* 0x403b4ee858000000 */
166	7.42099475860595703125e+01, /* 0x40528d6fc8000000 */
167	2.01715633392333984375e+02, /* 0x406936e678000000 */
168	5.48317031860351562500e+02, /* 0x4081228948000000 */
169	1.49047915649414062500e+03, /* 0x409749eaa8000000 */
170	4.05154199218750000000e+03, /* 0x40afa71580000000 */
171	1.10132329101562500000e+04, /* 0x40c5829dd0000000 */
172	2.99370708007812500000e+04, /* 0x40dd3c4488000000 */
173	8.13773945312500000000e+04, /* 0x40f3de1650000000 */
174	2.21206695312500000000e+05, /* 0x410b00b590000000 */
175	6.01302140625000000000e+05, /* 0x412259ac48000000 */
176	1.63450865625000000000e+06, /* 0x4138f0cca8000000 */
177	4.44305525000000000000e+06, /* 0x4150f2ebd0000000 */
178	1.20774762500000000000e+07, /* 0x4167093488000000 */
179	3.28299845000000000000e+07, /* 0x417f4f2208000000 */
180	8.92411500000000000000e+07, /* 0x419546d8f8000000 */
181	2.42582596000000000000e+08, /* 0x41aceb0888000000 */
182	6.59407856000000000000e+08, /* 0x41c3a6e1f8000000 */
183	1.79245641600000000000e+09, /* 0x41dab5adb8000000 */
184	4.87240166400000000000e+09, /* 0x41f226af30000000 */
185	1.32445608960000000000e+10, /* 0x4208ab7fb0000000 */
186	3.60024494080000000000e+10, /* 0x4220c3d390000000 */
187	9.78648043520000000000e+10, /* 0x4236c93268000000 */
188	2.66024116224000000000e+11, /* 0x424ef822f0000000 */
189	7.23128516608000000000e+11, /* 0x42650bba30000000 */
190	1.96566712320000000000e+12, /* 0x427c9aae40000000 */
191	5.34323724288000000000e+12, /* 0x4293704708000000 */
192	1.45244246507520000000e+13, /* 0x42aa6b7658000000 */
193	3.94814795284480000000e+13, /* 0x42c1f43fc8000000 */
194	1.07321789251584000000e+14, /* 0x42d866f348000000 */
195	2.91730863685632000000e+14, /* 0x42f0953e28000000 */
196	7.93006722514944000000e+14, /* 0x430689e220000000 */
197	2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
198
199	static const double cosh_tail[ 37] = {
200	0.00000000000000000000e+00, /* 0x0000000000000000 */
201	6.89700037027478056904e-09, /* 0x3e3d9f5504c2bd28 */
202	4.43207835591715833630e-08, /* 0x3e67cb66f0a4c9fd */
203	2.33540217013828929694e-07, /* 0x3e8f58617928e588 */
204	5.17452463948269748331e-08, /* 0x3e6bc7d000c38d48 */
205	9.38728274131605919153e-07, /* 0x3eaf7f9d4e329998 */
206	2.73012191010840495544e-06, /* 0x3ec6e6e464885269 */
207	3.29486051438996307950e-06, /* 0x3ecba3a8b946c154 */
208	4.75803746362771416375e-06, /* 0x3ed3f4e76110d5a4 */
209	3.33050940471947692369e-05, /* 0x3f017622515a3e2b */
210	9.94707313972136215365e-06, /* 0x3ee4dc4b528af3d0 */
211	6.51685096227860253398e-05, /* 0x3f11156278615e10 */
212	1.18132406658066663359e-03, /* 0x3f535ad50ed821f5 */
213	6.93090416366541877541e-04, /* 0x3f46b61055f2935c */
214	1.45780415323416845386e-03, /* 0x3f57e2794a601240 */
215	2.99862082708111758744e-02, /* 0x3f9eb4b45f6aadd3 */
216	1.02539925859688602072e-02, /* 0x3f85000b967b3698 */
217	1.26787669807076286421e-01, /* 0x3fc03a940fadc092 */
218	6.86652631843830962843e-02, /* 0x3fb1940bf3bf874c */
219	4.81593633223853068159e-01, /* 0x3fded26e1a2a2110 */
220	1.70489514001513020602e+00, /* 0x3ffb4740205796d6 */
221	1.12416073489841270572e+01, /* 0x40267bb3f55cb85d */
222	7.06579578098005001152e+00, /* 0x401c435ff81e18ac */
223	5.91244513000686140458e+01, /* 0x404d8fee052bdea4 */
224	1.68921736147088438429e+02, /* 0x40651d7edccde926 */
225	2.60692936262087528121e+02, /* 0x40704b1644557e0e */
226	3.62419382134890611269e+02, /* 0x4076a6b5ca0a9e1c */
227	4.07689930834187453002e+03, /* 0x40afd9cc72249abe */
228	1.55377375868385224749e+04, /* 0x40ce58de693edab5 */
229	2.53720210371943103382e+04, /* 0x40d8c70158ac6364 */
230	4.78822310734952334315e+04, /* 0x40e7614764f43e20 */
231	1.81871712615542812273e+05, /* 0x4106337db36fc718 */
232	5.62892347580489004031e+05, /* 0x41212d98b1f611e2 */
233	6.41374032312148716301e+05, /* 0x412392bc108b37cc */
234	7.57809544070145115256e+06, /* 0x415ce87bdc3473dc */
235	3.64177136406482197344e+06, /* 0x414bc8d5ae99ad14 */
236	7.63580561355670914054e+06}; /* 0x415d20d76744835c */
237
238	unsigned long ux, aux, xneg;
239	double y, z, z1, z2;
240	int m;
241
242	/* Special cases */
243
244	GET_BITS_DP64(x, ux);
245	aux = ux & ~SIGNBIT_DP64;
246	if (aux < 0x3e30000000000000) /* \|x\| small enough that sinh(x) = x */
247	{
248	if (aux == 0)
249	/* with no inexact */
250	return x;
251	else
252	return val_with_flags(x, AMD_F_INEXACT);
253	}
254	else if (aux >= 0x7ff0000000000000) /* \|x\| is NaN or Inf */
255	return x + x;
256
257	xneg = (aux != ux);
258
259	y = x;
260	if (xneg) y = -x;
261
262	if (y >= max_sinh_arg)
263	/* Return +/-infinity with overflow flag */
264	return retval_errno_erange(x, xneg);
265	else if (y >= small_threshold)
266	{
267	/* In this range y is large enough so that
268	the negative exponential is negligible,
269	so sinh(y) is approximated by sign(x)*exp(y)/2. The
270	code below is an inlined version of that from
271	exp() with two changes (it operates on
272	y instead of x, and the division by 2 is
273	done by reducing m by 1). */
274
275	splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
276	log2_by_32_tail, &m, &z1, &z2);
277	m -= 1;
278
279	if (m >= EMIN_DP64 && m <= EMAX_DP64)
280	z = scaleDouble_1((z1+z2),m);
281	else
282	z = scaleDouble_2((z1+z2),m);
283	}
284	else
285	{
286	/* In this range we find the integer part y0 of y
287	and the increment dy = y - y0. We then compute
288
289	z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
290
291	where sinh(y0) and cosh(y0) are tabulated above. */
292
293	int ind;
294	double dy, dy2, sdy, cdy, sdy1, sdy2;
295
296	ind = (int)y;
297	dy = y - ind;
298
299	dy2 = dy*dy;
300	sdy = dydy2(0.166666666666666667013899e0 +
301	(0.833333333333329931873097e-2 +
302	(0.198412698413242405162014e-3 +
303	(0.275573191913636406057211e-5 +
304	(0.250521176994133472333666e-7 +
305	(0.160576793121939886190847e-9 +
306	0.7746188980094184251527126e-12dy2)dy2)dy2)dy2)dy2)dy2);
307
308	cdy = dy2*(0.500000000000000005911074e0 +
309	(0.416666666666660876512776e-1 +
310	(0.138888888889814854814536e-2 +
311	(0.248015872460622433115785e-4 +
312	(0.275573350756016588011357e-6 +
313	(0.208744349831471353536305e-8 +
314	0.1163921388172173692062032e-10dy2)dy2)dy2)dy2)dy2)dy2);
315
316	/* At this point sinh(dy) is approximated by dy + sdy.
317	Shift some significant bits from dy to sdy. */
318
319	GET_BITS_DP64(dy, ux);
320	ux &= 0xfffffffff8000000;
321	PUT_BITS_DP64(ux, sdy1);
322	sdy2 = sdy + (dy - sdy1);
323
324	z = ((((((cosh_tail[ind]sdy2 + sinh_tail[ind]cdy)
325	+ cosh_tail[ind]*sdy1) + sinh_tail[ind])
326	+ cosh_lead[ind]sdy2) + sinh_lead[ind]cdy)
327	+ cosh_lead[ind]*sdy1) + sinh_lead[ind];
328	}
329
330	if (xneg) z = - z;
331	return z;
332	}
333
334	weak_alias (__sinh, sinh)
335	weak_alias (__sinh, __ieee754_sinh)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_sinhf.c.x86_64-new-libm (+250 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SPLITEXP
		13	#define USE_SCALEDOUBLE_1
		14	#define USE_SCALEDOUBLE_2
		15	#define USE_INFINITY_WITH_FLAGS
		16	#include "libm_inlines_amd.h"
		17	#undef USE_SPLITEXP
		18	#undef USE_SCALEDOUBLE_1
		19	#undef USE_SCALEDOUBLE_2
		20	#undef USE_INFINITY_WITH_FLAGS
		21
		22	/* Deal with errno for out-of-range result */
		23	#include "libm_errno_amd.h"
		24	static inline float retval_errno_erange(float x, int xneg)
		25	{
		26	struct exception exc;
		27	exc.arg1 = (double)x;
		28	exc.arg2 = (double)x;
		29	exc.type = OVERFLOW;
		30	exc.name = (char *)"sinh";
		31	if (_LIB_VERSION == _SVID_)
		32	{
		33	if (xneg)
		34	exc.retval = -HUGE;
		35	else
		36	exc.retval = HUGE;
		37	}
		38	else
		39	{
		40	if (xneg)
		41	exc.retval = -infinity_with_flags(AMD_F_OVERFLOW);
		42	else
		43	exc.retval = infinity_with_flags(AMD_F_OVERFLOW);
		44	}
		45	if (_LIB_VERSION == _POSIX_)
		46	__set_errno(ERANGE);
		47	else if (!matherr(&exc))
		48	__set_errno(ERANGE);
		49	return exc.retval;
		50	}
		51
		52	float __sinhf(float fx)
		53	{
		54	/*
		55	After dealing with special cases the computation is split into
		56	regions as follows:
		57
		58	abs(x) >= max_sinh_arg:
		59	sinh(x) = sign(x)*Inf
		60
		61	abs(x) >= small_threshold:
		62	sinh(x) = sign(x)*exp(abs(x))/2 computed using the
		63	splitexp and scaleDouble functions as for exp_amd().
		64
65	abs(x) < small_threshold:
66	compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
67	sinh(x) is then sign(x)z. /
68
69	static const double
70	/* The max argument of sinhf, but stored as a double */
71	max_sinh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
72	thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
73	log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
74	log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
75	small_threshold = 8BASEDIGITS_DP640.30102999566398119521373889;
76	/* (8BASEDIGITS_DP64log10of2) ' exp(-x) insignificant c.f. exp(x) */
77
78	/* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
79
80	static const double sinh_lead[ 37] = {
81	0.00000000000000000000e+00, /* 0x0000000000000000 */
82	1.17520119364380137839e+00, /* 0x3ff2cd9fc44eb982 */
83	3.62686040784701857476e+00, /* 0x400d03cf63b6e19f */
84	1.00178749274099008204e+01, /* 0x40240926e70949ad */
85	2.72899171971277496596e+01, /* 0x403b4a3803703630 */
86	7.42032105777887522891e+01, /* 0x40528d0166f07374 */
87	2.01713157370279219549e+02, /* 0x406936d22f67c805 */
88	5.48316123273246489589e+02, /* 0x408122876ba380c9 */
89	1.49047882578955000099e+03, /* 0x409749ea514eca65 */
90	4.05154190208278987484e+03, /* 0x40afa7157430966f */
91	1.10132328747033916443e+04, /* 0x40c5829dced69991 */
92	2.99370708492480553105e+04, /* 0x40dd3c4488cb48d6 */
93	8.13773957064298447222e+04, /* 0x40f3de1654d043f0 */
94	2.21206696003330085659e+05, /* 0x410b00b5916a31a5 */
95	6.01302142081972560845e+05, /* 0x412259ac48bef7e3 */
96	1.63450868623590236530e+06, /* 0x4138f0ccafad27f6 */
97	4.44305526025387924165e+06, /* 0x4150f2ebd0a7ffe3 */
98	1.20774763767876271158e+07, /* 0x416709348c0ea4ed */
99	3.28299845686652474105e+07, /* 0x417f4f22091940bb */
100	8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */
101	2.42582597704895108938e+08, /* 0x41aceb088b68e803 */
102	6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */
103	1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */
104	4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */
105	1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */
106	3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */
107	9.78648047144193725586e+10, /* 0x4236c932696a6b5c */
108	2.66024120300899291992e+11, /* 0x424ef822f7f6731c */
109	7.23128532145737548828e+11, /* 0x42650bba3796379a */
110	1.96566714857202099609e+12, /* 0x427c9aae4631c056 */
111	5.34323729076223046875e+12, /* 0x429370470aec28ec */
112	1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */
113	3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */
114	1.07321789892958031250e+14, /* 0x42d866f34a725782 */
115	2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */
116	7.93006726156715250000e+14, /* 0x430689e221bc8d5a */
117	2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
118
119	static const double cosh_lead[ 37] = {
120	1.00000000000000000000e+00, /* 0x3ff0000000000000 */
121	1.54308063481524371241e+00, /* 0x3ff8b07551d9f550 */
122	3.76219569108363138810e+00, /* 0x400e18fa0df2d9bc */
123	1.00676619957777653269e+01, /* 0x402422a497d6185e */
124	2.73082328360164865444e+01, /* 0x403b4ee858de3e80 */
125	7.42099485247878334349e+01, /* 0x40528d6fcbeff3a9 */
126	2.01715636122455890700e+02, /* 0x406936e67db9b919 */
127	5.48317035155212010977e+02, /* 0x4081228949ba3a8b */
128	1.49047916125217807348e+03, /* 0x409749eaa93f4e76 */
129	4.05154202549259389343e+03, /* 0x40afa715845d8894 */
130	1.10132329201033226127e+04, /* 0x40c5829dd053712d */
131	2.99370708659497577173e+04, /* 0x40dd3c4489115627 */
132	8.13773957125740562333e+04, /* 0x40f3de1654d6b543 */
133	2.21206696005590405548e+05, /* 0x410b00b5916b6105 */
134	6.01302142082804115489e+05, /* 0x412259ac48bf13ca */
135	1.63450868623620807193e+06, /* 0x4138f0ccafad2d17 */
136	4.44305526025399193168e+06, /* 0x4150f2ebd0a8005c */
137	1.20774763767876680940e+07, /* 0x416709348c0ea503 */
138	3.28299845686652623117e+07, /* 0x417f4f22091940bf */
139	8.92411504815936237574e+07, /* 0x419546d8f9ed26e1 */
140	2.42582597704895138741e+08, /* 0x41aceb088b68e804 */
141	6.59407867241607308388e+08, /* 0x41c3a6e1fd9eecfd */
142	1.79245642306579566002e+09, /* 0x41dab5adb9c435ff */
143	4.87240172312445068359e+09, /* 0x41f226af33b1fdc0 */
144	1.32445610649217357635e+10, /* 0x4208ab7fb5475fb7 */
145	3.60024496686929321289e+10, /* 0x4220c3d3920962c8 */
146	9.78648047144193725586e+10, /* 0x4236c932696a6b5c */
147	2.66024120300899291992e+11, /* 0x424ef822f7f6731c */
148	7.23128532145737548828e+11, /* 0x42650bba3796379a */
149	1.96566714857202099609e+12, /* 0x427c9aae4631c056 */
150	5.34323729076223046875e+12, /* 0x429370470aec28ec */
151	1.45244248326237109375e+13, /* 0x42aa6b765d8cdf6c */
152	3.94814800913403437500e+13, /* 0x42c1f43fcc4b662c */
153	1.07321789892958031250e+14, /* 0x42d866f34a725782 */
154	2.91730871263727437500e+14, /* 0x42f0953e2f3a1ef7 */
155	7.93006726156715250000e+14, /* 0x430689e221bc8d5a */
156	2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
157
158	unsigned long ux, aux, xneg;
159	double x = fx, y, z, z1, z2;
160	int m;
161
162	/* Special cases */
163
164	GET_BITS_DP64(x, ux);
165	aux = ux & ~SIGNBIT_DP64;
166	if (aux < 0x3f10000000000000) /* \|x\| small enough that sinh(x) = x */
167	{
168	if (aux == 0) return x; /* with no inexact */
169	if (LAMBDA_DP64 + x > 1.0) return x; /* with inexact */
170	}
171	else if (aux >= 0x7ff0000000000000) /* \|x\| is NaN or Inf */
172	return x + x;
173
174	xneg = (aux != ux);
175
176	y = x;
177	if (xneg) y = -x;
178
179	if (y >= max_sinh_arg)
180	{
181	/* Return infinity with overflow flag. */
182	#if 0
183	/* This way handles non-POSIX behaviour but weirdly causes
184	sinhf to run half as fast for all arguments on Hammer */
185	return retval_errno_erange(fx, xneg);
186	#else
187	/* This handles POSIX behaviour */
188	__set_errno(ERANGE);
189	z = infinity_with_flags(AMD_F_OVERFLOW);
190	#endif
191	}
192	else if (y >= small_threshold)
193	{
194	/* In this range y is large enough so that
195	the negative exponential is negligible,
196	so sinh(y) is approximated by sign(x)*exp(y)/2. The
197	code below is an inlined version of that from
198	exp() with two changes (it operates on
199	y instead of x, and the division by 2 is
200	done by reducing m by 1). */
201
202	splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
203	log2_by_32_tail, &m, &z1, &z2);
204	m -= 1;
205	/* scaleDouble_1 is always safe because the argument x was
206	float, rather than double */
207	z = scaleDouble_1((z1+z2),m);
208	}
209	else
210	{
211	/* In this range we find the integer part y0 of y
212	and the increment dy = y - y0. We then compute
213
214	z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
215
216	where sinh(y0) and cosh(y0) are tabulated above. */
217
218	int ind;
219	double dy, dy2, sdy, cdy;
220
221	ind = (int)y;
222	dy = y - ind;
223
224	dy2 = dy*dy;
225
226	sdy = dy + dydy2(0.166666666666666667013899e0 +
227	(0.833333333333329931873097e-2 +
228	(0.198412698413242405162014e-3 +
229	(0.275573191913636406057211e-5 +
230	(0.250521176994133472333666e-7 +
231	(0.160576793121939886190847e-9 +
232	0.7746188980094184251527126e-12dy2)dy2)dy2)dy2)dy2)dy2);
233
234	cdy = 1 + dy2*(0.500000000000000005911074e0 +
235	(0.416666666666660876512776e-1 +
236	(0.138888888889814854814536e-2 +
237	(0.248015872460622433115785e-4 +
238	(0.275573350756016588011357e-6 +
239	(0.208744349831471353536305e-8 +
240	0.1163921388172173692062032e-10dy2)dy2)dy2)dy2)dy2)dy2);
241
242	z = sinh_lead[ind]cdy + cosh_lead[ind]sdy;
243	}
244
245	if (xneg) z = - z;
246	return z;
247	}
248
249	weak_alias (__sinhf, sinhf)
250	weak_alias (__sinhf, __ieee754_sinhf)




/* copy sign, double version.
   Copyright (C) 2002 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Andreas Jaeger <aj@suse.de>, 2002.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <machine/asm.h>

#ifdef __ELF__
	.section .rodata
#else
	.text
#endif

	.align ALIGNARG(4)
	ASM_TYPE_DIRECTIVE(signmask,@object)
signmask:
	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
	.byte 0, 0, 0, 0, 0, 0, 0, 0
othermask:
	.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f
	.byte 0, 0, 0, 0, 0, 0, 0, 0
	ASM_SIZE_DIRECTIVE(othermask)

#ifdef PIC
#define MO(op) op##(%rip)
#else
#define MO(op) op
#endif

ENTRY(__copysign)
	andpd MO(othermask),%xmm0
	andpd MO(signmask),%xmm1
	orpd %xmm1,%xmm0
	ret
END (__copysign)

weak_alias (__copysign, copysign)




/* copy sign, double version.
   Copyright (C) 2002 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Andreas Jaeger <aj@suse.de>, 2002.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <machine/asm.h>

#ifdef __ELF__
	.section .rodata
#else
	.text
#endif

	.align ALIGNARG(4)
	ASM_TYPE_DIRECTIVE(mask,@object)
mask:
	.byte 0xff, 0xff, 0xff, 0x7f
	ASM_SIZE_DIRECTIVE(mask)

#ifdef PIC
#define MO(op) op##(%rip)
#else
#define MO(op) op
#endif

ENTRY(__copysignf)
	movss	MO(mask),%xmm3
	andps	%xmm3,%xmm0
	andnps	%xmm1,%xmm3
	orps	%xmm3,%xmm0
	retq
END (__copysignf)

weak_alias (__copysignf, copysignf)

Return to bug 100289

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_amd.h.x86_64-new-libm (+32 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#ifndef LIBM_AMD_H_INCLUDED
	10	#define LIBM_AMD_H_INCLUDED 1
	11
	12	/* The following definition of weak_alias is extracted from
	13	libc-symbols.h */
	14
	15	/* Define ALIASNAME as a weak alias for NAME.
	16	If weak aliases are not available, this defines a strong alias. */
	17	# define weak_alias(name, aliasname) _weak_alias (name, aliasname)
	18	# define _weak_alias(name, aliasname) \
	19	extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
	20
	21	#include <math.h>
	22
	23	extern double chgsign(double x);
	24	extern float chgsignf(float x);
	25
	26	extern double fma(double x, double y, double z);
	27	extern float fmaf(float x, float y, float z);
	28
	29	extern void __remainder_piby2(double x, double r, double rr, int *region);
	30	extern void __remainder_piby2f(float x, double r, int region);
	31
	32	#endif /* LIBM_AMD_H_INCLUDED */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_errno_amd.h.x86_64-new-libm (+18 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#ifndef LIBM_ERRNO_AMD_H_INCLUDED
	10	#define LIBM_ERRNO_AMD_H_INCLUDED 1
	11
	12	#include <stdio.h>
	13	#include <errno.h>
	14	#ifndef __set_errno
	15	#define __set_errno(x) errno = (x)
	16	#endif
	17
	18	#endif /* LIBM_ERRNO_AMD_H_INCLUDED */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_util_amd.h.x86_64-new-libm (+101 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#ifndef LIBM_UTIL_AMD_H_INCLUDED
		10	#define LIBM_UTIL_AMD_H_INCLUDED 1
		11
		12	/* Compile-time verification that type long is the same size
		13	as type double (i.e. we are really on a 64-bit machine) */
		14	void check_long_against_double_size(int machine_is_64_bit[(sizeof(long) == sizeof(double))?1:-1]);
		15
		16
		17	/* Definitions for double functions on 64 bit machines */
		18	#define SIGNBIT_DP64 0x8000000000000000
		19	#define EXPBITS_DP64 0x7ff0000000000000
		20	#define MANTBITS_DP64 0x000fffffffffffff
		21	#define ONEEXPBITS_DP64 0x3ff0000000000000
		22	#define TWOEXPBITS_DP64 0x4000000000000000
		23	#define HALFEXPBITS_DP64 0x3fe0000000000000
		24	#define IMPBIT_DP64 0x0010000000000000
		25	#define QNANBITPATT_DP64 0x7ff8000000000000
		26	#define PINFBITPATT_DP64 0x7ff0000000000000
		27	#define NINFBITPATT_DP64 0xfff0000000000000
		28	#define EXPBIAS_DP64 1023
		29	#define EXPSHIFTBITS_DP64 52
		30	#define BIASEDEMIN_DP64 1
		31	#define EMIN_DP64 -1022
		32	#define BIASEDEMAX_DP64 2046
		33	#define EMAX_DP64 1023
		34	#define LAMBDA_DP64 1.0e300
		35	#define MANTLENGTH_DP64 53
		36	#define BASEDIGITS_DP64 15
		37
		38
		39	/* These definitions, used by float functions,
		40	are for both 32 and 64 bit machines */
		41	#define SIGNBIT_SP32 0x80000000
		42	#define EXPBITS_SP32 0x7f800000
		43	#define MANTBITS_SP32 0x007fffff
		44	#define ONEEXPBITS_SP32 0x3f800000
		45	#define TWOEXPBITS_SP32 0x40000000
		46	#define HALFEXPBITS_SP32 0x3f000000
		47	#define IMPBIT_SP32 0x00800000
		48	#define QNANBITPATT_SP32 0x7fc00000
		49	#define PINFBITPATT_SP32 0x7f800000
		50	#define NINFBITPATT_SP32 0xff800000
		51	#define EXPBIAS_SP32 127
		52	#define EXPSHIFTBITS_SP32 23
		53	#define BIASEDEMIN_SP32 1
		54	#define EMIN_SP32 -126
		55	#define BIASEDEMAX_SP32 254
		56	#define EMAX_SP32 127
		57	#define LAMBDA_SP32 1.0e30
		58	#define MANTLENGTH_SP32 24
		59	#define BASEDIGITS_SP32 7
		60
		61	#define CLASS_SIGNALLING_NAN 1
		62	#define CLASS_QUIET_NAN 2
		63	#define CLASS_NEGATIVE_INFINITY 3
		64	#define CLASS_NEGATIVE_NORMAL_NONZERO 4
65	#define CLASS_NEGATIVE_DENORMAL 5
66	#define CLASS_NEGATIVE_ZERO 6
67	#define CLASS_POSITIVE_ZERO 7
68	#define CLASS_POSITIVE_DENORMAL 8
69	#define CLASS_POSITIVE_NORMAL_NONZERO 9
70	#define CLASS_POSITIVE_INFINITY 10
71
72	#define OLD_BITS_SP32(x) (((unsigned int )&x))
73	#define OLD_BITS_DP64(x) (((unsigned long )&x))
74
75	/* Alternatives to the above functions which don't have
76	problems when using high optimization levels on gcc */
77	#define GET_BITS_SP32(x, ux) {union {float f; unsigned int i;} _bitsy; _bitsy.f = (x); ux = _bitsy.i;}
78	#define PUT_BITS_SP32(ux, x) {union {float f; unsigned int i;} _bitsy; _bitsy.i = (ux); x = _bitsy.f;}
79	#define GET_BITS_DP64(x, ux) {union {double d; unsigned long i;} _bitsy; _bitsy.d = (x); ux = _bitsy.i;}
80	#define PUT_BITS_DP64(ux, x) {union {double d; unsigned long i;} _bitsy; _bitsy.i = (ux); x = _bitsy.d;}
81
82
83	/* Processor-dependent floating-point status flags */
84	#define AMD_F_INEXACT 0x00000020
85	#define AMD_F_UNDERFLOW 0x00000010
86	#define AMD_F_OVERFLOW 0x00000008
87	#define AMD_F_DIVBYZERO 0x00000004
88	#define AMD_F_INVALID 0x00000001
89
90	/* Processor-dependent floating-point precision-control flags */
91	#define AMD_F_EXTENDED 0x00000300
92	#define AMD_F_DOUBLE 0x00000200
93	#define AMD_F_SINGLE 0x00000000
94
95	/* Processor-dependent floating-point rounding-control flags */
96	#define AMD_F_RC_NEAREST 0x00000000
97	#define AMD_F_RC_DOWN 0x00002000
98	#define AMD_F_RC_UP 0x00004000
99	#define AMD_F_RC_ZERO 0x00006000
100
101	#endif /* LIBM_UTIL_AMD_H_INCLUDED */

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_ceil.c.x86_64-new-libm (+57 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	double __ceil(double x)
	13	{
	14	double r;
	15	long rexp, xneg;
	16	unsigned long ux, ax, ur, mask;
	17
	18	GET_BITS_DP64(x, ux);
	19	ax = ux & (~SIGNBIT_DP64);
	20	xneg = (ux != ax);
	21
	22	if (ax >= 0x4340000000000000)
	23	{
	24	/* abs(x) is either NaN, infinity, or >= 2^53 */
	25	if (ax > 0x7ff0000000000000)
	26	/* x is NaN */
	27	return x + x; /* Raise invalid if it is a signalling NaN */
	28	else
	29	return x;
	30	}
	31	else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
	32	{
	33	if (ax == 0x0000000000000000)
	34	/* x is +zero or -zero; return the same zero */
	35	return x;
	36	else if (xneg) /* x < 0.0 */
	37	return -0.0;
	38	else
	39	return 1.0;
	40	}
	41	else
	42	{
	43	rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
	44	/* Mask out the bits of r that we don't want */
	45	mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
	46	ur = (ux & ~mask);
	47	PUT_BITS_DP64(ur, r);
	48	if (xneg \|\| (ur == ux))
	49	return r;
	50	else
	51	/* We threw some bits away and x was positive */
	52	return r + 1.0;
	53	}
	54
	55	}
	56
	57	weak_alias (__ceil, ceil)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_copysign.c.x86_64-new-libm (+29 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	/* Returns the absolute value of x with the sign of y.
	13	NaNs are not considered special; their sign bits are handled
	14	the same as for any other number. */
	15
	16	double __copysign(double x, double y)
	17	{
	18	/* This works on Hammer */
	19	double temp = -0.0; /* 0x8000000000000000 */
	20	/* AND the bit pattern with y, result in y */
	21	asm volatile ("andpd %0, %1" : : "x" (temp), "x" (y));
	22	/* AND the ones-complement of the bit pattern with x, result in temp */
	23	asm volatile ("andnpd %0, %1" : : "x" (x), "x" (temp));
	24	asm volatile ("orpd %0, %1" : : "x" (temp), "x" (y));
	25	return y;
	26	}
	27
	28
	29	weak_alias (__copysign, copysign)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_finite.c.x86_64-new-libm (+28 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	/* Returns 0 if x is infinite or NaN, otherwise returns 1 */
	13
	14	int __finite(double x)
	15	{
	16	/* This works on Hammer */
	17	double temp = 1.0e444; /* = infinity = 0x7ff0000000000000 */
	18	volatile int retval;
	19	retval = 0;
	20	asm volatile ("andpd %0, %1;" : : "x" (temp), "x" (x));
	21	asm volatile ("comisd %0, %1" : : "x" (temp), "x" (x));
	22	asm volatile ("setnz %0" : "=g" (retval));
	23	return retval;
	24	}
	25
	26	hidden_def (__finite)
	27	weak_alias (__finite, finite)
	28

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_floor.c.x86_64-new-libm (+60 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	double __floor(double x)
	13	{
	14	double r;
	15	long rexp, xneg;
	16
	17
	18	unsigned long ux, ax, ur, mask;
	19
	20	GET_BITS_DP64(x, ux);
	21	ax = ux & (~SIGNBIT_DP64);
	22	xneg = (ux != ax);
	23
	24	if (ax >= 0x4340000000000000)
	25	{
	26	/* abs(x) is either NaN, infinity, or >= 2^53 */
	27	if (ax > 0x7ff0000000000000)
	28	/* x is NaN */
	29	return x + x; /* Raise invalid if it is a signalling NaN */
	30	else
	31	return x;
	32	}
	33	else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
	34	{
	35	if (ax == 0x0000000000000000)
	36	/* x is +zero or -zero; return the same zero */
	37	return x;
	38	else if (xneg) /* x < 0.0 */
	39	return -1.0;
	40	else
	41	return 0.0;
	42	}
	43	else
	44	{
	45	r = x;
	46	rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
	47	/* Mask out the bits of r that we don't want */
	48	mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
	49	ur = (ux & ~mask);
	50	PUT_BITS_DP64(ur, r);
	51	if (xneg && (ur != ux))
	52	/* We threw some bits away and x was negative */
	53	return r - 1.0;
	54	else
	55	return r;
	56	}
	57
	58	}
	59
	60	weak_alias (__floor, floor)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_fma.c.x86_64-new-libm (+117 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SCALEDOUBLE_1
		13	#define USE_SCALEDOUBLE_2
		14	#include "libm_inlines_amd.h"
		15	#undef USE_SCALEDOUBLE_1
		16	#undef USE_SCALEDOUBLE_2
		17
		18	double __fma(double a, double b, double sum)
		19	{
		20	/* Returns a * b + sum with no intermediate loss of precision */
		21
		22	double ha, ta, hb, tb, z, zz, r, s, az, asum;
		23	int ua, ub, usum;
		24	int scaled, expover, expunder, scaleexp;
		25	unsigned long u;
		26
		27	GET_BITS_DP64(a, u);
		28	ua = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
		29	GET_BITS_DP64(b, u);
		30	ub = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
		31	GET_BITS_DP64(sum, u);
		32	usum = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
		33
		34	if (ua == EMAX_DP64 + 1 \|\| ub == EMAX_DP64 + 1 \|\| usum == EMAX_DP64 + 1)
		35	{
		36	/* One or more of the arguments is NaN or infinity. The
		37	result will also be NaN or infinity. */
		38	return a * b + sum;
		39	}
		40	else if (ua + ub > usum + 2 * MANTLENGTH_DP64)
		41	{
		42	/* sum is negligible compared with the extra-length product ab /
		43	return a*b;
		44	}
		45	else if (usum > ua + ub + MANTLENGTH_DP64)
		46	{
		47	/* The product ab is negligible compared with sum /
		48	return sum;
		49	}
		50
		51	expover = EMAX_DP64 - 2;
		52	expunder = EMIN_DP64 + MANTLENGTH_DP64;
		53	scaleexp = 0;
		54
		55
		56	if (ua + ub > expover \|\| usum > expover)
		57	{
		58	/* The result is likely to overflow. Scale down in an attempt
		59	to avoid unnecessary overflow. The true result may still overflow. */
		60	scaled = 1;
		61	scaleexp = expover / 2;
		62	a = scaleDouble_1(a, -scaleexp);
		63	b = scaleDouble_1(b, -scaleexp);
		64	sum = scaleDouble_2(sum, -2*scaleexp);
65	}
66	else if (ua + ub < expunder)
67	{
68	/* The product ab is near underflow; scale up /
69	scaled = 1;
70	scaleexp = expunder / 2;
71	a = scaleDouble_1(a, -scaleexp);
72	b = scaleDouble_1(b, -scaleexp);
73	sum = scaleDouble_2(sum, -2*scaleexp);
74	}
75	else
76	scaled = 0;
77
78	/* Split a into ha (head) and ta (tail). Do the same for b. */
79	ha = a;
80	GET_BITS_DP64(ha, u);
81	u &= 0xfffffffff8000000;
82	PUT_BITS_DP64(u, ha);
83	ta = a - ha;
84	hb = b;
85	GET_BITS_DP64(hb, u);
86	u &= 0xfffffffff8000000;
87	PUT_BITS_DP64(u, hb);
88	tb = b - hb;
89
90	/* Carefully multiply the parts together. z is the most significant
91	part of the result, and zz the least significant part */
92	z = a * b;
93	zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb;
94
95	/* Set az = abs(z), asum = abs(sum) */
96	GET_BITS_DP64(z, u);
97	u &= ~SIGNBIT_DP64;
98	PUT_BITS_DP64(u, az);
99	GET_BITS_DP64(sum, u);
100	u &= ~SIGNBIT_DP64;
101	PUT_BITS_DP64(u, asum);
102
103	/* Carefully add (z,zz) to sum */
104	r = z + sum;
105
106	if (az > asum)
107	s = ((z - r) + sum) + zz;
108	else
109	s = ((sum - r) + z) + zz;
110
111	if (scaled)
112	return scaleDouble_1(r + s, 2*scaleexp);
113	else
114	return r + s;
115	}
116
117	weak_alias (__fma, fma)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_fmaf.c.x86_64-new-libm (+116 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SCALEFLOAT_1
		13	#define USE_SCALEFLOAT_2
		14	#include "libm_inlines_amd.h"
		15	#undef USE_SCALEFLOAT_1
		16	#undef USE_SCALEFLOAT_2
		17
		18	float __fmaf(float a, float b, float sum)
		19	{
		20	/* Returns a * b + sum with no intermediate loss of precision */
		21
		22	float ha, ta, hb, tb, z, zz, r, s, az, asum;
		23	int ua, ub, usum;
		24	int scaled, expover, expunder, scaleexp;
		25	unsigned int u;
		26
		27	GET_BITS_SP32(a, u);
		28	ua = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
		29	GET_BITS_SP32(b, u);
		30	ub = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
		31	GET_BITS_SP32(sum, u);
		32	usum = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
		33
		34	if (ua == EMAX_SP32 + 1 \|\| ub == EMAX_SP32 + 1 \|\| usum == EMAX_SP32 + 1)
		35	{
		36	/* One or more of the arguments is NaN or infinity. The
		37	result will also be NaN or infinity. */
		38	return a * b + sum;
		39	}
		40	else if (ua + ub > usum + 2 * MANTLENGTH_SP32)
		41	{
		42	/* sum is negligible compared with the extra-length product ab /
		43	return a*b;
		44	}
		45	else if (usum > ua + ub + MANTLENGTH_SP32)
		46	{
		47	/* The product ab is negligible compared with sum /
		48	return sum;
		49	}
		50
		51	expover = EMAX_SP32 - 2;
		52	expunder = EMIN_SP32 + MANTLENGTH_SP32;
		53	scaleexp = 0;
		54
		55	if (ua + ub > expover \|\| usum > expover)
		56	{
		57	/* The result is likely to overflow. Scale down in an attempt
		58	to avoid unnecessary overflow. The true result may still overflow. */
		59	scaled = 1;
		60	scaleexp = expover / 2;
		61	a = scaleFloat_1(a, -scaleexp);
		62	b = scaleFloat_1(b, -scaleexp);
		63	sum = scaleFloat_2(sum, -2*scaleexp);
		64	}
65	else if (ua + ub < expunder)
66	{
67	/* The product ab is near underflow; scale up /
68	scaled = 1;
69	scaleexp = expunder / 2;
70	a = scaleFloat_1(a, -scaleexp);
71	b = scaleFloat_1(b, -scaleexp);
72	sum = scaleFloat_2(sum, -2*scaleexp);
73	}
74	else
75	scaled = 0;
76
77	/* Split a into ha (head) and ta (tail). Do the same for b. */
78	ha = a;
79	GET_BITS_SP32(ha, u);
80	u &= 0xfffff000;
81	PUT_BITS_SP32(u, ha);
82	ta = a - ha;
83	hb = b;
84	GET_BITS_SP32(hb, u);
85	u &= 0xfffff000;
86	PUT_BITS_SP32(u, hb);
87	tb = b - hb;
88
89	/* Carefully multiply the parts together. z is the most significant
90	part of the result, and zz the least significant part */
91	z = a * b;
92	zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb;
93
94	/* Set az = abs(z), asum = abs(sum) */
95	GET_BITS_SP32(z, u);
96	u &= ~SIGNBIT_SP32;
97	PUT_BITS_SP32(u, az);
98	GET_BITS_SP32(sum, u);
99	u &= ~SIGNBIT_SP32;
100	PUT_BITS_SP32(u, asum);
101
102	/* Carefully add (z,zz) to sum */
103	r = z + sum;
104
105	if (az > asum)
106	s = ((z - r) + sum) + zz;
107	else
108	s = ((sum - r) + z) + zz;
109
110	if (scaled)
111	return scaleFloat_1(r + s, 2*scaleexp);
112	else
113	return r + s;
114	}
115
116	weak_alias (__fmaf, fmaf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_logb.c.x86_64-new-libm (+62 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	#define USE_INFINITY_WITH_FLAGS
	13	#include "libm_inlines_amd.h"
	14	#undef USE_INFINITY_WITH_FLAGS
	15
	16	double __logb(double x)
	17	{
	18
	19	unsigned long ux;
	20	long u;
	21	GET_BITS_DP64(x, ux);
	22	u = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
	23	if ((ux & ~SIGNBIT_DP64) == 0)
	24	/* x is +/-zero. Return -infinity with div-by-zero flag. */
	25	return -infinity_with_flags(AMD_F_DIVBYZERO);
	26	else if (EMIN_DP64 <= u && u <= EMAX_DP64)
	27	/* x is a normal number */
	28	return u;
	29	else if (u > EMAX_DP64)
	30	{
	31	/* x is infinity or NaN */
	32	if ((ux & MANTBITS_DP64) == 0)
	33	/* x is +/-infinity. Return +infinity with no flags. */
	34	return infinity_with_flags(0);
	35	else
	36	/* x is NaN, result is NaN */
	37	return x + x; /* Raise invalid if it is a signalling NaN */
	38	}
	39	else
	40	{
	41	/* x is denormalized. */
	42	#ifdef FOLLOW_IEEE754_LOGB
	43	/* Return the value of the minimum exponent to ensure that
	44	the relationship between logb and scalb, defined in
	45	IEEE 754, holds. */
	46	return EMIN_DP64;
	47	#else
	48	/* Follow the rule set by IEEE 854 for logb */
	49	ux &= MANTBITS_DP64;
	50	u = EMIN_DP64;
	51	while (ux < IMPBIT_DP64)
	52	{
	53	ux <<= 1;
	54	u--;
	55	}
	56	return u;
	57	#endif
	58	}
	59
	60	}
	61
	62	weak_alias (__logb, logb)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_modf.c.x86_64-new-libm (+59 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	double __modf(double x, double *iptr)
	13	{
	14	/* modf splits the argument x into integer and fraction parts,
	15	each with the same sign as x. */
	16
	17
	18	long xexp;
	19	unsigned long ux, ax, mask;
	20
	21	GET_BITS_DP64(x, ux);
	22	ax = ux & (~SIGNBIT_DP64);
	23
	24	if (ax >= 0x4340000000000000)
	25	{
	26	/* abs(x) is either NaN, infinity, or >= 2^53 */
	27	if (ax > 0x7ff0000000000000)
	28	{
	29	/* x is NaN */
	30	*iptr = x;
	31	return x + x; /* Raise invalid if it is a signalling NaN */
	32	}
	33	else
	34	{
	35	/* x is infinity or large. Return zero with the sign of x */
	36	*iptr = x;
	37	PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
	38	return x;
	39	}
	40	}
	41	else if (ax < 0x3ff0000000000000)
	42	{
	43	/* abs(x) < 1.0. Set iptr to zero with the sign of x
	44	and return x. */
	45	PUT_BITS_DP64(ux & SIGNBIT_DP64, *iptr);
	46	return x;
	47	}
	48	else
	49	{
	50	xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
	51	/* Mask out the bits of x that we don't want */
	52	mask = (1L << (EXPSHIFTBITS_DP64 - xexp)) - 1;
	53	PUT_BITS_DP64(ux & ~mask, *iptr);
	54	return x - *iptr;
	55	}
	56
	57	}
	58
	59	weak_alias (__modf, modf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sincos.c.x86_64-new-libm (+311 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9
		10	#include "libm_amd.h"
		11	#include "libm_util_amd.h"
		12
		13	#define USE_NAN_WITH_FLAGS
		14	#define USE_VAL_WITH_FLAGS
		15	#include "libm_inlines_amd.h"
		16	#undef USE_NAN_WITH_FLAGS
		17	#undef USE_VAL_WITH_FLAGS
		18
		19	/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */
		20	static inline double sin_piby4(double x, double xx)
		21	{
		22	/* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
		23	= x * (1 - x^2/3! + x^4/5! - x^6/7! ...
		24	= x * f(w)
		25	where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
		26	We use a minimax approximation of (f(w) - 1) / w
		27	because this produces an expansion in even powers of x.
		28	If xx (the tail of x) is non-zero, we add a correction
		29	term g(x,xx) = (1-xx/2)xx to the result, where g(x,xx)
		30	is an approximation to cos(x)*sin(xx) valid because
		31	xx is tiny relative to x.
		32	*/
		33	static const double
		34	c1 = -0.166666666666666646259241729,
		35	c2 = 0.833333333333095043065222816e-2,
		36	c3 = -0.19841269836761125688538679e-3,
		37	c4 = 0.275573161037288022676895908448e-5,
		38	c5 = -0.25051132068021699772257377197e-7,
		39	c6 = 0.159181443044859136852668200e-9;
		40	double x2, x3, r;
		41	x2 = x * x;
		42	x3 = x2 * x;
		43	r = (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6))));
		44	if (xx == 0.0)
		45	return x + x3 * (c1 + x2 * r);
		46	else
		47	return x - ((x2 * (0.5 * xx - x3 * r) - xx) - x3 * c1);
		48	}
		49
		50	/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */
		51	static inline double cos_piby4(double x, double xx)
		52	{
		53	/* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
		54	= f(w)
		55	where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
		56	We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
		57	because this produces an expansion in even powers of x.
		58	If xx (the tail of x) is non-zero, we subtract a correction
		59	term g(x,xx) = x*xx to the result, where g(x,xx)
		60	is an approximation to sin(x)*sin(xx) valid because
		61	xx is tiny relative to x.
		62	*/
		63	double r, x2, t;
		64	static const double
65	c1 = 0.41666666666666665390037e-1,
66	c2 = -0.13888888888887398280412e-2,
67	c3 = 0.248015872987670414957399e-4,
68	c4 = -0.275573172723441909470836e-6,
69	c5 = 0.208761463822329611076335e-8,
70	c6 = -0.113826398067944859590880e-10;
71
72	x2 = x * x;
73	r = 0.5 * x2;
74	t = 1.0 - r;
75	return t + ((((1.0 - t) - r) - x * xx) + x2 * x2 *
76	(c1 + x2 * (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6))))));
77	}
78
79	void __sincos(double x, double s, double c)
80	{
81	double r, rr;
82	int region, xneg;
83
84	unsigned long ux, ax;
85	GET_BITS_DP64(x, ux);
86	ax = (ux & ~SIGNBIT_DP64);
87	if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
88	{
89	if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
90	{
91	if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
92	{
93	if (ax == 0x0000000000000000)
94	{
95	*s = x;
96	*c = 1.0;
97	}
98	else
99	{
100	*s = x;
101	*c = val_with_flags(1.0, AMD_F_INEXACT);
102	}
103	}
104	else
105	{
106	s = x - xxx0.166666666666666666;
107	c = 1.0 - xx*0.5;
108	}
109	}
110	else
111	{
112	*s = sin_piby4(x, 0.0);
113	*c = cos_piby4(x, 0.0);
114	}
115	return;
116	}
117	else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
118	{
119	/* x is either NaN or infinity */
120	if (ux & MANTBITS_DP64)
121	/* x is NaN */
122	s = c = x + x; /* Raise invalid if it is a signalling NaN */
123	else
124	/* x is infinity. Return a NaN */
125	s = c = nan_with_flags(AMD_F_INVALID);
126	return;
127	}
128
129	xneg = (ax != ux);
130
131
132	if (xneg)
133	x = -x;
134
135	/* Reduce x into range [-pi/4,pi/4] */
136	__remainder_piby2(x, &r, &rr, &region);
137
138	if (xneg)
139	{
140	switch (region)
141	{
142	default:
143	case 0:
144	*s = -sin_piby4(r, rr);
145	*c = cos_piby4(r, rr);
146	break;
147	case 1:
148	*s = -cos_piby4(r, rr);
149	*c = -sin_piby4(r, rr);
150	break;
151	case 2:
152	*s = sin_piby4(r, rr);
153	*c = -cos_piby4(r, rr);
154	break;
155	case 3:
156	*s = cos_piby4(r, rr);
157	*c = sin_piby4(r, rr);
158	break;
159	}
160	}
161	else
162	{
163	switch (region)
164	{
165	default:
166	case 0:
167	*s = sin_piby4(r, rr);
168	*c = cos_piby4(r, rr);
169	break;
170	case 1:
171	*s = cos_piby4(r, rr);
172	*c = -sin_piby4(r, rr);
173	break;
174	case 2:
175	*s = -sin_piby4(r, rr);
176	*c = -cos_piby4(r, rr);
177	break;
178	case 3:
179	*s = -cos_piby4(r, rr);
180	*c = sin_piby4(r, rr);
181	break;
182	}
183	}
184	return;
185	}
186
187	double __sin(double x)
188	{
189	double r, rr;
190	int region, xneg;
191
192	unsigned long ux, ax;
193	GET_BITS_DP64(x, ux);
194	ax = (ux & ~SIGNBIT_DP64);
195	if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
196	{
197	if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
198	{
199	if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
200	{
201	if (ax == 0x0000000000000000)
202	return x;
203	else
204	return val_with_flags(x, AMD_F_INEXACT);
205	}
206	else
207	return x - xxx*0.166666666666666666;
208	}
209	else
210	return sin_piby4(x, 0.0);
211	}
212	else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
213	{
214	/* x is either NaN or infinity */
215	if (ux & MANTBITS_DP64)
216	/* x is NaN */
217	return x + x; /* Raise invalid if it is a signalling NaN */
218	else
219	/* x is infinity. Return a NaN */
220	return nan_with_flags(AMD_F_INVALID);
221	}
222	xneg = (ax != ux);
223
224
225	if (xneg)
226	x = -x;
227
228	/* Reduce x into range [-pi/4,pi/4] */
229	__remainder_piby2(x, &r, &rr, &region);
230
231	if (xneg)
232	{
233	switch (region)
234	{
235	default:
236	case 0: return -sin_piby4(r, rr);
237	case 1: return -cos_piby4(r, rr);
238	case 2: return sin_piby4(r, rr);
239	case 3: return cos_piby4(r, rr);
240	}
241	}
242	else
243	{
244	switch (region)
245	{
246	default:
247	case 0: return sin_piby4(r, rr);
248	case 1: return cos_piby4(r, rr);
249	case 2: return -sin_piby4(r, rr);
250	case 3: return -cos_piby4(r, rr);
251	}
252	}
253	}
254
255	double __cos(double x)
256	{
257	double r, rr;
258	int region, xneg;
259
260	unsigned long ux, ax;
261	GET_BITS_DP64(x, ux);
262	ax = (ux & ~SIGNBIT_DP64);
263	if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
264	{
265	if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
266	{
267	if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
268	{
269	if (ax == 0x0000000000000000) /* abs(x) = 0.0 */
270	return 1.0;
271	else
272	return val_with_flags(1.0, AMD_F_INEXACT);
273	}
274	else
275	return 1.0 - xx0.5;
276	}
277	else
278	return cos_piby4(x, 0.0);
279	}
280	else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
281	{
282	/* x is either NaN or infinity */
283	if (ux & MANTBITS_DP64)
284	/* x is NaN */
285	return x + x; /* Raise invalid if it is a signalling NaN */
286	else
287	/* x is infinity. Return a NaN */
288	return nan_with_flags(AMD_F_INVALID);
289	}
290	xneg = (ax != ux);
291
292
293	if (xneg)
294	x = -x;
295
296	/* Reduce x into range [-pi/4,pi/4] */
297	__remainder_piby2(x, &r, &rr, &region);
298
299	switch (region)
300	{
301	default:
302	case 0: return cos_piby4(r, rr);
303	case 1: return -sin_piby4(r, rr);
304	case 2: return -cos_piby4(r, rr);
305	case 3: return sin_piby4(r, rr);
306	}
307	}
308
309	weak_alias (__sin, sin)
310	weak_alias (__cos, cos)
311	weak_alias (__sincos, sincos)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_tan.c.x86_64-new-libm (+145 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9
		10	#include "libm_amd.h"
		11	#include "libm_util_amd.h"
		12
		13	#define USE_NAN_WITH_FLAGS
		14	#define USE_VAL_WITH_FLAGS
		15	#include "libm_inlines_amd.h"
		16	#undef USE_NAN_WITH_FLAGS
		17	#undef USE_VAL_WITH_FLAGS
		18
		19	/* tan(x + xx) approximation valid on the interval [-pi/4,pi/4].
		20	If recip is true return -1/tan(x + xx) instead. */
		21	static inline double tan_piby4(double x, double xx, int recip)
		22	{
		23	double r, t1, t2, xl;
		24	int transform = 0;
		25	static const double
		26	piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */
		27	piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */
		28
		29	/* In order to maintain relative precision transform using the identity:
		30	tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
		31	Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */
		32
		33	if (x > 0.68)
		34	{
		35	transform = 1;
		36	x = piby4_lead - x;
		37	xl = piby4_tail - xx;
		38	x += xl;
		39	xx = 0.0;
		40	}
		41	else if (x < -0.68)
		42	{
		43	transform = -1;
		44	x = piby4_lead + x;
		45	xl = piby4_tail + xx;
		46	x += xl;
		47	xx = 0.0;
		48	}
		49
		50	/* Core Remez [2,3] approximation to tan(x+xx) on the
		51	interval [0,0.68]. */
		52
		53	r = xx + 2.0 x * xx;
		54	t1 = x;
		55	t2 = xx + xr
		56	(0.372379159759792203640806338901e0 +
		57	(-0.229345080057565662883358588111e-1 +
		58	0.224044448537022097264602535574e-3r)r)/
		59	(0.111713747927937668539901657944e1 +
		60	(-0.515658515729031149329237816945e0 +
		61	(0.260656620398645407524064091208e-1 -
		62	0.232371494088563558304549252913e-3r)r)*r);
		63
		64	/* Reconstruct tan(x) in the transformed case. */
65
66	if (transform)
67	{
68	double t;
69	t = t1 + t2;
70	if (recip)
71	return transform(2t/(t-1) - 1.0);
72	else
73	return transform(1.0 - 2t/(1+t));
74	}
75
76	if (recip)
77	{
78	/* Compute -1.0/(t1 + t2) accurately */
79	double trec, trec_top, z1, z2, t;
80	unsigned long u;
81	t = t1 + t2;
82	GET_BITS_DP64(t, u);
83	u &= 0xffffffff00000000;
84	PUT_BITS_DP64(u, z1);
85	z2 = t2 - (z1 - t1);
86	trec = -1.0 / t;
87	GET_BITS_DP64(trec, u);
88	u &= 0xffffffff00000000;
89	PUT_BITS_DP64(u, trec_top);
90	return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2);
91
92	}
93	else
94	return t1 + t2;
95	}
96
97	double __tan(double x)
98	{
99	double r, rr;
100	int region, xneg;
101
102	unsigned long ux, ax;
103	GET_BITS_DP64(x, ux);
104	ax = (ux & ~SIGNBIT_DP64);
105	if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
106	{
107	if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
108	{
109	if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
110	{
111	if (ax == 0x0000000000000000) return x;
112	else return val_with_flags(x, AMD_F_INEXACT);
113	}
114	else
115	return x + xxx*0.333333333333333333;
116	}
117	else
118	return tan_piby4(x, 0.0, 0);
119	}
120	else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
121	{
122	/* x is either NaN or infinity */
123	if (ux & MANTBITS_DP64)
124	/* x is NaN */
125	return x + x; /* Raise invalid if it is a signalling NaN */
126	else
127	/* x is infinity. Return a NaN */
128	return nan_with_flags(AMD_F_INVALID);
129	}
130	xneg = (ax != ux);
131
132
133	if (xneg)
134	x = -x;
135
136	/* Reduce x into range [-pi/4,pi/4] */
137	__remainder_piby2(x, &r, &rr, &region);
138
139	if (xneg)
140	return -tan_piby4(r, rr, region & 1);
141	else
142	return tan_piby4(r, rr, region & 1);
143	}
144
145	weak_alias (__tan, tan)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_tanf.c.x86_64-new-libm (+97 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9
		10	#include "libm_amd.h"
		11	#include "libm_util_amd.h"
		12
		13	#define USE_REMAINDER_PIBY2F_INLINE
		14	#define USE_VAL_WITH_FLAGS
		15	#define USE_NAN_WITH_FLAGS
		16	#include "libm_inlines_amd.h"
		17	#undef USE_VAL_WITH_FLAGS
		18	#undef USE_NAN_WITH_FLAGS
		19	#undef USE_REMAINDER_PIBY2F_INLINE
		20
		21	/* tan(x) approximation valid on the interval [-pi/4,pi/4].
		22	If recip is true return -1/tan(x) instead. */
		23	static inline double tanf_piby4(double x, int recip)
		24	{
		25	double r, t;
		26
		27	/* Core Remez [1,2] approximation to tan(x) on the
		28	interval [0,pi/4]. */
		29	r = x*x;
		30	t = x + xr
		31	(0.385296071263995406715129e0 -
		32	0.172032480471481694693109e-1 * r) /
		33	(0.115588821434688393452299e+1 +
		34	(-0.51396505478854532132342e0 +
		35	0.1844239256901656082986661e-1 * r) * r);
		36
		37	if (recip)
		38	return -1.0 / t;
		39	else
		40	return t;
		41	}
		42
		43	float __tanf(float x)
		44	{
		45	double r, dx;
		46	int region, xneg;
		47
		48	unsigned long ux, ax;
		49
		50	dx = x;
		51
		52	GET_BITS_DP64(dx, ux);
		53	ax = (ux & ~SIGNBIT_DP64);
		54
		55	if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
		56	{
		57	if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
		58	{
		59	if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
		60	{
		61	if (ax == 0x0000000000000000)
		62	return dx;
		63	else
		64	return val_with_flags(dx, AMD_F_INEXACT);
65	}
66	else
67	return dx + dxdxdx*0.333333333333333333;
68	}
69	else
70	return tanf_piby4(dx, 0);
71	}
72	else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
73	{
74	/* x is either NaN or infinity */
75	if (ux & MANTBITS_DP64)
76	/* x is NaN */
77	return dx + dx; /* Raise invalid if it is a signalling NaN */
78	else
79	/* x is infinity. Return a NaN */
80	return nan_with_flags(AMD_F_INVALID);
81	}
82
83	xneg = (ux >> 63);
84
85	if (xneg)
86	x = -x;
87
88	/* Reduce x into range [-pi/4,pi/4] */
89	__remainder_piby2f_inline(x, ax, &r, &region);
90
91	if (xneg)
92	return -tanf_piby4(r, region & 1);
93	else
94	return tanf_piby4(r, region & 1);
95	}
96
97	weak_alias (__tanf, tanf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_trunc.c.x86_64-new-libm (+48 lines)
	1	/*
	2	(C) 2002 Advanced Micro Devices, Inc.
	3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
	4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
	5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
	6	THIS LIBRARY**
	7	*/
	8
	9	#include "libm_amd.h"
	10	#include "libm_util_amd.h"
	11
	12	double __trunc(double x)
	13	{
	14	double r;
	15	long rexp;
	16	unsigned long ux, ax, mask;
	17
	18	GET_BITS_DP64(x, ux);
	19	ax = ux & (~SIGNBIT_DP64);
	20
	21	if (ax >= 0x4340000000000000)
	22	{
	23	/* abs(x) is either NaN, infinity, or >= 2^53 */
	24	if (ax > 0x7ff0000000000000)
	25	/* x is NaN */
	26	return x + x; /* Raise invalid if it is a signalling NaN */
	27	else
	28	return x;
	29	}
	30	else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
	31	{
	32	/* Return zero with the sign of x */
	33	PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
	34	return x;
	35	}
	36	else
	37	{
	38	r = x;
	39	rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
	40	/* Mask out the bits of r that we don't want */
	41	mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
	42	PUT_BITS_DP64(ux & ~mask, r);
	43	return r;
	44	}
	45
	46	}
	47
	48	weak_alias (__trunc, trunc)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_acos.c.x86_64-new-libm (+139 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_VAL_WITH_FLAGS
		13	#define USE_NAN_WITH_FLAGS
		14	#include "libm_inlines_amd.h"
		15	#undef USE_NAN_WITH_FLAGS
		16	#undef USE_VAL_WITH_FLAGS
		17
		18	/* Deal with errno for out-of-range argument */
		19	#include "libm_errno_amd.h"
		20	static inline double retval_errno_edom(double x)
		21	{
		22	struct exception exc;
		23	exc.arg1 = x;
		24	exc.arg2 = x;
		25	exc.type = DOMAIN;
		26	exc.name = (char *)"acos";
		27	if (_LIB_VERSION == _SVID_)
		28	exc.retval = HUGE;
		29	else
		30	exc.retval = nan_with_flags(AMD_F_INVALID);
		31	if (_LIB_VERSION == _POSIX_)
		32	__set_errno(EDOM);
		33	else if (!matherr(&exc))
		34	{
		35	if(_LIB_VERSION == _SVID_)
		36	(void)fputs("acos: DOMAIN error\n", stderr);
		37	__set_errno(EDOM);
		38	}
		39	return exc.retval;
		40	}
		41
		42	double __acos(double x)
		43	{
		44	/* Computes arccos(x).
		45	The argument is first reduced by noting that arccos(x)
		46	is invalid for abs(x) > 1. For denormal and small
		47	arguments arccos(x) = pi/2 to machine accuracy.
		48	Remaining argument ranges are handled as follows.
		49	For abs(x) <= 0.5 use
		50	arccos(x) = pi/2 - arcsin(x)
		51	= pi/2 - (x + x^3*R(x^2))
		52	where R(x^2) is a rational minimax approximation to
		53	(arcsin(x) - x)/x^3.
		54	For abs(x) > 0.5 exploit the identity:
		55	arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
		56	together with the above rational approximation, and
		57	reconstruct the terms carefully.
		58	*/
		59
		60	/* Some constants and split constants. */
		61
		62	static const double
		63	pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
		64	piby2 = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
65	piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
66	piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
67
68	double u, y, s=0.0, r;
69	int xexp, xnan, transform=0;
70
71	unsigned long ux, aux, xneg;
72	GET_BITS_DP64(x, ux);
73	aux = ux & ~SIGNBIT_DP64;
74	xneg = (ux & SIGNBIT_DP64);
75	xnan = (aux > PINFBITPATT_DP64);
76	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
77
78	/* Special cases */
79
80	if (xexp < -56)
81	{ /* y small enough that arccos(x) = pi/2 */
82	return val_with_flags(piby2, AMD_F_INEXACT);
83	}
84	else if (xnan) return x + x;
85	else if (xexp >= 0)
86	{ /* abs(x) >= 1.0 */
87	if (x == 1.0) return 0.0;
88	else if (x == -1.0) return val_with_flags(pi, AMD_F_INEXACT);
89	else return retval_errno_edom(x);
90	}
91
92	if (xneg) y = -x;
93	else y = x;
94
95	transform = (xexp >= -1); /* abs(x) >= 0.5 */
96
97	if (transform)
98	{ /* Transform y into the range [0,0.5) */
99	transform = 1;
100	r = 0.5*(1-y);
101	/* Hammer sqrt instruction */
102	asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
103	y = s;
104	}
105	else
106	r = y*y;
107
108	/* Use a rational approximation for [0.0, 0.5] */
109
110	u = r*(0.227485835556935010735943483075 +
111	(-0.445017216867635649900123110649 +
112	(0.275558175256937652532686256258 +
113	(-0.0549989809235685841612020091328 +
114	(0.00109242697235074662306043804220 +
115	0.0000482901920344786991880522822991r)r)r)r)*r)/
116	(1.36491501334161032038194214209 +
117	(-3.28431505720958658909889444194 +
118	(2.76568859157270989520376345954 +
119	(-0.943639137032492685763471240072 +
120	0.105869422087204370341222318533r)r)r)r);
121
122	if (transform)
123	{ /* Reconstruct acos carefully in transformed region */
124	if (xneg) return pi - 2(s+(yu - piby2_tail));
125	else
126	{
127	double c, s1;
128	unsigned long us;
129	GET_BITS_DP64(s, us);
130	PUT_BITS_DP64(0xffffffff00000000 & us, s1);
131	c = (r-s1*s1)/(s+s1);
132	return 2s1 + (2c+2yu);
133	}
134	}
135	else
136	return piby2_head - (x - (piby2_tail - x*u));
137	}
138
139	weak_alias (__acos, acos)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_asin.c.x86_64-new-libm (+144 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_VAL_WITH_FLAGS
		13	#define USE_NAN_WITH_FLAGS
		14	#include "libm_inlines_amd.h"
		15	#undef USE_NAN_WITH_FLAGS
		16	#undef USE_VAL_WITH_FLAGS
		17
		18	/* Deal with errno for out-of-range argument */
		19	#include "libm_errno_amd.h"
		20	static inline double retval_errno_edom(double x)
		21	{
		22	struct exception exc;
		23	exc.arg1 = x;
		24	exc.arg2 = x;
		25	exc.type = DOMAIN;
		26	exc.name = (char *)"asin";
		27	if (_LIB_VERSION == _SVID_)
		28	exc.retval = HUGE;
		29	else
		30	exc.retval = nan_with_flags(AMD_F_INVALID);
		31	if (_LIB_VERSION == _POSIX_)
		32	__set_errno(EDOM);
		33	else if (!matherr(&exc))
		34	{
		35	if(_LIB_VERSION == _SVID_)
		36	(void)fputs("asin: DOMAIN error\n", stderr);
		37	__set_errno(EDOM);
		38	}
		39	return exc.retval;
		40	}
		41
		42	double __asin(double x)
		43	{
		44	/* Computes arcsin(x).
		45	The argument is first reduced by noting that arcsin(x)
		46	is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
		47	For denormal and small arguments arcsin(x) = x to machine
		48	accuracy. Remaining argument ranges are handled as follows.
		49	For abs(x) <= 0.5 use
		50	arcsin(x) = x + x^3*R(x^2)
		51	where R(x^2) is a rational minimax approximation to
		52	(arcsin(x) - x)/x^3.
		53	For abs(x) > 0.5 exploit the identity:
		54	arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
		55	together with the above rational approximation, and
		56	reconstruct the terms carefully.
		57	*/
		58
		59	/* Some constants and split constants. */
		60
		61	static const double
		62	piby2_tail = 6.1232339957367660e-17, /* 0x3c91a62633145c07 */
		63	hpiby2_head = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
		64	piby2 = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
65	double u, v, y, s=0.0, r;
66	int xexp, xnan, transform=0;
67
68	unsigned long ux, aux, xneg;
69	GET_BITS_DP64(x, ux);
70	aux = ux & ~SIGNBIT_DP64;
71	xneg = (ux & SIGNBIT_DP64);
72	xnan = (aux > PINFBITPATT_DP64);
73	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
74
75	/* Special cases */
76
77	if (xexp < -28)
78	{ /* y small enough that arcsin(x) = x */
79	return val_with_flags(x, AMD_F_INEXACT);
80	}
81	else if (xnan) return x + x;
82	else if (xexp >= 0)
83	{ /* abs(x) >= 1.0 */
84	if (x == 1.0) return val_with_flags(piby2, AMD_F_INEXACT);
85	else if (x == -1.0) return val_with_flags(-piby2, AMD_F_INEXACT);
86	else return retval_errno_edom(x);
87
88	}
89
90	if (xneg) y = -x;
91	else y = x;
92
93	transform = (xexp >= -1); /* abs(x) >= 0.5 */
94
95	if (transform)
96	{ /* Transform y into the range [0,0.5) */
97	transform = 1;
98	r = 0.5*(1-y);
99	/* Hammer sqrt instruction */
100	asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
101	y = s;
102	}
103	else
104	{
105	r = y*y;
106	}
107
108	/* Use a rational approximation for [0.0, 0.5] */
109
110	u = r*(0.227485835556935010735943483075 +
111	(-0.445017216867635649900123110649 +
112	(0.275558175256937652532686256258 +
113	(-0.0549989809235685841612020091328 +
114	(0.00109242697235074662306043804220 +
115	0.0000482901920344786991880522822991r)r)r)r)*r)/
116	(1.36491501334161032038194214209 +
117	(-3.28431505720958658909889444194 +
118	(2.76568859157270989520376345954 +
119	(-0.943639137032492685763471240072 +
120	0.105869422087204370341222318533r)r)r)r);
121
122	if (transform)
123	{ /* Reconstruct asin carefully in transformed region */
124	{
125	double c, s1, p, q;
126	unsigned long us;
127	GET_BITS_DP64(s, us);
128	PUT_BITS_DP64(0xffffffff00000000 & us, s1);
129	c = (r-s1*s1)/(s+s1);
130	p = 2su-(piby2_tail-2*c);
131	q = hpiby2_head-2*s1;
132	v = hpiby2_head-(p-q);
133	}
134	}
135	else
136	{
137	v = y + y*u;
138	}
139
140	if (xneg) return -v;
141	else return v;
142	}
143
144	weak_alias (__asin, asin)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp.c.x86_64-new-libm (+159 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SPLITEXP
		13	#define USE_SCALEDOUBLE_1
		14	#define USE_SCALEDOUBLE_2
		15	#define USE_ZERO_WITH_FLAGS
		16	#define USE_INFINITY_WITH_FLAGS
		17	#include "libm_inlines_amd.h"
		18	#undef USE_ZERO_WITH_FLAGS
		19	#undef USE_SPLITEXP
		20	#undef USE_SCALEDOUBLE_1
		21	#undef USE_SCALEDOUBLE_2
		22	#undef USE_INFINITY_WITH_FLAGS
		23
		24	/* Deal with errno for out-of-range result */
		25	#include "libm_errno_amd.h"
		26	static inline double retval_errno_erange_overflow(double x)
		27	{
		28	struct exception exc;
		29	exc.arg1 = x;
		30	exc.arg2 = x;
		31	exc.type = OVERFLOW;
		32	exc.name = (char *)"exp";
		33	if (_LIB_VERSION == _SVID_)
		34	exc.retval = HUGE;
		35	else
		36	exc.retval = infinity_with_flags(AMD_F_OVERFLOW \| AMD_F_INEXACT);
		37	if (_LIB_VERSION == _POSIX_)
		38	__set_errno(ERANGE);
		39	else if (!matherr(&exc))
		40	__set_errno(ERANGE);
		41	return exc.retval;
		42	}
		43
		44	static inline double retval_errno_erange_underflow(double x)
		45	{
		46	struct exception exc;
		47	exc.arg1 = x;
		48	exc.arg2 = x;
		49	exc.type = UNDERFLOW;
		50	exc.name = (char *)"exp";
		51	exc.retval = zero_with_flags(AMD_F_UNDERFLOW \| AMD_F_INEXACT);
		52	if (_LIB_VERSION == _POSIX_)
		53	__set_errno(ERANGE);
		54	else if (!matherr(&exc))
		55	__set_errno(ERANGE);
		56	return exc.retval;
		57	}
		58
		59	double __exp(double x)
		60	{
		61	static const double
		62	max_exp_arg = 7.09782712893383973096e+02, /* 0x40862e42fefa39ef */
		63	min_exp_arg = -7.45133219101941108420e+02, /* 0xc0874910d52d3051 */
		64	thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
65	log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
66	log2_by_32_trail = 5.68948749532545630390e-11; /* 0x3dcf473de6af278e */
67
68	double z1, z2, z;
69	int m;
70	unsigned long ux, ax;
71
72	/*
73	Computation of exp(x).
74
75	We compute the values m, z1, and z2 such that
76	exp(x) = 2*m (z1 + z2), where
77	exp(x) is the natural exponential of x.
78
79	Computations needed in order to obtain m, z1, and z2
80	involve three steps.
81
82	First, we reduce the argument x to the form
83	x = n * log2/32 + remainder,
84	where n has the value of an integer and \|remainder\| <= log2/64.
85	The value of n = x * 32/log2 rounded to the nearest integer and
86	the remainder = x - n*log2/32.
87
88	Second, we approximate exp(r1 + r2) - 1 where r1 is the leading
89	part of the remainder and r2 is the trailing part of the remainder.
90
91	Third, we reconstruct the exponential of x so that
92	exp(x) = 2*m (z1 + z2).
93	*/
94
95
96	GET_BITS_DP64(x, ux);
97	ax = ux & (~SIGNBIT_DP64);
98
99	if (ax >= 0x40862e42fefa39ef) /* abs(x) >= 709.78... */
100	{
101	if(ax >= 0x7ff0000000000000)
102	{
103	/* x is either NaN or infinity */
104	if (ux & MANTBITS_DP64)
105	/* x is NaN */
106	return x + x; /* Raise invalid if it is a signalling NaN */
107	else if (ux & SIGNBIT_DP64)
108	/* x is negative infinity; return 0.0 with no flags. */
109	return 0.0;
110	else
111	/* x is positive infinity */
112	return x;
113	}
114	if (x > max_exp_arg)
115	/* Return +infinity with overflow flag */
116	return retval_errno_erange_overflow(x);
117	else if (x < min_exp_arg)
118	/* x is negative. Return +zero with underflow and inexact flags */
119	return retval_errno_erange_underflow(x);
120	}
121
122	/* Handle small arguments separately */
123	if (ax < 0x3fb0000000000000) /* abs(x) < 1/16 */
124	{
125	if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */
126	z = 1.0 + x; /* Raises inexact if x is non-zero */
127	else
128	z = ((((((((((
129	1.0/3628800)*x+
130	1.0/362880)*x+
131	1.0/40320)*x+
132	1.0/5040)*x+
133	1.0/720)*x+
134	1.0/120)*x+
135	1.0/24)*x+
136	1.0/6)*x+
137	1.0/2)*x+
138	1.0)*x + 1.0;
139	}
140	else
141	{
142	/* Find m, z1 and z2 such that exp(x) = 2*m (z1 + z2) */
143
144	splitexp(x, 1.0, thirtytwo_by_log2, log2_by_32_lead, log2_by_32_trail,
145	&m, &z1, &z2);
146
147	/* Scale (z1 + z2) by 2.0*m /
148
149	if (m >= EMIN_DP64 && m <= EMAX_DP64)
150	z = scaleDouble_1((z1+z2),m);
151	else
152	z = scaleDouble_2((z1+z2),m);
153	}
154	return z;
155	}
156
157
158	weak_alias (__exp, __ieee754_exp)
159	weak_alias (__exp, exp)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp10.c.x86_64-new-libm (+158 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SPLITEXP
		13	#define USE_SCALEDOUBLE_1
		14	#define USE_SCALEDOUBLE_2
		15	#define USE_ZERO_WITH_FLAGS
		16	#define USE_INFINITY_WITH_FLAGS
		17	#include "libm_inlines_amd.h"
		18	#undef USE_SPLITEXP
		19	#undef USE_SCALEDOUBLE_1
		20	#undef USE_SCALEDOUBLE_2
		21	#undef USE_ZERO_WITH_FLAGS
		22	#undef USE_INFINITY_WITH_FLAGS
		23
		24	/* Deal with errno for out-of-range result */
		25	#include "libm_errno_amd.h"
		26	static inline double retval_errno_erange_overflow(double x)
		27	{
		28	struct exception exc;
		29	exc.arg1 = x;
		30	exc.arg2 = x;
		31	exc.type = OVERFLOW;
		32	exc.name = (char *)"exp10";
		33	if (_LIB_VERSION == _SVID_)
		34	exc.retval = HUGE;
		35	else
		36	exc.retval = infinity_with_flags(AMD_F_OVERFLOW \| AMD_F_INEXACT);
		37	if (_LIB_VERSION == _POSIX_)
		38	__set_errno(ERANGE);
		39	else if (!matherr(&exc))
		40	__set_errno(ERANGE);
		41	return exc.retval;
		42	}
		43
		44	static inline double retval_errno_erange_underflow(double x)
		45	{
		46	struct exception exc;
		47	exc.arg1 = x;
		48	exc.arg2 = x;
		49	exc.type = UNDERFLOW;
		50	exc.name = (char *)"exp10";
		51	exc.retval = zero_with_flags(AMD_F_UNDERFLOW \| AMD_F_INEXACT);
		52	if (_LIB_VERSION == _POSIX_)
		53	__set_errno(ERANGE);
		54	else if (!matherr(&exc))
		55	__set_errno(ERANGE);
		56	return exc.retval;
		57	}
		58
		59	double __exp10(double x)
		60	{
		61	static const double
		62	max_exp10_arg = 3.0825471555991674677e+02, /* 0x40734413509f79ff */
		63	min_exp10_arg = -3.2330621534311580944e+02, /* 0xc07434e6420f4374 */
		64	log10 = 2.30258509299404568401e+00, /* 0x40026bb1bbb55516 */
65	thirtytwo_by_log10of2 = 1.06301699036395595131e+02, /* 0x405a934f0979a371 */
66	log10of2_by_32_lead = 9.40718688070774078369e-03, /* 0x3F83441340000000 */
67	log10of2_by_32_trail = 4.83791671566737916758e-10; /* 0x3E009F79FEF311F1 */
68
69	double y, z1, z2, z;
70	int m;
71	unsigned long ux, ax;
72
73	/*
74	Computation of exp10(x).
75
76	We compute the values m, z1, and z2 such that
77	exp10(x) = 2*m (z1 + z2), where exp10(x) is 10**x.
78
79	Computations needed in order to obtain m, z1, and z2
80	involve three steps.
81
82	First, we reduce the argument x to the form
83	x = n * log10of2/32 + remainder,
84	where n has the value of an integer and \|remainder\| <= log10of2/64.
85	The value of n = x * 32/log10of2 rounded to the nearest integer and
86	the remainder = x - n*log10of2/32.
87
88	Second, we approximate exp10(r1 + r2) - 1 where r1 is the leading
89	part of the remainder and r2 is the trailing part of the remainder.
90
91	Third, we reconstruct exp10(x) so that
92	exp10(x) = 2*m (z1 + z2).
93	*/
94
95
96	GET_BITS_DP64(x, ux);
97	ax = ux & (~SIGNBIT_DP64);
98
99	if (ax >= 0x40734413509f79ff) /* abs(x) >= 308.25... */
100	{
101	if(ax >= 0x7ff0000000000000)
102	{
103	/* x is either NaN or infinity */
104	if (ux & MANTBITS_DP64)
105	/* x is NaN */
106	return x + x; /* Raise invalid if it is a signalling NaN */
107	else if (ux & SIGNBIT_DP64)
108	/* x is negative infinity; return 0.0 with no flags. */
109	return 0.0;
110	else
111	/* x is positive infinity */
112	return x;
113	}
114	if (x > max_exp10_arg)
115	/* Return +infinity with overflow flag */
116	return retval_errno_erange_overflow(x);
117	else if (x < min_exp10_arg)
118	/* x is negative. Return +zero with underflow and inexact flags */
119	return retval_errno_erange_underflow(x);
120	}
121
122
123	/* Handle small arguments separately */
124	if (ax < 0x3f9bcb7b131bbb9d) /* abs(x) < 1/(16log10) /
125	{
126	if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */
127	return 1.0 + x; /* Raises inexact if x is non-zero */
128	else
129	y = log10*x;
130	z = ((((((((((
131	1.0/3628800)*y+
132	1.0/362880)*y+
133	1.0/40320)*y+
134	1.0/5040)*y+
135	1.0/720)*y+
136	1.0/120)*y+
137	1.0/24)*y+
138	1.0/6)*y+
139	1.0/2)*y+
140	1.0)*y + 1.0;
141	}
142	else
143	{
144	/* Find m, z1 and z2 such that exp10(x) = 2*m (z1 + z2) */
145
146	splitexp(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead,
147	log10of2_by_32_trail, &m, &z1, &z2);
148
149	/* Scale (z1 + z2) by 2.0*m /
150	if (m > EMIN_DP64 && m < EMAX_DP64)
151	return scaleDouble_1((z1+z2),m);
152	else
153	return scaleDouble_2((z1+z2),m);
154	}
155	return z;
156	}
157
158	weak_alias (__exp10, exp10)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp2.c.x86_64-new-libm (+172 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SPLITEXP
		13	#define USE_SCALEDOUBLE_1
		14	#define USE_SCALEDOUBLE_2
		15	#define USE_ZERO_WITH_FLAGS
		16	#define USE_INFINITY_WITH_FLAGS
		17	#include "libm_inlines_amd.h"
		18	#undef USE_ZERO_WITH_FLAGS
		19	#undef USE_SPLITEXP
		20	#undef USE_SCALEDOUBLE_1
		21	#undef USE_SCALEDOUBLE_2
		22	#undef USE_INFINITY_WITH_FLAGS
		23
		24	/* Deal with errno for out-of-range result */
		25	#include "libm_errno_amd.h"
		26	static inline double retval_errno_erange_overflow(double x)
		27	{
		28	struct exception exc;
		29	exc.arg1 = x;
		30	exc.arg2 = x;
		31	exc.type = OVERFLOW;
		32	exc.name = (char *)"exp2";
		33	if (_LIB_VERSION == _SVID_)
		34	exc.retval = HUGE;
		35	else
		36	exc.retval = infinity_with_flags(AMD_F_OVERFLOW \| AMD_F_INEXACT);
		37	if (_LIB_VERSION == _POSIX_)
		38	__set_errno(ERANGE);
		39	else if (!matherr(&exc))
		40	__set_errno(ERANGE);
		41	return exc.retval;
		42	}
		43
		44	static inline double retval_errno_erange_underflow(double x)
		45	{
		46	struct exception exc;
		47	exc.arg1 = x;
		48	exc.arg2 = x;
		49	exc.type = UNDERFLOW;
		50	exc.name = (char *)"exp2";
		51	exc.retval = zero_with_flags(AMD_F_UNDERFLOW \| AMD_F_INEXACT);
		52	if (_LIB_VERSION == _POSIX_)
		53	__set_errno(ERANGE);
		54	else if (!matherr(&exc))
		55	__set_errno(ERANGE);
		56	return exc.retval;
		57	}
		58
		59	double __exp2(double x)
		60	{
		61	static const double
		62	max_exp2_arg = 1024.0, /* 0x4090000000000000 */
		63	min_exp2_arg = -1074.0, /* 0xc090c80000000000 */
		64	log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */
65	log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */
66	log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */
67	one_by_32_lead = 0.03125;
68
69	double y, z1, z2, z, hx, tx, y1, y2;
70	int m;
71	unsigned long ux, ax;
72
73	/*
74	Computation of exp2(x).
75
76	We compute the values m, z1, and z2 such that
77	exp2(x) = 2*m (z1 + z2), where exp2(x) is 2**x.
78
79	Computations needed in order to obtain m, z1, and z2
80	involve three steps.
81
82	First, we reduce the argument x to the form
83	x = n/32 + remainder,
84	where n has the value of an integer and \|remainder\| <= 1/64.
85	The value of n = x * 32 rounded to the nearest integer and
86	the remainder = x - n/32.
87
88	Second, we approximate exp2(r1 + r2) - 1 where r1 is the leading
89	part of the remainder and r2 is the trailing part of the remainder.
90
91	Third, we reconstruct exp2(x) so that
92	exp2(x) = 2*m (z1 + z2).
93	*/
94
95
96	GET_BITS_DP64(x, ux);
97	ax = ux & (~SIGNBIT_DP64);
98
99	if (ax >= 0x4090000000000000) /* abs(x) >= 1024.0 */
100	{
101	if(ax >= 0x7ff0000000000000)
102	{
103	/* x is either NaN or infinity */
104	if (ux & MANTBITS_DP64)
105	/* x is NaN */
106	return x + x; /* Raise invalid if it is a signalling NaN */
107	else if (ux & SIGNBIT_DP64)
108	/* x is negative infinity; return 0.0 with no flags. */
109	return 0.0;
110	else
111	/* x is positive infinity */
112	return x;
113	}
114	if (x > max_exp2_arg)
115	/* Return +infinity with overflow flag */
116	return retval_errno_erange_overflow(x);
117	else if (x < min_exp2_arg)
118	/* x is negative. Return +zero with underflow and inexact flags */
119	return retval_errno_erange_underflow(x);
120	}
121
122
123	/* Handle small arguments separately */
124	if (ax < 0x3fb7154764ee6c2f) /* abs(x) < 1/(16log2) /
125	{
126	if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */
127	return 1.0 + x; /* Raises inexact if x is non-zero */
128	else
129	{
130	/* Split x into hx (head) and tx (tail). */
131	unsigned long u;
132	hx = x;
133	GET_BITS_DP64(hx, u);
134	u &= 0xfffffffff8000000;
135	PUT_BITS_DP64(u, hx);
136	tx = x - hx;
137	/* Carefully multiply x by log2. y1 is the most significant
138	part of the result, and y2 the least significant part */
139	y1 = x * log2_lead;
140	y2 = (((hx * log2_lead - y1) + hx * log2_tail) +
141	tx * log2_lead) + tx * log2_tail;
142
143	y = y1 + y2;
144	z = (9.99564649780173690e-1 +
145	(1.61251249355268050e-5 +
146	(2.37986978239838493e-2 +
147	2.68724774856111190e-7y)y)*y)/
148	(9.99564649780173692e-1 +
149	(-4.99766199765151309e-1 +
150	(1.070876894098586184e-1 +
151	(-1.189773642681502232e-2 +
152	5.9480622371960190616e-4y)y)y)y);
153	z = ((z * y1) + (z * y2)) + 1.0;
154	}
155	}
156	else
157	{
158	/* Find m, z1 and z2 such that exp2(x) = 2*m (z1 + z2) */
159
160	splitexp(x, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2);
161
162	/* Scale (z1 + z2) by 2.0*m /
163	if (m > EMIN_DP64 && m < EMAX_DP64)
164	z = scaleDouble_1((z1+z2),m);
165	else
166	z = scaleDouble_2((z1+z2),m);
167	}
168	return z;
169	}
170
171	weak_alias (__exp2, exp2)
172	weak_alias (__exp2, __libm_ieee754_exp2)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_expf.c.x86_64-new-libm (+154 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SPLITEXPF
		13	#define USE_SCALEFLOAT_1
		14	#define USE_SCALEFLOAT_2
		15	#define USE_ZEROF_WITH_FLAGS
		16	#define USE_INFINITYF_WITH_FLAGS
		17	#include "libm_inlines_amd.h"
		18	#undef USE_SPLITEXPF
		19	#undef USE_SCALEFLOAT_1
		20	#undef USE_SCALEFLOAT_2
		21	#undef USE_ZEROF_WITH_FLAGS
		22	#undef USE_INFINITYF_WITH_FLAGS
		23
		24	/* Deal with errno for out-of-range result */
		25	#include "libm_errno_amd.h"
		26	static inline float retval_errno_erange_overflow(float x)
		27	{
		28	struct exception exc;
		29	exc.arg1 = (double)x;
		30	exc.arg2 = (double)x;
		31	exc.type = OVERFLOW;
		32	exc.name = (char *)"expf";
		33	if (_LIB_VERSION == _SVID_)
		34	exc.retval = HUGE;
		35	else
		36	exc.retval = infinityf_with_flags(AMD_F_OVERFLOW \| AMD_F_INEXACT);
		37	if (_LIB_VERSION == _POSIX_)
		38	__set_errno(ERANGE);
		39	else if (!matherr(&exc))
		40	__set_errno(ERANGE);
		41	return exc.retval;
		42	}
		43
		44	static inline float retval_errno_erange_underflow(float x)
		45	{
		46	struct exception exc;
		47	exc.arg1 = (double)x;
		48	exc.arg2 = (double)x;
		49	exc.type = UNDERFLOW;
		50	exc.name = (char *)"expf";
		51	exc.retval = zerof_with_flags(AMD_F_UNDERFLOW \| AMD_F_INEXACT);
		52	if (_LIB_VERSION == _POSIX_)
		53	__set_errno(ERANGE);
		54	else if (!matherr(&exc))
		55	__set_errno(ERANGE);
		56	return exc.retval;
		57	}
		58
		59	float __expf(float x)
		60	{
		61	static const float
		62	max_exp_arg = 8.8722839355E+01, /* 0x42B17218 */
		63	min_exp_arg = -1.0327893066E+02, /* 0xC2CE8ED0 */
		64	thirtytwo_by_log2 = 4.6166240692E+01, /* 0x4238AA3B */
65	log2_by_32_lead = 2.1659851074E-02, /* 0x3CB17000 */
66	log2_by_32_tail = 9.9831822808E-07; /* 0x3585FDF4 */
67
68	float z1, z2, z;
69	int m;
70	unsigned int ux, ax;
71
72	/*
73	Computation of exp(x).
74
75	We compute the values m, z1, and z2 such that
76	exp(x) = 2*m (z1 + z2), where
77	exp(x) is the natural exponential of x.
78
79	Computations needed in order to obtain m, z1, and z2
80	involve three steps.
81
82	First, we reduce the argument x to the form
83	x = n * log2/32 + remainder,
84	where n has the value of an integer and \|remainder\| <= log2/64.
85	The value of n = x * 32/log2 rounded to the nearest integer and
86	the remainder = x - n*log2/32.
87
88	Second, we approximate exp(r1 + r2) - 1 where r1 is the leading
89	part of the remainder and r2 is the trailing part of the remainder.
90
91	Third, we reconstruct the exponential of x so that
92	exp(x) = 2*m (z1 + z2).
93	*/
94
95	GET_BITS_SP32(x, ux);
96	ax = ux & (~SIGNBIT_SP32);
97
98	if (ax >= 0x42B17218) /* abs(x) >= 88.7... */
99	{
100	if(ax >= 0x7f800000)
101	{
102	/* x is either NaN or infinity */
103	if (ux & MANTBITS_SP32)
104	/* x is NaN */
105	return x + x; /* Raise invalid if it is a signalling NaN */
106	else if (ux & SIGNBIT_SP32)
107	/* x is negative infinity; return 0.0 with no flags */
108	return 0.0;
109	else
110	/* x is positive infinity */
111	return x;
112	}
113	if (x > max_exp_arg)
114	/* Return +infinity with overflow flag */
115	return retval_errno_erange_overflow(x);
116	else if (x < min_exp_arg)
117	/* x is negative. Return +zero with underflow and inexact flags */
118	return retval_errno_erange_underflow(x);
119	}
120
121	/* Handle small arguments separately */
122	if (ax < 0x3c800000) /* abs(x) < 1/64 */
123	{
124	if (ax < 0x32800000) /* abs(x) < 2^(-26) */
125	return 1.0 + x; /* Raises inexact if x is non-zero */
126	else
127	z = (((((((
128	1.0/5040)*x+
129	1.0/720)*x+
130	1.0/120)*x+
131	1.0/24)*x+
132	1.0/6)*x+
133	1.0/2)*x+
134	1.0)*x + 1.0;
135	}
136	else
137	{
138	/* Find m and z such that exp(x) = 2*m (z1 + z2) */
139
140	splitexpf(x, 1.0, thirtytwo_by_log2, log2_by_32_lead,
141	log2_by_32_tail, &m, &z1, &z2);
142
143	/* Scale (z1 + z2) by 2.0*m /
144
145	if (m >= EMIN_SP32 && m <= EMAX_SP32)
146	z = scaleFloat_1((z1+z2),m);
147	else
148	z = scaleFloat_2((z1+z2),m);
149	}
150	return z;
151	}
152
153	weak_alias (__expf, expf)
154	weak_alias (__expf, __ieee754_expf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_hypot.c.x86_64-new-libm (+190 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_SCALEDOUBLE_1
		13	#define USE_INFINITY_WITH_FLAGS
		14	#include "libm_inlines_amd.h"
		15	#undef USE_SCALEDOUBLE_1
		16	#undef USE_INFINITY_WITH_FLAGS
		17
		18	/* Deal with errno for out-of-range result */
		19	#include "libm_errno_amd.h"
		20	static inline double retval_errno_erange_overflow(double x, double y)
		21	{
		22	struct exception exc;
		23	exc.arg1 = x;
		24	exc.arg2 = y;
		25	exc.type = OVERFLOW;
		26	exc.name = (char *)"hypot";
		27	if (_LIB_VERSION == _SVID_)
		28	exc.retval = HUGE;
		29	else
		30	exc.retval = infinity_with_flags(AMD_F_OVERFLOW \| AMD_F_INEXACT);
		31	if (_LIB_VERSION == _POSIX_)
		32	__set_errno(ERANGE);
		33	else if (!matherr(&exc))
		34	__set_errno(ERANGE);
		35	return exc.retval;
		36	}
		37
		38	double __hypot(double x, double y)
		39	{
		40	/* Returns sqrt(xx + yy) with no overflow or underflow unless
		41	the result warrants it */
		42
		43	const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */
		44
		45	double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts;
		46	unsigned long xexp, yexp, ux, uy, ut;
		47	int dexp, expadjust;
		48
		49	GET_BITS_DP64(x, ux);
		50	ux &= ~SIGNBIT_DP64;
		51	GET_BITS_DP64(y, uy);
		52	uy &= ~SIGNBIT_DP64;
		53	xexp = (ux >> EXPSHIFTBITS_DP64);
		54	yexp = (uy >> EXPSHIFTBITS_DP64);
		55
		56	if (xexp == BIASEDEMAX_DP64 + 1 \|\| yexp == BIASEDEMAX_DP64 + 1)
		57	{
		58	/* One or both of the arguments are NaN or infinity. The
		59	result will also be NaN or infinity. */
		60	retval = xx + yy;
		61	if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) \|\|
		62	((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64)))
		63	/* x or y is infinity. ISO C99 defines that we must
		64	return +infinity, even if the other argument is NaN.
65	Note that the computation of xx + yy above will already
66	have raised invalid if either x or y is a signalling NaN. */
67	return infinity_with_flags(0);
68	else
69	/* One or both of x or y is NaN, and neither is infinity.
70	Raise invalid if it's a signalling NaN */
71	return retval;
72	}
73
74	/* Set x = abs(x) and y = abs(y) */
75	PUT_BITS_DP64(ux, x);
76	PUT_BITS_DP64(uy, y);
77
78	/* The difference in exponents between x and y */
79	dexp = xexp - yexp;
80	expadjust = 0;
81
82	if (ux == 0)
83	/* x is zero */
84	return y;
85	else if (uy == 0)
86	/* y is zero */
87	return x;
88	else if (dexp > MANTLENGTH_DP64 + 1 \|\| dexp < -MANTLENGTH_DP64 - 1)
89	/* One of x and y is insignificant compared to the other */
90	return x + y; /* Raise inexact */
91	else if (xexp > EXPBIAS_DP64 + 500 \|\| yexp > EXPBIAS_DP64 + 500)
92	{
93	/* Danger of overflow; scale down by 2*600. /
94	expadjust = 600;
95	ux -= 0x2580000000000000;
96	PUT_BITS_DP64(ux, x);
97	uy -= 0x2580000000000000;
98	PUT_BITS_DP64(uy, y);
99	}
100	else if (xexp < EXPBIAS_DP64 - 500 \|\| yexp < EXPBIAS_DP64 - 500)
101	{
102	/* Danger of underflow; scale up by 2*600. /
103	expadjust = -600;
104	if (xexp == 0)
105	{
106	/* x is denormal - handle by adding 601 to the exponent
107	and then subtracting a correction for the implicit bit */
108	PUT_BITS_DP64(ux + 0x2590000000000000, x);
109	x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
110	GET_BITS_DP64(x, ux);
111	}
112	else
113	{
114	/* x is normal - just increase the exponent by 600 */
115	ux += 0x2580000000000000;
116	PUT_BITS_DP64(ux, x);
117	}
118	if (yexp == 0)
119	{
120	PUT_BITS_DP64(uy + 0x2590000000000000, y);
121	y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
122	GET_BITS_DP64(y, uy);
123	}
124	else
125	{
126	uy += 0x2580000000000000;
127	PUT_BITS_DP64(uy, y);
128	}
129	}
130
131
132	#ifdef FAST_BUT_GREATER_THAN_ONE_ULP
133	/* Not awful, but results in accuracy loss larger than 1 ulp */
134	r = xx + yy
135	#else
136	/* Slower but more accurate */
137
138	/* Sort so that x is greater than y */
139	if (x < y)
140	{
141	u = y;
142	y = x;
143	x = u;
144	ut = ux;
145	ux = uy;
146	uy = ut;
147	}
148
149	/* Split x into hx and tx, head and tail */
150	PUT_BITS_DP64(ux & 0xfffffffff8000000, hx);
151	tx = x - hx;
152
153	PUT_BITS_DP64(uy & 0xfffffffff8000000, hy);
154	ty = y - hy;
155
156	/* Compute r = xx + yy with extra precision */
157	x2 = x*x;
158	y2 = y*y;
159	hs = x2 + y2;
160
161	if (dexp == 0)
162	/* We take most care when x and y have equal exponents,
163	i.e. are almost the same size */
164	ts = (((x2 - hs) + y2) +
165	((hx * hx - x2) + 2 * hx * tx) + tx * tx) +
166	((hy * hy - y2) + 2 * hy * ty) + ty * ty;
167	else
168	ts = (((x2 - hs) + y2) +
169	((hx * hx - x2) + 2 * hx * tx) + tx * tx);
170
171	r = hs + ts;
172	#endif
173
174	/* The sqrt can introduce another half ulp error. */
175	/* Hammer sqrt instruction */
176	asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r));
177
178	/* If necessary scale the result back. This may lead to
179	overflow but if so that's the correct result. */
180	retval = scaleDouble_1(retval, expadjust);
181
182	if (retval > large)
183	/* The result overflowed. Deal with errno. */
184	return retval_errno_erange_overflow(x, y);
185
186	return retval;
187	}
188
189	weak_alias (__hypot, hypot)
190	weak_alias (__hypot, __ieee754_hypot)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log.c.x86_64-new-libm (+489 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_NAN_WITH_FLAGS
		13	#define USE_INFINITY_WITH_FLAGS
		14	#include "libm_inlines_amd.h"
		15	#undef USE_NAN_WITH_FLAGS
		16	#undef USE_INFINITY_WITH_FLAGS
		17
		18	/* Deal with errno for out-of-range result */
		19	#include "libm_errno_amd.h"
		20	static inline double retval_errno_erange_overflow(double x)
		21	{
		22	struct exception exc;
		23	exc.arg1 = x;
		24	exc.arg2 = x;
		25	exc.type = SING;
		26	#if defined(COMPILING_LOG10)
		27	exc.name = (char *)"log10";
		28	#elif defined(COMPILING_LOG2)
		29	exc.name = (char *)"log2";
		30	#else
		31	exc.name = (char *)"log";
		32	#endif
		33	if (_LIB_VERSION == _SVID_)
		34	exc.retval = -HUGE;
		35	else
		36	exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO);
		37	if (_LIB_VERSION == _POSIX_)
		38	__set_errno(ERANGE);
		39	else if (!matherr(&exc))
		40	__set_errno(ERANGE);
		41	return exc.retval;
		42	}
		43
		44	/* Deal with errno for out-of-range argument */
		45	static inline double retval_errno_edom(double x)
		46	{
		47	struct exception exc;
		48	exc.arg1 = x;
		49	exc.arg2 = x;
		50	exc.type = DOMAIN;
		51	#if defined(COMPILING_LOG10)
		52	exc.name = (char *)"log10";
		53	#elif defined(COMPILING_LOG2)
		54	exc.name = (char *)"log2";
		55	#else
		56	exc.name = (char *)"log";
		57	#endif
		58	if (_LIB_VERSION == _SVID_)
		59	exc.retval = -HUGE;
		60	else
		61	exc.retval = nan_with_flags(AMD_F_INVALID);
		62	if (_LIB_VERSION == _POSIX_)
		63	__set_errno(EDOM);
		64	else if (!matherr(&exc))
65	{
66	if(_LIB_VERSION == _SVID_)
67	#if defined(COMPILING_LOG10)
68	(void)fputs("log10: DOMAIN error\n", stderr);
69	#elif defined(COMPILING_LOG2)
70	(void)fputs("log2: DOMAIN error\n", stderr);
71	#else
72	(void)fputs("log: DOMAIN error\n", stderr);
73	#endif
74	__set_errno(EDOM);
75	}
76	return exc.retval;
77	}
78
79	#if defined(COMPILING_LOG10)
80	double __log10(double x)
81	#elif defined(COMPILING_LOG2)
82	double __log2(double x)
83	#else
84	double __log(double x)
85	#endif
86	{
87
88	int expadjust, xexp;
89	double r, r1, r2, correction, f, f1, f2, q, u, v, z1, z2, poly;
90	int index;
91	unsigned long ux;
92	#if defined(COMPILING_LOG10) \|\| defined (COMPILING_LOG2)
93	unsigned long ut;
94	#endif
95
96	/*
97	Computes natural log(x). Algorithm based on:
98	Ping-Tak Peter Tang
99	"Table-driven implementation of the logarithm function in IEEE
100	floating-point arithmetic"
101	ACM Transactions on Mathematical Software (TOMS)
102	Volume 16, Issue 4 (December 1990)
103	*/
104
105	/* Arrays ln_lead_table and ln_tail_table contain
106	leading and trailing parts respectively of precomputed
107	values of natural log(1+i/64), for i = 0, 1, ..., 64.
108	ln_lead_table contains the first 24 bits of precision,
109	and ln_tail_table contains a further 53 bits precision. */
110
111	static const double ln_lead_table[65] = {
112	0.00000000000000000000e+00, /* 0x0000000000000000 */
113	1.55041813850402832031e-02, /* 0x3f8fc0a800000000 */
114	3.07716131210327148438e-02, /* 0x3f9f829800000000 */
115	4.58095073699951171875e-02, /* 0x3fa7745800000000 */
116	6.06245994567871093750e-02, /* 0x3faf0a3000000000 */
117	7.52233862876892089844e-02, /* 0x3fb341d700000000 */
118	8.96121263504028320312e-02, /* 0x3fb6f0d200000000 */
119	1.03796780109405517578e-01, /* 0x3fba926d00000000 */
120	1.17783010005950927734e-01, /* 0x3fbe270700000000 */
121	1.31576299667358398438e-01, /* 0x3fc0d77e00000000 */
122	1.45181953907012939453e-01, /* 0x3fc2955280000000 */
123	1.58604979515075683594e-01, /* 0x3fc44d2b00000000 */
124	1.71850204467773437500e-01, /* 0x3fc5ff3000000000 */
125	1.84922337532043457031e-01, /* 0x3fc7ab8900000000 */
126	1.97825729846954345703e-01, /* 0x3fc9525a80000000 */
127	2.10564732551574707031e-01, /* 0x3fcaf3c900000000 */
128	2.23143517971038818359e-01, /* 0x3fcc8ff780000000 */
129	2.35566020011901855469e-01, /* 0x3fce270700000000 */
130	2.47836112976074218750e-01, /* 0x3fcfb91800000000 */
131	2.59957492351531982422e-01, /* 0x3fd0a324c0000000 */
132	2.71933674812316894531e-01, /* 0x3fd1675c80000000 */
133	2.83768117427825927734e-01, /* 0x3fd22941c0000000 */
134	2.95464158058166503906e-01, /* 0x3fd2e8e280000000 */
135	3.07025015354156494141e-01, /* 0x3fd3a64c40000000 */
136	3.18453729152679443359e-01, /* 0x3fd4618bc0000000 */
137	3.29753279685974121094e-01, /* 0x3fd51aad80000000 */
138	3.40926527976989746094e-01, /* 0x3fd5d1bd80000000 */
139	3.51976394653320312500e-01, /* 0x3fd686c800000000 */
140	3.62905442714691162109e-01, /* 0x3fd739d7c0000000 */
141	3.73716354370117187500e-01, /* 0x3fd7eaf800000000 */
142	3.84411692619323730469e-01, /* 0x3fd89a3380000000 */
143	3.94993782043457031250e-01, /* 0x3fd9479400000000 */
144	4.05465066432952880859e-01, /* 0x3fd9f323c0000000 */
145	4.15827870368957519531e-01, /* 0x3fda9cec80000000 */
146	4.26084339618682861328e-01, /* 0x3fdb44f740000000 */
147	4.36236739158630371094e-01, /* 0x3fdbeb4d80000000 */
148	4.46287095546722412109e-01, /* 0x3fdc8ff7c0000000 */
149	4.56237375736236572266e-01, /* 0x3fdd32fe40000000 */
150	4.66089725494384765625e-01, /* 0x3fddd46a00000000 */
151	4.75845873355865478516e-01, /* 0x3fde744240000000 */
152	4.85507786273956298828e-01, /* 0x3fdf128f40000000 */
153	4.95077252388000488281e-01, /* 0x3fdfaf5880000000 */
154	5.04556000232696533203e-01, /* 0x3fe02552a0000000 */
155	5.13945698738098144531e-01, /* 0x3fe0723e40000000 */
156	5.23248136043548583984e-01, /* 0x3fe0be72e0000000 */
157	5.32464742660522460938e-01, /* 0x3fe109f380000000 */
158	5.41597247123718261719e-01, /* 0x3fe154c3c0000000 */
159	5.50647079944610595703e-01, /* 0x3fe19ee6a0000000 */
160	5.59615731239318847656e-01, /* 0x3fe1e85f40000000 */
161	5.68504691123962402344e-01, /* 0x3fe23130c0000000 */
162	5.77315330505371093750e-01, /* 0x3fe2795e00000000 */
163	5.86049020290374755859e-01, /* 0x3fe2c0e9e0000000 */
164	5.94707071781158447266e-01, /* 0x3fe307d720000000 */
165	6.03290796279907226562e-01, /* 0x3fe34e2880000000 */
166	6.11801505088806152344e-01, /* 0x3fe393e0c0000000 */
167	6.20240390300750732422e-01, /* 0x3fe3d90260000000 */
168	6.28608644008636474609e-01, /* 0x3fe41d8fe0000000 */
169	6.36907458305358886719e-01, /* 0x3fe4618bc0000000 */
170	6.45137906074523925781e-01, /* 0x3fe4a4f840000000 */
171	6.53301239013671875000e-01, /* 0x3fe4e7d800000000 */
172	6.61398470401763916016e-01, /* 0x3fe52a2d20000000 */
173	6.69430613517761230469e-01, /* 0x3fe56bf9c0000000 */
174	6.77398800849914550781e-01, /* 0x3fe5ad4040000000 */
175	6.85303986072540283203e-01, /* 0x3fe5ee02a0000000 */
176	6.93147122859954833984e-01}; /* 0x3fe62e42e0000000 */
177
178	static const double ln_tail_table[65] = {
179	0.00000000000000000000e+00, /* 0x0000000000000000 */
180	5.15092497094772879206e-09, /* 0x3e361f807c79f3db */
181	4.55457209735272790188e-08, /* 0x3e6873c1980267c8 */
182	2.86612990859791781788e-08, /* 0x3e5ec65b9f88c69e */
183	2.23596477332056055352e-08, /* 0x3e58022c54cc2f99 */
184	3.49498983167142274770e-08, /* 0x3e62c37a3a125330 */
185	3.23392843005887000414e-08, /* 0x3e615cad69737c93 */
186	1.35722380472479366661e-08, /* 0x3e4d256ab1b285e9 */
187	2.56504325268044191098e-08, /* 0x3e5b8abcb97a7aa2 */
188	5.81213608741512136843e-08, /* 0x3e6f34239659a5dc */
189	5.59374849578288093334e-08, /* 0x3e6e07fd48d30177 */
190	5.06615629004996189970e-08, /* 0x3e6b32df4799f4f6 */
191	5.24588857848400955725e-08, /* 0x3e6c29e4f4f21cf8 */
192	9.61968535632653505972e-10, /* 0x3e1086c848df1b59 */
193	1.34829655346594463137e-08, /* 0x3e4cf456b4764130 */
194	3.65557749306383026498e-08, /* 0x3e63a02ffcb63398 */
195	3.33431709374069198903e-08, /* 0x3e61e6a6886b0976 */
196	5.13008650536088382197e-08, /* 0x3e6b8abcb97a7aa2 */
197	5.09285070380306053751e-08, /* 0x3e6b578f8aa35552 */
198	3.20853940845502057341e-08, /* 0x3e6139c871afb9fc */
199	4.06713248643004200446e-08, /* 0x3e65d5d30701ce64 */
200	5.57028186706125221168e-08, /* 0x3e6de7bcb2d12142 */
201	5.48356693724804282546e-08, /* 0x3e6d708e984e1664 */
202	1.99407553679345001938e-08, /* 0x3e556945e9c72f36 */
203	1.96585517245087232086e-09, /* 0x3e20e2f613e85bda */
204	6.68649386072067321503e-09, /* 0x3e3cb7e0b42724f6 */
205	5.89936034642113390002e-08, /* 0x3e6fac04e52846c7 */
206	2.85038578721554472484e-08, /* 0x3e5e9b14aec442be */
207	5.09746772910284482606e-08, /* 0x3e6b5de8034e7126 */
208	5.54234668933210171467e-08, /* 0x3e6dc157e1b259d3 */
209	6.29100830926604004874e-09, /* 0x3e3b05096ad69c62 */
210	2.61974119468563937716e-08, /* 0x3e5c2116faba4cdd */
211	4.16752115011186398935e-08, /* 0x3e665fcc25f95b47 */
212	2.47747534460820790327e-08, /* 0x3e5a9a08498d4850 */
213	5.56922172017964209793e-08, /* 0x3e6de647b1465f77 */
214	2.76162876992552906035e-08, /* 0x3e5da71b7bf7861d */
215	7.08169709942321478061e-09, /* 0x3e3e6a6886b09760 */
216	5.77453510221151779025e-08, /* 0x3e6f0075eab0ef64 */
217	4.43021445893361960146e-09, /* 0x3e33071282fb989b */
218	3.15140984357495864573e-08, /* 0x3e60eb43c3f1bed2 */
219	2.95077445089736670973e-08, /* 0x3e5faf06ecb35c84 */
220	1.44098510263167149349e-08, /* 0x3e4ef1e63db35f68 */
221	1.05196987538551827693e-08, /* 0x3e469743fb1a71a5 */
222	5.23641361722697546261e-08, /* 0x3e6c1cdf404e5796 */
223	7.72099925253243069458e-09, /* 0x3e4094aa0ada625e */
224	5.62089493829364197156e-08, /* 0x3e6e2d4c96fde3ec */
225	3.53090261098577946927e-08, /* 0x3e62f4d5e9a98f34 */
226	3.80080516835568242269e-08, /* 0x3e6467c96ecc5cbe */
227	5.66961038386146408282e-08, /* 0x3e6e7040d03dec5a */
228	4.42287063097349852717e-08, /* 0x3e67bebf4282de36 */
229	3.45294525105681104660e-08, /* 0x3e6289b11aeb783f */
230	2.47132034530447431509e-08, /* 0x3e5a891d1772f538 */
231	3.59655343422487209774e-08, /* 0x3e634f10be1fb591 */
232	5.51581770357780862071e-08, /* 0x3e6d9ce1d316eb93 */
233	3.60171867511861372793e-08, /* 0x3e63562a19a9c442 */
234	1.94511067964296180547e-08, /* 0x3e54e2adf548084c */
235	1.54137376631349347838e-08, /* 0x3e508ce55cc8c97a */
236	3.93171034490174464173e-09, /* 0x3e30e2f613e85bda */
237	5.52990607758839766440e-08, /* 0x3e6db03ebb0227bf */
238	3.29990737637586136511e-08, /* 0x3e61b75bb09cb098 */
239	1.18436010922446096216e-08, /* 0x3e496f16abb9df22 */
240	4.04248680368301346709e-08, /* 0x3e65b3f399411c62 */
241	2.27418915900284316293e-08, /* 0x3e586b3e59f65355 */
242	1.70263791333409206020e-08, /* 0x3e52482ceae1ac12 */
243	5.76999904754328540596e-08}; /* 0x3e6efa39ef35793c */
244
245	#ifndef COMPILING_LOG2
246	/* log2_lead and log2_tail sum to an extra-precise version
247	of log(2) */
248	static const double
249	log2_lead = 6.93147122859954833984e-01, /* 0x3fe62e42e0000000 */
250	log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */
251	#endif
252
253	static const double
254	/* Approximating polynomial coefficients for x near 1.0 */
255	ca_1 = 8.33333333333317923934e-02, /* 0x3fb55555555554e6 */
256	ca_2 = 1.25000000037717509602e-02, /* 0x3f89999999bac6d4 */
257	ca_3 = 2.23213998791944806202e-03, /* 0x3f62492307f1519f */
258	ca_4 = 4.34887777707614552256e-04, /* 0x3f3c8034c85dfff0 */
259
260	/* Approximating polynomial coefficients for other x */
261	cb_1 = 8.33333333333333593622e-02, /* 0x3fb5555555555557 */
262	cb_2 = 1.24999999978138668903e-02, /* 0x3f89999999865ede */
263	cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */
264
265	#if defined(COMPILING_LOG10)
266	/* log10e_lead and log10e_tail sum to an extra-precision
267	version of log10(e) (19 bits in lead) */
268	static const double
269	log10e_lead = 4.34293746948242187500e-01, /* 0x3fdbcb7800000000 */
270	log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */
271	#elif defined(COMPILING_LOG2)
272	/* log2e_lead and log2e_tail sum to an extra-precision
273	version of log2(e) (19 bits in lead) */
274	static const double
275	log2e_lead = 1.44269180297851562500E+00, /* 0x3FF7154400000000 */
276	log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */
277	#endif
278
279	static const unsigned long
280	log_thresh1 = 0x3fee0faa00000000,
281	log_thresh2 = 0x3ff1082c00000000;
282
283
284	GET_BITS_DP64(x, ux);
285
286	if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
287	{
288	/* x is either NaN or infinity */
289	if (ux & MANTBITS_DP64)
290	/* x is NaN */
291	return x + x; /* Raise invalid if it is a signalling NaN */
292	else
293	{
294	/* x is infinity */
295	if (ux & SIGNBIT_DP64)
296	/* x is negative infinity. Return a NaN. */
297	return retval_errno_edom(x);
298	else
299	return x;
300	}
301	}
302	else if (!(ux & ~SIGNBIT_DP64))
303	/* x is +/-zero. Return -infinity with div-by-zero flag. */
304	return retval_errno_erange_overflow(x);
305	else if (ux & SIGNBIT_DP64)
306	/* x is negative. Return a NaN. */
307	return retval_errno_edom(x);
308
309
310	/* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
311	log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */
312	if (ux >= log_thresh1 && ux <= log_thresh2)
313	{
314	/* Arguments close to 1.0 are handled separately to maintain
315	accuracy.
316
317	The approximation in this region exploits the identity
318	log( 1 + r ) = log( 1 + u/2 ) - log( 1 - u/2 ), where
319	u = 2r / (2+r).
320	Note that the right hand side has an odd Taylor series expansion
321	which converges much faster than the Taylor series expansion of
322	log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by
323	u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1).
324
325	One subtlety is that since u cannot be calculated from
326	r exactly, the rounding error in the first u should be
327	avoided if possible. To accomplish this, we observe that
328	u = r - r*r/(2+r).
329	Since x (=1+r) is the input argument, and thus presumed exact,
330	the formula above approximates u accurately because
331	u = r - correction,
332	and the magnitude of "correction" (of the order of r*r)
333	is small.
334	With these observations, we will approximate log( 1 + r ) by
335	r + ( (A1u^3 + ... + Anu^(2n+1)) - correction ).
336
337	We approximate log(1+r) by an odd polynomial in u, where
338	u = 2r/(2+r) = r - r*r/(2+r).
339	*/
340	r = x - 1.0;
341	u = r / (2.0 + r);
342	correction = r * u;
343	u = u + u;
344	v = u * u;
345	r1 = r;
346	r2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction);
347	#if defined(COMPILING_LOG10)
348	/* At this point r1,r2 is an extra-precise approximation to
349	natural log(x). Convert it to log10(x) by multiplying
350	carefully by log10(e).
351	Shift some bits from r1 to r2 so that log10e_lead*r1
352	can be computed without rounding error */
353	r = r1;
354	GET_BITS_DP64(r1, ut);
355	PUT_BITS_DP64(ut & 0xffffffff00000000, r1);
356	r2 = r2 + (r - r1);
357	return (((log10e_tailr2) + log10e_tailr1) + log10e_lead*r2) +
358	log10e_lead*r1;
359	#elif defined(COMPILING_LOG2)
360	/* Similarly handle log2(x) by multiplying carefully by log2(e). */
361	r = r1;
362	GET_BITS_DP64(r1, ut);
363	PUT_BITS_DP64(ut & 0xffffffff00000000, r1);
364	r2 = r2 + (r - r1);
365	return (((log2e_tailr2) + log2e_tailr1) + log2e_lead*r2) +
366	log2e_lead*r1;
367	#else
368	return r1 + r2;
369	#endif
370	}
371	else
372	{
373	/*
374	First, we decompose the argument x to the form
375	x = 2*M (F1 + F2),
376	where 1 <= F1+F2 < 2, M has the value of an integer,
377	F1 = 1 + j/64, j ranges from 0 to 64, and \|F2\| <= 1/128.
378
379	Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
380	in U, where U = 2 F2 / (2 F1 + F2).
381	Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
382	The core approximation calculates
383	Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U - 1.
384	Note that log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
385	thus, Poly = 2 arctanh( U/2 ) / U - 1.
386
387	It is not hard to see that
388	log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
389	Hence, we return Z1 = log(F1), and Z2 = log( 1 + F2/F1).
390	The values of log(F1) are calculated beforehand and stored
391	in the program.
392	*/
393
394	if (ux < IMPBIT_DP64)
395	{
396	/* The input argument x is denormalized */
397	/* Normalize f by increasing the exponent by 60
398	and subtracting a correction to account for the implicit
399	bit. This replaces a slow denormalized
400	multiplication by a fast normal subtraction. */
401	static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
402	PUT_BITS_DP64(ux \| 0x03d0000000000000, f);
403	f -= corr;
404	GET_BITS_DP64(f, ux);
405	expadjust = 60;
406	}
407	else
408	{
409	f = x;
410	expadjust = 0;
411	}
412
413	/* Store the exponent of x in xexp and put
414	f into the range [0.5,1) */
415	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
416	PUT_BITS_DP64((ux & MANTBITS_DP64) \| HALFEXPBITS_DP64, f);
417
418	/* Now x = 2*xexp f, 1/2 <= f < 1. */
419
420	/* Set index to be the nearest integer to 128f /
421	/*
422	r = 128.0 * f;
423	index = (int)(r + 0.5);
424	*/
425	/* This code instead of the above can save several cycles.
426	It only works because 64 <= r < 128, so
427	the nearest integer is always contained in exactly
428	7 bits, and the right shift is always the same. */
429	index = (((ux & 0x000fc00000000000) \| 0x0010000000000000) >> 46)
430	+ ((ux & 0x0000200000000000) >> 45);
431
432	z1 = ln_lead_table[index-64];
433	q = ln_tail_table[index-64];
434	f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
435	f2 = f - f1;
436	/* At this point, x = 2*xexp ( f1 + f2 ) where
437	f1 = j/128, j = 64, 65, ..., 128 and \|f2\| <= 1/256. */
438
439	/* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5f2 ) /
440	u = f2 / (f1 + 0.5 * f2);
441
442	/* Here, \|u\| <= 2(exp(1/16)-1) / (exp(1/16)+1).
443	The core approximation calculates
444	poly = [log(1 + u/2) - log(1 - u/2)]/u - 1 */
445	v = u * u;
446	poly = (v * (cb_1 + v * (cb_2 + v * cb_3)));
447	z2 = q + (u + u * poly);
448
449	/* Now z1,z2 is an extra-precise approximation of log(2f). */
450
451	#if defined (COMPILING_LOG10)
452	/* Add xexp * log(2) to z1,z2 to get log(x). */
453	r1 = (xexp * log2_lead + z1);
454	r2 = (xexp * log2_tail + z2);
455	/* At this point r1,r2 is an extra-precise approximation to
456	natural log(x). Convert it to log10(x) by multiplying
457	carefully by log10(e). */
458	return (((log10e_tailr2) + log10e_tailr1) + log10e_lead*r2) +
459	log10e_lead*r1;
460	#elif defined(COMPILING_LOG2)
461	/* Convert to log2(x) by multiplying carefully by log2(e)
462	and adding xexp. */
463	r1 = xexp + log2e_lead*z1;
464	r2 = (((log2e_tailz2) + log2e_tailz1) + log2e_lead*z2);
465	return r1 + r2;
466	#else
467	/* Add xexp * log(2) to z1,z2 to get the result log(x).
468	The computed r1 is not subject to rounding error because
469	xexp has at most 10 significant bits, log(2) has 24 significant
470	bits, and z1 has up to 24 bits; and the exponents of z1
471	and z2 differ by at most 6. */
472	r1 = (xexp * log2_lead + z1);
473	r2 = (xexp * log2_tail + z2);
474	/* Natural log(x) */
475	return r1 + r2;
476	#endif
477	}
478	}
479
480	#if defined(COMPILING_LOG10)
481	weak_alias (__log10, log10)
482	weak_alias (__log10, __ieee754_log10)
483	#elif defined(COMPILING_LOG2)
484	weak_alias (__log2, log2)
485	weak_alias (__log2, __ieee754_log2)
486	#else
487	weak_alias (__log, log)
488	weak_alias (__log, __ieee754_log)
489	#endif

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_logf.c.x86_64-new-libm (+375 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_NANF_WITH_FLAGS
		13	#define USE_INFINITYF_WITH_FLAGS
		14	#include "libm_inlines_amd.h"
		15	#undef USE_NANF_WITH_FLAGS
		16	#undef USE_INFINITYF_WITH_FLAGS
		17
		18	/* Deal with errno for out-of-range result */
		19	#include "libm_errno_amd.h"
		20	static inline float retval_errno_erange_overflow(float x)
		21	{
		22	struct exception exc;
		23	exc.arg1 = (double)x;
		24	exc.arg2 = (double)x;
		25	exc.type = SING;
		26	#if defined(COMPILING_LOG10)
		27	exc.name = (char *)"log10f";
		28	#elif defined(COMPILING_LOG2)
		29	exc.name = (char *)"log2f";
		30	#else
		31	exc.name = (char *)"logf";
		32	#endif
		33	if (_LIB_VERSION == _SVID_)
		34	exc.retval = -HUGE;
		35	else
		36	exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO);
		37	if (_LIB_VERSION == _POSIX_)
		38	__set_errno(ERANGE);
		39	else if (!matherr(&exc))
		40	__set_errno(ERANGE);
		41	return exc.retval;
		42	}
		43
		44	/* Deal with errno for out-of-range argument */
		45	static inline float retval_errno_edom(float x)
		46	{
		47	struct exception exc;
		48	exc.arg1 = (double)x;
		49	exc.arg2 = (double)x;
		50	exc.type = DOMAIN;
		51	#if defined(COMPILING_LOG10)
		52	exc.name = (char *)"log10f";
		53	#elif defined(COMPILING_LOG2)
		54	exc.name = (char *)"log2f";
		55	#else
		56	exc.name = (char *)"logf";
		57	#endif
		58	if (_LIB_VERSION == _SVID_)
		59	exc.retval = -HUGE;
		60	else
		61	exc.retval = nanf_with_flags(AMD_F_INVALID);
		62	if (_LIB_VERSION == _POSIX_)
		63	__set_errno(EDOM);
		64	else if (!matherr(&exc))
65	{
66	if(_LIB_VERSION == _SVID_)
67	#if defined(COMPILING_LOG10)
68	(void)fputs("log10f: DOMAIN error\n", stderr);
69	#elif defined(COMPILING_LOG2)
70	(void)fputs("log2f: DOMAIN error\n", stderr);
71	#else
72	(void)fputs("logf: DOMAIN error\n", stderr);
73	#endif
74	__set_errno(EDOM);
75	}
76	return exc.retval;
77	}
78
79	#if defined(COMPILING_LOG10)
80	float __log10f(float fx)
81	#elif defined(COMPILING_LOG2)
82	float __log2f(float fx)
83	#else
84	float __logf(float fx)
85	#endif
86	{
87
88	double x = fx;
89
90	int xexp;
91	double r, f, f1, f2, q, u, v, z1, z2, poly;
92	int index;
93	unsigned long ux;
94
95	/*
96	Computes natural log(x) for float arguments. Algorithm is
97	basically a promotion of the arguments to double followed
98	by an inlined version of the double algorithm, simplified
99	for efficiency (see log_amd.c). Simplifications include:
100	* Special algorithm for arguments near 1.0 not required
101	* Scaling of denormalised arguments not required
102	* Shorter core series approximations used
103	*/
104
105	/* Arrays ln_lead_table and ln_tail_table contain
106	leading and trailing parts respectively of precomputed
107	values of natural log(1+i/64), for i = 0, 1, ..., 64.
108	ln_lead_table contains the first 24 bits of precision,
109	and ln_tail_table contains a further 53 bits precision. */
110
111	static const double ln_lead_table[65] = {
112	0.00000000000000000000e+00, /* 0x0000000000000000 */
113	1.55041813850402832031e-02, /* 0x3f8fc0a800000000 */
114	3.07716131210327148438e-02, /* 0x3f9f829800000000 */
115	4.58095073699951171875e-02, /* 0x3fa7745800000000 */
116	6.06245994567871093750e-02, /* 0x3faf0a3000000000 */
117	7.52233862876892089844e-02, /* 0x3fb341d700000000 */
118	8.96121263504028320312e-02, /* 0x3fb6f0d200000000 */
119	1.03796780109405517578e-01, /* 0x3fba926d00000000 */
120	1.17783010005950927734e-01, /* 0x3fbe270700000000 */
121	1.31576299667358398438e-01, /* 0x3fc0d77e00000000 */
122	1.45181953907012939453e-01, /* 0x3fc2955280000000 */
123	1.58604979515075683594e-01, /* 0x3fc44d2b00000000 */
124	1.71850204467773437500e-01, /* 0x3fc5ff3000000000 */
125	1.84922337532043457031e-01, /* 0x3fc7ab8900000000 */
126	1.97825729846954345703e-01, /* 0x3fc9525a80000000 */
127	2.10564732551574707031e-01, /* 0x3fcaf3c900000000 */
128	2.23143517971038818359e-01, /* 0x3fcc8ff780000000 */
129	2.35566020011901855469e-01, /* 0x3fce270700000000 */
130	2.47836112976074218750e-01, /* 0x3fcfb91800000000 */
131	2.59957492351531982422e-01, /* 0x3fd0a324c0000000 */
132	2.71933674812316894531e-01, /* 0x3fd1675c80000000 */
133	2.83768117427825927734e-01, /* 0x3fd22941c0000000 */
134	2.95464158058166503906e-01, /* 0x3fd2e8e280000000 */
135	3.07025015354156494141e-01, /* 0x3fd3a64c40000000 */
136	3.18453729152679443359e-01, /* 0x3fd4618bc0000000 */
137	3.29753279685974121094e-01, /* 0x3fd51aad80000000 */
138	3.40926527976989746094e-01, /* 0x3fd5d1bd80000000 */
139	3.51976394653320312500e-01, /* 0x3fd686c800000000 */
140	3.62905442714691162109e-01, /* 0x3fd739d7c0000000 */
141	3.73716354370117187500e-01, /* 0x3fd7eaf800000000 */
142	3.84411692619323730469e-01, /* 0x3fd89a3380000000 */
143	3.94993782043457031250e-01, /* 0x3fd9479400000000 */
144	4.05465066432952880859e-01, /* 0x3fd9f323c0000000 */
145	4.15827870368957519531e-01, /* 0x3fda9cec80000000 */
146	4.26084339618682861328e-01, /* 0x3fdb44f740000000 */
147	4.36236739158630371094e-01, /* 0x3fdbeb4d80000000 */
148	4.46287095546722412109e-01, /* 0x3fdc8ff7c0000000 */
149	4.56237375736236572266e-01, /* 0x3fdd32fe40000000 */
150	4.66089725494384765625e-01, /* 0x3fddd46a00000000 */
151	4.75845873355865478516e-01, /* 0x3fde744240000000 */
152	4.85507786273956298828e-01, /* 0x3fdf128f40000000 */
153	4.95077252388000488281e-01, /* 0x3fdfaf5880000000 */
154	5.04556000232696533203e-01, /* 0x3fe02552a0000000 */
155	5.13945698738098144531e-01, /* 0x3fe0723e40000000 */
156	5.23248136043548583984e-01, /* 0x3fe0be72e0000000 */
157	5.32464742660522460938e-01, /* 0x3fe109f380000000 */
158	5.41597247123718261719e-01, /* 0x3fe154c3c0000000 */
159	5.50647079944610595703e-01, /* 0x3fe19ee6a0000000 */
160	5.59615731239318847656e-01, /* 0x3fe1e85f40000000 */
161	5.68504691123962402344e-01, /* 0x3fe23130c0000000 */
162	5.77315330505371093750e-01, /* 0x3fe2795e00000000 */
163	5.86049020290374755859e-01, /* 0x3fe2c0e9e0000000 */
164	5.94707071781158447266e-01, /* 0x3fe307d720000000 */
165	6.03290796279907226562e-01, /* 0x3fe34e2880000000 */
166	6.11801505088806152344e-01, /* 0x3fe393e0c0000000 */
167	6.20240390300750732422e-01, /* 0x3fe3d90260000000 */
168	6.28608644008636474609e-01, /* 0x3fe41d8fe0000000 */
169	6.36907458305358886719e-01, /* 0x3fe4618bc0000000 */
170	6.45137906074523925781e-01, /* 0x3fe4a4f840000000 */
171	6.53301239013671875000e-01, /* 0x3fe4e7d800000000 */
172	6.61398470401763916016e-01, /* 0x3fe52a2d20000000 */
173	6.69430613517761230469e-01, /* 0x3fe56bf9c0000000 */
174	6.77398800849914550781e-01, /* 0x3fe5ad4040000000 */
175	6.85303986072540283203e-01, /* 0x3fe5ee02a0000000 */
176	6.93147122859954833984e-01}; /* 0x3fe62e42e0000000 */
177
178	static const double ln_tail_table[65] = {
179	0.00000000000000000000e+00, /* 0x0000000000000000 */
180	5.15092497094772879206e-09, /* 0x3e361f807c79f3db */
181	4.55457209735272790188e-08, /* 0x3e6873c1980267c8 */
182	2.86612990859791781788e-08, /* 0x3e5ec65b9f88c69e */
183	2.23596477332056055352e-08, /* 0x3e58022c54cc2f99 */
184	3.49498983167142274770e-08, /* 0x3e62c37a3a125330 */
185	3.23392843005887000414e-08, /* 0x3e615cad69737c93 */
186	1.35722380472479366661e-08, /* 0x3e4d256ab1b285e9 */
187	2.56504325268044191098e-08, /* 0x3e5b8abcb97a7aa2 */
188	5.81213608741512136843e-08, /* 0x3e6f34239659a5dc */
189	5.59374849578288093334e-08, /* 0x3e6e07fd48d30177 */
190	5.06615629004996189970e-08, /* 0x3e6b32df4799f4f6 */
191	5.24588857848400955725e-08, /* 0x3e6c29e4f4f21cf8 */
192	9.61968535632653505972e-10, /* 0x3e1086c848df1b59 */
193	1.34829655346594463137e-08, /* 0x3e4cf456b4764130 */
194	3.65557749306383026498e-08, /* 0x3e63a02ffcb63398 */
195	3.33431709374069198903e-08, /* 0x3e61e6a6886b0976 */
196	5.13008650536088382197e-08, /* 0x3e6b8abcb97a7aa2 */
197	5.09285070380306053751e-08, /* 0x3e6b578f8aa35552 */
198	3.20853940845502057341e-08, /* 0x3e6139c871afb9fc */
199	4.06713248643004200446e-08, /* 0x3e65d5d30701ce64 */
200	5.57028186706125221168e-08, /* 0x3e6de7bcb2d12142 */
201	5.48356693724804282546e-08, /* 0x3e6d708e984e1664 */
202	1.99407553679345001938e-08, /* 0x3e556945e9c72f36 */
203	1.96585517245087232086e-09, /* 0x3e20e2f613e85bda */
204	6.68649386072067321503e-09, /* 0x3e3cb7e0b42724f6 */
205	5.89936034642113390002e-08, /* 0x3e6fac04e52846c7 */
206	2.85038578721554472484e-08, /* 0x3e5e9b14aec442be */
207	5.09746772910284482606e-08, /* 0x3e6b5de8034e7126 */
208	5.54234668933210171467e-08, /* 0x3e6dc157e1b259d3 */
209	6.29100830926604004874e-09, /* 0x3e3b05096ad69c62 */
210	2.61974119468563937716e-08, /* 0x3e5c2116faba4cdd */
211	4.16752115011186398935e-08, /* 0x3e665fcc25f95b47 */
212	2.47747534460820790327e-08, /* 0x3e5a9a08498d4850 */
213	5.56922172017964209793e-08, /* 0x3e6de647b1465f77 */
214	2.76162876992552906035e-08, /* 0x3e5da71b7bf7861d */
215	7.08169709942321478061e-09, /* 0x3e3e6a6886b09760 */
216	5.77453510221151779025e-08, /* 0x3e6f0075eab0ef64 */
217	4.43021445893361960146e-09, /* 0x3e33071282fb989b */
218	3.15140984357495864573e-08, /* 0x3e60eb43c3f1bed2 */
219	2.95077445089736670973e-08, /* 0x3e5faf06ecb35c84 */
220	1.44098510263167149349e-08, /* 0x3e4ef1e63db35f68 */
221	1.05196987538551827693e-08, /* 0x3e469743fb1a71a5 */
222	5.23641361722697546261e-08, /* 0x3e6c1cdf404e5796 */
223	7.72099925253243069458e-09, /* 0x3e4094aa0ada625e */
224	5.62089493829364197156e-08, /* 0x3e6e2d4c96fde3ec */
225	3.53090261098577946927e-08, /* 0x3e62f4d5e9a98f34 */
226	3.80080516835568242269e-08, /* 0x3e6467c96ecc5cbe */
227	5.66961038386146408282e-08, /* 0x3e6e7040d03dec5a */
228	4.42287063097349852717e-08, /* 0x3e67bebf4282de36 */
229	3.45294525105681104660e-08, /* 0x3e6289b11aeb783f */
230	2.47132034530447431509e-08, /* 0x3e5a891d1772f538 */
231	3.59655343422487209774e-08, /* 0x3e634f10be1fb591 */
232	5.51581770357780862071e-08, /* 0x3e6d9ce1d316eb93 */
233	3.60171867511861372793e-08, /* 0x3e63562a19a9c442 */
234	1.94511067964296180547e-08, /* 0x3e54e2adf548084c */
235	1.54137376631349347838e-08, /* 0x3e508ce55cc8c97a */
236	3.93171034490174464173e-09, /* 0x3e30e2f613e85bda */
237	5.52990607758839766440e-08, /* 0x3e6db03ebb0227bf */
238	3.29990737637586136511e-08, /* 0x3e61b75bb09cb098 */
239	1.18436010922446096216e-08, /* 0x3e496f16abb9df22 */
240	4.04248680368301346709e-08, /* 0x3e65b3f399411c62 */
241	2.27418915900284316293e-08, /* 0x3e586b3e59f65355 */
242	1.70263791333409206020e-08, /* 0x3e52482ceae1ac12 */
243	5.76999904754328540596e-08}; /* 0x3e6efa39ef35793c */
244
245	static const double
246	log2 = 6.931471805599453e-01, /* 0x3fe62e42fefa39ef */
247
248	/* Approximating polynomial coefficients */
249	cb_1 = 8.33333333333333593622e-02, /* 0x3fb5555555555557 */
250	cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */
251
252	#if defined(COMPILING_LOG10)
253	static const double
254	log10e = 4.34294481903251827651e-01; /* 0x3fdbcb7b1526e50e */
255	#elif defined(COMPILING_LOG2)
256	static const double
257	log2e = 1.44269504088896340735e+00; /* 0x3ff71547652b82fe */
258	#endif
259
260
261	GET_BITS_DP64(x, ux);
262
263	#if !defined(COMPILING_LOG10) && !defined(COMPILING_LOG2)
264	if (ux == 0x4005bf0a80000000)
265	/* Treat this, the number closest to e in float arithmetic,
266	as a special case and return 1.0 */
267	return 1.0F;
268	#endif
269
270	if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
271	{
272	/* x is either NaN or infinity */
273	if (ux & MANTBITS_DP64)
274	/* x is NaN */
275	return fx + fx; /* Raise invalid if it is a signalling NaN */
276	else
277	{
278	/* x is infinity */
279	if (ux & SIGNBIT_DP64)
280	/* x is negative infinity. Return a NaN. */
281	return retval_errno_edom(fx);
282	else
283	return fx;
284	}
285	}
286	else if (!(ux & ~SIGNBIT_DP64))
287	/* x is +/-zero. Return -infinity with div-by-zero flag. */
288	return retval_errno_erange_overflow(fx);
289	else if (ux & SIGNBIT_DP64)
290	/* x is negative. Return a NaN. */
291	return retval_errno_edom(fx);
292
293
294	/*
295	First, we decompose the argument x to the form
296	x = 2*M (F1 + F2),
297	where 1 <= F1+F2 < 2, M has the value of an integer,
298	F1 = 1 + j/64, j ranges from 0 to 64, and \|F2\| <= 1/128.
299
300	Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
301	in U, where U = 2 F2 / (2 F2 + F1).
302	Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
303	The core approximation calculates
304	Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U - 1.
305	Note that log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
306	thus, Poly = 2 arctanh( U/2 ) / U - 1.
307
308	It is not hard to see that
309	log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
310	Hence, we return Z1 = log(F1), and Z2 = log( 1 + F2/F1).
311	The values of log(F1) are calculated beforehand and stored
312	in the program.
313	*/
314
315	f = x;
316
317	/* Store the exponent of x in xexp and put
318	f into the range [0.5,1) */
319	xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
320	PUT_BITS_DP64((ux & MANTBITS_DP64) \| HALFEXPBITS_DP64, f);
321
322	/* Now x = 2*xexp f, 1/2 <= f < 1. */
323
324	/* Set index to be the nearest integer to 128f /
325	/*
326	r = 128.0 * f;
327	index = (int)(r + 0.5);
328	*/
329	/* This code instead of the above can save several cycles.
330	It only works because 64 <= r < 128, so
331	the nearest integer is always contained in exactly
332	7 bits, and the right shift is always the same. */
333	index = (((ux & 0x000fc00000000000) \| 0x0010000000000000) >> 46)
334	+ ((ux & 0x0000200000000000) >> 45);
335	z1 = ln_lead_table[index-64];
336	q = ln_tail_table[index-64];
337	f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
338	f2 = f - f1;
339	/* At this point, x = 2*xexp ( f1 + f2 ) where
340	f1 = j/128, j = 64, 65, ..., 128 and \|f2\| <= 1/256. */
341
342	/* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5f2 ) /
343	/* u = f2 / (f1 + 0.5 * f2); */
344	u = f2 / (f1 + 0.5 * f2);
345
346	/* Here, \|u\| <= 2(exp(1/16)-1) / (exp(1/16)+1).
347	The core approximation calculates
348	poly = [log(1 + u/2) - log(1 - u/2)]/u - 1 */
349	v = u * u;
350	poly = (v * (cb_1 + v * cb_2));
351	z2 = q + (u + u * poly);
352
353	/* Now z1,z2 is an extra-precise approximation of log(f).
354	Add xexp * log(2) to z1, z2 to get the result log(x). */
355
356	r = xexp*log2 + z1 + z2;
357	#if defined (COMPILING_LOG10)
358	return log10e*r;
359	#elif defined(COMPILING_LOG2)
360	return log2e*r;
361	#else
362	return r;
363	#endif
364	}
365
366	#if defined(COMPILING_LOG10)
367	weak_alias (__log10f, log10f)
368	weak_alias (__log10f, __ieee754_log10f)
369	#elif defined(COMPILING_LOG2)
370	weak_alias (__log2f, log2f)
371	weak_alias (__log2f, __ieee754_log2f)
372	#else
373	weak_alias (__logf, logf)
374	weak_alias (__logf, __ieee754_logf)
375	#endif

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_powf.c.x86_64-new-libm (+358 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_ZEROF_WITH_FLAGS
		13	#define USE_INFINITYF_WITH_FLAGS
		14	#define USE_NANF_WITH_FLAGS
		15	#include "libm_inlines_amd.h"
		16	#undef USE_ZEROF_WITH_FLAGS
		17	#undef USE_INFINITYF_WITH_FLAGS
		18	#undef USE_NANF_WITH_FLAGS
		19
		20	/* Deal with errno for out-of-range result */
		21	#include "libm_errno_amd.h"
		22	static inline float retval_errno_erange_overflow(float x, float y, int sign)
		23	{
		24	struct exception exc;
		25	exc.arg1 = (double)x;
		26	exc.arg2 = (double)y;
		27	exc.type = OVERFLOW;
		28	exc.name = (char *)"powf";
		29	if (_LIB_VERSION == _SVID_)
		30	{
		31	if (sign == 1)
		32	exc.retval = HUGE;
		33	else /* sign = -1 */
		34	exc.retval = -HUGE;
		35	}
		36	else
		37	{
		38	if (sign == 1)
		39	exc.retval = infinityf_with_flags(AMD_F_OVERFLOW);
		40	else /* sign == -1 */
		41	exc.retval = -infinityf_with_flags(AMD_F_OVERFLOW);
		42	}
		43	if (_LIB_VERSION == _POSIX_)
		44	__set_errno(ERANGE);
		45	else if (!matherr(&exc))
		46	__set_errno(ERANGE);
		47	return exc.retval;
		48	}
		49
		50	static inline float retval_errno_erange_underflow(float x, float y, int sign)
		51	{
		52	struct exception exc;
		53	exc.arg1 = (double)x;
		54	exc.arg2 = (double)y;
		55	exc.type = UNDERFLOW;
		56	exc.name = (char *)"powf";
		57	if (sign == 1)
		58	exc.retval = zerof_with_flags(AMD_F_UNDERFLOW \| AMD_F_INEXACT);
		59	else /* sign == -1 */
		60	exc.retval = -zerof_with_flags(AMD_F_UNDERFLOW \| AMD_F_INEXACT);
		61	if (_LIB_VERSION == _POSIX_)
		62	__set_errno(ERANGE);
		63	else if (!matherr(&exc))
		64	__set_errno(ERANGE);
65	return exc.retval;
66	}
67
68	/* Deal with errno for out-of-range arguments */
69	static inline float retval_errno_edom(float x, float y, int type)
70	{
71	struct exception exc;
72	exc.arg1 = (double)x;
73	exc.arg2 = (double)y;
74	exc.type = DOMAIN;
75	exc.name = (char *)"powf";
76	if (_LIB_VERSION == _SVID_)
77	exc.retval = 0.0;
78	else if (type == 1)
79	exc.retval = infinityf_with_flags(AMD_F_DIVBYZERO);
80	else if (type == 2)
81	exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO);
82	else /* type == 3 */
83	exc.retval = nanf_with_flags(AMD_F_INVALID);
84	if (_LIB_VERSION == _POSIX_)
85	__set_errno (EDOM);
86	if (!matherr(&exc))
87	{
88	if (_LIB_VERSION == _SVID_)
89	(void)fputs("pow: DOMAIN error\n", stderr);
90	__set_errno(EDOM);
91	}
92	return exc.retval;
93	}
94
95	float __powf(float x, float y)
96	{
97	unsigned int ux, ax, uy, ay, mask;
98	int yexp, inty, xpos, ypos, negateres;
99	double dx, dy, dw, dlog2, dr;
100	volatile int dummy;
101
102	/* Largest float, stored as a double */
103	const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */
104
105	/* Smallest float, stored as a double */
106	const double tiny = 1.40129846432481707092e-45; /* 0x36a0000000000000 */
107
108	GET_BITS_SP32(x, ux);
109	ax = ux & (~SIGNBIT_SP32);
110	xpos = ax == ux;
111	GET_BITS_SP32(y, uy);
112	ay = uy & (~SIGNBIT_SP32);
113	ypos = ay == uy;
114
115	if (ux == 0x3f800000)
116	{
117	/* x = +1.0. Return +1.0 for all y, even NaN,
118	raising invalid only if y is a signalling NaN */
119	if (y + 1.0F == 2.0F) dummy = 1;
120	return 1.0F;
121	}
122	else if (ay == 0)
123	{
124	/* y is zero. Return 1.0, even if x is infinity or NaN,
125	raising invalid only if x is a signalling NaN */
126	if (x + 1.0F == 2.0F) dummy = 1;
127	return 1.0F;
128	}
129	else if (((ax & EXPBITS_SP32) == EXPBITS_SP32) &&
130	(ax & MANTBITS_SP32))
131	/* x is NaN. Return NaN, with invalid exception if it's
132	a signalling NaN. */
133	return x + x;
134	else if (((ay & EXPBITS_SP32) == EXPBITS_SP32) &&
135	(ay & MANTBITS_SP32))
136	/* y is NaN. Return NaN, with invalid exception if y
137	is a signalling NaN. */
138	return y + y;
139	else if (uy == 0x3f800000)
140	/* y is 1.0; return x */
141	return x;
142	else if ((ay & EXPBITS_SP32) > 0x4f000000)
143	{
144	/* y is infinite or so large that the result would
145	overflow or underflow. Flags should be raised
146	unless y is an exact infinity. */
147	int yinf = (ay == EXPBITS_SP32);
148	if (ypos)
149	{
150	/* y is +ve */
151	if (ax == 0)
152	/* abs(x) = 0.0. */
153	return 0.0F;
154	else if (ax < 0x3f800000)
155	{
156	/* abs(x) < 1.0 */
157	if (yinf)
158	return 0.0F;
159	else
160	return retval_errno_erange_underflow(x, y, 1);
161	}
162	else if (ax == 0x3f800000)
163	/* abs(x) = 1.0. */
164	return 1.0F;
165	else
166	{
167	/* abs(x) > 1.0 */
168	if (yinf)
169	return infinityf_with_flags(0);
170	else
171	return retval_errno_erange_overflow(x, y, 1);
172	}
173	}
174	else
175	{
176	/* y is -ve */
177	if (ax == 0)
178	/* abs(x) = 0.0. Return +infinity. */
179	return retval_errno_edom(x, y, 1);
180	else if (ax < 0x3f800000)
181	{
182	/* abs(x) < 1.0; return +infinity. */
183	if (yinf)
184	return infinityf_with_flags(0);
185	else
186	return retval_errno_erange_overflow(x, y, 1);
187	}
188	else if (ax == 0x3f800000)
189	/* abs(x) = 1.0. */
190	return 1.0F;
191	else
192	{
193	/* abs(x) > 1.0 */
194	if (yinf)
195	return 0.0F;
196	else
197	return retval_errno_erange_underflow(x, y, 1);
198	}
199	}
200	}
201
202	/* See whether y is an integer.
203	inty = 0 means not an integer.
204	inty = 1 means odd integer.
205	inty = 2 means even integer.
206	*/
207	yexp = ((uy & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32 + 1;
208	if (yexp < 1)
209	inty = 0;
210	else if (yexp > 24)
211	inty = 2;
212	else /* 1 <= yexp <= 24 */
213	{
214	/* Mask out the bits of r that we don't want */
215	mask = (1 << (24 - yexp)) - 1;
216	if ((uy & mask) != 0)
217	inty = 0;
218	else if (((uy & ~mask) >> (24 - yexp)) & 0x00000001)
219	inty = 1;
220	else
221	inty = 2;
222	}
223
224	if ((ax & EXPBITS_SP32) == EXPBITS_SP32)
225	{
226	/* x is infinity (NaN was already ruled out). */
227	if (xpos)
228	{
229	/* x is +infinity */
230	if (ypos)
231	/* y > 0.0 */
232	return x;
233	else
234	return 0.0F;
235	}
236	else
237	{
238	/* x is -infinity */
239	if (inty == 1)
240	{
241	/* y is an odd integer */
242	if (ypos)
243	/* Result is -infinity */
244	return x;
245	else
246	return -0.0F;
247	}
248	else
249	{
250	if (ypos)
251	/* Result is +infinity */
252	return -x;
253	else
254	return 0.0F;
255	}
256	}
257	}
258	else if (ax == 0)
259	{
260	/* x is zero */
261	if (xpos)
262	{
263	/* x is +0.0 */
264	if (ypos)
265	/* y is positive; return +0.0 for all cases */
266	return x;
267	else
268	/* y is negative; return +infinity with div-by-zero
269	for all cases */
270	return retval_errno_edom(x, y, 1);
271	}
272	else
273	{
274	/* x is -0.0 */
275	if (ypos)
276	{
277	/* y is positive */
278	if (inty == 1)
279	/* -0.0 raised to a positive odd integer returns -0.0 */
280	return x;
281	else
282	/* Return +0.0 */
283	return -x;
284	}
285	else
286	{
287	/* y is negative */
288	if (inty == 1)
289	/* -0.0 raised to a negative odd integer returns -infinity
290	with div-by-zero */
291	return retval_errno_edom(x, y, 2);
292	else
293	/* Return +infinity with div-by-zero */
294	return retval_errno_edom(x, y, 1);
295	}
296	}
297	}
298
299	negateres = 0;
300	if (!xpos)
301	{
302	/* x is negative */
303	if (inty)
304	{
305	/* It's OK because y is an integer. */
306	ux = ax;
307	PUT_BITS_SP32(ux, x); /* x = abs(x) */
308	/* If y is odd, the result will be negative */
309	negateres = (inty == 1);
310	}
311	else
312	/* y is not an integer. Return a NaN. */
313	return retval_errno_edom(x, y, 3);
314	}
315
316	if (ay < 0x2e800000) /* abs(y) < 2^(-34) */
317	{
318	/* y is close enough to zero for the result to be 1.0
319	no matter what the size of x */
320	return 1.0F + y;
321	}
322
323	/* Simply use double precision for computation of log2(x),
324	ylog2(x) and exp2(ylog2(x)) */
325	dx = x;
326	dy = y;
327	dlog2 = log2(dx);
328	dw = y * dlog2;
329	dr = exp2(dw);
330
331	/* If dr overflowed or underflowed we need to deal with errno */
332	if (dr > large)
333	{
334	/* Double dr has overflowed range of float. */
335	if (negateres)
336	return retval_errno_erange_overflow(x, y, -1);
337	else
338	return retval_errno_erange_overflow(x, y, 1);
339	}
340	else if (dr < tiny)
341	{
342	/* Double dr has underflowed range of float. */
343	if (negateres)
344	return retval_errno_erange_underflow(x, y, -1);
345	else
346	return retval_errno_erange_underflow(x, y, 1);
347	}
348	else
349	{
350	if (negateres)
351	return -dr;
352	else
353	return dr;
354	}
355	}
356
357	weak_alias (__powf, powf)
358	weak_alias (__powf, __ieee754_powf)

Line 0 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_remainder.c.x86_64-new-libm (+270 lines)
		1	/*
		2	(C) 2002 Advanced Micro Devices, Inc.
		3	** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS
		4	AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC
		5	LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH
		6	THIS LIBRARY**
		7	*/
		8
		9	#include "libm_amd.h"
		10	#include "libm_util_amd.h"
		11
		12	#define USE_NAN_WITH_FLAGS
		13	#define USE_SCALEDOUBLE_3
		14	#define USE_GET_FPSW_INLINE
		15	#define USE_SET_FPSW_INLINE
		16	#include "libm_inlines_amd.h"
		17	#undef USE_NAN_WITH_FLAGS
		18	#undef USE_SCALEDOUBLE_3
		19	#undef USE_GET_FPSW_INLINE
		20	#undef USE_SET_FPSW_INLINE
		21
		22	/* Computes the exact product of x and y, the result being the
		23	nearly doublelength number (z,zz) */
		24	static inline void dekker_mul12(double x, double y,
		25	double z, double zz)
		26	{
		27	double hx, tx, hy, ty;
		28	/* Split x into hx (head) and tx (tail). Do the same for y. */
		29	unsigned long u;
		30	GET_BITS_DP64(x, u);
		31	u &= 0xfffffffff8000000;
		32	PUT_BITS_DP64(u, hx);
		33	tx = x - hx;
		34	GET_BITS_DP64(y, u);
		35	u &= 0xfffffffff8000000;
		36	PUT_BITS_DP64(u, hy);
		37	ty = y - hy;
		38	z = x y;
		39	zz = (((hx hy - z) + hx ty) + tx * hy) + tx * ty;
		40	}
		41
		42
		43	#if defined(COMPILING_FMOD)
		44	double __fmod(double x, double y)
		45	#else
		46	double __remainder(double x, double y)
		47	#endif
		48	{
		49	double dx, dy, scale, w, t, v, c, cc;
		50	int i, ntimes, xexp, yexp;
		51	unsigned long u, ux, uy, ax, ay, todd;
		52	unsigned int sw;
		53
		54	dx = x;
		55	dy = y;
		56
		57	GET_BITS_DP64(dx, ux);
		58	GET_BITS_DP64(dy, uy);
		59	ax = ux & ~SIGNBIT_DP64;
		60	ay = uy & ~SIGNBIT_DP64;
		61	xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
		62	yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
		63
		64	if (xexp < 1 \|\| xexp > BIASEDEMAX_DP64 \|\|
65	yexp < 1 \|\| yexp > BIASEDEMAX_DP64)
66	{
67	/* x or y is zero, denormalized, NaN or infinity */
68	if (xexp > BIASEDEMAX_DP64)
69	{
70	/* x is NaN or infinity */
71	if (ux & MANTBITS_DP64)
72	/* x is NaN */
73	return dx + dx; /* Raise invalid if it is a signalling NaN */
74	else
75	/* x is infinity; result is NaN */
76	return nan_with_flags(AMD_F_INVALID);
77	}
78	else if (yexp > BIASEDEMAX_DP64)
79	{
80	/* y is NaN or infinity */
81	if (uy & MANTBITS_DP64)
82	/* y is NaN */
83	return dy + dy; /* Raise invalid if it is a signalling NaN */
84	else
85	/* y is infinity; result is x */
86	return dx;
87	}
88	else if (ax == 0x0000000000000000)
89	{
90	/* x is zero */
91	if (ay == 0x0000000000000000)
92	/* y is zero */
93	return nan_with_flags(AMD_F_INVALID);
94	else
95	return dx;
96	}
97	else if (ay == 0x0000000000000000)
98	/* y is zero */
99	return nan_with_flags(AMD_F_INVALID);
100
101	/* We've exhausted all other possibilities. One or both of x and
102	y must be denormalized */
103	if (xexp < 1)
104	{
105	/* x is denormalized. Figure out its exponent. */
106	u = ax;
107	while (u < IMPBIT_DP64)
108	{
109	xexp--;
110	u <<= 1;
111	}
112	}
113	if (yexp < 1)
114	{
115	/* y is denormalized. Figure out its exponent. */
116	u = ay;
117	while (u < IMPBIT_DP64)
118	{
119	yexp--;
120	u <<= 1;
121	}
122	}
123	}
124	else if (ax == ay)
125	{
126	/* abs(x) == abs(y); return zero with the sign of x */
127	PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
128	return dx;
129	}
130
131	/* Set x = abs(x), y = abs(y) */
132	PUT_BITS_DP64(ax, dx);
133	PUT_BITS_DP64(ay, dy);
134
135	if (ax < ay)
136	{
137	/* abs(x) < abs(y) */
138	#if !defined(COMPILING_FMOD)
139	if (dx > 0.5*dy)
140	dx -= dy;
141	#endif
142	return x < 0.0? -dx : dx;
143	}
144
145	/* Save the current floating-point status word. We need
146	to do this because the remainder function is always
147	exact for finite arguments, but our algorithm causes
148	the inexact flag to be raised. We therefore need to
149	restore the entry status before exiting. */
150	sw = get_fpsw_inline();
151
152	/* Set ntimes to the number of times we need to do a
153	partial remainder. If the exponent of x is an exact multiple
154	of 52 larger than the exponent of y, and the mantissa of x is
155	less than the mantissa of y, ntimes will be one too large
156	but it doesn't matter - it just means that we'll go round
157	the loop below one extra time. */
158	if (xexp <= yexp)
159	ntimes = 0;
160	else
161	ntimes = (xexp - yexp) / 52;
162
163	if (ntimes == 0)
164	{
165	w = dy;
166	scale = 1.0;
167	}
168	else
169	{
170	/* Set w = y * 2^(52ntimes) /
171	w = scaleDouble_3(dy, ntimes * 52);
172
173	/* Set scale = 2^(-52) */
174	PUT_BITS_DP64((unsigned long)(-52 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
175	scale);
176	}
177
178	/* Each time round the loop we compute a partial remainder.
179	This is done by subtracting a large multiple of w
180	from x each time, where w is a scaled up version of y.
181	The subtraction must be performed exactly in quad
182	precision, though the result at each stage can
183	fit exactly in a double precision number. */
184	for (i = 0; i < ntimes; i++)
185	{
186	/* t is the integer multiple of w that we will subtract.
187	We use a truncated value for t.
188
189	N.B. w has been chosen so that the integer t will have
190	at most 52 significant bits. This is the amount by
191	which the exponent of the partial remainder dx gets reduced
192	every time around the loop. In theory we could use
193	53 bits in t, but the quad precision multiplication
194	routine dekker_mul12 does not allow us to do that because
195	it loses the last (106th) bit of its quad precision result. */
196
197	/* Set dx = dx - w * t, where t is equal to trunc(dx/w). */
198	t = (double)(long)(dx / w);
199	/* At this point, t may be one too large due to
200	rounding of dx/w */
201
202	/* Compute w * t in quad precision */
203	dekker_mul12(w, t, &c, &cc);
204
205	/* Subtract w * t from dx */
206	v = dx - c;
207	dx = v + (((dx - v) - c) - cc);
208
209	/* If t was one too large, dx will be negative. Add back
210	one w */
211	/* It might be possible to speed up this loop by finding
212	a way to compute correctly truncated t directly from dx and w.
213	We would then avoid the need for this check on negative dx. */
214	if (dx < 0.0)
215	dx += w;
216
217	/* Scale w down by 2^(-52) for the next iteration */
218	w *= scale;
219	}
220
221	/* One more time */
222	/* Variable todd says whether the integer t is odd or not */
223	t = (double)(long)(dx / w);
224	todd = ((long)(dx / w)) & 1;
225	dekker_mul12(w, t, &c, &cc);
226	v = dx - c;
227	dx = v + (((dx - v) - c) - cc);
228	if (dx < 0.0)
229	{
230	todd = !todd;
231	dx += w;
232	}
233
234	/* At this point, dx lies in the range [0,dy) */
235	#if !defined(COMPILING_FMOD)
236	/* For the fmod function, we're done apart from setting
237	the correct sign. */
238	/* For the remainder function, we need to adjust dx
239	so that it lies in the range (-y/2, y/2] by carefully
240	subtracting w (== dy == y) if necessary. The rigmarole
241	with todd is to get the correct sign of the result
242	when x/y lies exactly half way between two integers,
243	when we need to choose the even integer. */
244	if (ay < 0x7fd0000000000000)
245	{
246	if (dx + dx > w \|\| (todd && (dx + dx == w)))
247	dx -= w;
248	}
249	else if (dx > 0.5 * w \|\| (todd && (dx == 0.5 * w)))
250	dx -= w;
251
252	#endif
253
254	/* **** N.B. for some reason this breaks the 32 bit version
255	of remainder when compiling with optimization. */
256	/* Restore the entry status flags */
257	set_fpsw_inline(sw);
258
259	/* Set the result sign according to input argument x */
260	return x < 0.0? -dx : dx;
261
262	}
263
264	#if defined(COMPILING_FMOD)
265	weak_alias (__fmod, fmod)
266	weak_alias (__fmod, __ieee754_fmod)
267	#else
268	weak_alias (__remainder, remainder)
269	weak_alias (__remainder, __ieee754_remainder)
270	#endif

Lines 1-52 Link Here

(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_copysign.S.x86_64-new-libm (-52 lines)
1	/* copy sign, double version.
2	Copyright (C) 2002 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Andreas Jaeger <aj@suse.de>, 2002.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, write to the Free
18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19	02111-1307 USA. */
20
21	#include <machine/asm.h>
22
23	#ifdef __ELF__
24	.section .rodata
25	#else
26	.text
27	#endif
28
29	.align ALIGNARG(4)
30	ASM_TYPE_DIRECTIVE(signmask,@object)
31	signmask:
32	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
33	.byte 0, 0, 0, 0, 0, 0, 0, 0
34	othermask:
35	.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f
36	.byte 0, 0, 0, 0, 0, 0, 0, 0
37	ASM_SIZE_DIRECTIVE(othermask)
38
39	#ifdef PIC
40	#define MO(op) op##(%rip)
41	#else
42	#define MO(op) op
43	#endif
44
45	ENTRY(__copysign)
46	andpd MO(othermask),%xmm0
47	andpd MO(signmask),%xmm1
48	orpd %xmm1,%xmm0
49	ret
50	END (__copysign)
51
52	weak_alias (__copysign, copysign)