Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 100289 | Differences between
and this patch

Collapse All | Expand All

(-)glibc-2.3.3/sysdeps/x86_64/fpu/Makefile.x86_64-new-libm (+3 lines)
Line 0 Link Here
1
ifeq ($(subdir),math)
2
libm-sysdep_routines += w_remainder_piby2 w_remainder_piby2f
3
endif
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_acos.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_acosf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_asin.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_asinf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp10.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp10f.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp2.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_exp2f.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_expf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_fmod.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_fmodf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_hypot.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_hypotf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log10.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log10f.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log2.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_log2f.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_logf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_pow.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_powf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_remainder.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_remainderf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_sinh.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/e_sinhf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_amd.h.x86_64-new-libm (+32 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#ifndef LIBM_AMD_H_INCLUDED
10
#define LIBM_AMD_H_INCLUDED 1
11
12
/* The following definition of weak_alias is extracted from
13
   libc-symbols.h */
14
15
/* Define ALIASNAME as a weak alias for NAME.
16
   If weak aliases are not available, this defines a strong alias.  */
17
#  define weak_alias(name, aliasname) _weak_alias (name, aliasname)
18
#  define _weak_alias(name, aliasname) \
19
  extern __typeof (name) aliasname __attribute__ ((weak, alias (#name)));
20
21
#include <math.h>
22
23
extern double chgsign(double x);
24
extern float chgsignf(float x);
25
26
extern double fma(double x, double y, double z);
27
extern float fmaf(float x, float y, float z);
28
29
extern void __remainder_piby2(double x, double *r, double *rr, int *region);
30
extern void __remainder_piby2f(float x, double *r, int *region);
31
32
#endif /* LIBM_AMD_H_INCLUDED */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_errno_amd.h.x86_64-new-libm (+18 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#ifndef LIBM_ERRNO_AMD_H_INCLUDED
10
#define LIBM_ERRNO_AMD_H_INCLUDED 1
11
12
#include <stdio.h>
13
#include <errno.h>
14
#ifndef __set_errno
15
#define __set_errno(x) errno = (x)
16
#endif
17
18
#endif /* LIBM_ERRNO_AMD_H_INCLUDED */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_inlines_amd.h.x86_64-new-libm (+2260 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#ifndef LIBM_INLINES_AMD_H_INCLUDED
10
#define LIBM_INLINES_AMD_H_INCLUDED 1
11
12
#include "libm_util_amd.h"
13
14
#ifdef WIN32
15
#define inline __inline
16
#endif
17
18
/* Set defines for inline functions calling other inlines */
19
#if defined(USE_VAL_WITH_FLAGS) || defined(USE_VALF_WITH_FLAGS) || \
20
    defined(USE_ZERO_WITH_FLAGS) || defined(USE_ZEROF_WITH_FLAGS) || \
21
    defined(USE_NAN_WITH_FLAGS) || defined(USE_NANF_WITH_FLAGS) || \
22
    defined(USE_INFINITY_WITH_FLAGS) || defined(USE_INFINITYF_WITH_FLAGS) || \
23
    defined(USE_SQRT_AMD_INLINE) || defined(USE_SQRTF_AMD_INLINE)
24
#undef USE_RAISE_FPSW_FLAGS
25
#define USE_RAISE_FPSW_FLAGS 1
26
#endif
27
28
#if defined(USE_SPLITDOUBLE)
29
/* Splits double x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0.
30
   Assumes that x is not zero, denormal, infinity or NaN, but these conditions
31
   are not checked */
32
static inline void splitDouble(double x, int *e, double *m)
33
{
34
  unsigned long ux, uy;
35
  GET_BITS_DP64(x, ux);
36
  uy = ux;
37
  ux &= EXPBITS_DP64;
38
  ux >>= EXPSHIFTBITS_DP64;
39
  *e = (int)ux - EXPBIAS_DP64 + 1;
40
  uy = (uy & (SIGNBIT_DP64 | MANTBITS_DP64)) | HALFEXPBITS_DP64;
41
  PUT_BITS_DP64(uy, x);
42
  *m = x;
43
}
44
#endif /* USE_SPLITDOUBLE */
45
46
47
#if defined(USE_SPLITDOUBLE_2)
48
/* Splits double x into exponent e and mantissa m, where 1.0 <= abs(m) < 4.0.
49
   Assumes that x is not zero, denormal, infinity or NaN, but these conditions
50
   are not checked. Also assumes EXPBIAS_DP is odd. With this
51
   assumption, e will be even on exit. */
52
static inline void splitDouble_2(double x, int *e, double *m)
53
{
54
  unsigned long ux, vx;
55
  GET_BITS_DP64(x, ux);
56
  vx = ux;
57
  ux &= EXPBITS_DP64;
58
  ux >>= EXPSHIFTBITS_DP64;
59
  if (ux & 1)
60
    {
61
      /* The exponent is odd */
62
      vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | ONEEXPBITS_DP64;
63
      PUT_BITS_DP64(vx, x);
64
      *m = x;
65
      *e = ux - EXPBIAS_DP64;
66
    }
67
  else
68
    {
69
      /* The exponent is even */
70
      vx = (vx & (SIGNBIT_DP64 | MANTBITS_DP64)) | TWOEXPBITS_DP64;
71
      PUT_BITS_DP64(vx, x);
72
      *m = x;
73
      *e = ux - EXPBIAS_DP64 - 1;
74
    }
75
}
76
#endif /* USE_SPLITDOUBLE_2 */
77
78
79
#if defined(USE_SPLITFLOAT)
80
/* Splits float x into exponent e and mantissa m, where 0.5 <= abs(m) < 1.0.
81
   Assumes that x is not zero, denormal, infinity or NaN, but these conditions
82
   are not checked */
83
static inline void splitFloat(float x, int *e, float *m)
84
{
85
  unsigned int ux, uy;
86
  GET_BITS_SP32(x, ux);
87
  uy = ux;
88
  ux &= EXPBITS_SP32;
89
  ux >>= EXPSHIFTBITS_SP32;
90
  *e = (int)ux - EXPBIAS_SP32 + 1;
91
  uy = (uy & (SIGNBIT_SP32 | MANTBITS_SP32)) | HALFEXPBITS_SP32;
92
  PUT_BITS_SP32(uy, x);
93
  *m = x;
94
}
95
#endif /* USE_SPLITFLOAT */
96
97
98
#if defined(USE_SCALEDOUBLE_1)
99
/* Scales the double x by 2.0**n.
100
   Assumes EMIN <= n <= EMAX, though this condition is not checked. */
101
static inline double scaleDouble_1(double x, int n)
102
{
103
  double t;
104
  /* Construct the number t = 2.0**n */
105
  PUT_BITS_DP64(((long)n + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t);
106
  return x*t;
107
}
108
#endif /* USE_SCALEDOUBLE_1 */
109
110
111
#if defined(USE_SCALEDOUBLE_2)
112
/* Scales the double x by 2.0**n.
113
   Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */
114
static inline double scaleDouble_2(double x, int n)
115
{
116
  double t1, t2;
117
  int n1, n2;
118
  n1 = n / 2;
119
  n2 = n - n1;
120
  /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */
121
  PUT_BITS_DP64(((long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1);
122
  PUT_BITS_DP64(((long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2);
123
  return (x*t1)*t2;
124
}
125
#endif /* USE_SCALEDOUBLE_2 */
126
127
128
#if defined(USE_SCALEDOUBLE_3)
129
/* Scales the double x by 2.0**n.
130
   Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */
131
static inline double scaleDouble_3(double x, int n)
132
{
133
  double t1, t2, t3;
134
  int n1, n2, n3;
135
  n1 = n / 3;
136
  n2 = (n - n1) / 2;
137
  n3 = n - n1 - n2;
138
  /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */
139
  PUT_BITS_DP64(((long)n1 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t1);
140
  PUT_BITS_DP64(((long)n2 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t2);
141
  PUT_BITS_DP64(((long)n3 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, t3);
142
  return ((x*t1)*t2)*t3;
143
}
144
#endif /* USE_SCALEDOUBLE_3 */
145
146
147
#if defined(USE_SCALEFLOAT_1)
148
/* Scales the float x by 2.0**n.
149
   Assumes EMIN <= n <= EMAX, though this condition is not checked. */
150
static inline double scaleFloat_1(float x, int n)
151
{
152
  float t;
153
  /* Construct the number t = 2.0**n */
154
  PUT_BITS_SP32((n + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t);
155
  return x*t;
156
}
157
#endif /* USE_SCALEFLOAT_1 */
158
159
160
#if defined(USE_SCALEFLOAT_2)
161
/* Scales the float x by 2.0**n.
162
   Assumes 2*EMIN <= n <= 2*EMAX, though this condition is not checked. */
163
static inline float scaleFloat_2(float x, int n)
164
{
165
  float t1, t2;
166
  int n1, n2;
167
  n1 = n / 2;
168
  n2 = n - n1;
169
  /* Construct the numbers t1 = 2.0**n1 and t2 = 2.0**n2 */
170
  PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1);
171
  PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2);
172
  return (x*t1)*t2;
173
}
174
#endif /* USE_SCALEFLOAT_2 */
175
176
177
#if defined(USE_SCALEFLOAT_3)
178
/* Scales the float x by 2.0**n.
179
   Assumes 3*EMIN <= n <= 3*EMAX, though this condition is not checked. */
180
static inline double scaleFloat_3(float x, int n)
181
{
182
  float t1, t2, t3;
183
  int n1, n2, n3;
184
  n1 = n / 3;
185
  n2 = (n - n1) / 2;
186
  n3 = n - n1 - n2;
187
  /* Construct the numbers t1 = 2.0**n1, t2 = 2.0**n2 and t3 = 2.0**n3 */
188
  PUT_BITS_SP32((n1 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t1);
189
  PUT_BITS_SP32((n2 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t2);
190
  PUT_BITS_SP32((n3 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, t3);
191
  return ((x*t1)*t2)*t3;
192
}
193
#endif /* USE_SCALEFLOAT_3 */
194
195
#if defined(USE_SETPRECISIONDOUBLE)
196
unsigned int setPrecisionDouble(void)
197
{
198
  unsigned int cw, cwold = 0;
199
#if defined(WIN32)
200
  __asm fstcw cwold;
201
  cw = cwold & (~0x00000300); /* These two bits control rounding precision */
202
  cw |= AMD_F_DOUBLE;
203
  __asm fldcw cw;
204
#elif defined(linux)
205
  /* There is no precision control on Hammer */
206
#else
207
  /* Do nowt */
208
#endif
209
  return cwold;
210
}
211
#endif /* USE_SETPRECISIONDOUBLE */
212
213
#if defined(USE_RESTOREPRECISION)
214
void restorePrecision(unsigned int cwold)
215
{
216
#if defined(WIN32)
217
  __asm fldcw cwold;
218
#elif defined(linux)
219
  /* There is no precision control on Hammer */
220
#else
221
  /* Do nowt */
222
#endif
223
  return;
224
}
225
#endif /* USE_RESTOREPRECISION */
226
227
228
#if defined(USE_CLEAR_FPSW_FLAGS)
229
/* Clears floating-point status flags. The argument should be
230
   the bitwise or of the flags to be cleared, from the
231
   list above, e.g.
232
     clear_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID);
233
 */
234
static inline void clear_fpsw_flags(int flags)
235
{
236
#if defined(WIN32)
237
  fpenv_type fenv;
238
  /* Get the current floating-point environment */
239
  __asm fnstenv fenv;
240
  fenv.status_word &= (~flags);
241
  /* Put the floating-point environment back */
242
  __asm fldenv fenv;
243
#elif defined(linux)
244
  unsigned int cw;
245
  /* Get the current floating-point control/status word */
246
  asm volatile ("STMXCSR %0" : "=m" (cw));
247
  cw &= (~flags);
248
  asm volatile ("LDMXCSR %0" : : "m" (cw));
249
#else
250
#error Unknown machine
251
#endif
252
}
253
#endif /* USE_CLEAR_FPSW_FLAGS */
254
255
256
#if defined(USE_RAISE_FPSW_FLAGS)
257
/* Raises floating-point status flags. The argument should be
258
   the bitwise or of the flags to be raised, from the
259
   list above, e.g.
260
     raise_fpsw_flags(AMD_F_INEXACT | AMD_F_INVALID);
261
 */
262
static inline void raise_fpsw_flags(int flags)
263
{
264
#if defined(WIN32)
265
  fpenv_type fenv;
266
  /* Get the current floating-point environment */
267
  __asm fnstenv fenv;
268
  fenv.status_word |= flags;
269
  /* Put the floating-point environment back */
270
  __asm fldenv fenv;
271
#elif defined(linux)
272
  unsigned int cw;
273
  /* Get the current floating-point control/status word */
274
  asm volatile ("STMXCSR %0" : "=m" (cw));
275
  cw |= flags;
276
  asm volatile ("LDMXCSR %0" : : "m" (cw));
277
#else
278
#error Unknown machine
279
#endif
280
}
281
#endif /* USE_RAISE_FPSW_FLAGS */
282
283
284
#if defined(USE_GET_FPSW_INLINE)
285
/* Return the current floating-point status word */
286
static inline unsigned int get_fpsw_inline(void)
287
{
288
#if defined(WIN32)
289
  unsigned short sw;
290
  __asm fstsw sw;
291
  return (unsigned int)sw;
292
#elif defined(linux)
293
  unsigned int sw;
294
  asm volatile ("STMXCSR %0" : "=m" (sw));
295
  return sw;
296
#else
297
#error Unknown machine
298
#endif
299
}
300
#endif /* USE_GET_FPSW_INLINE */
301
302
#if defined(USE_SET_FPSW_INLINE)
303
/* Set the floating-point status word */
304
static inline void set_fpsw_inline(unsigned int sw)
305
{
306
#if defined(WIN32)
307
  fpenv_type fenv;
308
  /* Get the current floating-point environment */
309
  __asm fnstenv fenv;
310
  /* Set the status word to sw */
311
  fenv.status_word = (unsigned short)sw;
312
  /* Put the floating-point environment back */
313
  __asm fldenv fenv;
314
#elif defined(linux)
315
  /* Set the current floating-point control/status word */
316
  asm volatile ("LDMXCSR %0" : : "m" (sw));
317
#else
318
#error Unknown machine
319
#endif
320
}
321
#endif /* USE_SET_FPSW_INLINE */
322
323
#if defined(USE_CLEAR_FPSW_INLINE)
324
/* Clear all exceptions from the floating-point status word */
325
static inline void clear_fpsw_inline(void)
326
{
327
#if defined(WIN32)
328
  fpenv_type fenv;
329
  /* Get the current floating-point environment */
330
  __asm fnstenv fenv;
331
  /* Set the status word to 0 */
332
  fenv.status_word = 0;
333
  /* Put the floating-point environment back */
334
  __asm fldenv fenv;
335
#elif defined(linux)
336
  unsigned int cw;
337
  /* Get the current floating-point control/status word */
338
  asm volatile ("STMXCSR %0" : "=m" (cw));
339
  cw &= ~(AMD_F_INEXACT | AMD_F_UNDERFLOW | AMD_F_OVERFLOW |
340
          AMD_F_DIVBYZERO | AMD_F_INVALID);
341
  asm volatile ("LDMXCSR %0" : : "m" (cw));
342
#else
343
#error Unknown machine
344
#endif
345
}
346
#endif /* USE_CLEAR_FPSW_INLINE */
347
348
349
#if defined(USE_VAL_WITH_FLAGS)
350
/* Returns a double value after raising the given flags,
351
  e.g.  val_with_flags(AMD_F_INEXACT);
352
 */
353
static inline double val_with_flags(double val, int flags)
354
{
355
  raise_fpsw_flags(flags);
356
  return val;
357
}
358
#endif /* USE_VAL_WITH_FLAGS */
359
360
#if defined(USE_VALF_WITH_FLAGS)
361
/* Returns a float value after raising the given flags,
362
  e.g.  valf_with_flags(AMD_F_INEXACT);
363
 */
364
static inline float valf_with_flags(float val, int flags)
365
{
366
  raise_fpsw_flags(flags);
367
  return val;
368
}
369
#endif /* USE_VALF_WITH_FLAGS */
370
371
372
#if defined(USE_ZERO_WITH_FLAGS)
373
/* Returns a double +zero after raising the given flags,
374
  e.g.  zero_with_flags(AMD_F_INEXACT | AMD_F_INVALID);
375
 */
376
static inline double zero_with_flags(int flags)
377
{
378
  raise_fpsw_flags(flags);
379
  return 0.0;
380
}
381
#endif /* USE_ZERO_WITH_FLAGS */
382
383
384
#if defined(USE_ZEROF_WITH_FLAGS)
385
/* Returns a float +zero after raising the given flags,
386
  e.g.  zerof_with_flags(AMD_F_INEXACT | AMD_F_INVALID);
387
 */
388
static inline float zerof_with_flags(int flags)
389
{
390
  raise_fpsw_flags(flags);
391
  return 0.0F;
392
}
393
#endif /* USE_ZEROF_WITH_FLAGS */
394
395
396
#if defined(USE_NAN_WITH_FLAGS)
397
/* Returns a double quiet +nan after raising the given flags,
398
   e.g.  nan_with_flags(AMD_F_INVALID);
399
*/
400
static inline double nan_with_flags(int flags)
401
{
402
  double z;
403
  raise_fpsw_flags(flags);
404
  PUT_BITS_DP64(0x7ff8000000000000, z);
405
  return z;
406
}
407
#endif /* USE_NAN_WITH_FLAGS */
408
409
#if defined(USE_NANF_WITH_FLAGS)
410
/* Returns a float quiet +nan after raising the given flags,
411
   e.g.  nanf_with_flags(AMD_F_INVALID);
412
*/
413
static inline float nanf_with_flags(int flags)
414
{
415
  float z;
416
  raise_fpsw_flags(flags);
417
  PUT_BITS_SP32(0x7fc00000, z);
418
  return z;
419
}
420
#endif /* USE_NANF_WITH_FLAGS */
421
422
423
#ifdef USE_INFINITY_WITH_FLAGS
424
/* Returns a positive double infinity after raising the given flags,
425
   e.g.  infinity_with_flags(AMD_F_OVERFLOW);
426
*/
427
static inline double infinity_with_flags(int flags)
428
{
429
  double z;
430
  raise_fpsw_flags(flags);
431
  PUT_BITS_DP64((unsigned long)(BIASEDEMAX_DP64 + 1) << EXPSHIFTBITS_DP64, z);
432
  return z;
433
}
434
#endif /* USE_INFINITY_WITH_FLAGS */
435
436
#ifdef USE_INFINITYF_WITH_FLAGS
437
/* Returns a positive float infinity after raising the given flags,
438
   e.g.  infinityf_with_flags(AMD_F_OVERFLOW);
439
*/
440
static inline float infinityf_with_flags(int flags)
441
{
442
  float z;
443
  raise_fpsw_flags(flags);
444
  PUT_BITS_SP32((BIASEDEMAX_SP32 + 1) << EXPSHIFTBITS_SP32, z);
445
  return z;
446
}
447
#endif /* USE_INFINITYF_WITH_FLAGS */
448
449
450
#if defined(USE_SPLITEXP)
451
/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2).
452
   Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments
453
   abs(x) > large/(ln(base)) (where large is the largest representable
454
   floating point number) should be handled separately instead of calling
455
   this function. This function is called by exp_amd, exp2_amd, exp10_amd,
456
   cosh_amd and sinh_amd. */
457
static inline void splitexp(double x, double logbase,
458
                            double thirtytwo_by_logbaseof2,
459
                            double logbaseof2_by_32_lead,
460
                            double logbaseof2_by_32_trail,
461
                            int *m, double *z1, double *z2)
462
{
463
  double q, r, r1, r2, f1, f2;
464
  int n, j;
465
466
/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain
467
   leading and trailing parts respectively of precomputed
468
   values of pow(2.0,j/32.0), for j = 0, 1, ..., 31.
469
   two_to_jby32_lead_table contains the first 25 bits of precision,
470
   and two_to_jby32_trail_table contains a further 53 bits precision. */
471
472
  static const double two_to_jby32_lead_table[32] = {
473
    1.00000000000000000000e+00,   /* 0x3ff0000000000000 */
474
    1.02189713716506958008e+00,   /* 0x3ff059b0d0000000 */
475
    1.04427373409271240234e+00,   /* 0x3ff0b55860000000 */
476
    1.06714040040969848633e+00,   /* 0x3ff11301d0000000 */
477
    1.09050768613815307617e+00,   /* 0x3ff172b830000000 */
478
    1.11438673734664916992e+00,   /* 0x3ff1d48730000000 */
479
    1.13878858089447021484e+00,   /* 0x3ff2387a60000000 */
480
    1.16372483968734741211e+00,   /* 0x3ff29e9df0000000 */
481
    1.18920707702636718750e+00,   /* 0x3ff306fe00000000 */
482
    1.21524733304977416992e+00,   /* 0x3ff371a730000000 */
483
    1.24185776710510253906e+00,   /* 0x3ff3dea640000000 */
484
    1.26905095577239990234e+00,   /* 0x3ff44e0860000000 */
485
    1.29683953523635864258e+00,   /* 0x3ff4bfdad0000000 */
486
    1.32523661851882934570e+00,   /* 0x3ff5342b50000000 */
487
    1.35425549745559692383e+00,   /* 0x3ff5ab07d0000000 */
488
    1.38390988111495971680e+00,   /* 0x3ff6247eb0000000 */
489
    1.41421353816986083984e+00,   /* 0x3ff6a09e60000000 */
490
    1.44518077373504638672e+00,   /* 0x3ff71f75e0000000 */
491
    1.47682613134384155273e+00,   /* 0x3ff7a11470000000 */
492
    1.50916439294815063477e+00,   /* 0x3ff8258990000000 */
493
    1.54221081733703613281e+00,   /* 0x3ff8ace540000000 */
494
    1.57598084211349487305e+00,   /* 0x3ff93737b0000000 */
495
    1.61049032211303710938e+00,   /* 0x3ff9c49180000000 */
496
    1.64575546979904174805e+00,   /* 0x3ffa5503b0000000 */
497
    1.68179279565811157227e+00,   /* 0x3ffae89f90000000 */
498
    1.71861928701400756836e+00,   /* 0x3ffb7f76f0000000 */
499
    1.75625211000442504883e+00,   /* 0x3ffc199bd0000000 */
500
    1.79470902681350708008e+00,   /* 0x3ffcb720d0000000 */
501
    1.83400803804397583008e+00,   /* 0x3ffd5818d0000000 */
502
    1.87416762113571166992e+00,   /* 0x3ffdfc9730000000 */
503
    1.91520655155181884766e+00,   /* 0x3ffea4afa0000000 */
504
    1.95714408159255981445e+00};  /* 0x3fff507650000000 */
505
506
  static const double two_to_jby32_trail_table[32] = {
507
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
508
    1.14890470981563546737e-08,   /* 0x3e48ac2ba1d73e2a */
509
    4.83347014379782142328e-08,   /* 0x3e69f3121ec53172 */
510
    2.67125131841396124714e-10,   /* 0x3df25b50a4ebbf1b */
511
    4.65271045830351350190e-08,   /* 0x3e68faa2f5b9bef9 */
512
    5.24924336638693782574e-09,   /* 0x3e368b9aa7805b80 */
513
    5.38622214388600821910e-08,   /* 0x3e6ceac470cd83f6 */
514
    1.90902301017041969782e-08,   /* 0x3e547f7b84b09745 */
515
    3.79763538792174980894e-08,   /* 0x3e64636e2a5bd1ab */
516
    2.69306947081946450986e-08,   /* 0x3e5ceaa72a9c5154 */
517
    4.49683815095311756138e-08,   /* 0x3e682468446b6824 */
518
    1.41933332021066904914e-09,   /* 0x3e18624b40c4dbd0 */
519
    1.94146510233556266402e-08,   /* 0x3e54d8a89c750e5e */
520
    2.46409119489264118569e-08,   /* 0x3e5a753e077c2a0f */
521
    4.94812958044698886494e-08,   /* 0x3e6a90a852b19260 */
522
    8.48872238075784476136e-10,   /* 0x3e0d2ac258f87d03 */
523
    2.42032342089579394887e-08,   /* 0x3e59fcef32422cbf */
524
    3.32420002333182569170e-08,   /* 0x3e61d8bee7ba46e2 */
525
    1.45956577586525322754e-08,   /* 0x3e4f580c36bea881 */
526
    3.46452721050003920866e-08,   /* 0x3e62999c25159f11 */
527
    8.07090469079979051284e-09,   /* 0x3e415506dadd3e2a */
528
    2.99439161340839520436e-09,   /* 0x3e29b8bc9e8a0388 */
529
    9.83621719880452147153e-09,   /* 0x3e451f8480e3e236 */
530
    8.35492309647188080486e-09,   /* 0x3e41f12ae45a1224 */
531
    3.48493175137966283582e-08,   /* 0x3e62b5a75abd0e6a */
532
    1.11084703472699692902e-08,   /* 0x3e47daf237553d84 */
533
    5.03688744342840346564e-08,   /* 0x3e6b0aa538444196 */
534
    4.81896001063495806249e-08,   /* 0x3e69df20d22a0798 */
535
    4.83653666334089557746e-08,   /* 0x3e69f7490e4bb40b */
536
    1.29745882314081237628e-08,   /* 0x3e4bdcdaf5cb4656 */
537
    9.84532844621636118964e-09,   /* 0x3e452486cc2c7b9d */
538
    4.25828404545651943883e-08};  /* 0x3e66dc8a80ce9f09 */
539
540
    /*
541
      Step 1. Reduce the argument.
542
543
      To perform argument reduction, we find the integer n such that
544
      x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64.
545
      n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and
546
      remainder by x - n*logbaseof2/32. The calculation of n is
547
      straightforward whereas the computation of x - n*logbaseof2/32
548
      must be carried out carefully.
549
      logbaseof2/32 is so represented in two pieces that
550
      (1) logbaseof2/32 is known to extra precision, (2) the product
551
      of n and the leading piece is a model number and is hence
552
      calculated without error, and (3) the subtraction of the value
553
      obtained in (2) from x is a model number and is hence again
554
      obtained without error.
555
    */
556
557
    r = x * thirtytwo_by_logbaseof2;
558
    /* Set n = nearest integer to r */
559
    /* This is faster on Hammer */
560
    if (r > 0)
561
      n = (int)(r + 0.5);
562
    else
563
      n = (int)(r - 0.5);
564
565
    r1 = x - n * logbaseof2_by_32_lead;
566
    r2 =   - n * logbaseof2_by_32_trail;
567
568
    /* Set j = n mod 32:   5 mod 32 = 5,   -5 mod 32 = 27,  etc. */
569
    /* j = n % 32;
570
       if (j < 0) j += 32; */
571
    j = n & 0x0000001f;
572
573
    f1 = two_to_jby32_lead_table[j];
574
    f2 = two_to_jby32_trail_table[j];
575
576
    *m = (n - j) / 32;
577
578
    /* Step 2. The following is the core approximation. We approximate
579
       exp(r1+r2)-1 by a polynomial. */
580
581
    r1 *= logbase; r2 *= logbase;
582
583
    r = r1 + r2;
584
    q = r1 + (r2 +
585
              r*r*( 5.00000000000000008883e-01 +
586
                      r*( 1.66666666665260878863e-01 +
587
                      r*( 4.16666666662260795726e-02 +
588
                      r*( 8.33336798434219616221e-03 +
589
                      r*( 1.38889490863777199667e-03 ))))));
590
591
    /* Step 3. Function value reconstruction.
592
       We now reconstruct the exponential of the input argument
593
       so that exp(x) = 2**m * (z1 + z2).
594
       The order of the computation below must be strictly observed. */
595
596
    *z1 = f1;
597
    *z2 = f2 + ((f1 + f2) * q);
598
}
599
#endif /* USE_SPLITEXP */
600
601
602
#if defined(USE_SPLITEXPF)
603
/* Compute the values m, z1, and z2 such that base**x = 2**m * (z1 + z2).
604
   Small arguments abs(x) < 1/(16*ln(base)) and extreme arguments
605
   abs(x) > large/(ln(base)) (where large is the largest representable
606
   floating point number) should be handled separately instead of calling
607
   this function. This function is called by exp_amd, exp2_amd, exp10_amd,
608
   cosh_amd and sinh_amd. */
609
static inline void splitexpf(float x, float logbase,
610
                             float thirtytwo_by_logbaseof2,
611
                             float logbaseof2_by_32_lead,
612
                             float logbaseof2_by_32_trail,
613
                             int *m, float *z1, float *z2)
614
{
615
  float q, r, r1, r2, f1, f2;
616
  int n, j;
617
618
/* Arrays two_to_jby32_lead_table and two_to_jby32_trail_table contain
619
   leading and trailing parts respectively of precomputed
620
   values of pow(2.0,j/32.0), for j = 0, 1, ..., 31.
621
   two_to_jby32_lead_table contains the first 10 bits of precision,
622
   and two_to_jby32_trail_table contains a further 24 bits precision. */
623
624
  static const float two_to_jby32_lead_table[32] = {
625
    1.0000000000E+00F,  /* 0x3F800000 */
626
    1.0214843750E+00F,  /* 0x3F82C000 */
627
    1.0429687500E+00F,  /* 0x3F858000 */
628
    1.0664062500E+00F,  /* 0x3F888000 */
629
    1.0898437500E+00F,  /* 0x3F8B8000 */
630
    1.1132812500E+00F,  /* 0x3F8E8000 */
631
    1.1386718750E+00F,  /* 0x3F91C000 */
632
    1.1621093750E+00F,  /* 0x3F94C000 */
633
    1.1875000000E+00F,  /* 0x3F980000 */
634
    1.2148437500E+00F,  /* 0x3F9B8000 */
635
    1.2402343750E+00F,  /* 0x3F9EC000 */
636
    1.2675781250E+00F,  /* 0x3FA24000 */
637
    1.2949218750E+00F,  /* 0x3FA5C000 */
638
    1.3242187500E+00F,  /* 0x3FA98000 */
639
    1.3535156250E+00F,  /* 0x3FAD4000 */
640
    1.3828125000E+00F,  /* 0x3FB10000 */
641
    1.4140625000E+00F,  /* 0x3FB50000 */
642
    1.4433593750E+00F,  /* 0x3FB8C000 */
643
    1.4765625000E+00F,  /* 0x3FBD0000 */
644
    1.5078125000E+00F,  /* 0x3FC10000 */
645
    1.5410156250E+00F,  /* 0x3FC54000 */
646
    1.5742187500E+00F,  /* 0x3FC98000 */
647
    1.6093750000E+00F,  /* 0x3FCE0000 */
648
    1.6445312500E+00F,  /* 0x3FD28000 */
649
    1.6816406250E+00F,  /* 0x3FD74000 */
650
    1.7167968750E+00F,  /* 0x3FDBC000 */
651
    1.7558593750E+00F,  /* 0x3FE0C000 */
652
    1.7929687500E+00F,  /* 0x3FE58000 */
653
    1.8339843750E+00F,  /* 0x3FEAC000 */
654
    1.8730468750E+00F,  /* 0x3FEFC000 */
655
    1.9140625000E+00F,  /* 0x3FF50000 */
656
    1.9570312500E+00F}; /* 0x3FFA8000 */
657
658
  static const float two_to_jby32_trail_table[32] = {
659
    0.0000000000E+00F,  /* 0x00000000 */
660
    4.1277357377E-04F,  /* 0x39D86988 */
661
    1.3050324051E-03F,  /* 0x3AAB0D9F */
662
    7.3415064253E-04F,  /* 0x3A407404 */
663
    6.6398258787E-04F,  /* 0x3A2E0F1E */
664
    1.1054925853E-03F,  /* 0x3A90E62D */
665
    1.1675967835E-04F,  /* 0x38F4DCE0 */
666
    1.6154836630E-03F,  /* 0x3AD3BEA3 */
667
    1.7071149778E-03F,  /* 0x3ADFC146 */
668
    4.0360994171E-04F,  /* 0x39D39B9C */
669
    1.6234370414E-03F,  /* 0x3AD4C982 */
670
    1.4728321694E-03F,  /* 0x3AC10C0C */
671
    1.9176795613E-03F,  /* 0x3AFB5AA6 */
672
    1.0178930825E-03F,  /* 0x3A856AD3 */
673
    7.3992193211E-04F,  /* 0x3A41F752 */
674
    1.0973819299E-03F,  /* 0x3A8FD607 */
675
    1.5106226783E-04F,  /* 0x391E6678 */
676
    1.8214319134E-03F,  /* 0x3AEEBD1D */
677
    2.6364589576E-04F,  /* 0x398A39F4 */
678
    1.3519275235E-03F,  /* 0x3AB13329 */
679
    1.1952003697E-03F,  /* 0x3A9CA845 */
680
    1.7620950239E-03F,  /* 0x3AE6F619 */
681
    1.1153318919E-03F,  /* 0x3A923054 */
682
    1.2242280645E-03F,  /* 0x3AA07647 */
683
    1.5220546629E-04F,  /* 0x391F9958 */
684
    1.8224230735E-03F,  /* 0x3AEEDE5F */
685
    3.9278529584E-04F,  /* 0x39CDEEC0 */
686
    1.7403248930E-03F,  /* 0x3AE41B9D */
687
    2.3711356334E-05F,  /* 0x37C6E7C0 */
688
    1.1207590578E-03F,  /* 0x3A92E66F */
689
    1.1440613307E-03F,  /* 0x3A95F454 */
690
    1.1287408415E-04F}; /* 0x38ECB6D0 */
691
692
    /*
693
      Step 1. Reduce the argument.
694
695
      To perform argument reduction, we find the integer n such that
696
      x = n * logbaseof2/32 + remainder, |remainder| <= logbaseof2/64.
697
      n is defined by round-to-nearest-integer( x*32/logbaseof2 ) and
698
      remainder by x - n*logbaseof2/32. The calculation of n is
699
      straightforward whereas the computation of x - n*logbaseof2/32
700
      must be carried out carefully.
701
      logbaseof2/32 is so represented in two pieces that
702
      (1) logbaseof2/32 is known to extra precision, (2) the product
703
      of n and the leading piece is a model number and is hence
704
      calculated without error, and (3) the subtraction of the value
705
      obtained in (2) from x is a model number and is hence again
706
      obtained without error.
707
    */
708
709
    r = x * thirtytwo_by_logbaseof2;
710
    /* Set n = nearest integer to r */
711
    /* This is faster on Hammer */
712
    if (r > 0)
713
      n = (int)(r + 0.5F);
714
    else
715
      n = (int)(r - 0.5F);
716
717
    r1 = x - n * logbaseof2_by_32_lead;
718
    r2 =   - n * logbaseof2_by_32_trail;
719
720
    /* Set j = n mod 32:   5 mod 32 = 5,   -5 mod 32 = 27,  etc. */
721
    /* j = n % 32;
722
       if (j < 0) j += 32; */
723
    j = n & 0x0000001f;
724
725
    f1 = two_to_jby32_lead_table[j];
726
    f2 = two_to_jby32_trail_table[j];
727
728
    *m = (n - j) / 32;
729
730
    /* Step 2. The following is the core approximation. We approximate
731
       exp(r1+r2)-1 by a polynomial. */
732
733
    r1 *= logbase; r2 *= logbase;
734
735
    r = r1 + r2;
736
    q = r1 + (r2 +
737
              r*r*( 5.00000000000000008883e-01F +
738
                      r*( 1.66666666665260878863e-01F )));
739
740
    /* Step 3. Function value reconstruction.
741
       We now reconstruct the exponential of the input argument
742
       so that exp(x) = 2**m * (z1 + z2).
743
       The order of the computation below must be strictly observed. */
744
745
    *z1 = f1;
746
    *z2 = f2 + ((f1 + f2) * q);
747
}
748
#endif /* SPLITEXPF */
749
750
751
#if defined(USE_SCALEUPDOUBLE1024)
752
/* Scales up a double (normal or denormal) whose bit pattern is given
753
   as ux by 2**1024. There are no checks that the input number is
754
   scalable by that amount. */
755
static inline void scaleUpDouble1024(unsigned long ux, unsigned long *ur)
756
{
757
  unsigned long uy;
758
  double y;
759
760
  if ((ux & EXPBITS_DP64) == 0)
761
    {
762
      /* ux is denormalised */
763
      PUT_BITS_DP64(ux | 0x4010000000000000, y);
764
      if (ux & SIGNBIT_DP64)
765
        y += 4.0;
766
      else
767
        y -= 4.0;
768
      GET_BITS_DP64(y, uy);
769
    }
770
  else
771
    /* ux is normal */
772
    uy = ux + 0x4000000000000000;
773
774
  *ur = uy;
775
  return;
776
}
777
778
#endif /* SCALEUPDOUBLE1024 */
779
780
781
#if defined(USE_SCALEDOWNDOUBLE)
782
/* Scales down a double whose bit pattern is given as ux by 2**k.
783
   There are no checks that the input number is scalable by that amount. */
784
static inline void scaleDownDouble(unsigned long ux, int k,
785
                                   unsigned long *ur)
786
{
787
  unsigned long uy, uk, ax, xsign;
788
  int n, shift;
789
  xsign = ux & SIGNBIT_DP64;
790
  ax = ux & ~SIGNBIT_DP64;
791
  n = ((ax & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - k;
792
  if (n > 0)
793
    {
794
      uk = (unsigned long)n << EXPSHIFTBITS_DP64;
795
      uy = (ax & ~EXPBITS_DP64) | uk;
796
    }
797
  else
798
    {
799
      uy = (ax & ~EXPBITS_DP64) | 0x0010000000000000;
800
      shift = (1 - n);
801
      if (shift > MANTLENGTH_DP64 + 1)
802
        /* Sigh. Shifting works mod 64 so be careful not to shift too much */
803
        uy = 0;
804
      else
805
        {
806
          /* Make sure we round the result */
807
          uy >>= shift - 1;
808
          uy = (uy >> 1) + (uy & 1);
809
        }
810
    }
811
  *ur = uy | xsign;
812
}
813
814
#endif /* SCALEDOWNDOUBLE */
815
816
817
#if defined(USE_SCALEUPFLOAT128)
818
/* Scales up a float (normal or denormal) whose bit pattern is given
819
   as ux by 2**128. There are no checks that the input number is
820
   scalable by that amount. */
821
static inline void scaleUpFloat128(unsigned int ux, unsigned int *ur)
822
{
823
  unsigned int uy;
824
  float y;
825
826
  if ((ux & EXPBITS_SP32) == 0)
827
    {
828
      /* ux is denormalised */
829
      PUT_BITS_SP32(ux | 0x40800000, y);
830
      /* Compensate for the implicit bit just added */
831
      if (ux & SIGNBIT_SP32)
832
        y += 4.0F;
833
      else
834
        y -= 4.0F;
835
      GET_BITS_SP32(y, uy);
836
    }
837
  else
838
    /* ux is normal */
839
    uy = ux + 0x40000000;
840
  *ur = uy;
841
}
842
#endif /* SCALEUPFLOAT128 */
843
844
845
#if defined(USE_SCALEDOWNFLOAT)
846
/* Scales down a float whose bit pattern is given as ux by 2**k.
847
   There are no checks that the input number is scalable by that amount. */
848
static inline void scaleDownFloat(unsigned int ux, int k,
849
                                  unsigned int *ur)
850
{
851
  unsigned int uy, uk, ax, xsign;
852
  int n, shift;
853
854
  xsign = ux & SIGNBIT_SP32;
855
  ax = ux & ~SIGNBIT_SP32;
856
  n = ((ax & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - k;
857
  if (n > 0)
858
    {
859
      uk = (unsigned int)n << EXPSHIFTBITS_SP32;
860
      uy = (ax & ~EXPBITS_SP32) | uk;
861
    }
862
  else
863
    {
864
      uy = (ax & ~EXPBITS_SP32) | 0x00800000;
865
      shift = (1 - n);
866
      if (shift > MANTLENGTH_SP32 + 1)
867
        /* Sigh. Shifting works mod 32 so be careful not to shift too much */
868
        uy = 0;
869
      else
870
        {
871
          /* Make sure we round the result */
872
          uy >>= shift - 1;
873
          uy = (uy >> 1) + (uy & 1);
874
        }
875
    }
876
  *ur = uy | xsign;
877
}
878
#endif /* SCALEDOWNFLOAT */
879
880
881
#if defined(USE_SQRT_AMD_INLINE)
882
static inline double sqrt_amd_inline(double x)
883
{
884
  /*
885
     Computes the square root of x.
886
887
     The calculation is carried out in three steps.
888
889
     Step 1. Reduction.
890
     The input argument is scaled to the interval [1, 4) by
891
     computing
892
               x = 2^e * y, where y in [1,4).
893
     Furthermore y is decomposed as y = c + t where
894
               c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64.
895
896
     Step 2. Approximation.
897
     An approximation q = sqrt(1 + (t/c)) - 1  is obtained
898
     from a basic series expansion using precomputed values
899
     stored in rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl.
900
901
     Step 3. Reconstruction.
902
     The value of sqrt(x) is reconstructed via
903
       sqrt(x) = 2^(e/2) * sqrt(y)
904
               = 2^(e/2) * sqrt(c) * sqrt(y/c)
905
               = 2^(e/2) * sqrt(c) * sqrt(1 + t/c)
906
               = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ]
907
    */
908
909
  unsigned long ux, ax, u;
910
  double r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail;
911
  int e, denorm = 0, index;
912
913
/* Arrays rt_jby32_lead_table_dbl and rt_jby32_trail_table_dbl contain
914
   leading and trailing parts respectively of precomputed
915
   values of sqrt(j/32), for j = 32, 33, ..., 128.
916
   rt_jby32_lead_table_dbl contains the first 21 bits of precision,
917
   and rt_jby32_trail_table_dbl contains a further 53 bits precision. */
918
919
  static const double rt_jby32_lead_table_dbl[97] = {
920
    1.00000000000000000000e+00,   /* 0x3ff0000000000000 */
921
    1.01550388336181640625e+00,   /* 0x3ff03f8100000000 */
922
    1.03077602386474609375e+00,   /* 0x3ff07e0f00000000 */
923
    1.04582500457763671875e+00,   /* 0x3ff0bbb300000000 */
924
    1.06065940856933593750e+00,   /* 0x3ff0f87600000000 */
925
    1.07528972625732421875e+00,   /* 0x3ff1346300000000 */
926
    1.08972454071044921875e+00,   /* 0x3ff16f8300000000 */
927
    1.10396957397460937500e+00,   /* 0x3ff1a9dc00000000 */
928
    1.11803340911865234375e+00,   /* 0x3ff1e37700000000 */
929
    1.13192272186279296875e+00,   /* 0x3ff21c5b00000000 */
930
    1.14564323425292968750e+00,   /* 0x3ff2548e00000000 */
931
    1.15920162200927734375e+00,   /* 0x3ff28c1700000000 */
932
    1.17260360717773437500e+00,   /* 0x3ff2c2fc00000000 */
933
    1.18585395812988281250e+00,   /* 0x3ff2f94200000000 */
934
    1.19895744323730468750e+00,   /* 0x3ff32eee00000000 */
935
    1.21191978454589843750e+00,   /* 0x3ff3640600000000 */
936
    1.22474479675292968750e+00,   /* 0x3ff3988e00000000 */
937
    1.23743629455566406250e+00,   /* 0x3ff3cc8a00000000 */
938
    1.25000000000000000000e+00,   /* 0x3ff4000000000000 */
939
    1.26243782043457031250e+00,   /* 0x3ff432f200000000 */
940
    1.27475452423095703125e+00,   /* 0x3ff4656500000000 */
941
    1.28695297241210937500e+00,   /* 0x3ff4975c00000000 */
942
    1.29903793334960937500e+00,   /* 0x3ff4c8dc00000000 */
943
    1.31101036071777343750e+00,   /* 0x3ff4f9e600000000 */
944
    1.32287502288818359375e+00,   /* 0x3ff52a7f00000000 */
945
    1.33463478088378906250e+00,   /* 0x3ff55aaa00000000 */
946
    1.34629058837890625000e+00,   /* 0x3ff58a6800000000 */
947
    1.35784721374511718750e+00,   /* 0x3ff5b9be00000000 */
948
    1.36930561065673828125e+00,   /* 0x3ff5e8ad00000000 */
949
    1.38066959381103515625e+00,   /* 0x3ff6173900000000 */
950
    1.39194107055664062500e+00,   /* 0x3ff6456400000000 */
951
    1.40312099456787109375e+00,   /* 0x3ff6732f00000000 */
952
    1.41421318054199218750e+00,   /* 0x3ff6a09e00000000 */
953
    1.42521858215332031250e+00,   /* 0x3ff6cdb200000000 */
954
    1.43614006042480468750e+00,   /* 0x3ff6fa6e00000000 */
955
    1.44697952270507812500e+00,   /* 0x3ff726d400000000 */
956
    1.45773792266845703125e+00,   /* 0x3ff752e500000000 */
957
    1.46841716766357421875e+00,   /* 0x3ff77ea300000000 */
958
    1.47901916503906250000e+00,   /* 0x3ff7aa1000000000 */
959
    1.48954677581787109375e+00,   /* 0x3ff7d52f00000000 */
960
    1.50000000000000000000e+00,   /* 0x3ff8000000000000 */
961
    1.51038074493408203125e+00,   /* 0x3ff82a8500000000 */
962
    1.52068996429443359375e+00,   /* 0x3ff854bf00000000 */
963
    1.53093051910400390625e+00,   /* 0x3ff87eb100000000 */
964
    1.54110336303710937500e+00,   /* 0x3ff8a85c00000000 */
965
    1.55120849609375000000e+00,   /* 0x3ff8d1c000000000 */
966
    1.56124877929687500000e+00,   /* 0x3ff8fae000000000 */
967
    1.57122516632080078125e+00,   /* 0x3ff923bd00000000 */
968
    1.58113861083984375000e+00,   /* 0x3ff94c5800000000 */
969
    1.59099006652832031250e+00,   /* 0x3ff974b200000000 */
970
    1.60078048706054687500e+00,   /* 0x3ff99ccc00000000 */
971
    1.61051177978515625000e+00,   /* 0x3ff9c4a800000000 */
972
    1.62018489837646484375e+00,   /* 0x3ff9ec4700000000 */
973
    1.62979984283447265625e+00,   /* 0x3ffa13a900000000 */
974
    1.63935947418212890625e+00,   /* 0x3ffa3ad100000000 */
975
    1.64886283874511718750e+00,   /* 0x3ffa61be00000000 */
976
    1.65831184387207031250e+00,   /* 0x3ffa887200000000 */
977
    1.66770744323730468750e+00,   /* 0x3ffaaeee00000000 */
978
    1.67705059051513671875e+00,   /* 0x3ffad53300000000 */
979
    1.68634128570556640625e+00,   /* 0x3ffafb4100000000 */
980
    1.69558238983154296875e+00,   /* 0x3ffb211b00000000 */
981
    1.70477199554443359375e+00,   /* 0x3ffb46bf00000000 */
982
    1.71391296386718750000e+00,   /* 0x3ffb6c3000000000 */
983
    1.72300529479980468750e+00,   /* 0x3ffb916e00000000 */
984
    1.73204994201660156250e+00,   /* 0x3ffbb67a00000000 */
985
    1.74104785919189453125e+00,   /* 0x3ffbdb5500000000 */
986
    1.75000000000000000000e+00,   /* 0x3ffc000000000000 */
987
    1.75890541076660156250e+00,   /* 0x3ffc247a00000000 */
988
    1.76776695251464843750e+00,   /* 0x3ffc48c600000000 */
989
    1.77658367156982421875e+00,   /* 0x3ffc6ce300000000 */
990
    1.78535652160644531250e+00,   /* 0x3ffc90d200000000 */
991
    1.79408740997314453125e+00,   /* 0x3ffcb49500000000 */
992
    1.80277538299560546875e+00,   /* 0x3ffcd82b00000000 */
993
    1.81142139434814453125e+00,   /* 0x3ffcfb9500000000 */
994
    1.82002735137939453125e+00,   /* 0x3ffd1ed500000000 */
995
    1.82859230041503906250e+00,   /* 0x3ffd41ea00000000 */
996
    1.83711719512939453125e+00,   /* 0x3ffd64d500000000 */
997
    1.84560203552246093750e+00,   /* 0x3ffd879600000000 */
998
    1.85404872894287109375e+00,   /* 0x3ffdaa2f00000000 */
999
    1.86245727539062500000e+00,   /* 0x3ffdcca000000000 */
1000
    1.87082862854003906250e+00,   /* 0x3ffdeeea00000000 */
1001
    1.87916183471679687500e+00,   /* 0x3ffe110c00000000 */
1002
    1.88745784759521484375e+00,   /* 0x3ffe330700000000 */
1003
    1.89571857452392578125e+00,   /* 0x3ffe54dd00000000 */
1004
    1.90394306182861328125e+00,   /* 0x3ffe768d00000000 */
1005
    1.91213226318359375000e+00,   /* 0x3ffe981800000000 */
1006
    1.92028617858886718750e+00,   /* 0x3ffeb97e00000000 */
1007
    1.92840576171875000000e+00,   /* 0x3ffedac000000000 */
1008
    1.93649101257324218750e+00,   /* 0x3ffefbde00000000 */
1009
    1.94454288482666015625e+00,   /* 0x3fff1cd900000000 */
1010
    1.95256233215332031250e+00,   /* 0x3fff3db200000000 */
1011
    1.96054744720458984375e+00,   /* 0x3fff5e6700000000 */
1012
    1.96850109100341796875e+00,   /* 0x3fff7efb00000000 */
1013
    1.97642326354980468750e+00,   /* 0x3fff9f6e00000000 */
1014
    1.98431301116943359375e+00,   /* 0x3fffbfbf00000000 */
1015
    1.99217128753662109375e+00,   /* 0x3fffdfef00000000 */
1016
    2.00000000000000000000e+00};  /* 0x4000000000000000 */
1017
1018
  static const double rt_jby32_trail_table_dbl[97] = {
1019
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
1020
    9.17217678638807524014e-07,   /* 0x3eaec6d70177881c */
1021
    3.82539669043705364790e-07,   /* 0x3e99abfb41bd6b24 */
1022
    2.85899577162227138140e-08,   /* 0x3e5eb2bf6bab55a2 */
1023
    7.63210485349101216659e-07,   /* 0x3ea99bed9b2d8d0c */
1024
    9.32123004127716212874e-07,   /* 0x3eaf46e029c1b296 */
1025
    1.95174719169309219157e-07,   /* 0x3e8a3226fc42f30c */
1026
    5.34316371481845492427e-07,   /* 0x3ea1edbe20701d73 */
1027
    5.79631242504454563052e-07,   /* 0x3ea372fe94f82be7 */
1028
    4.20404384109571705948e-07,   /* 0x3e9c367e08e7bb06 */
1029
    6.89486030314147010716e-07,   /* 0x3ea722a3d0a66608 */
1030
    6.89927685625314560328e-07,   /* 0x3ea7266f067ca1d6 */
1031
    3.32778123013641425828e-07,   /* 0x3e965515a9b34850 */
1032
    1.64433259436999584387e-07,   /* 0x3e8611e23ef6c1bd */
1033
    4.37590875197899335723e-07,   /* 0x3e9d5dc1059ed8e7 */
1034
    1.79808183816018617413e-07,   /* 0x3e88222982d0e4f4 */
1035
    7.46386593615986477624e-08,   /* 0x3e7409212e7d0322 */
1036
    5.72520794105201454728e-07,   /* 0x3ea335ea8a5fcf39 */
1037
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
1038
    2.96860689431670420344e-07,   /* 0x3e93ec071e938bfe */
1039
    3.54167239176257065345e-07,   /* 0x3e97c48bfd9862c6 */
1040
    7.95211265664474710063e-07,   /* 0x3eaaaed010f74671 */
1041
    1.72327048595145565621e-07,   /* 0x3e87211cbfeb62e0 */
1042
    6.99494915996239297020e-07,   /* 0x3ea7789d9660e72d */
1043
    6.32644111701500844315e-07,   /* 0x3ea53a5f1d36f1cf */
1044
    6.20124838851440463844e-10,   /* 0x3e054eacff2057dc */
1045
    6.13404719757812629969e-07,   /* 0x3ea4951b3e6a83cc */
1046
    3.47654909777986407387e-07,   /* 0x3e9754aa76884c66 */
1047
    7.83106177002392475763e-07,   /* 0x3eaa46d4b1de1074 */
1048
    5.33337372440526357008e-07,   /* 0x3ea1e55548f92635 */
1049
    2.01508648555298681765e-08,   /* 0x3e55a3070dd17788 */
1050
    5.25472356925843939587e-07,   /* 0x3ea1a1c5eedb0801 */
1051
    3.81831102861301692797e-07,   /* 0x3e999fcef32422cc */
1052
    6.99220602161420018738e-07,   /* 0x3ea776425d6b0199 */
1053
    6.01209702477462624811e-07,   /* 0x3ea42c5a1e0191a2 */
1054
    9.01437000591944740554e-08,   /* 0x3e7832a0bdff1327 */
1055
    5.10428680864685379950e-08,   /* 0x3e6b674743636676 */
1056
    3.47895267104621031421e-07,   /* 0x3e9758cb90d2f714 */
1057
    7.80735841510641848628e-07,   /* 0x3eaa3278459cde25 */
1058
    1.35158752025506517690e-07,   /* 0x3e822404f4a103ee */
1059
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
1060
    1.76523947728535489812e-09,   /* 0x3e1e539af6892ac5 */
1061
    6.68280121328499932183e-07,   /* 0x3ea66c7b872c9cd0 */
1062
    5.70135482405123276616e-07,   /* 0x3ea3216d2f43887d */
1063
    1.37705134737562525897e-07,   /* 0x3e827b832cbedc0e */
1064
    7.09655107074516613672e-07,   /* 0x3ea7cfe41579091d */
1065
    7.20302724551461693011e-07,   /* 0x3ea82b5a713c490a */
1066
    4.69926266058212796694e-07,   /* 0x3e9f8945932d872e */
1067
    2.19244345915999437026e-07,   /* 0x3e8d6d2da9490251 */
1068
    1.91141411617401877927e-07,   /* 0x3e89a791a3114e4a */
1069
    5.72297665296622053774e-07,   /* 0x3ea333ffe005988d */
1070
    5.61055484436830560103e-07,   /* 0x3ea2d36e0ed49ab1 */
1071
    2.76225500213991506100e-07,   /* 0x3e92898498f55f9e */
1072
    7.58466189522395692908e-07,   /* 0x3ea9732cca1032a3 */
1073
    1.56893371256836029827e-07,   /* 0x3e850ed0b02a22d2 */
1074
    4.06038997708867066507e-07,   /* 0x3e9b3fb265b1e40a */
1075
    5.51305629612057435809e-07,   /* 0x3ea27fade682d1de */
1076
    5.64778487026561123207e-07,   /* 0x3ea2f36906f707ba */
1077
    3.92609705553556897517e-07,   /* 0x3e9a58fbbee883b6 */
1078
    9.09698438776943827802e-07,   /* 0x3eae864005bca6d7 */
1079
    1.05949774066016139743e-07,   /* 0x3e7c70d02300f263 */
1080
    7.16578798392844784244e-07,   /* 0x3ea80b5d712d8e3e */
1081
    6.86233073531233972561e-07,   /* 0x3ea706b27cc7d390 */
1082
    7.99211473033494452908e-07,   /* 0x3eaad12c9d849a97 */
1083
    8.65552275731027456121e-07,   /* 0x3ead0b09954e764b */
1084
    6.75456120386058448618e-07,   /* 0x3ea6aa1fb7826cbd */
1085
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
1086
    4.99167184520462138743e-07,   /* 0x3ea0bfd03f46763c */
1087
    4.51720373502110930296e-10,   /* 0x3dff0abfb4adfb9e */
1088
    1.28874162718371367439e-07,   /* 0x3e814c151f991b2e */
1089
    5.85529267186999798656e-07,   /* 0x3ea3a5a879b09292 */
1090
    1.01827770937125531924e-07,   /* 0x3e7b558d173f9796 */
1091
    2.54736389177809626508e-07,   /* 0x3e9118567cd83fb8 */
1092
    6.98925535290464831294e-07,   /* 0x3ea773b981896751 */
1093
    1.20940735036524314513e-07,   /* 0x3e803b7df49f48a8 */
1094
    5.43759351196479689657e-08,   /* 0x3e6d315f22491900 */
1095
    1.11957989042397958409e-07,   /* 0x3e7e0db1c5bb84b2 */
1096
    8.47006714134442661218e-07,   /* 0x3eac6bbb7644ff76 */
1097
    8.92831044643427836228e-07,   /* 0x3eadf55c3afec01f */
1098
    7.77828292464916501663e-07,   /* 0x3eaa197e81034da3 */
1099
    6.48469316302918797451e-08,   /* 0x3e71683f4920555d */
1100
    2.12579816658859849140e-07,   /* 0x3e8c882fd78bb0b0 */
1101
    7.61222472580559138435e-07,   /* 0x3ea98ad9eb7b83ec */
1102
    2.86488961857314189607e-07,   /* 0x3e9339d7c7777273 */
1103
    2.14637363790165363515e-07,   /* 0x3e8ccee237cae6fe */
1104
    5.44137005612605847831e-08,   /* 0x3e6d368fe324a146 */
1105
    2.58378284856442408413e-07,   /* 0x3e9156e7b6d99b45 */
1106
    3.15848939061134843091e-07,   /* 0x3e95323e5310b5c1 */
1107
    6.60530466255089632309e-07,   /* 0x3ea629e9db362f5d */
1108
    7.63436345535852301127e-07,   /* 0x3ea99dde4728d7ec */
1109
    8.68233432860324345268e-08,   /* 0x3e774e746878544d */
1110
    9.45465175398023087082e-07,   /* 0x3eafb97be873a87d */
1111
    8.77499534786171267246e-07,   /* 0x3ead71a9e23c2f63 */
1112
    2.74055432394999316135e-07,   /* 0x3e92643c89cda173 */
1113
    4.72129009349126213532e-07,   /* 0x3e9faf1d57a4d56c */
1114
    8.93777032327078947306e-07,   /* 0x3eadfd7c7ab7b282 */
1115
    0.00000000000000000000e+00};  /* 0x0000000000000000 */
1116
1117
1118
  /* Handle special arguments first */
1119
1120
  GET_BITS_DP64(x, ux);
1121
  ax = ux & (~SIGNBIT_DP64);
1122
1123
  if(ax >= 0x7ff0000000000000)
1124
    {
1125
      /* x is either NaN or infinity */
1126
      if (ux & MANTBITS_DP64)
1127
        /* x is NaN */
1128
        return x + x; /* Raise invalid if it is a signalling NaN */
1129
      else if (ux & SIGNBIT_DP64)
1130
        /* x is negative infinity */
1131
        return nan_with_flags(AMD_F_INVALID);
1132
      else
1133
        /* x is positive infinity */
1134
        return x;
1135
    }
1136
  else if (ux & SIGNBIT_DP64)
1137
    {
1138
      /* x is negative. */
1139
      if (ux == SIGNBIT_DP64)
1140
        /* Handle negative zero first */
1141
        return x;
1142
      else
1143
        return nan_with_flags(AMD_F_INVALID);
1144
    }
1145
  else if (ux <= 0x000fffffffffffff)
1146
    {
1147
      /* x is denormalised or zero */
1148
      if (ux == 0)
1149
        /* x is zero */
1150
        return x;
1151
      else
1152
        {
1153
          /* x is denormalised; scale it up */
1154
          /* Normalize x by increasing the exponent by 60
1155
             and subtracting a correction to account for the implicit
1156
             bit. This replaces a slow denormalized
1157
             multiplication by a fast normal subtraction. */
1158
          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
1159
          denorm = 1;
1160
          GET_BITS_DP64(x, ux);
1161
          PUT_BITS_DP64(ux | 0x03d0000000000000, x);
1162
          x -= corr;
1163
          GET_BITS_DP64(x, ux);
1164
        }
1165
    }
1166
1167
  /* Main algorithm */
1168
1169
  /*
1170
     Find y and e such that x = 2^e * y, where y in [1,4).
1171
     This is done using an in-lined variant of splitDouble,
1172
     which also ensures that e is even.
1173
   */
1174
  y = x;
1175
  ux &= EXPBITS_DP64;
1176
  ux >>= EXPSHIFTBITS_DP64;
1177
  if (ux & 1)
1178
    {
1179
      GET_BITS_DP64(y, u);
1180
      u &= (SIGNBIT_DP64 | MANTBITS_DP64);
1181
      u |= ONEEXPBITS_DP64;
1182
      PUT_BITS_DP64(u, y);
1183
      e = ux - EXPBIAS_DP64;
1184
    }
1185
  else
1186
    {
1187
      GET_BITS_DP64(y, u);
1188
      u &= (SIGNBIT_DP64 | MANTBITS_DP64);
1189
      u |= TWOEXPBITS_DP64;
1190
      PUT_BITS_DP64(u, y);
1191
      e = ux - EXPBIAS_DP64 - 1;
1192
    }
1193
1194
1195
  /* Find the index of the sub-interval of [1,4) in which y lies. */
1196
1197
  index = (int)(32.0*y+0.5);
1198
1199
  /* Look up the table values and compute c and r = c/t */
1200
1201
  rtc_lead = rt_jby32_lead_table_dbl[index-32];
1202
  rtc_trail = rt_jby32_trail_table_dbl[index-32];
1203
  c = 0.03125*index;
1204
  r = (y - c)/c;
1205
1206
  /*
1207
    Find q = sqrt(1+r) - 1.
1208
    From one step of Newton on (q+1)^2 = 1+r
1209
  */
1210
1211
  p = r*0.5 - r*r*(0.1250079870 - r*(0.6250522999E-01));
1212
  twop = p + p;
1213
  q = p - (p*p + (twop - r))/(twop + 2.0);
1214
1215
  /* Reconstruction */
1216
1217
  rtc = rtc_lead + rtc_trail;
1218
  e >>= 1; /* e = e/2 */
1219
  z = rtc_lead + (rtc*q+rtc_trail);
1220
1221
  if (denorm)
1222
    {
1223
      /* Scale by 2**(e-30) */
1224
      PUT_BITS_DP64(((long)(e - 30) + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r);
1225
      z *= r;
1226
    }
1227
  else
1228
    {
1229
      /* Scale by 2**e */
1230
      PUT_BITS_DP64(((long)e + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, r);
1231
      z *= r;
1232
    }
1233
1234
  return z;
1235
1236
}
1237
#endif /* SQRT_AMD_INLINE */
1238
1239
#if defined(USE_SQRTF_AMD_INLINE)
1240
1241
static inline float sqrtf_amd_inline(float x)
1242
{
1243
  /*
1244
     Computes the square root of x.
1245
1246
     The calculation is carried out in three steps.
1247
1248
     Step 1. Reduction.
1249
     The input argument is scaled to the interval [1, 4) by
1250
     computing
1251
               x = 2^e * y, where y in [1,4).
1252
     Furthermore y is decomposed as y = c + t where
1253
               c = 1 + j/32, j = 0,1,..,96; and |t| <= 1/64.
1254
1255
     Step 2. Approximation.
1256
     An approximation q = sqrt(1 + (t/c)) - 1  is obtained
1257
     from a basic series expansion using precomputed values
1258
     stored in rt_jby32_lead_table_float and rt_jby32_trail_table_float.
1259
1260
     Step 3. Reconstruction.
1261
     The value of sqrt(x) is reconstructed via
1262
       sqrt(x) = 2^(e/2) * sqrt(y)
1263
               = 2^(e/2) * sqrt(c) * sqrt(y/c)
1264
               = 2^(e/2) * sqrt(c) * sqrt(1 + t/c)
1265
               = 2^(e/2) * [ sqrt(c) + sqrt(c)*q ]
1266
    */
1267
1268
  unsigned int ux, ax, u;
1269
  float r1, r2, c, y, p, q, r, twop, z, rtc, rtc_lead, rtc_trail;
1270
  int e, denorm = 0, index;
1271
1272
/* Arrays rt_jby32_lead_table_float and rt_jby32_trail_table_float contain
1273
   leading and trailing parts respectively of precomputed
1274
   values of sqrt(j/32), for j = 32, 33, ..., 128.
1275
   rt_jby32_lead_table_float contains the first 13 bits of precision,
1276
   and rt_jby32_trail_table_float contains a further 24 bits precision. */
1277
1278
static const float rt_jby32_lead_table_float[97] = {
1279
    1.00000000000000000000e+00F,   /* 0x3f800000 */
1280
    1.01538085937500000000e+00F,   /* 0x3f81f800 */
1281
    1.03076171875000000000e+00F,   /* 0x3f83f000 */
1282
    1.04565429687500000000e+00F,   /* 0x3f85d800 */
1283
    1.06054687500000000000e+00F,   /* 0x3f87c000 */
1284
    1.07519531250000000000e+00F,   /* 0x3f89a000 */
1285
    1.08959960937500000000e+00F,   /* 0x3f8b7800 */
1286
    1.10375976562500000000e+00F,   /* 0x3f8d4800 */
1287
    1.11791992187500000000e+00F,   /* 0x3f8f1800 */
1288
    1.13183593750000000000e+00F,   /* 0x3f90e000 */
1289
    1.14550781250000000000e+00F,   /* 0x3f92a000 */
1290
    1.15917968750000000000e+00F,   /* 0x3f946000 */
1291
    1.17236328125000000000e+00F,   /* 0x3f961000 */
1292
    1.18579101562500000000e+00F,   /* 0x3f97c800 */
1293
    1.19873046875000000000e+00F,   /* 0x3f997000 */
1294
    1.21191406250000000000e+00F,   /* 0x3f9b2000 */
1295
    1.22460937500000000000e+00F,   /* 0x3f9cc000 */
1296
    1.23730468750000000000e+00F,   /* 0x3f9e6000 */
1297
    1.25000000000000000000e+00F,   /* 0x3fa00000 */
1298
    1.26220703125000000000e+00F,   /* 0x3fa19000 */
1299
    1.27465820312500000000e+00F,   /* 0x3fa32800 */
1300
    1.28686523437500000000e+00F,   /* 0x3fa4b800 */
1301
    1.29882812500000000000e+00F,   /* 0x3fa64000 */
1302
    1.31079101562500000000e+00F,   /* 0x3fa7c800 */
1303
    1.32275390625000000000e+00F,   /* 0x3fa95000 */
1304
    1.33447265625000000000e+00F,   /* 0x3faad000 */
1305
    1.34619140625000000000e+00F,   /* 0x3fac5000 */
1306
    1.35766601562500000000e+00F,   /* 0x3fadc800 */
1307
    1.36914062500000000000e+00F,   /* 0x3faf4000 */
1308
    1.38061523437500000000e+00F,   /* 0x3fb0b800 */
1309
    1.39184570312500000000e+00F,   /* 0x3fb22800 */
1310
    1.40307617187500000000e+00F,   /* 0x3fb39800 */
1311
    1.41406250000000000000e+00F,   /* 0x3fb50000 */
1312
    1.42504882812500000000e+00F,   /* 0x3fb66800 */
1313
    1.43603515625000000000e+00F,   /* 0x3fb7d000 */
1314
    1.44677734375000000000e+00F,   /* 0x3fb93000 */
1315
    1.45751953125000000000e+00F,   /* 0x3fba9000 */
1316
    1.46826171875000000000e+00F,   /* 0x3fbbf000 */
1317
    1.47900390625000000000e+00F,   /* 0x3fbd5000 */
1318
    1.48950195312500000000e+00F,   /* 0x3fbea800 */
1319
    1.50000000000000000000e+00F,   /* 0x3fc00000 */
1320
    1.51025390625000000000e+00F,   /* 0x3fc15000 */
1321
    1.52050781250000000000e+00F,   /* 0x3fc2a000 */
1322
    1.53076171875000000000e+00F,   /* 0x3fc3f000 */
1323
    1.54101562500000000000e+00F,   /* 0x3fc54000 */
1324
    1.55102539062500000000e+00F,   /* 0x3fc68800 */
1325
    1.56103515625000000000e+00F,   /* 0x3fc7d000 */
1326
    1.57104492187500000000e+00F,   /* 0x3fc91800 */
1327
    1.58105468750000000000e+00F,   /* 0x3fca6000 */
1328
    1.59082031250000000000e+00F,   /* 0x3fcba000 */
1329
    1.60058593750000000000e+00F,   /* 0x3fcce000 */
1330
    1.61035156250000000000e+00F,   /* 0x3fce2000 */
1331
    1.62011718750000000000e+00F,   /* 0x3fcf6000 */
1332
    1.62963867187500000000e+00F,   /* 0x3fd09800 */
1333
    1.63916015625000000000e+00F,   /* 0x3fd1d000 */
1334
    1.64868164062500000000e+00F,   /* 0x3fd30800 */
1335
    1.65820312500000000000e+00F,   /* 0x3fd44000 */
1336
    1.66748046875000000000e+00F,   /* 0x3fd57000 */
1337
    1.67700195312500000000e+00F,   /* 0x3fd6a800 */
1338
    1.68627929687500000000e+00F,   /* 0x3fd7d800 */
1339
    1.69555664062500000000e+00F,   /* 0x3fd90800 */
1340
    1.70458984375000000000e+00F,   /* 0x3fda3000 */
1341
    1.71386718750000000000e+00F,   /* 0x3fdb6000 */
1342
    1.72290039062500000000e+00F,   /* 0x3fdc8800 */
1343
    1.73193359375000000000e+00F,   /* 0x3fddb000 */
1344
    1.74096679687500000000e+00F,   /* 0x3fded800 */
1345
    1.75000000000000000000e+00F,   /* 0x3fe00000 */
1346
    1.75878906250000000000e+00F,   /* 0x3fe12000 */
1347
    1.76757812500000000000e+00F,   /* 0x3fe24000 */
1348
    1.77636718750000000000e+00F,   /* 0x3fe36000 */
1349
    1.78515625000000000000e+00F,   /* 0x3fe48000 */
1350
    1.79394531250000000000e+00F,   /* 0x3fe5a000 */
1351
    1.80273437500000000000e+00F,   /* 0x3fe6c000 */
1352
    1.81127929687500000000e+00F,   /* 0x3fe7d800 */
1353
    1.81982421875000000000e+00F,   /* 0x3fe8f000 */
1354
    1.82836914062500000000e+00F,   /* 0x3fea0800 */
1355
    1.83691406250000000000e+00F,   /* 0x3feb2000 */
1356
    1.84545898437500000000e+00F,   /* 0x3fec3800 */
1357
    1.85400390625000000000e+00F,   /* 0x3fed5000 */
1358
    1.86230468750000000000e+00F,   /* 0x3fee6000 */
1359
    1.87060546875000000000e+00F,   /* 0x3fef7000 */
1360
    1.87915039062500000000e+00F,   /* 0x3ff08800 */
1361
    1.88745117187500000000e+00F,   /* 0x3ff19800 */
1362
    1.89550781250000000000e+00F,   /* 0x3ff2a000 */
1363
    1.90380859375000000000e+00F,   /* 0x3ff3b000 */
1364
    1.91210937500000000000e+00F,   /* 0x3ff4c000 */
1365
    1.92016601562500000000e+00F,   /* 0x3ff5c800 */
1366
    1.92822265625000000000e+00F,   /* 0x3ff6d000 */
1367
    1.93627929687500000000e+00F,   /* 0x3ff7d800 */
1368
    1.94433593750000000000e+00F,   /* 0x3ff8e000 */
1369
    1.95239257812500000000e+00F,   /* 0x3ff9e800 */
1370
    1.96044921875000000000e+00F,   /* 0x3ffaf000 */
1371
    1.96826171875000000000e+00F,   /* 0x3ffbf000 */
1372
    1.97631835937500000000e+00F,   /* 0x3ffcf800 */
1373
    1.98413085937500000000e+00F,   /* 0x3ffdf800 */
1374
    1.99194335937500000000e+00F,   /* 0x3ffef800 */
1375
    2.00000000000000000000e+00F};  /* 0x40000000 */
1376
1377
static const float rt_jby32_trail_table_float[97] = {
1378
    0.00000000000000000000e+00F,   /* 0x00000000 */
1379
    1.23941208585165441036e-04F,   /* 0x3901f637 */
1380
    1.46876545841223560274e-05F,   /* 0x37766aff */
1381
    1.70736297150142490864e-04F,   /* 0x393307ad */
1382
    1.13296780909877270460e-04F,   /* 0x38ed99bf */
1383
    9.53458802541717886925e-05F,   /* 0x38c7f46e */
1384
    1.25126505736261606216e-04F,   /* 0x39033464 */
1385
    2.10342666832730174065e-04F,   /* 0x395c8f6e */
1386
    1.14066875539720058441e-04F,   /* 0x38ef3730 */
1387
    8.72047676239162683487e-05F,   /* 0x38b6e1b4 */
1388
    1.36111237225122749805e-04F,   /* 0x390eb915 */
1389
    2.26244374061934649944e-05F,   /* 0x37bdc99c */
1390
    2.40658700931817293167e-04F,   /* 0x397c5954 */
1391
    6.31069415248930454254e-05F,   /* 0x38845848 */
1392
    2.27412077947519719601e-04F,   /* 0x396e7577 */
1393
    5.90185391047270968556e-06F,   /* 0x36c6088a */
1394
    1.35496389702893793583e-04F,   /* 0x390e1409 */
1395
    1.32179571664892137051e-04F,   /* 0x390a99af */
1396
    0.00000000000000000000e+00F,   /* 0x00000000 */
1397
    2.31086043640971183777e-04F,   /* 0x39724fb0 */
1398
    9.66752704698592424393e-05F,   /* 0x38cabe24 */
1399
    8.85332483449019491673e-05F,   /* 0x38b9aaed */
1400
    2.09980673389509320259e-04F,   /* 0x395c2e42 */
1401
    2.20044588786549866199e-04F,   /* 0x3966bbc5 */
1402
    1.21749282698146998882e-04F,   /* 0x38ff53a6 */
1403
    1.62125259521417319775e-04F,   /* 0x392a002b */
1404
    9.97955357888713479042e-05F,   /* 0x38d14952 */
1405
    1.81545779923908412457e-04F,   /* 0x393e5d53 */
1406
    1.65768768056295812130e-04F,   /* 0x392dd237 */
1407
    5.48927710042335093021e-05F,   /* 0x38663caa */
1408
    9.53875860432162880898e-05F,   /* 0x38c80ad2 */
1409
    4.53481625299900770187e-05F,   /* 0x383e3438 */
1410
    1.51062369695864617825e-04F,   /* 0x391e667f */
1411
    1.70453247847035527229e-04F,   /* 0x3932bbb2 */
1412
    1.05505387182347476482e-04F,   /* 0x38dd42c6 */
1413
    2.02269104192964732647e-04F,   /* 0x39541833 */
1414
    2.18442466575652360916e-04F,   /* 0x39650db4 */
1415
    1.55796806211583316326e-04F,   /* 0x39235d63 */
1416
    1.60395247803535312414e-05F,   /* 0x37868c9e */
1417
    4.49578510597348213196e-05F,   /* 0x383c9120 */
1418
    0.00000000000000000000e+00F,   /* 0x00000000 */
1419
    1.26840444863773882389e-04F,   /* 0x39050079 */
1420
    1.82820076588541269302e-04F,   /* 0x393fb364 */
1421
    1.69370483490638434887e-04F,   /* 0x3931990b */
1422
    8.78757418831810355186e-05F,   /* 0x38b849ee */
1423
    1.83815121999941766262e-04F,   /* 0x3940be7f */
1424
    2.14343352126888930798e-04F,   /* 0x3960c15b */
1425
    1.80714370799250900745e-04F,   /* 0x393d7e25 */
1426
    8.41425862745381891727e-05F,   /* 0x38b075b5 */
1427
    1.69945167726837098598e-04F,   /* 0x3932334f */
1428
    1.95121858268976211548e-04F,   /* 0x394c99a0 */
1429
    1.60778334247879683971e-04F,   /* 0x3928969b */
1430
    6.79871009197086095810e-05F,   /* 0x388e944c */
1431
    1.61929419846273958683e-04F,   /* 0x3929cb99 */
1432
    1.99474830878898501396e-04F,   /* 0x39512a1e */
1433
    1.81604162207804620266e-04F,   /* 0x393e6cff */
1434
    1.09270178654696792364e-04F,   /* 0x38e527fb */
1435
    2.27539261686615645885e-04F,   /* 0x396e979b */
1436
    4.90300008095800876617e-05F,   /* 0x384da590 */
1437
    6.28985289949923753738e-05F,   /* 0x3883e864 */
1438
    2.58551553997676819563e-05F,   /* 0x37d8e386 */
1439
    1.82868374395184218884e-04F,   /* 0x393fc05b */
1440
    4.64625991298817098141e-05F,   /* 0x3842e0d6 */
1441
    1.05703387816902250051e-04F,   /* 0x38ddad13 */
1442
    1.17213814519345760345e-04F,   /* 0x38f5d0b0 */
1443
    8.17377731436863541603e-05F,   /* 0x38ab6aa2 */
1444
    0.00000000000000000000e+00F,   /* 0x00000000 */
1445
    1.16847433673683553934e-04F,   /* 0x38f50bfd */
1446
    1.88827965757809579372e-04F,   /* 0x3946001f */
1447
    2.16612941585481166840e-04F,   /* 0x39632298 */
1448
    2.00857131858356297016e-04F,   /* 0x39529d2d */
1449
    1.42199307447299361229e-04F,   /* 0x39151b56 */
1450
    4.12627305195201188326e-05F,   /* 0x382d1185 */
1451
    1.42796401632949709892e-04F,   /* 0x3915bb9e */
1452
    2.03253570361994206905e-04F,   /* 0x39552077 */
1453
    2.23214170546270906925e-04F,   /* 0x396a0e99 */
1454
    2.03244591830298304558e-04F,   /* 0x39551e0e */
1455
    1.43898156238719820976e-04F,   /* 0x3916e35e */
1456
    4.57155256299301981926e-05F,   /* 0x383fbeac */
1457
    1.53365719597786664963e-04F,   /* 0x3920d0cc */
1458
    2.23224633373320102692e-04F,   /* 0x396a1168 */
1459
    1.16566716314991936088e-05F,   /* 0x37439106 */
1460
    7.43694272387074306607e-06F,   /* 0x36f98ada */
1461
    2.11048507480882108212e-04F,   /* 0x395d4ce7 */
1462
    1.34682719362899661064e-04F,   /* 0x390d399e */
1463
    2.29425968427676707506e-05F,   /* 0x37c074da */
1464
    1.20421340398024767637e-04F,   /* 0x38fc8ab7 */
1465
    1.83421318070031702518e-04F,   /* 0x394054c9 */
1466
    2.12376224226318299770e-04F,   /* 0x395eb14f */
1467
    2.07710763788782060146e-04F,   /* 0x3959ccef */
1468
    1.69840845046564936638e-04F,   /* 0x3932174e */
1469
    9.91739216260612010956e-05F,   /* 0x38cffb98 */
1470
    2.40249748458154499531e-04F,   /* 0x397beb8d */
1471
    1.05178231024183332920e-04F,   /* 0x38dc9322 */
1472
    1.82623916771262884140e-04F,   /* 0x393f7ebc */
1473
    2.28821940254420042038e-04F,   /* 0x396fefec */
1474
    0.00000000000000000000e+00F};  /* 0x00000000 */
1475
1476
1477
/* Handle special arguments first */
1478
1479
  GET_BITS_SP32(x, ux);
1480
  ax = ux & (~SIGNBIT_SP32);
1481
1482
  if(ax >= 0x7f800000)
1483
    {
1484
      /* x is either NaN or infinity */
1485
      if (ux & MANTBITS_SP32)
1486
        /* x is NaN */
1487
        return x + x; /* Raise invalid if it is a signalling NaN */
1488
      else if (ux & SIGNBIT_SP32)
1489
        return nanf_with_flags(AMD_F_INVALID);
1490
      else
1491
        /* x is positive infinity */
1492
        return x;
1493
    }
1494
  else if (ux & SIGNBIT_SP32)
1495
    {
1496
      /* x is negative. */
1497
      if (x == 0.0F)
1498
        /* Handle negative zero first */
1499
        return x;
1500
      else
1501
        return nanf_with_flags(AMD_F_INVALID);
1502
    }
1503
  else if (ux <= 0x007fffff)
1504
    {
1505
      /* x is denormalised or zero */
1506
      if (ux == 0)
1507
        /* x is zero */
1508
        return x;
1509
      else
1510
        {
1511
          /* x is denormalised; scale it up */
1512
          /* Normalize x by increasing the exponent by 26
1513
             and subtracting a correction to account for the implicit
1514
             bit. This replaces a slow denormalized
1515
             multiplication by a fast normal subtraction. */
1516
          static const float corr = 7.888609052210118054e-31F; /* 0x0d800000 */
1517
          denorm = 1;
1518
          GET_BITS_SP32(x, ux);
1519
          PUT_BITS_SP32(ux | 0x0d800000, x);
1520
          x -= corr;
1521
          GET_BITS_SP32(x, ux);
1522
        }
1523
    }
1524
1525
  /* Main algorithm */
1526
1527
  /*
1528
     Find y and e such that x = 2^e * y, where y in [1,4).
1529
     This is done using an in-lined variant of splitFloat,
1530
     which also ensures that e is even.
1531
   */
1532
  y = x;
1533
  ux &= EXPBITS_SP32;
1534
  ux >>= EXPSHIFTBITS_SP32;
1535
  if (ux & 1)
1536
    {
1537
      GET_BITS_SP32(y, u);
1538
      u &= (SIGNBIT_SP32 | MANTBITS_SP32);
1539
      u |= ONEEXPBITS_SP32;
1540
      PUT_BITS_SP32(u, y);
1541
      e = ux - EXPBIAS_SP32;
1542
    }
1543
  else
1544
    {
1545
      GET_BITS_SP32(y, u);
1546
      u &= (SIGNBIT_SP32 | MANTBITS_SP32);
1547
      u |= TWOEXPBITS_SP32;
1548
      PUT_BITS_SP32(u, y);
1549
      e = ux - EXPBIAS_SP32 - 1;
1550
    }
1551
1552
  /* Find the index of the sub-interval of [1,4) in which y lies. */
1553
1554
  index = (int)(32.0F*y+0.5);
1555
1556
  /* Look up the table values and compute c and r = c/t */
1557
1558
  rtc_lead = rt_jby32_lead_table_float[index-32];
1559
  rtc_trail = rt_jby32_trail_table_float[index-32];
1560
  c = 0.03125F*index;
1561
  r = (y - c)/c;
1562
1563
  /*
1564
  Find q = sqrt(1+r) - 1.
1565
  From one step of Newton on (q+1)^2 = 1+r
1566
  */
1567
1568
  p = r*0.5F - r*r*(0.1250079870F - r*(0.6250522999e-01F));
1569
  twop = p + p;
1570
  q = p - (p*p + (twop - r))/(twop + 2.0);
1571
1572
  /* Reconstruction */
1573
1574
  rtc = rtc_lead + rtc_trail;
1575
  e >>= 1; /* e = e/2 */
1576
  z = rtc_lead + (rtc*q+rtc_trail);
1577
1578
  if (denorm)
1579
    {
1580
      /* Scale by 2**(e-13) */
1581
      PUT_BITS_SP32(((e - 13) + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r);
1582
      z *= r;
1583
    }
1584
  else
1585
    {
1586
      /* Scale by 2**e */
1587
      PUT_BITS_SP32((e + EXPBIAS_SP32) << EXPSHIFTBITS_SP32, r);
1588
      z *= r;
1589
    }
1590
1591
  return z;
1592
1593
}
1594
#endif /* SQRTF_AMD_INLINE */
1595
1596
#ifdef USE_LOG_KERNEL_AMD
1597
static inline void log_kernel_amd64(double x, unsigned long ux, int *xexp, double *r1, double *r2)
1598
{
1599
1600
  int expadjust;
1601
  double r, z1, z2, correction, f, f1, f2, q, u, v, poly;
1602
  int index;
1603
1604
  /*
1605
    Computes natural log(x). Algorithm based on:
1606
    Ping-Tak Peter Tang
1607
    "Table-driven implementation of the logarithm function in IEEE
1608
    floating-point arithmetic"
1609
    ACM Transactions on Mathematical Software (TOMS)
1610
    Volume 16, Issue 4 (December 1990)
1611
  */
1612
1613
/* Arrays ln_lead_table and ln_tail_table contain
1614
   leading and trailing parts respectively of precomputed
1615
   values of natural log(1+i/64), for i = 0, 1, ..., 64.
1616
   ln_lead_table contains the first 24 bits of precision,
1617
   and ln_tail_table contains a further 53 bits precision. */
1618
1619
  static const double ln_lead_table[65] = {
1620
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
1621
    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */
1622
    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */
1623
    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */
1624
    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */
1625
    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */
1626
    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */
1627
    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */
1628
    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */
1629
    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */
1630
    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */
1631
    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */
1632
    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */
1633
    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */
1634
    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */
1635
    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */
1636
    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */
1637
    2.35566020011901855469e-01,   /* 0x3fce270700000000 */
1638
    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */
1639
    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */
1640
    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */
1641
    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */
1642
    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */
1643
    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */
1644
    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */
1645
    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */
1646
    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */
1647
    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */
1648
    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */
1649
    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */
1650
    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */
1651
    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */
1652
    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */
1653
    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */
1654
    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */
1655
    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */
1656
    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */
1657
    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */
1658
    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */
1659
    4.75845873355865478516e-01,   /* 0x3fde744240000000 */
1660
    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */
1661
    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */
1662
    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */
1663
    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */
1664
    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */
1665
    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */
1666
    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */
1667
    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */
1668
    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */
1669
    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */
1670
    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */
1671
    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */
1672
    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */
1673
    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */
1674
    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */
1675
    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */
1676
    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */
1677
    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */
1678
    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */
1679
    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */
1680
    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */
1681
    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */
1682
    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */
1683
    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */
1684
    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */
1685
1686
  static const double ln_tail_table[65] = {
1687
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
1688
    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */
1689
    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */
1690
    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */
1691
    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */
1692
    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */
1693
    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */
1694
    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */
1695
    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */
1696
    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */
1697
    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */
1698
    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */
1699
    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */
1700
    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */
1701
    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */
1702
    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */
1703
    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */
1704
    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */
1705
    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */
1706
    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */
1707
    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */
1708
    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */
1709
    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */
1710
    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */
1711
    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */
1712
    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */
1713
    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */
1714
    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */
1715
    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */
1716
    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */
1717
    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */
1718
    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */
1719
    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */
1720
    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */
1721
    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */
1722
    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */
1723
    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */
1724
    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */
1725
    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */
1726
    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */
1727
    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */
1728
    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */
1729
    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */
1730
    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */
1731
    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */
1732
    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */
1733
    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */
1734
    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */
1735
    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */
1736
    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */
1737
    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */
1738
    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */
1739
    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */
1740
    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */
1741
    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */
1742
    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */
1743
    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */
1744
    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */
1745
    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */
1746
    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */
1747
    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */
1748
    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */
1749
    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */
1750
    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */
1751
    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */
1752
1753
  /* Approximating polynomial coefficients for x near 1.0 */
1754
  static const double
1755
    ca_1 = 8.33333333333317923934e-02,  /* 0x3fb55555555554e6 */
1756
    ca_2 = 1.25000000037717509602e-02,  /* 0x3f89999999bac6d4 */
1757
    ca_3 = 2.23213998791944806202e-03,  /* 0x3f62492307f1519f */
1758
    ca_4 = 4.34887777707614552256e-04;  /* 0x3f3c8034c85dfff0 */
1759
1760
  /* Approximating polynomial coefficients for other x */
1761
  static const double
1762
    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */
1763
    cb_2 = 1.24999999978138668903e-02,  /* 0x3f89999999865ede */
1764
    cb_3 = 2.23219810758559851206e-03;  /* 0x3f6249423bd94741 */
1765
1766
  static const unsigned long
1767
    log_thresh1 = 0x3fee0faa00000000,
1768
    log_thresh2 = 0x3ff1082c00000000;
1769
1770
  /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
1771
     log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */
1772
  if (ux >= log_thresh1 && ux <= log_thresh2)
1773
    {
1774
      /* Arguments close to 1.0 are handled separately to maintain
1775
         accuracy.
1776
1777
         The approximation in this region exploits the identity
1778
             log( 1 + r ) = log( 1 + u/2 )  /  log( 1 - u/2 ), where
1779
             u  = 2r / (2+r).
1780
         Note that the right hand side has an odd Taylor series expansion
1781
         which converges much faster than the Taylor series expansion of
1782
         log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by
1783
             u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1).
1784
1785
         One subtlety is that since u cannot be calculated from
1786
         r exactly, the rounding error in the first u should be
1787
         avoided if possible. To accomplish this, we observe that
1788
                       u  =  r  -  r*r/(2+r).
1789
         Since x (=1+r) is the input argument, and thus presumed exact,
1790
         the formula above approximates u accurately because
1791
                       u  =  r  -  correction,
1792
         and the magnitude of "correction" (of the order of r*r)
1793
         is small.
1794
         With these observations, we will approximate log( 1 + r ) by
1795
            r + (  (A1*u^3 + ... + An*u^(2n+1)) - correction ).
1796
1797
         We approximate log(1+r) by an odd polynomial in u, where
1798
                  u = 2r/(2+r) = r - r*r/(2+r).
1799
      */
1800
      r = x - 1.0;
1801
      u = r / (2.0 + r);
1802
      correction = r * u;
1803
      u = u + u;
1804
      v = u * u;
1805
      z1 = r;
1806
      z2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction);
1807
      *r1 = z1;
1808
      *r2 = z2;
1809
      *xexp = 0;
1810
    }
1811
  else
1812
    {
1813
      /*
1814
        First, we decompose the argument x to the form
1815
        x  =  2**M  *  (F1  +  F2),
1816
        where  1 <= F1+F2 < 2, M has the value of an integer,
1817
        F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.
1818
1819
        Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
1820
        in U, where U  =  2 F2 / (2 F2 + F1).
1821
        Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
1822
        The core approximation calculates
1823
        Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.
1824
        Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
1825
        thus, Poly =  2 arctanh( U/2 ) / U  -  1.
1826
1827
        It is not hard to see that
1828
          log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
1829
        Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).
1830
        The values of log(F1) are calculated beforehand and stored
1831
        in the program.
1832
      */
1833
1834
      f = x;
1835
      if (ux < IMPBIT_DP64)
1836
        {
1837
          /* The input argument x is denormalized */
1838
          /* Normalize f by increasing the exponent by 60
1839
             and subtracting a correction to account for the implicit
1840
             bit. This replaces a slow denormalized
1841
             multiplication by a fast normal subtraction. */
1842
          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
1843
          GET_BITS_DP64(f, ux);
1844
          ux |= 0x03d0000000000000;
1845
          PUT_BITS_DP64(ux, f);
1846
          f -= corr;
1847
          GET_BITS_DP64(f, ux);
1848
          expadjust = 60;
1849
        }
1850
      else
1851
        expadjust = 0;
1852
1853
      /* Store the exponent of x in xexp and put
1854
         f into the range [0.5,1) */
1855
      *xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
1856
      PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);
1857
1858
      /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */
1859
1860
      /* Set index to be the nearest integer to 128*f */
1861
      r = 128.0 * f;
1862
      index = (int)(r + 0.5);
1863
1864
      z1 = ln_lead_table[index-64];
1865
      q = ln_tail_table[index-64];
1866
      f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
1867
      f2 = f - f1;
1868
      /* At this point, x = 2**xexp * ( f1  +  f2 ) where
1869
         f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */
1870
1871
      /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */
1872
      /* u = f2 / (f1 + 0.5 * f2); */
1873
      u = f2 / (f1 + 0.5 * f2);
1874
1875
      /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).
1876
         The core approximation calculates
1877
         poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */
1878
      v = u * u;
1879
      poly = (v * (cb_1 + v * (cb_2 + v * cb_3)));
1880
      z2 = q + (u + u * poly);
1881
      *r1 = z1;
1882
      *r2 = z2;
1883
    }
1884
  return;
1885
}
1886
#endif /* USE_LOG_KERNEL_AMD */
1887
1888
#if defined(USE_REMAINDER_PIBY2F_INLINE)
1889
/* Define this to get debugging print statements activated */
1890
#define DEBUGGING_PRINT
1891
#undef DEBUGGING_PRINT
1892
1893
1894
#ifdef DEBUGGING_PRINT
1895
#include <stdio.h>
1896
char *d2b(long d, int bitsper, int point)
1897
{
1898
  static char buff[200];
1899
  int i, j;
1900
  j = bitsper;
1901
  if (point >= 0 && point <= bitsper)
1902
    j++;
1903
  buff[j] = '\0';
1904
  for (i = bitsper - 1; i >= 0; i--)
1905
    {
1906
      j--;
1907
      if (d % 2 == 1)
1908
        buff[j] = '1';
1909
      else
1910
        buff[j] = '0';
1911
      if (i == point)
1912
        {
1913
          j--;
1914
          buff[j] = '.';
1915
        }
1916
      d /= 2;
1917
    }
1918
  return buff;
1919
}
1920
#endif
1921
1922
/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
1923
   extra precision, and return the result in r.
1924
   Return value "region" tells how many lots of pi/2 were subtracted
1925
   from x to put it in the range [-pi/4,pi/4], mod 4. */
1926
static inline void __remainder_piby2f_inline(double x, unsigned long ux, double *r, int *region)
1927
{
1928
1929
  /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */
1930
  static const double
1931
    eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */
1932
1933
  static const double
1934
    piby2 = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
1935
    twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */
1936
    pi = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */
1937
    three_piby2 = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */
1938
    two_pi = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */
1939
    five_piby2 = 7.85398163397448278999e+00; /* 0x401f6a7a2955385e */
1940
1941
  /* Each of these threshold values is the closest machine
1942
     number BELOW a multiple of pi/4, i.e. they are not
1943
     rounded to nearest. thresh1 is 1*pi/4, thresh3 is 3*pi/4, etc.
1944
     This ensures that we end up in precisely the correct region. */
1945
  static const double
1946
    thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */
1947
    thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */
1948
    thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */
1949
    thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */
1950
    thresh9 = 7.0685834705770345109954e+00; /* 0x401c463abeccb2bb */
1951
1952
  static const double cancellationThresh = 1.0e-5;
1953
  int done = 0;
1954
1955
  /* For small values of x, up to 11*pi/4, we do double precision
1956
     subtraction of the relevant multiple of pi/2 */
1957
  if (x <= eleven_piby4) /* x <= 11*pi/4 */
1958
    {
1959
      double t, ctest;
1960
1961
      if (x <= thresh5) /* x < 5*pi/4 */
1962
        {
1963
          if (x <= thresh1) /* x < pi/4 */
1964
            {
1965
              /* Quick return if x is already less than pi/4 */
1966
              *r = x;
1967
              *region = 0;
1968
              return;
1969
            }
1970
          else if (x <= thresh3) /* x < 3*pi/4 */
1971
            {
1972
              t = x - piby2;
1973
              *region = 1;
1974
            }
1975
          else /* x < 5*pi/4 */
1976
            {
1977
              t = x - pi;
1978
              *region = 2;
1979
            }
1980
        }
1981
      else
1982
        {
1983
          if (x <= thresh7) /* x < 7*pi/4 */
1984
            {
1985
              t = x - three_piby2;
1986
              *region = 3;
1987
            }
1988
          else if (x <= thresh9) /* x < 9*pi/4 */
1989
            {
1990
              t = x - two_pi;
1991
              *region = 0;
1992
            }
1993
          else /* x < 11*pi/4 */
1994
            {
1995
              t = x - five_piby2;
1996
              *region = 1;
1997
            }
1998
        }
1999
2000
      /* Check for massive cancellation which may happen very close
2001
         to multiples of pi/2 */
2002
      if (t < 0.0)
2003
        ctest = -t;
2004
      else
2005
        ctest = t;
2006
#ifdef DEBUGGING_PRINT
2007
      printf("Cancellation threshold test = (%g > %g)\n",
2008
             ctest, cancellationThresh);
2009
#endif
2010
2011
      /* Check if cancellation error was not too large */
2012
      if (ctest > cancellationThresh)
2013
        {
2014
          *r = t;
2015
          done = 1;
2016
        }
2017
      /* Otherwise fall through to the expensive method */
2018
    }
2019
  else if (x <= 1.0e6)
2020
    {
2021
      /* This range reduction is accurate enough for x up to
2022
         approximately 2**(20) except near multiples of pi/2 */
2023
2024
      /* We perform double precision arithmetic to find the
2025
         nearest multiple of pi/2 to x */
2026
      int reg;
2027
      double z, w, c, ctest;
2028
2029
      /* Multiply x by 2/pi in double precision, result in z */
2030
      z = x * twobypi;
2031
2032
#ifdef DEBUGGING_PRINT
2033
      printf("z = %30.20e = %s\n", z, double2hex(&z));
2034
#endif
2035
2036
      /* Find reg, the nearest integer to z */
2037
      reg = (int)(z + 0.5);
2038
2039
#ifdef DEBUGGING_PRINT
2040
      printf("reg = %d\n", reg);
2041
#endif
2042
2043
      /* Subtract reg from z, result in w */
2044
      w = z - reg;
2045
2046
#ifdef DEBUGGING_PRINT
2047
      printf("w = %30.20e = %s\n", w, double2hex(&w));
2048
#endif
2049
2050
     /* Check for massive cancellation which may happen very close
2051
        to multiples of pi/2 */
2052
      if (w < 0.0)
2053
        ctest = -w;
2054
      else
2055
        ctest = w;
2056
2057
      /* If cancellation is not too severe, continue with this method.
2058
         Otherwise we fall through to the expensive, accurate method */
2059
      if (ctest > cancellationThresh)
2060
        {
2061
          /* Multiply w by pi/2 */
2062
          c = w * piby2;
2063
          *r = c;
2064
          *region = reg & 3;
2065
2066
#ifdef DEBUGGING_PRINT
2067
          printf("r = %30.20e = %s\n", *r, double2hex(r));
2068
#endif
2069
          done = 1;
2070
        }
2071
    }
2072
2073
  if (!done)
2074
    {
2075
      /* This method simulates multi-precision floating-point
2076
         arithmetic and is accurate for all 1 <= x < infinity */
2077
#if 0
2078
      const int bitsper = 36;
2079
#else
2080
#define bitsper 36
2081
#endif
2082
      unsigned long res[10];
2083
      unsigned long u, carry, mask, mant, nextbits;
2084
      int first, last, i, rexp, xexp, resexp, ltb, determ, bc;
2085
      double dx;
2086
      static const double
2087
        piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
2088
      static unsigned long pibits[] =
2089
      {
2090
        0L,
2091
        5215L, 13000023176L, 11362338026L, 67174558139L,
2092
        34819822259L, 10612056195L, 67816420731L, 57840157550L,
2093
        19558516809L, 50025467026L, 25186875954L, 18152700886L
2094
      };
2095
2096
#ifdef DEBUGGING_PRINT
2097
      printf("On entry, x = %25.20e = %s\n", x, double2hex(&x));
2098
#endif
2099
2100
      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
2101
      ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29;
2102
2103
#ifdef DEBUGGING_PRINT
2104
      printf("ux = %s\n", d2b(ux, 64, -1));
2105
#endif
2106
2107
      /* Now ux is the mantissa bit pattern of x as a long integer */
2108
      mask = (1L << bitsper) - 1;
2109
2110
      /* Set first and last to the positions of the first
2111
         and last chunks of 2/pi that we need */
2112
      first = xexp / bitsper;
2113
      resexp = xexp - first * bitsper;
2114
      /* 120 is the theoretical maximum number of bits (actually
2115
         115 for IEEE single precision) that we need to extract
2116
         from the middle of 2/pi to compute the reduced argument
2117
         accurately enough for our purposes */
2118
      last = first + 120 / bitsper;
2119
2120
#ifdef DEBUGGING_PRINT
2121
      printf("first = %d, last = %d\n", first, last);
2122
#endif
2123
2124
      /* Do a long multiplication of the bits of 2/pi by the
2125
         integer mantissa */
2126
#if 0
2127
      for (i = last; i >= first; i--)
2128
        {
2129
          u = pibits[i] * ux + carry;
2130
          res[i - first] = u & mask;
2131
          carry = u >> bitsper;
2132
        }
2133
      res[last - first + 1] = 0;
2134
#else
2135
      /* Unroll the loop. This is only correct because we know
2136
         that bitsper is fixed as 36. */
2137
      res[4] = 0;
2138
      u = pibits[last] * ux;
2139
      res[3] = u & mask;
2140
      carry = u >> bitsper;
2141
      u = pibits[last - 1] * ux + carry;
2142
      res[2] = u & mask;
2143
      carry = u >> bitsper;
2144
      u = pibits[last - 2] * ux + carry;
2145
      res[1] = u & mask;
2146
      carry = u >> bitsper;
2147
      u = pibits[first] * ux + carry;
2148
      res[0] = u & mask;
2149
#endif
2150
2151
#ifdef DEBUGGING_PRINT
2152
      printf("resexp = %d\n", resexp);
2153
      printf("Significant part of x * 2/pi with binary"
2154
             " point in correct place:\n");
2155
      for (i = 0; i <= last - first; i++)
2156
        {
2157
          if (i > 0 && i % 5 == 0)
2158
            printf("\n ");
2159
          if (i == 1)
2160
            printf("%s ", d2b(res[i], bitsper, resexp));
2161
          else
2162
            printf("%s ", d2b(res[i], bitsper, -1));
2163
        }
2164
      printf("\n");
2165
#endif
2166
2167
      /* Reconstruct the result */
2168
      ltb = (int)((((res[0] << bitsper) | res[1])
2169
                   >> (bitsper - 1 - resexp)) & 7);
2170
2171
      /* determ says whether the fractional part is >= 0.5 */
2172
      determ = ltb & 1;
2173
2174
#ifdef DEBUGGING_PRINT
2175
      printf("ltb = %d (last two bits before binary point"
2176
             " and first bit after)\n", ltb);
2177
      printf("determ = %d (1 means need to negate because the fractional\n"
2178
             "            part of x * 2/pi is greater than 0.5)\n", determ);
2179
#endif
2180
2181
      i = 1;
2182
      if (determ)
2183
        {
2184
          /* The mantissa is >= 0.5. We want to subtract it
2185
             from 1.0 by negating all the bits */
2186
          *region = ((ltb >> 1) + 1) & 3;
2187
          mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1);
2188
          while (mant < 0x0000000000010000)
2189
            {
2190
              i++;
2191
              mant = (mant << bitsper) | (~(res[i]) & mask);
2192
            }
2193
          nextbits = (~(res[i+1]) & mask);
2194
        }
2195
      else
2196
        {
2197
          *region = (ltb >> 1);
2198
          mant = res[1] & ((1L << (bitsper - resexp)) - 1);
2199
          while (mant < 0x0000000000010000)
2200
            {
2201
              i++;
2202
              mant = (mant << bitsper) | res[i];
2203
            }
2204
          nextbits = res[i+1];
2205
        }
2206
2207
#ifdef DEBUGGING_PRINT
2208
      printf("First bits of mant = %s\n", d2b(mant, bitsper, -1));
2209
#endif
2210
2211
      /* Normalize the mantissa. The shift value 6 here, determined by
2212
         trial and error, seems to give optimal speed. */
2213
      bc = 0;
2214
      while (mant < 0x0000400000000000)
2215
        {
2216
          bc += 6;
2217
          mant <<= 6;
2218
        }
2219
      while (mant < 0x0010000000000000)
2220
        {
2221
          bc++;
2222
          mant <<= 1;
2223
        }
2224
      mant |= nextbits >> (bitsper - bc);
2225
2226
      rexp = 52 + resexp - bc - i * bitsper;
2227
2228
#ifdef DEBUGGING_PRINT
2229
      printf("Normalised mantissa = 0x%016lx\n", mant);
2230
      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);
2231
#endif
2232
2233
      /* Put the result exponent rexp onto the mantissa pattern */
2234
      u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
2235
      ux = (mant & MANTBITS_DP64) | u;
2236
      if (determ)
2237
        /* If we negated the mantissa we negate x too */
2238
        ux |= SIGNBIT_DP64;
2239
      PUT_BITS_DP64(ux, dx);
2240
2241
#ifdef DEBUGGING_PRINT
2242
      printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx));
2243
#endif
2244
2245
      /* x is a double precision version of the fractional part of
2246
         x * 2 / pi. Multiply x by pi/2 in double precision
2247
         to get the reduced argument r. */
2248
      *r = dx * piby2;
2249
2250
#ifdef DEBUGGING_PRINT
2251
      printf(" r = frac(x*2/pi) * pi/2:\n");
2252
      printf(" r = %25.20e = %s\n", *r, double2hex(r));
2253
      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",
2254
             *region);
2255
#endif
2256
    }
2257
}
2258
#endif /* USE_REMAINDER_PIBY2F_INLINE */
2259
2260
#endif /* LIBM_INLINES_AMD_H_INCLUDED */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/libm_util_amd.h.x86_64-new-libm (+101 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#ifndef LIBM_UTIL_AMD_H_INCLUDED
10
#define LIBM_UTIL_AMD_H_INCLUDED 1
11
12
/* Compile-time verification that type long is the same size
13
   as type double (i.e. we are really on a 64-bit machine) */
14
void check_long_against_double_size(int machine_is_64_bit[(sizeof(long) == sizeof(double))?1:-1]); 
15
16
17
/* Definitions for double functions on 64 bit machines */
18
#define SIGNBIT_DP64      0x8000000000000000
19
#define EXPBITS_DP64      0x7ff0000000000000
20
#define MANTBITS_DP64     0x000fffffffffffff
21
#define ONEEXPBITS_DP64   0x3ff0000000000000
22
#define TWOEXPBITS_DP64   0x4000000000000000
23
#define HALFEXPBITS_DP64  0x3fe0000000000000
24
#define IMPBIT_DP64       0x0010000000000000
25
#define QNANBITPATT_DP64  0x7ff8000000000000
26
#define PINFBITPATT_DP64  0x7ff0000000000000
27
#define NINFBITPATT_DP64  0xfff0000000000000
28
#define EXPBIAS_DP64      1023
29
#define EXPSHIFTBITS_DP64 52
30
#define BIASEDEMIN_DP64   1
31
#define EMIN_DP64         -1022
32
#define BIASEDEMAX_DP64   2046
33
#define EMAX_DP64         1023
34
#define LAMBDA_DP64       1.0e300
35
#define MANTLENGTH_DP64   53
36
#define BASEDIGITS_DP64   15
37
38
39
/* These definitions, used by float functions,
40
   are for both 32 and 64 bit machines */
41
#define SIGNBIT_SP32      0x80000000
42
#define EXPBITS_SP32      0x7f800000
43
#define MANTBITS_SP32     0x007fffff
44
#define ONEEXPBITS_SP32   0x3f800000
45
#define TWOEXPBITS_SP32   0x40000000
46
#define HALFEXPBITS_SP32  0x3f000000
47
#define IMPBIT_SP32       0x00800000
48
#define QNANBITPATT_SP32  0x7fc00000
49
#define PINFBITPATT_SP32  0x7f800000
50
#define NINFBITPATT_SP32  0xff800000
51
#define EXPBIAS_SP32      127
52
#define EXPSHIFTBITS_SP32 23
53
#define BIASEDEMIN_SP32   1
54
#define EMIN_SP32         -126
55
#define BIASEDEMAX_SP32   254
56
#define EMAX_SP32         127
57
#define LAMBDA_SP32       1.0e30
58
#define MANTLENGTH_SP32   24
59
#define BASEDIGITS_SP32   7
60
61
#define CLASS_SIGNALLING_NAN 1
62
#define CLASS_QUIET_NAN 2
63
#define CLASS_NEGATIVE_INFINITY 3
64
#define CLASS_NEGATIVE_NORMAL_NONZERO 4
65
#define CLASS_NEGATIVE_DENORMAL 5
66
#define CLASS_NEGATIVE_ZERO 6
67
#define CLASS_POSITIVE_ZERO 7
68
#define CLASS_POSITIVE_DENORMAL 8
69
#define CLASS_POSITIVE_NORMAL_NONZERO 9
70
#define CLASS_POSITIVE_INFINITY 10
71
72
#define OLD_BITS_SP32(x) (*((unsigned int *)&x))
73
#define OLD_BITS_DP64(x) (*((unsigned long *)&x))
74
75
/* Alternatives to the above functions which don't have
76
   problems when using high optimization levels on gcc */
77
#define GET_BITS_SP32(x, ux) {union {float f; unsigned int i;} _bitsy; _bitsy.f = (x); ux = _bitsy.i;}
78
#define PUT_BITS_SP32(ux, x) {union {float f; unsigned int i;} _bitsy; _bitsy.i = (ux); x = _bitsy.f;}
79
#define GET_BITS_DP64(x, ux) {union {double d; unsigned long i;} _bitsy; _bitsy.d = (x); ux = _bitsy.i;}
80
#define PUT_BITS_DP64(ux, x) {union {double d; unsigned long i;} _bitsy; _bitsy.i = (ux); x = _bitsy.d;}
81
82
83
/* Processor-dependent floating-point status flags */
84
#define AMD_F_INEXACT 0x00000020
85
#define AMD_F_UNDERFLOW 0x00000010
86
#define AMD_F_OVERFLOW 0x00000008
87
#define AMD_F_DIVBYZERO 0x00000004
88
#define AMD_F_INVALID 0x00000001
89
90
/* Processor-dependent floating-point precision-control flags */
91
#define AMD_F_EXTENDED 0x00000300
92
#define AMD_F_DOUBLE   0x00000200
93
#define AMD_F_SINGLE   0x00000000
94
95
/* Processor-dependent floating-point rounding-control flags */
96
#define AMD_F_RC_NEAREST 0x00000000
97
#define AMD_F_RC_DOWN    0x00002000
98
#define AMD_F_RC_UP      0x00004000
99
#define AMD_F_RC_ZERO    0x00006000
100
101
#endif /* LIBM_UTIL_AMD_H_INCLUDED */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_atan.c.x86_64-new-libm (+105 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VAL_WITH_FLAGS
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEUPDOUBLE1024
15
#include "libm_inlines_amd.h"
16
#undef USE_SCALEUPDOUBLE1024
17
#undef USE_SCALEDOUBLE_1
18
#undef USE_VAL_WITH_FLAGS
19
20
double __atan(double y)
21
{
22
23
  /* Some constants and split constants. */
24
25
  static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
26
  double chi, clo, v, s, q, z;
27
28
  /* Find properties of argument y. */
29
30
  unsigned long uy, auy, yneg;
31
  GET_BITS_DP64(y, uy);
32
  auy = uy & ~SIGNBIT_DP64;
33
  yneg = (uy != auy);
34
35
  if (yneg) v = -y;
36
  else v = y;
37
38
  /* Argument reduction to range [-7/16,7/16] */
39
40
  if (auy > 0x4003800000000000) /* v > 39./16. */
41
    {
42
43
      if (auy > PINFBITPATT_DP64) return y + y; /* y is NaN */  
44
      else if (v > 0x4370000000000000)
45
	{ /* abs(y) > 2^56 => arctan(1/y) is 
46
	     insignificant compared to piby2 */
47
	  if (yneg) return val_with_flags(-piby2, AMD_F_INEXACT);
48
	  else return val_with_flags(piby2, AMD_F_INEXACT);
49
	}
50
51
      y = -1.0/v;
52
      /* (chi + clo) = arctan(infinity) */
53
      chi = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
54
      clo = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */
55
    }
56
  else if (auy > 0x3ff3000000000000) /* 39./16. > v > 19./16. */
57
    {
58
      y = (v-1.5)/(1.0+1.5*v);
59
      /* (chi + clo) = arctan(1.5) */
60
      chi = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */
61
      clo = 1.39033110312309953701e-17; /* 0x3c7007887af0cbbc */
62
    }
63
  else if (auy > 0x3fe6000000000000) /* 19./16. > v > 11./16. */
64
    {
65
      y = (v-1.)/(1.0+v);
66
      /* (chi + clo) = arctan(1.) */
67
      chi = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */
68
      clo = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */
69
    }
70
  else if (auy > 0x3fdc000000000000) /* 11./16. > v > 7./16. */
71
    {
72
      y = (2*v-1.0)/(2.0+v);
73
      /* (chi + clo) = arctan(0.5) */
74
      chi = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */
75
      clo = 2.26987774529616809294e-17; /* 0x3c7a2b7f222f65e0 */
76
    }
77
  else  /* v < 7./16. */
78
    {
79
      y = v;
80
      chi = 0.0;
81
      clo = 0.0;
82
    }
83
84
  /* Core approximation: Remez(4,4) on [-7/16,7/16] */
85
86
  s = y*y;
87
  q = y*s*
88
       (0.268297920532545909e0 + 
89
	(0.447677206805497472e0 + 
90
	 (0.220638780716667420e0 + 
91
	  (0.304455919504853031e-1 + 
92
	    0.142316903342317766e-3*s)*s)*s)*s)/
93
       (0.804893761597637733e0 + 
94
	(0.182596787737507063e1 + 
95
	 (0.141254259931958921e1 + 
96
	  (0.424602594203847109e0 + 
97
	    0.389525873944742195e-1*s)*s)*s)*s);
98
99
  z = chi - ((q - clo) - y);
100
101
  if (yneg) z = -z;
102
  return z;
103
}
104
105
weak_alias (__atan, atan)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_atan2.c.x86_64-new-libm (+746 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VAL_WITH_FLAGS
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEDOUBLE_2
15
#define USE_SCALEUPDOUBLE1024
16
#define USE_SCALEDOWNDOUBLE
17
#include "libm_inlines_amd.h"
18
#undef USE_SCALEDOWNDOUBLE
19
#undef USE_SCALEUPDOUBLE1024
20
#undef USE_SCALEDOUBLE_1
21
#undef USE_SCALEDOUBLE_2
22
#undef USE_VAL_WITH_FLAGS
23
24
/* Deal with errno for out-of-range arguments
25
   (only used when _LIB_VERSION is _SVID_) */
26
#include "libm_errno_amd.h"
27
static inline double retval_errno_edom(double x, double y)
28
{
29
  struct exception exc;
30
  exc.arg1 = x;
31
  exc.arg2 = y;
32
  exc.type = DOMAIN;
33
  exc.name = (char *)"atan2";
34
  exc.retval = HUGE;
35
  if (!matherr(&exc))
36
    {
37
      (void)fputs("atan2: DOMAIN error\n", stderr);
38
      __set_errno(EDOM);
39
    }
40
  return exc.retval;
41
}
42
43
double __atan2(double y, double x)
44
{
45
  /* Arrays atan_jby256_lead and atan_jby256_tail contain
46
     leading and trailing parts respectively of precomputed
47
     values of atan(j/256), for j = 16, 17, ..., 256.
48
     atan_jby256_lead contains the first 21 bits of precision,
49
     and atan_jby256_tail contains a further 53 bits precision. */
50
51
  static const double atan_jby256_lead[  241] = {
52
    6.24187886714935302734e-02,  /* 0x3faff55b00000000 */
53
    6.63088560104370117188e-02,  /* 0x3fb0f99e00000000 */
54
    7.01969265937805175781e-02,  /* 0x3fb1f86d00000000 */
55
    7.40829110145568847656e-02,  /* 0x3fb2f71900000000 */
56
    7.79666304588317871094e-02,  /* 0x3fb3f59f00000000 */
57
    8.18479657173156738281e-02,  /* 0x3fb4f3fd00000000 */
58
    8.57268571853637695312e-02,  /* 0x3fb5f23200000000 */
59
    8.96031260490417480469e-02,  /* 0x3fb6f03b00000000 */
60
    9.34767723083496093750e-02,  /* 0x3fb7ee1800000000 */
61
    9.73475575447082519531e-02,  /* 0x3fb8ebc500000000 */
62
    1.01215422153472900391e-01,  /* 0x3fb9e94100000000 */
63
    1.05080246925354003906e-01,  /* 0x3fbae68a00000000 */
64
    1.08941912651062011719e-01,  /* 0x3fbbe39e00000000 */
65
    1.12800359725952148438e-01,  /* 0x3fbce07c00000000 */
66
    1.16655409336090087891e-01,  /* 0x3fbddd2100000000 */
67
    1.20507001876831054688e-01,  /* 0x3fbed98c00000000 */
68
    1.24354958534240722656e-01,  /* 0x3fbfd5ba00000000 */
69
    1.28199219703674316406e-01,  /* 0x3fc068d500000000 */
70
    1.32039666175842285156e-01,  /* 0x3fc0e6ad00000000 */
71
    1.35876297950744628906e-01,  /* 0x3fc1646500000000 */
72
    1.39708757400512695312e-01,  /* 0x3fc1e1fa00000000 */
73
    1.43537282943725585938e-01,  /* 0x3fc25f6e00000000 */
74
    1.47361397743225097656e-01,  /* 0x3fc2dcbd00000000 */
75
    1.51181221008300781250e-01,  /* 0x3fc359e800000000 */
76
    1.54996633529663085938e-01,  /* 0x3fc3d6ee00000000 */
77
    1.58807516098022460938e-01,  /* 0x3fc453ce00000000 */
78
    1.62613749504089355469e-01,  /* 0x3fc4d08700000000 */
79
    1.66415214538574218750e-01,  /* 0x3fc54d1800000000 */
80
    1.70211911201477050781e-01,  /* 0x3fc5c98100000000 */
81
    1.74003481864929199219e-01,  /* 0x3fc645bf00000000 */
82
    1.77790164947509765625e-01,  /* 0x3fc6c1d400000000 */
83
    1.81571602821350097656e-01,  /* 0x3fc73dbd00000000 */
84
    1.85347914695739746094e-01,  /* 0x3fc7b97b00000000 */
85
    1.89118742942810058594e-01,  /* 0x3fc8350b00000000 */
86
    1.92884206771850585938e-01,  /* 0x3fc8b06e00000000 */
87
    1.96644186973571777344e-01,  /* 0x3fc92ba300000000 */
88
    2.00398445129394531250e-01,  /* 0x3fc9a6a800000000 */
89
    2.04147100448608398438e-01,  /* 0x3fca217e00000000 */
90
    2.07889914512634277344e-01,  /* 0x3fca9c2300000000 */
91
    2.11626768112182617188e-01,  /* 0x3fcb169600000000 */
92
    2.15357661247253417969e-01,  /* 0x3fcb90d700000000 */
93
    2.19082474708557128906e-01,  /* 0x3fcc0ae500000000 */
94
    2.22801089286804199219e-01,  /* 0x3fcc84bf00000000 */
95
    2.26513504981994628906e-01,  /* 0x3fccfe6500000000 */
96
    2.30219483375549316406e-01,  /* 0x3fcd77d500000000 */
97
    2.33919143676757812500e-01,  /* 0x3fcdf11000000000 */
98
    2.37612247467041015625e-01,  /* 0x3fce6a1400000000 */
99
    2.41298794746398925781e-01,  /* 0x3fcee2e100000000 */
100
    2.44978547096252441406e-01,  /* 0x3fcf5b7500000000 */
101
    2.48651623725891113281e-01,  /* 0x3fcfd3d100000000 */
102
    2.52317905426025390625e-01,  /* 0x3fd025fa00000000 */
103
    2.55977153778076171875e-01,  /* 0x3fd061ee00000000 */
104
    2.59629487991333007812e-01,  /* 0x3fd09dc500000000 */
105
    2.63274669647216796875e-01,  /* 0x3fd0d97e00000000 */
106
    2.66912937164306640625e-01,  /* 0x3fd1151a00000000 */
107
    2.70543813705444335938e-01,  /* 0x3fd1509700000000 */
108
    2.74167299270629882812e-01,  /* 0x3fd18bf500000000 */
109
    2.77783632278442382812e-01,  /* 0x3fd1c73500000000 */
110
    2.81392335891723632812e-01,  /* 0x3fd2025500000000 */
111
    2.84993648529052734375e-01,  /* 0x3fd23d5600000000 */
112
    2.88587331771850585938e-01,  /* 0x3fd2783700000000 */
113
    2.92173147201538085938e-01,  /* 0x3fd2b2f700000000 */
114
    2.95751571655273437500e-01,  /* 0x3fd2ed9800000000 */
115
    2.99322128295898437500e-01,  /* 0x3fd3281800000000 */
116
    3.02884817123413085938e-01,  /* 0x3fd3627700000000 */
117
    3.06439399719238281250e-01,  /* 0x3fd39cb400000000 */
118
    3.09986352920532226562e-01,  /* 0x3fd3d6d100000000 */
119
    3.13524961471557617188e-01,  /* 0x3fd410cb00000000 */
120
    3.17055702209472656250e-01,  /* 0x3fd44aa400000000 */
121
    3.20578098297119140625e-01,  /* 0x3fd4845a00000000 */
122
    3.24092388153076171875e-01,  /* 0x3fd4bdee00000000 */
123
    3.27598333358764648438e-01,  /* 0x3fd4f75f00000000 */
124
    3.31095933914184570312e-01,  /* 0x3fd530ad00000000 */
125
    3.34585189819335937500e-01,  /* 0x3fd569d800000000 */
126
    3.38066101074218750000e-01,  /* 0x3fd5a2e000000000 */
127
    3.41538190841674804688e-01,  /* 0x3fd5dbc300000000 */
128
    3.45002174377441406250e-01,  /* 0x3fd6148400000000 */
129
    3.48457098007202148438e-01,  /* 0x3fd64d1f00000000 */
130
    3.51903676986694335938e-01,  /* 0x3fd6859700000000 */
131
    3.55341434478759765625e-01,  /* 0x3fd6bdea00000000 */
132
    3.58770608901977539062e-01,  /* 0x3fd6f61900000000 */
133
    3.62190723419189453125e-01,  /* 0x3fd72e2200000000 */
134
    3.65602254867553710938e-01,  /* 0x3fd7660700000000 */
135
    3.69004726409912109375e-01,  /* 0x3fd79dc600000000 */
136
    3.72398376464843750000e-01,  /* 0x3fd7d56000000000 */
137
    3.75782966613769531250e-01,  /* 0x3fd80cd400000000 */
138
    3.79158496856689453125e-01,  /* 0x3fd8442200000000 */
139
    3.82525205612182617188e-01,  /* 0x3fd87b4b00000000 */
140
    3.85882616043090820312e-01,  /* 0x3fd8b24d00000000 */
141
    3.89230966567993164062e-01,  /* 0x3fd8e92900000000 */
142
    3.92570018768310546875e-01,  /* 0x3fd91fde00000000 */
143
    3.95900011062622070312e-01,  /* 0x3fd9566d00000000 */
144
    3.99220705032348632812e-01,  /* 0x3fd98cd500000000 */
145
    4.02532100677490234375e-01,  /* 0x3fd9c31600000000 */
146
    4.05834197998046875000e-01,  /* 0x3fd9f93000000000 */
147
    4.09126996994018554688e-01,  /* 0x3fda2f2300000000 */
148
    4.12410259246826171875e-01,  /* 0x3fda64ee00000000 */
149
    4.15684223175048828125e-01,  /* 0x3fda9a9200000000 */
150
    4.18948888778686523438e-01,  /* 0x3fdad00f00000000 */
151
    4.22204017639160156250e-01,  /* 0x3fdb056400000000 */
152
    4.25449609756469726562e-01,  /* 0x3fdb3a9100000000 */
153
    4.28685665130615234375e-01,  /* 0x3fdb6f9600000000 */
154
    4.31912183761596679688e-01,  /* 0x3fdba47300000000 */
155
    4.35129165649414062500e-01,  /* 0x3fdbd92800000000 */
156
    4.38336372375488281250e-01,  /* 0x3fdc0db400000000 */
157
    4.41534280776977539062e-01,  /* 0x3fdc421900000000 */
158
    4.44722414016723632812e-01,  /* 0x3fdc765500000000 */
159
    4.47900772094726562500e-01,  /* 0x3fdcaa6800000000 */
160
    4.51069593429565429688e-01,  /* 0x3fdcde5300000000 */
161
    4.54228639602661132812e-01,  /* 0x3fdd121500000000 */
162
    4.57377910614013671875e-01,  /* 0x3fdd45ae00000000 */
163
    4.60517644882202148438e-01,  /* 0x3fdd791f00000000 */
164
    4.63647603988647460938e-01,  /* 0x3fddac6700000000 */
165
    4.66767549514770507812e-01,  /* 0x3fdddf8500000000 */
166
    4.69877958297729492188e-01,  /* 0x3fde127b00000000 */
167
    4.72978591918945312500e-01,  /* 0x3fde454800000000 */
168
    4.76069211959838867188e-01,  /* 0x3fde77eb00000000 */
169
    4.79150056838989257812e-01,  /* 0x3fdeaa6500000000 */
170
    4.82221126556396484375e-01,  /* 0x3fdedcb600000000 */
171
    4.85282421112060546875e-01,  /* 0x3fdf0ede00000000 */
172
    4.88333940505981445312e-01,  /* 0x3fdf40dd00000000 */
173
    4.91375446319580078125e-01,  /* 0x3fdf72b200000000 */
174
    4.94406938552856445312e-01,  /* 0x3fdfa45d00000000 */
175
    4.97428894042968750000e-01,  /* 0x3fdfd5e000000000 */
176
    5.00440597534179687500e-01,  /* 0x3fe0039c00000000 */
177
    5.03442764282226562500e-01,  /* 0x3fe01c3400000000 */
178
    5.06434917449951171875e-01,  /* 0x3fe034b700000000 */
179
    5.09417057037353515625e-01,  /* 0x3fe04d2500000000 */
180
    5.12389183044433593750e-01,  /* 0x3fe0657e00000000 */
181
    5.15351772308349609375e-01,  /* 0x3fe07dc300000000 */
182
    5.18304347991943359375e-01,  /* 0x3fe095f300000000 */
183
    5.21246910095214843750e-01,  /* 0x3fe0ae0e00000000 */
184
    5.24179458618164062500e-01,  /* 0x3fe0c61400000000 */
185
    5.27101993560791015625e-01,  /* 0x3fe0de0500000000 */
186
    5.30014991760253906250e-01,  /* 0x3fe0f5e200000000 */
187
    5.32917976379394531250e-01,  /* 0x3fe10daa00000000 */
188
    5.35810947418212890625e-01,  /* 0x3fe1255d00000000 */
189
    5.38693904876708984375e-01,  /* 0x3fe13cfb00000000 */
190
    5.41567325592041015625e-01,  /* 0x3fe1548500000000 */
191
    5.44430732727050781250e-01,  /* 0x3fe16bfa00000000 */
192
    5.47284126281738281250e-01,  /* 0x3fe1835a00000000 */
193
    5.50127506256103515625e-01,  /* 0x3fe19aa500000000 */
194
    5.52961349487304687500e-01,  /* 0x3fe1b1dc00000000 */
195
    5.55785179138183593750e-01,  /* 0x3fe1c8fe00000000 */
196
    5.58598995208740234375e-01,  /* 0x3fe1e00b00000000 */
197
    5.61403274536132812500e-01,  /* 0x3fe1f70400000000 */
198
    5.64197540283203125000e-01,  /* 0x3fe20de800000000 */
199
    5.66981792449951171875e-01,  /* 0x3fe224b700000000 */
200
    5.69756031036376953125e-01,  /* 0x3fe23b7100000000 */
201
    5.72520732879638671875e-01,  /* 0x3fe2521700000000 */
202
    5.75275897979736328125e-01,  /* 0x3fe268a900000000 */
203
    5.78021049499511718750e-01,  /* 0x3fe27f2600000000 */
204
    5.80756187438964843750e-01,  /* 0x3fe2958e00000000 */
205
    5.83481788635253906250e-01,  /* 0x3fe2abe200000000 */
206
    5.86197376251220703125e-01,  /* 0x3fe2c22100000000 */
207
    5.88903427124023437500e-01,  /* 0x3fe2d84c00000000 */
208
    5.91599464416503906250e-01,  /* 0x3fe2ee6200000000 */
209
    5.94285964965820312500e-01,  /* 0x3fe3046400000000 */
210
    5.96962928771972656250e-01,  /* 0x3fe31a5200000000 */
211
    5.99629878997802734375e-01,  /* 0x3fe3302b00000000 */
212
    6.02287292480468750000e-01,  /* 0x3fe345f000000000 */
213
    6.04934692382812500000e-01,  /* 0x3fe35ba000000000 */
214
    6.07573032379150390625e-01,  /* 0x3fe3713d00000000 */
215
    6.10201358795166015625e-01,  /* 0x3fe386c500000000 */
216
    6.12820148468017578125e-01,  /* 0x3fe39c3900000000 */
217
    6.15428924560546875000e-01,  /* 0x3fe3b19800000000 */
218
    6.18028640747070312500e-01,  /* 0x3fe3c6e400000000 */
219
    6.20618820190429687500e-01,  /* 0x3fe3dc1c00000000 */
220
    6.23198986053466796875e-01,  /* 0x3fe3f13f00000000 */
221
    6.25770092010498046875e-01,  /* 0x3fe4064f00000000 */
222
    6.28331184387207031250e-01,  /* 0x3fe41b4a00000000 */
223
    6.30883216857910156250e-01,  /* 0x3fe4303200000000 */
224
    6.33425712585449218750e-01,  /* 0x3fe4450600000000 */
225
    6.35958671569824218750e-01,  /* 0x3fe459c600000000 */
226
    6.38482093811035156250e-01,  /* 0x3fe46e7200000000 */
227
    6.40995979309082031250e-01,  /* 0x3fe4830a00000000 */
228
    6.43500804901123046875e-01,  /* 0x3fe4978f00000000 */
229
    6.45996093750000000000e-01,  /* 0x3fe4ac0000000000 */
230
    6.48482322692871093750e-01,  /* 0x3fe4c05e00000000 */
231
    6.50959014892578125000e-01,  /* 0x3fe4d4a800000000 */
232
    6.53426170349121093750e-01,  /* 0x3fe4e8de00000000 */
233
    6.55884265899658203125e-01,  /* 0x3fe4fd0100000000 */
234
    6.58332824707031250000e-01,  /* 0x3fe5111000000000 */
235
    6.60772323608398437500e-01,  /* 0x3fe5250c00000000 */
236
    6.63202762603759765625e-01,  /* 0x3fe538f500000000 */
237
    6.65623664855957031250e-01,  /* 0x3fe54cca00000000 */
238
    6.68035984039306640625e-01,  /* 0x3fe5608d00000000 */
239
    6.70438766479492187500e-01,  /* 0x3fe5743c00000000 */
240
    6.72832489013671875000e-01,  /* 0x3fe587d800000000 */
241
    6.75216674804687500000e-01,  /* 0x3fe59b6000000000 */
242
    6.77592277526855468750e-01,  /* 0x3fe5aed600000000 */
243
    6.79958820343017578125e-01,  /* 0x3fe5c23900000000 */
244
    6.82316303253173828125e-01,  /* 0x3fe5d58900000000 */
245
    6.84664726257324218750e-01,  /* 0x3fe5e8c600000000 */
246
    6.87004089355468750000e-01,  /* 0x3fe5fbf000000000 */
247
    6.89334869384765625000e-01,  /* 0x3fe60f0800000000 */
248
    6.91656589508056640625e-01,  /* 0x3fe6220d00000000 */
249
    6.93969249725341796875e-01,  /* 0x3fe634ff00000000 */
250
    6.96272850036621093750e-01,  /* 0x3fe647de00000000 */
251
    6.98567867279052734375e-01,  /* 0x3fe65aab00000000 */
252
    7.00854301452636718750e-01,  /* 0x3fe66d6600000000 */
253
    7.03131675720214843750e-01,  /* 0x3fe6800e00000000 */
254
    7.05400466918945312500e-01,  /* 0x3fe692a400000000 */
255
    7.07660198211669921875e-01,  /* 0x3fe6a52700000000 */
256
    7.09911346435546875000e-01,  /* 0x3fe6b79800000000 */
257
    7.12153911590576171875e-01,  /* 0x3fe6c9f700000000 */
258
    7.14387893676757812500e-01,  /* 0x3fe6dc4400000000 */
259
    7.16613292694091796875e-01,  /* 0x3fe6ee7f00000000 */
260
    7.18829631805419921875e-01,  /* 0x3fe700a700000000 */
261
    7.21037864685058593750e-01,  /* 0x3fe712be00000000 */
262
    7.23237514495849609375e-01,  /* 0x3fe724c300000000 */
263
    7.25428581237792968750e-01,  /* 0x3fe736b600000000 */
264
    7.27611064910888671875e-01,  /* 0x3fe7489700000000 */
265
    7.29785442352294921875e-01,  /* 0x3fe75a6700000000 */
266
    7.31950759887695312500e-01,  /* 0x3fe76c2400000000 */
267
    7.34108448028564453125e-01,  /* 0x3fe77dd100000000 */
268
    7.36257076263427734375e-01,  /* 0x3fe78f6b00000000 */
269
    7.38397598266601562500e-01,  /* 0x3fe7a0f400000000 */
270
    7.40530014038085937500e-01,  /* 0x3fe7b26c00000000 */
271
    7.42654323577880859375e-01,  /* 0x3fe7c3d300000000 */
272
    7.44770050048828125000e-01,  /* 0x3fe7d52800000000 */
273
    7.46877670288085937500e-01,  /* 0x3fe7e66c00000000 */
274
    7.48976707458496093750e-01,  /* 0x3fe7f79e00000000 */
275
    7.51068115234375000000e-01,  /* 0x3fe808c000000000 */
276
    7.53150939941406250000e-01,  /* 0x3fe819d000000000 */
277
    7.55226135253906250000e-01,  /* 0x3fe82ad000000000 */
278
    7.57292747497558593750e-01,  /* 0x3fe83bbe00000000 */
279
    7.59351730346679687500e-01,  /* 0x3fe84c9c00000000 */
280
    7.61402606964111328125e-01,  /* 0x3fe85d6900000000 */
281
    7.63445377349853515625e-01,  /* 0x3fe86e2500000000 */
282
    7.65480041503906250000e-01,  /* 0x3fe87ed000000000 */
283
    7.67507076263427734375e-01,  /* 0x3fe88f6b00000000 */
284
    7.69526004791259765625e-01,  /* 0x3fe89ff500000000 */
285
    7.71537303924560546875e-01,  /* 0x3fe8b06f00000000 */
286
    7.73540973663330078125e-01,  /* 0x3fe8c0d900000000 */
287
    7.75536537170410156250e-01,  /* 0x3fe8d13200000000 */
288
    7.77523994445800781250e-01,  /* 0x3fe8e17a00000000 */
289
    7.79504299163818359375e-01,  /* 0x3fe8f1b300000000 */
290
    7.81476497650146484375e-01,  /* 0x3fe901db00000000 */
291
    7.83441066741943359375e-01,  /* 0x3fe911f300000000 */
292
    7.85398006439208984375e-01}; /* 0x3fe921fb00000000 */
293
294
  static const double atan_jby256_tail[  241] = {
295
    2.13244638182005395671e-08,  /* 0x3e56e59fbd38db2c */
296
    3.89093864761712760656e-08,  /* 0x3e64e3aa54dedf96 */
297
    4.44780900009437454576e-08,  /* 0x3e67e105ab1bda88 */
298
    1.15344768460112754160e-08,  /* 0x3e48c5254d013fd0 */
299
    3.37271051945395312705e-09,  /* 0x3e2cf8ab3ad62670 */
300
    2.40857608736109859459e-08,  /* 0x3e59dca4bec80468 */
301
    1.85853810450623807768e-08,  /* 0x3e53f4b5ec98a8da */
302
    5.14358299969225078306e-08,  /* 0x3e6b9d49619d81fe */
303
    8.85023985412952486748e-09,  /* 0x3e43017887460934 */
304
    1.59425154214358432060e-08,  /* 0x3e511e3eca0b9944 */
305
    1.95139937737755753164e-08,  /* 0x3e54f3f73c5a332e */
306
    2.64909755273544319715e-08,  /* 0x3e5c71c8ae0e00a6 */
307
    4.43388037881231070144e-08,  /* 0x3e67cde0f86fbdc7 */
308
    2.14757072421821274557e-08,  /* 0x3e570f328c889c72 */
309
    2.61049792670754218852e-08,  /* 0x3e5c07ae9b994efe */
310
    7.81439350674466302231e-09,  /* 0x3e40c8021d7b1698 */
311
    3.60125207123751024094e-08,  /* 0x3e635585edb8cb22 */
312
    6.15276238179343767917e-08,  /* 0x3e70842567b30e96 */
313
    9.54387964641184285058e-08,  /* 0x3e799e811031472e */
314
    3.02789566851502754129e-08,  /* 0x3e6041821416bcee */
315
    1.16888650949870856331e-07,  /* 0x3e7f6086e4dc96f4 */
316
    1.07580956468653338863e-08,  /* 0x3e471a535c5f1b58 */
317
    8.33454265379535427653e-08,  /* 0x3e765f743fe63ca1 */
318
    1.10790279272629526068e-07,  /* 0x3e7dbd733472d014 */
319
    1.08394277896366207424e-07,  /* 0x3e7d18cc4d8b0d1d */
320
    9.22176086126841098800e-08,  /* 0x3e78c12553c8fb29 */
321
    7.90938592199048786990e-08,  /* 0x3e753b49e2e8f991 */
322
    8.66445407164293125637e-08,  /* 0x3e77422ae148c141 */
323
    1.40839973537092438671e-08,  /* 0x3e4e3ec269df56a8 */
324
    1.19070438507307600689e-07,  /* 0x3e7ff6754e7e0ac9 */
325
    6.40451663051716197071e-08,  /* 0x3e7131267b1b5aad */
326
    1.08338682076343674522e-07,  /* 0x3e7d14fa403a94bc */
327
    3.52999550187922736222e-08,  /* 0x3e62f396c089a3d8 */
328
    1.05983273930043077202e-07,  /* 0x3e7c731d78fa95bb */
329
    1.05486124078259553339e-07,  /* 0x3e7c50f385177399 */
330
    5.82167732281776477773e-08,  /* 0x3e6f41409c6f2c20 */
331
    1.08696483983403942633e-07,  /* 0x3e7d2d90c4c39ec0 */
332
    4.47335086122377542835e-08,  /* 0x3e680420696f2106 */
333
    1.26896287162615723528e-08,  /* 0x3e4b40327943a2e8 */
334
    4.06534471589151404531e-08,  /* 0x3e65d35e02f3d2a2 */
335
    3.84504846300557026690e-08,  /* 0x3e64a498288117b0 */
336
    3.60715006404807269080e-08,  /* 0x3e635da119afb324 */
337
    6.44725903165522722801e-08,  /* 0x3e714e85cdb9a908 */
338
    3.63749249976409461305e-08,  /* 0x3e638754e5547b9a */
339
    1.03901294413833913794e-07,  /* 0x3e7be40ae6ce3246 */
340
    6.25379756302167880580e-08,  /* 0x3e70c993b3bea7e7 */
341
    6.63984302368488828029e-08,  /* 0x3e71d2dd89ac3359 */
342
    3.21844598971548278059e-08,  /* 0x3e61476603332c46 */
343
    1.16030611712765830905e-07,  /* 0x3e7f25901bac55b7 */
344
    1.17464622142347730134e-07,  /* 0x3e7f881b7c826e28 */
345
    7.54604017965808996596e-08,  /* 0x3e7441996d698d20 */
346
    1.49234929356206556899e-07,  /* 0x3e8407ac521ea089 */
347
    1.41416924523217430259e-07,  /* 0x3e82fb0c6c4b1723 */
348
    2.13308065617483489011e-07,  /* 0x3e8ca135966a3e18 */
349
    5.04230937933302320146e-08,  /* 0x3e6b1218e4d646e4 */
350
    5.45874922281655519035e-08,  /* 0x3e6d4e72a350d288 */
351
    1.51849028914786868886e-07,  /* 0x3e84617e2f04c329 */
352
    3.09004308703769273010e-08,  /* 0x3e6096ec41e82650 */
353
    9.67574548184738317664e-08,  /* 0x3e79f91f25773e6e */
354
    4.02508285529322212824e-08,  /* 0x3e659c0820f1d674 */
355
    3.01222268096861091157e-08,  /* 0x3e602bf7a2df1064 */
356
    2.36189860670079288680e-07,  /* 0x3e8fb36bfc40508f */
357
    1.14095158111080887695e-07,  /* 0x3e7ea08f3f8dc892 */
358
    7.42349089746573467487e-08,  /* 0x3e73ed6254656a0e */
359
    5.12515583196230380184e-08,  /* 0x3e6b83f5e5e69c58 */
360
    2.19290391828763918102e-07,  /* 0x3e8d6ec2af768592 */
361
    3.83263512187553886471e-08,  /* 0x3e6493889a226f94 */
362
    1.61513486284090523855e-07,  /* 0x3e85ad8fa65279ba */
363
    5.09996743535589922261e-08,  /* 0x3e6b615784d45434 */
364
    1.23694037861246766534e-07,  /* 0x3e809a184368f145 */
365
    8.23367955351123783984e-08,  /* 0x3e761a2439b0d91c */
366
    1.07591766213053694014e-07,  /* 0x3e7ce1a65e39a978 */
367
    1.42789947524631815640e-07,  /* 0x3e832a39a93b6a66 */
368
    1.32347123024711878538e-07,  /* 0x3e81c3699af804e7 */
369
    2.17626067316598149229e-08,  /* 0x3e575e0f4e44ede8 */
370
    2.34454866923044288656e-07,  /* 0x3e8f77ced1a7a83b */
371
    2.82966370261766916053e-09,  /* 0x3e284e7f0cb1b500 */
372
    2.29300919890907632975e-07,  /* 0x3e8ec6b838b02dfe */
373
    1.48428270450261284915e-07,  /* 0x3e83ebf4dfbeda87 */
374
    1.87937408574313982512e-07,  /* 0x3e89397aed9cb475 */
375
    6.13685946813334055347e-08,  /* 0x3e707937bc239c54 */
376
    1.98585022733583817493e-07,  /* 0x3e8aa754553131b6 */
377
    7.68394131623752961662e-08,  /* 0x3e74a05d407c45dc */
378
    1.28119052312436745644e-07,  /* 0x3e8132231a206dd0 */
379
    7.02119104719236502733e-08,  /* 0x3e72d8ecfdd69c88 */
380
    9.87954793820636301943e-08,  /* 0x3e7a852c74218606 */
381
    1.72176752381034986217e-07,  /* 0x3e871bf2baeebb50 */
382
    1.12877225146169704119e-08,  /* 0x3e483d7db7491820 */
383
    5.33549829555851737993e-08,  /* 0x3e6ca50d92b6da14 */
384
    2.13833275710816521345e-08,  /* 0x3e56f5cde8530298 */
385
    1.16243518048290556393e-07,  /* 0x3e7f343198910740 */
386
    6.29926408369055877943e-08,  /* 0x3e70e8d241ccd80a */
387
    6.45429039328021963791e-08,  /* 0x3e71535ac619e6c8 */
388
    8.64001922814281933403e-08,  /* 0x3e77316041c36cd2 */
389
    9.50767572202325800240e-08,  /* 0x3e7985a000637d8e */
390
    5.80851497508121135975e-08,  /* 0x3e6f2f29858c0a68 */
391
    1.82350561135024766232e-07,  /* 0x3e8879847f96d909 */
392
    1.98948680587390608655e-07,  /* 0x3e8ab3d319e12e42 */
393
    7.83548663450197659846e-08,  /* 0x3e75088162dfc4c2 */
394
    3.04374234486798594427e-08,  /* 0x3e605749a1cd9d8c */
395
    2.76135725629797411787e-08,  /* 0x3e5da65c6c6b8618 */
396
    4.32610105454203065470e-08,  /* 0x3e6739bf7df1ad64 */
397
    5.17107515324127256994e-08,  /* 0x3e6bc31252aa3340 */
398
    2.82398327875841444660e-08,  /* 0x3e5e528191ad3aa8 */
399
    1.87482469524195595399e-07,  /* 0x3e8929d93df19f18 */
400
    2.97481891662714096139e-08,  /* 0x3e5ff11eb693a080 */
401
    9.94421570843584316402e-09,  /* 0x3e455ae3f145a3a0 */
402
    1.07056210730391848428e-07,  /* 0x3e7cbcd8c6c0ca82 */
403
    6.25589580466881163081e-08,  /* 0x3e70cb04d425d304 */
404
    9.56641013869464593803e-08,  /* 0x3e79adfcab5be678 */
405
    1.88056307148355440276e-07,  /* 0x3e893d90c5662508 */
406
    8.38850689379557880950e-08,  /* 0x3e768489bd35ff40 */
407
    5.01215865527674122924e-09,  /* 0x3e3586ed3da2b7e0 */
408
    1.74166095998522089762e-07,  /* 0x3e87604d2e850eee */
409
    9.96779574395363585849e-08,  /* 0x3e7ac1d12bfb53d8 */
410
    5.98432026368321460686e-09,  /* 0x3e39b3d468274740 */
411
    1.18362922366887577169e-07,  /* 0x3e7fc5d68d10e53c */
412
    1.86086833284154215946e-07,  /* 0x3e88f9e51884becb */
413
    1.97671457251348941011e-07,  /* 0x3e8a87f0869c06d1 */
414
    1.42447160717199237159e-07,  /* 0x3e831e7279f685fa */
415
    1.05504240785546574184e-08,  /* 0x3e46a8282f9719b0 */
416
    3.13335218371639189324e-08,  /* 0x3e60d2724a8a44e0 */
417
    1.96518418901914535399e-07,  /* 0x3e8a60524b11ad4e */
418
    2.17692035039173536059e-08,  /* 0x3e575fdf832750f0 */
419
    2.15613114426529981675e-07,  /* 0x3e8cf06902e4cd36 */
420
    5.68271098300441214948e-08,  /* 0x3e6e82422d4f6d10 */
421
    1.70331455823369124256e-08,  /* 0x3e524a091063e6c0 */
422
    9.17590028095709583247e-08,  /* 0x3e78a1a172dc6f38 */
423
    2.77266304112916566247e-07,  /* 0x3e929b6619f8a92d */
424
    9.37041937614656939690e-08,  /* 0x3e79274d9c1b70c8 */
425
    1.56116346368316796511e-08,  /* 0x3e50c34b1fbb7930 */
426
    4.13967433808382727413e-08,  /* 0x3e6639866c20eb50 */
427
    1.70164749185821616276e-07,  /* 0x3e86d6d0f6832e9e */
428
    4.01708788545600086008e-07,  /* 0x3e9af54def99f25e */
429
    2.59663539226050551563e-07,  /* 0x3e916cfc52a00262 */
430
    2.22007487655027469542e-07,  /* 0x3e8dcc1e83569c32 */
431
    2.90542250809644081369e-07,  /* 0x3e937f7a551ed425 */
432
    4.67720537666628903341e-07,  /* 0x3e9f6360adc98887 */
433
    2.79799803956772554802e-07,  /* 0x3e92c6ec8d35a2c1 */
434
    2.07344552327432547723e-07,  /* 0x3e8bd44df84cb036 */
435
    2.54705698692735196368e-07,  /* 0x3e9117cf826e310e */
436
    4.26848589539548450728e-07,  /* 0x3e9ca533f332cfc9 */
437
    2.52506723633552216197e-07,  /* 0x3e90f208509dbc2e */
438
    2.14684129933849704964e-07,  /* 0x3e8cd07d93c945de */
439
    3.20134822201596505431e-07,  /* 0x3e957bdfd67e6d72 */
440
    9.93537565749855712134e-08,  /* 0x3e7aab89c516c658 */
441
    3.70792944827917252327e-08,  /* 0x3e63e823b1a1b8a0 */
442
    1.41772749369083698972e-07,  /* 0x3e8307464a9d6d3c */
443
    4.22446601490198804306e-07,  /* 0x3e9c5993cd438843 */
444
    4.11818433724801511540e-07,  /* 0x3e9ba2fca02ab554 */
445
    1.19976381502605310519e-07,  /* 0x3e801a5b6983a268 */
446
    3.43703078571520905265e-08,  /* 0x3e6273d1b350efc8 */
447
    1.66128705555453270379e-07,  /* 0x3e864c238c37b0c6 */
448
    5.00499610023283006540e-08,  /* 0x3e6aded07370a300 */
449
    1.75105139941208062123e-07,  /* 0x3e878091197eb47e */
450
    7.70807146729030327334e-08,  /* 0x3e74b0f245e0dabc */
451
    2.45918607526895836121e-07,  /* 0x3e9080d9794e2eaf */
452
    2.18359020958626199345e-07,  /* 0x3e8d4ec242b60c76 */
453
    8.44342887976445333569e-09,  /* 0x3e4221d2f940caa0 */
454
    1.07506148687888629299e-07,  /* 0x3e7cdbc42b2bba5c */
455
    5.36544954316820904572e-08,  /* 0x3e6cce37bb440840 */
456
    3.39109101518396596341e-07,  /* 0x3e96c1d999cf1dd0 */
457
    2.60098720293920613340e-08,  /* 0x3e5bed8a07eb0870 */
458
    8.42678991664621455827e-08,  /* 0x3e769ed88f490e3c */
459
    5.36972237470183633197e-08,  /* 0x3e6cd41719b73ef0 */
460
    4.28192558171921681288e-07,  /* 0x3e9cbc4ac95b41b7 */
461
    2.71535491483955143294e-07,  /* 0x3e9238f1b890f5d7 */
462
    7.84094998145075780203e-08,  /* 0x3e750c4282259cc4 */
463
    3.43880599134117431863e-07,  /* 0x3e9713d2de87b3e2 */
464
    1.32878065060366481043e-07,  /* 0x3e81d5a7d2255276 */
465
    4.18046802627967629428e-07,  /* 0x3e9c0dfd48227ac1 */
466
    2.65042411765766019424e-07,  /* 0x3e91c964dab76753 */
467
    1.70383695347518643694e-07,  /* 0x3e86de56d5704496 */
468
    1.54096497259613515678e-07,  /* 0x3e84aeb71fd19968 */
469
    2.36543402412459813461e-07,  /* 0x3e8fbf91c57b1918 */
470
    4.38416350106876736790e-07,  /* 0x3e9d6bef7fbe5d9a */
471
    3.03892161339927775731e-07,  /* 0x3e9464d3dc249066 */
472
    3.31136771605664899240e-07,  /* 0x3e9638e2ec4d9073 */
473
    6.49494294526590682218e-08,  /* 0x3e716f4a7247ea7c */
474
    4.10423429887181345747e-09,  /* 0x3e31a0a740f1d440 */
475
    1.70831640869113847224e-07,  /* 0x3e86edbb0114a33c */
476
    1.10811512657909180966e-07,  /* 0x3e7dbee8bf1d513c */
477
    3.23677724749783611964e-07,  /* 0x3e95b8bdb0248f73 */
478
    3.55662734259192678528e-07,  /* 0x3e97de3d3f5eac64 */
479
    2.30102333489738219140e-07,  /* 0x3e8ee24187ae448a */
480
    4.47429004000738629714e-07,  /* 0x3e9e06c591ec5192 */
481
    7.78167135617329598659e-08,  /* 0x3e74e3861a332738 */
482
    9.90345291908535415737e-08,  /* 0x3e7a9599dcc2bfe4 */
483
    5.85800913143113728314e-08,  /* 0x3e6f732fbad43468 */
484
    4.57859062410871843857e-07,  /* 0x3e9eb9f573b727d9 */
485
    3.67993069723390929794e-07,  /* 0x3e98b212a2eb9897 */
486
    2.90836464322977276043e-07,  /* 0x3e9384884c167215 */
487
    2.51621574250131388318e-07,  /* 0x3e90e2d363020051 */
488
    2.75789824740652815545e-07,  /* 0x3e92820879fbd022 */
489
    3.88985776250314403593e-07,  /* 0x3e9a1ab9893e4b30 */
490
    1.40214080183768019611e-07,  /* 0x3e82d1b817a24478 */
491
    3.23451432223550478373e-08,  /* 0x3e615d7b8ded4878 */
492
    9.15979180730608444470e-08,  /* 0x3e78968f9db3a5e4 */
493
    3.44371402498640470421e-07,  /* 0x3e971c4171fe135f */
494
    3.40401897215059498077e-07,  /* 0x3e96d80f605d0d8c */
495
    1.06431813453707950243e-07,  /* 0x3e7c91f043691590 */
496
    1.46204238932338846248e-07,  /* 0x3e839f8a15fce2b2 */
497
    9.94610376972039046878e-09,  /* 0x3e455beda9d94b80 */
498
    2.01711528092681771039e-07,  /* 0x3e8b12c15d60949a */
499
    2.72027977986191568296e-07,  /* 0x3e924167b312bfe3 */
500
    2.48402602511693757964e-07,  /* 0x3e90ab8633070277 */
501
    1.58480011219249621715e-07,  /* 0x3e854554ebbc80ee */
502
    3.00372828113368713281e-08,  /* 0x3e60204aef5a4bb8 */
503
    3.67816204583541976394e-07,  /* 0x3e98af08c679cf2c */
504
    2.46169793032343824291e-07,  /* 0x3e90852a330ae6c8 */
505
    1.70080468270204253247e-07,  /* 0x3e86d3eb9ec32916 */
506
    1.67806717763872914315e-07,  /* 0x3e8685cb7fcbbafe */
507
    2.67715622006907942620e-07,  /* 0x3e91f751c1e0bd95 */
508
    2.14411342550299170574e-08,  /* 0x3e5705b1b0f72560 */
509
    4.11228221283669073277e-07,  /* 0x3e9b98d8d808ca92 */
510
    3.52311752396749662260e-08,  /* 0x3e62ea22c75cc980 */
511
    3.52718000397367821054e-07,  /* 0x3e97aba62bca0350 */
512
    4.38857387992911129814e-07,  /* 0x3e9d73833442278c */
513
    3.22574606753482540743e-07,  /* 0x3e95a5ca1fb18bf9 */
514
    3.28730371182804296828e-08,  /* 0x3e61a6092b6ecf28 */
515
    7.56672470607639279700e-08,  /* 0x3e744fd049aac104 */
516
    3.26750155316369681821e-09,  /* 0x3e2c114fd8df5180 */
517
    3.21724445362095284743e-07,  /* 0x3e95972f130feae5 */
518
    1.06639427371776571151e-07,  /* 0x3e7ca034a55fe198 */
519
    3.41020788139524715063e-07,  /* 0x3e96e2b149990227 */
520
    1.00582838631232552824e-07,  /* 0x3e7b00000294592c */
521
    3.68439433859276640065e-07,  /* 0x3e98b9bdc442620e */
522
    2.20403078342388012027e-07,  /* 0x3e8d94fdfabf3e4e */
523
    1.62841467098298142534e-07,  /* 0x3e85db30b145ad9a */
524
    2.25325348296680733838e-07,  /* 0x3e8e3e1eb95022b0 */
525
    4.37462238226421614339e-07,  /* 0x3e9d5b8b45442bd6 */
526
    3.52055880555040706500e-07,  /* 0x3e97a046231ecd2e */
527
    4.75614398494781776825e-07,  /* 0x3e9feafe3ef55232 */
528
    3.60998399033215317516e-07,  /* 0x3e9839e7bfd78267 */
529
    3.79292434611513945954e-08,  /* 0x3e645cf49d6fa900 */
530
    1.29859015528549300061e-08,  /* 0x3e4be3132b27f380 */
531
    3.15927546985474913188e-07,  /* 0x3e9533980bb84f9f */
532
    2.28533679887379668031e-08,  /* 0x3e5889e2ce3ba390 */
533
    1.17222541823553133877e-07,  /* 0x3e7f7778c3ad0cc8 */
534
    1.51991208405464415857e-07,  /* 0x3e846660cec4eba2 */
535
    1.56958239325240655564e-07}; /* 0x3e85110b4611a626 */
536
537
  /* Some constants and split constants. */
538
539
  static double pi = 3.1415926535897932e+00, /* 0x400921fb54442d18 */
540
             piby2 = 1.5707963267948966e+00, /* 0x3ff921fb54442d18 */
541
             piby4 = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
542
       three_piby4 = 2.3561944901923449e+00, /* 0x4002d97c7f3321d2 */
543
           pi_head = 3.1415926218032836e+00, /* 0x400921fb50000000 */
544
           pi_tail = 3.1786509547056392e-08, /* 0x3e6110b4611a6263 */
545
        piby2_head = 1.5707963267948965e+00, /* 0x3ff921fb54442d18 */
546
        piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
547
548
  double u, v, vbyu, q1, q2, s, u1, vu1, u2, vu2, uu, c, r;
549
  unsigned int swap_vu, index, xzero, yzero, xnan, ynan, xinf, yinf;
550
  int m, xexp, yexp, diffexp;
551
552
  /* Find properties of arguments x and y. */
553
554
  unsigned long ux, ui, aux, xneg, uy, auy, yneg;
555
556
  GET_BITS_DP64(x, ux);
557
  GET_BITS_DP64(y, uy);
558
  aux = ux & ~SIGNBIT_DP64;
559
  auy = uy & ~SIGNBIT_DP64;
560
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
561
  yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
562
  xneg = ux & SIGNBIT_DP64;
563
  yneg = uy & SIGNBIT_DP64;
564
  xzero = (aux == 0);
565
  yzero = (auy == 0);
566
  xnan = (aux > PINFBITPATT_DP64);
567
  ynan = (auy > PINFBITPATT_DP64);
568
  xinf = (aux == PINFBITPATT_DP64);
569
  yinf = (auy == PINFBITPATT_DP64);
570
  
571
  diffexp = yexp - xexp;
572
573
  /* Special cases */
574
575
  if (xnan)
576
    return x + x;
577
  else if (ynan)
578
    return y + y;
579
  else if (yzero)
580
    { /* Zero y gives +-0 for positive x 
581
         and +-pi for negative x */
582
      if ((_LIB_VERSION == _SVID_) && xzero)
583
        /* Sigh - _SVID_ defines atan2(0,0) as a domain error */
584
        return retval_errno_edom(x, y);
585
      else if (xneg)
586
	{
587
	  if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
588
          else return val_with_flags(pi,AMD_F_INEXACT);
589
	}
590
      else return y;
591
    }
592
  else if (xzero)
593
    { /* Zero x gives +- pi/2 
594
         depending on sign of y */
595
      if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
596
      else val_with_flags(piby2,AMD_F_INEXACT);
597
    }
598
599
  /* Scale up both x and y if they are both below 1/4.
600
     This avoids any possible later denormalised arithmetic. */
601
602
  if ((xexp < 1021 && yexp < 1021))
603
    {
604
      scaleUpDouble1024(ux, &ux);
605
      scaleUpDouble1024(uy, &uy);
606
      PUT_BITS_DP64(ux, x);
607
      PUT_BITS_DP64(uy, y);
608
      xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
609
      yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
610
      diffexp = yexp - xexp;
611
    }
612
613
  if (diffexp > 56)
614
    { /* abs(y)/abs(x) > 2^56 => arctan(x/y) 
615
         is insignificant compared to piby2 */
616
      if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
617
      else return val_with_flags(piby2,AMD_F_INEXACT);
618
    }
619
  else if (diffexp < -28 && (!xneg))
620
    { /* x positive and dominant over y by a factor of 2^28.
621
         In this case atan(y/x) is y/x to machine accuracy. */
622
623
      if (diffexp < -1074) /* Result underflows */
624
        {
625
          if (yneg)
626
            return val_with_flags(-0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW);
627
          else
628
            return val_with_flags(0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW);
629
        }
630
      else
631
        {
632
          if (diffexp < -1022)
633
            {
634
              /* Result will likely be denormalized */
635
              y = scaleDouble_1(y, 100);
636
              y /= x;
637
              /* Now y is 2^100 times the true result. Scale it back down. */
638
              GET_BITS_DP64(y, uy);
639
	      scaleDownDouble(uy, 100, &uy);
640
              PUT_BITS_DP64(uy, y);
641
	      if ((uy & EXPBITS_DP64) == 0)
642
		return val_with_flags(y, AMD_F_INEXACT | AMD_F_UNDERFLOW);
643
	      else
644
		return y;
645
             }
646
          else
647
            return y / x;
648
        }
649
    }
650
  else if (diffexp < -56 && xneg)
651
    { /* abs(x)/abs(y) > 2^56 and x < 0 => arctan(y/x) 
652
         is insignificant compared to pi */
653
    if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
654
    else return val_with_flags(pi,AMD_F_INEXACT);
655
    }
656
  else if (yinf && xinf)
657
    { /* If abs(x) and abs(y) are both infinity
658
         return +-pi/4 or +- 3pi/4 according to 
659
         signs.  */ 
660
    if (xneg)
661
      {
662
      if (yneg) return val_with_flags(-three_piby4,AMD_F_INEXACT);
663
      else return val_with_flags(three_piby4,AMD_F_INEXACT);
664
      }
665
    else
666
      {
667
      if (yneg) return val_with_flags(-piby4,AMD_F_INEXACT);
668
      else return val_with_flags(piby4,AMD_F_INEXACT);
669
      }
670
    }
671
672
  /* General case: take absolute values of arguments */
673
674
  u = x; v = y;
675
  if (xneg) u = -x;
676
  if (yneg) v = -y;
677
678
  /* Swap u and v if necessary to obtain 0 < v < u. Compute v/u. */
679
680
  swap_vu = (u < v);
681
  if (swap_vu) { uu = u; u = v; v = uu; }
682
  vbyu = v/u;
683
684
  if (vbyu > 0.0625)
685
    { /* General values of v/u. Use a look-up  
686
         table and series expansion. */
687
688
      index = (int)(256*vbyu + 0.5);
689
      q1 = atan_jby256_lead[index-16];
690
      q2 = atan_jby256_tail[index-16];
691
      c = index*1./256;
692
      GET_BITS_DP64(u, ui);
693
      m = (int)((ui & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
694
      u = scaleDouble_2(u,-m);
695
      v = scaleDouble_2(v,-m);
696
      GET_BITS_DP64(u, ui);
697
      PUT_BITS_DP64(0xfffffffff8000000 & ui, u1); /* 26 leading bits of u */
698
      u2 = u - u1;
699
700
      r = ((v-c*u1)-c*u2)/(u+c*v);
701
702
      /* Polynomial approximation to atan(r) */
703
704
      s = r*r;
705
      q2 = q2 + r - r*(s * (0.33333333333224095522 - s*(0.19999918038989143496)));
706
    }
707
  else if (vbyu < 1.e-8)
708
    { /* v/u is small enough that atan(v/u) = v/u */
709
      q1 = 0.0;
710
      q2 = vbyu;
711
    }
712
  else  /* vbyu <= 0.0625 */
713
    {
714
      /* Small values of v/u. Use a series expansion 
715
	 computed carefully to minimise cancellation */
716
      
717
      GET_BITS_DP64(u, ui);
718
      PUT_BITS_DP64(0xffffffff00000000 & ui, u1);
719
      GET_BITS_DP64(vbyu, ui);
720
      PUT_BITS_DP64(0xffffffff00000000 & ui, vu1);
721
      u2 = u - u1;
722
      vu2 = vbyu - vu1;
723
	  
724
      q1 = 0.0;
725
      s  = vbyu*vbyu;
726
      q2 = vbyu +
727
	((((v - u1*vu1) - u2*vu1) - u*vu2)/u -
728
	 (vbyu*s*(0.33333333333333170500 -
729
		  s*(0.19999999999393223405 -
730
		     s*(0.14285713561807169030 -
731
			s*(0.11110736283514525407 - 
732
			   s*(0.90029810285449784439E-01)))))));
733
    }
734
735
  /* Tidy-up according to which quadrant the arguments lie in */
736
737
  if (swap_vu) {q1 = piby2_head - q1; q2 = piby2_tail - q2;}
738
  if (xneg) {q1 = pi_head - q1; q2 = pi_tail - q2;}
739
  q1 = q1 + q2;
740
741
  if (yneg) q1 = - q1;
742
743
  return q1;
744
}
745
746
weak_alias (__atan2, atan2)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_atan2f.c.x86_64-new-libm (+459 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
10
#include "libm_amd.h"
11
#include "libm_util_amd.h"
12
13
#define USE_VAL_WITH_FLAGS
14
#define USE_SCALEDOUBLE_1
15
#define USE_SCALEDOUBLE_2
16
#define USE_SCALEUPDOUBLE1024
17
#define USE_SCALEDOWNDOUBLE
18
#include "libm_inlines_amd.h"
19
#undef USE_SCALEDOWNDOUBLE
20
#undef USE_SCALEUPDOUBLE1024
21
#undef USE_SCALEDOUBLE_1
22
#undef USE_SCALEDOUBLE_2
23
#undef USE_VAL_WITH_FLAGS
24
25
/* Deal with errno for out-of-range arguments
26
   (only used when _LIB_VERSION is _SVID_) */
27
#include "libm_errno_amd.h"
28
static inline float retval_errno_edom(float x, float y)
29
{
30
  struct exception exc;
31
  exc.arg1 = (double)x;
32
  exc.arg2 = (double)y;
33
  exc.type = DOMAIN;
34
  exc.name = (char *)"atan2f";
35
  exc.retval = HUGE;
36
  if (!matherr(&exc))
37
    {
38
      (void)fputs("atan2f: DOMAIN error\n", stderr);
39
      __set_errno(EDOM);
40
    }
41
  return exc.retval;
42
}
43
44
float __atan2f(float fy, float fx)
45
{
46
  /* Array atan_jby256 contains precomputed values of atan(j/256), 
47
     for j = 16, 17, ..., 256. */
48
49
  static const double atan_jby256[  241] = {
50
    6.24188099959573430842e-02,  /* 0x3faff55bb72cfde9 */
51
    6.63088949198234745008e-02,  /* 0x3fb0f99ea71d52a6 */
52
    7.01969710718705064423e-02,  /* 0x3fb1f86dbf082d58 */
53
    7.40829225490337306415e-02,  /* 0x3fb2f719318a4a9a */
54
    7.79666338315423007588e-02,  /* 0x3fb3f59f0e7c559d */
55
    8.18479898030765457007e-02,  /* 0x3fb4f3fd677292fb */
56
    8.57268757707448092464e-02,  /* 0x3fb5f2324fd2d7b2 */
57
    8.96031774848717321724e-02,  /* 0x3fb6f03bdcea4b0c */
58
    9.34767811585894559112e-02,  /* 0x3fb7ee182602f10e */
59
    9.73475734872236708739e-02,  /* 0x3fb8ebc54478fb28 */
60
    1.01215441667466668485e-01,  /* 0x3fb9e94153cfdcf1 */
61
    1.05080273416329528224e-01,  /* 0x3fbae68a71c722b8 */
62
    1.08941956989865793015e-01,  /* 0x3fbbe39ebe6f07c3 */
63
    1.12800381201659388752e-01,  /* 0x3fbce07c5c3cca32 */
64
    1.16655435441069349478e-01,  /* 0x3fbddd21701eba6e */
65
    1.20507009691224548087e-01,  /* 0x3fbed98c2190043a */
66
    1.24354994546761424279e-01,  /* 0x3fbfd5ba9aac2f6d */
67
    1.28199281231298117811e-01,  /* 0x3fc068d584212b3d */
68
    1.32039761614638734288e-01,  /* 0x3fc0e6adccf40881 */
69
    1.35876328229701304195e-01,  /* 0x3fc1646541060850 */
70
    1.39708874289163620386e-01,  /* 0x3fc1e1fafb043726 */
71
    1.43537293701821222491e-01,  /* 0x3fc25f6e171a535c */
72
    1.47361481088651630200e-01,  /* 0x3fc2dcbdb2fba1ff */
73
    1.51181331798580037562e-01,  /* 0x3fc359e8edeb99a3 */
74
    1.54996741923940972718e-01,  /* 0x3fc3d6eee8c6626c */
75
    1.58807608315631065832e-01,  /* 0x3fc453cec6092a9e */
76
    1.62613828597948567589e-01,  /* 0x3fc4d087a9da4f17 */
77
    1.66415301183114927586e-01,  /* 0x3fc54d18ba11570a */
78
    1.70211925285474380276e-01,  /* 0x3fc5c9811e3ec269 */
79
    1.74003600935367680469e-01,  /* 0x3fc645bfffb3aa73 */
80
    1.77790228992676047071e-01,  /* 0x3fc6c1d4898933d8 */
81
    1.81571711160032150945e-01,  /* 0x3fc73dbde8a7d201 */
82
    1.85347949995694760705e-01,  /* 0x3fc7b97b4bce5b02 */
83
    1.89118848926083965578e-01,  /* 0x3fc8350be398ebc7 */
84
    1.92884312257974643856e-01,  /* 0x3fc8b06ee2879c28 */
85
    1.96644245190344985064e-01,  /* 0x3fc92ba37d050271 */
86
    2.00398553825878511514e-01,  /* 0x3fc9a6a8e96c8626 */
87
    2.04147145182116990236e-01,  /* 0x3fca217e601081a5 */
88
    2.07889927202262986272e-01,  /* 0x3fca9c231b403279 */
89
    2.11626808765629753628e-01,  /* 0x3fcb1696574d780b */
90
    2.15357699697738047551e-01,  /* 0x3fcb90d7529260a2 */
91
    2.19082510780057748701e-01,  /* 0x3fcc0ae54d768466 */
92
    2.22801153759394493514e-01,  /* 0x3fcc84bf8a742e6d */
93
    2.26513541356919617664e-01,  /* 0x3fccfe654e1d5395 */
94
    2.30219587276843717927e-01,  /* 0x3fcd77d5df205736 */
95
    2.33919206214733416127e-01,  /* 0x3fcdf110864c9d9d */
96
    2.37612313865471241892e-01,  /* 0x3fce6a148e96ec4d */
97
    2.41298826930858800743e-01,  /* 0x3fcee2e1451d980c */
98
    2.44978663126864143473e-01,  /* 0x3fcf5b75f92c80dd */
99
    2.48651741190513253521e-01,  /* 0x3fcfd3d1fc40dbe4 */
100
    2.52317980886427151166e-01,  /* 0x3fd025fa510665b5 */
101
    2.55977303013005474952e-01,  /* 0x3fd061eea03d6290 */
102
    2.59629629408257511791e-01,  /* 0x3fd09dc597d86362 */
103
    2.63274882955282396590e-01,  /* 0x3fd0d97ee509acb3 */
104
    2.66912987587400396539e-01,  /* 0x3fd1151a362431c9 */
105
    2.70543868292936529052e-01,  /* 0x3fd150973a9ce546 */
106
    2.74167451119658789338e-01,  /* 0x3fd18bf5a30bf178 */
107
    2.77783663178873208022e-01,  /* 0x3fd1c735212dd883 */
108
    2.81392432649178403370e-01,  /* 0x3fd2025567e47c95 */
109
    2.84993688779881237938e-01,  /* 0x3fd23d562b381041 */
110
    2.88587361894077354396e-01,  /* 0x3fd278372057ef45 */
111
    2.92173383391398755471e-01,  /* 0x3fd2b2f7fd9b5fe2 */
112
    2.95751685750431536626e-01,  /* 0x3fd2ed987a823cfe */
113
    2.99322202530807379706e-01,  /* 0x3fd328184fb58951 */
114
    3.02884868374971361060e-01,  /* 0x3fd362773707ebcb */
115
    3.06439619009630070945e-01,  /* 0x3fd39cb4eb76157b */
116
    3.09986391246883430384e-01,  /* 0x3fd3d6d129271134 */
117
    3.13525122985043869228e-01,  /* 0x3fd410cbad6c7d32 */
118
    3.17055753209146973237e-01,  /* 0x3fd44aa436c2af09 */
119
    3.20578221991156986359e-01,  /* 0x3fd4845a84d0c21b */
120
    3.24092470489871664618e-01,  /* 0x3fd4bdee586890e6 */
121
    3.27598440950530811477e-01,  /* 0x3fd4f75f73869978 */
122
    3.31096076704132047386e-01,  /* 0x3fd530ad9951cd49 */
123
    3.34585322166458920545e-01,  /* 0x3fd569d88e1b4cd7 */
124
    3.38066122836825466713e-01,  /* 0x3fd5a2e0175e0f4e */
125
    3.41538425296541714449e-01,  /* 0x3fd5dbc3fbbe768d */
126
    3.45002177207105076295e-01,  /* 0x3fd614840309cfe1 */
127
    3.48457327308122011278e-01,  /* 0x3fd64d1ff635c1c5 */
128
    3.51903825414964732676e-01,  /* 0x3fd685979f5fa6fd */
129
    3.55341622416168290144e-01,  /* 0x3fd6bdeac9cbd76c */
130
    3.58770670270572189509e-01,  /* 0x3fd6f61941e4def0 */
131
    3.62190922004212156882e-01,  /* 0x3fd72e22d53aa2a9 */
132
    3.65602331706966821034e-01,  /* 0x3fd7660752817501 */
133
    3.69004854528964421068e-01,  /* 0x3fd79dc6899118d1 */
134
    3.72398446676754202311e-01,  /* 0x3fd7d5604b63b3f7 */
135
    3.75783065409248884237e-01,  /* 0x3fd80cd46a14b1d0 */
136
    3.79158669033441808605e-01,  /* 0x3fd84422b8df95d7 */
137
    3.82525216899905096124e-01,  /* 0x3fd87b4b0c1ebedb */
138
    3.85882669398073752109e-01,  /* 0x3fd8b24d394a1b25 */
139
    3.89230987951320717144e-01,  /* 0x3fd8e92916f5cde8 */
140
    3.92570135011828580396e-01,  /* 0x3fd91fde7cd0c662 */
141
    3.95900074055262896078e-01,  /* 0x3fd9566d43a34907 */
142
    3.99220769575252543149e-01,  /* 0x3fd98cd5454d6b18 */
143
    4.02532187077682512832e-01,  /* 0x3fd9c3165cc58107 */
144
    4.05834293074804064450e-01,  /* 0x3fd9f93066168001 */
145
    4.09127055079168300278e-01,  /* 0x3fda2f233e5e530b */
146
    4.12410441597387267265e-01,  /* 0x3fda64eec3cc23fc */
147
    4.15684422123729413467e-01,  /* 0x3fda9a92d59e98cf */
148
    4.18948967133552840902e-01,  /* 0x3fdad00f5422058b */
149
    4.22204048076583571270e-01,  /* 0x3fdb056420ae9343 */
150
    4.25449637370042266227e-01,  /* 0x3fdb3a911da65c6c */
151
    4.28685708391625730496e-01,  /* 0x3fdb6f962e737efb */
152
    4.31912235472348193799e-01,  /* 0x3fdba473378624a5 */
153
    4.35129193889246812521e-01,  /* 0x3fdbd9281e528191 */
154
    4.38336559857957774877e-01,  /* 0x3fdc0db4c94ec9ef */
155
    4.41534310525166673322e-01,  /* 0x3fdc42191ff11eb6 */
156
    4.44722423960939305942e-01,  /* 0x3fdc76550aad71f8 */
157
    4.47900879150937292206e-01,  /* 0x3fdcaa6872f3631b */
158
    4.51069655988523443568e-01,  /* 0x3fdcde53432c1350 */
159
    4.54228735266762495559e-01,  /* 0x3fdd121566b7f2ad */
160
    4.57378098670320809571e-01,  /* 0x3fdd45aec9ec862b */
161
    4.60517728767271039558e-01,  /* 0x3fdd791f5a1226f4 */
162
    4.63647609000806093515e-01,  /* 0x3fddac670561bb4f */
163
    4.66767723680866497560e-01,  /* 0x3fdddf85bb026974 */
164
    4.69878057975686880265e-01,  /* 0x3fde127b6b0744af */
165
    4.72978597903265574054e-01,  /* 0x3fde4548066cf51a */
166
    4.76069330322761219421e-01,  /* 0x3fde77eb7f175a34 */
167
    4.79150242925822533735e-01,  /* 0x3fdeaa65c7cf28c4 */
168
    4.82221324227853687105e-01,  /* 0x3fdedcb6d43f8434 */
169
    4.85282563559221225002e-01,  /* 0x3fdf0ede98f393cf */
170
    4.88333951056405479729e-01,  /* 0x3fdf40dd0b541417 */
171
    4.91375477653101910835e-01,  /* 0x3fdf72b221a4e495 */
172
    4.94407135071275316562e-01,  /* 0x3fdfa45dd3029258 */
173
    4.97428915812172245392e-01,  /* 0x3fdfd5e0175fdf83 */
174
    5.00440813147294050189e-01,  /* 0x3fe0039c73c1a40b */
175
    5.03442821109336358099e-01,  /* 0x3fe01c341e82422d */
176
    5.06434934483096732549e-01,  /* 0x3fe034b709250488 */
177
    5.09417148796356245022e-01,  /* 0x3fe04d25314342e5 */
178
    5.12389460310737621107e-01,  /* 0x3fe0657e94db30cf */
179
    5.15351866012543347040e-01,  /* 0x3fe07dc3324e9b38 */
180
    5.18304363603577900044e-01,  /* 0x3fe095f30861a58f */
181
    5.21246951491958210312e-01,  /* 0x3fe0ae0e1639866c */
182
    5.24179628782913242802e-01,  /* 0x3fe0c6145b5b43da */
183
    5.27102395269579471204e-01,  /* 0x3fe0de05d7aa6f7c */
184
    5.30015251423793132268e-01,  /* 0x3fe0f5e28b67e295 */
185
    5.32918198386882147055e-01,  /* 0x3fe10daa77307a0d */
186
    5.35811237960463593311e-01,  /* 0x3fe1255d9bfbd2a8 */
187
    5.38694372597246617929e-01,  /* 0x3fe13cfbfb1b056e */
188
    5.41567605391844897333e-01,  /* 0x3fe1548596376469 */
189
    5.44430940071603086672e-01,  /* 0x3fe16bfa6f5137e1 */
190
    5.47284380987436924748e-01,  /* 0x3fe1835a88be7c13 */
191
    5.50127933104692989907e-01,  /* 0x3fe19aa5e5299f99 */
192
    5.52961601994028217888e-01,  /* 0x3fe1b1dc87904284 */
193
    5.55785393822313511514e-01,  /* 0x3fe1c8fe7341f64f */
194
    5.58599315343562330405e-01,  /* 0x3fe1e00babdefeb3 */
195
    5.61403373889889367732e-01,  /* 0x3fe1f7043557138a */
196
    5.64197577362497537656e-01,  /* 0x3fe20de813e823b1 */
197
    5.66981934222700489912e-01,  /* 0x3fe224b74c1d192a */
198
    5.69756453482978431069e-01,  /* 0x3fe23b71e2cc9e6a */
199
    5.72521144698072359525e-01,  /* 0x3fe25217dd17e501 */
200
    5.75276017956117824426e-01,  /* 0x3fe268a940696da6 */
201
    5.78021083869819540801e-01,  /* 0x3fe27f261273d1b3 */
202
    5.80756353567670302596e-01,  /* 0x3fe2958e59308e30 */
203
    5.83481838685214859730e-01,  /* 0x3fe2abe21aded073 */
204
    5.86197551356360535557e-01,  /* 0x3fe2c2215e024465 */
205
    5.88903504204738026395e-01,  /* 0x3fe2d84c2961e48b */
206
    5.91599710335111383941e-01,  /* 0x3fe2ee628406cbca */
207
    5.94286183324841177367e-01,  /* 0x3fe30464753b090a */
208
    5.96962937215401501234e-01,  /* 0x3fe31a52048874be */
209
    5.99629986503951384336e-01,  /* 0x3fe3302b39b78856 */
210
    6.02287346134964152178e-01,  /* 0x3fe345f01cce37bb */
211
    6.04935031491913965951e-01,  /* 0x3fe35ba0b60eccce */
212
    6.07573058389022313541e-01,  /* 0x3fe3713d0df6c503 */
213
    6.10201443063065118722e-01,  /* 0x3fe386c52d3db11e */
214
    6.12820202165241245673e-01,  /* 0x3fe39c391cd41719 */
215
    6.15429352753104952356e-01,  /* 0x3fe3b198e5e2564a */
216
    6.18028912282561737612e-01,  /* 0x3fe3c6e491c78dc4 */
217
    6.20618898599929469384e-01,  /* 0x3fe3dc1c2a188504 */
218
    6.23199329934065904268e-01,  /* 0x3fe3f13fb89e96f4 */
219
    6.25770224888563042498e-01,  /* 0x3fe4064f47569f48 */
220
    6.28331602434009650615e-01,  /* 0x3fe41b4ae06fea41 */
221
    6.30883481900321840818e-01,  /* 0x3fe430328e4b26d5 */
222
    6.33425882969144482537e-01,  /* 0x3fe445065b795b55 */
223
    6.35958825666321447834e-01,  /* 0x3fe459c652badc7f */
224
    6.38482330354437466191e-01,  /* 0x3fe46e727efe4715 */
225
    6.40996417725432032775e-01,  /* 0x3fe4830aeb5f7bfd */
226
    6.43501108793284370968e-01,  /* 0x3fe4978fa3269ee1 */
227
    6.45996424886771558604e-01,  /* 0x3fe4ac00b1c71762 */
228
    6.48482387642300484032e-01,  /* 0x3fe4c05e22de94e4 */
229
    6.50959018996812410762e-01,  /* 0x3fe4d4a8023414e8 */
230
    6.53426341180761927063e-01,  /* 0x3fe4e8de5bb6ec04 */
231
    6.55884376711170835605e-01,  /* 0x3fe4fd013b7dd17e */
232
    6.58333148384755983962e-01,  /* 0x3fe51110adc5ed81 */
233
    6.60772679271132590273e-01,  /* 0x3fe5250cbef1e9fa */
234
    6.63202992706093175102e-01,  /* 0x3fe538f57b89061e */
235
    6.65624112284960989250e-01,  /* 0x3fe54ccaf0362c8f */
236
    6.68036061856020157990e-01,  /* 0x3fe5608d29c70c34 */
237
    6.70438865514021320458e-01,  /* 0x3fe5743c352b33b9 */
238
    6.72832547593763097282e-01,  /* 0x3fe587d81f732fba */
239
    6.75217132663749830535e-01,  /* 0x3fe59b60f5cfab9d */
240
    6.77592645519925151909e-01,  /* 0x3fe5aed6c5909517 */
241
    6.79959111179481823228e-01,  /* 0x3fe5c2399c244260 */
242
    6.82316554874748071313e-01,  /* 0x3fe5d58987169b18 */
243
    6.84665002047148862907e-01,  /* 0x3fe5e8c6941043cf */
244
    6.87004478341244895212e-01,  /* 0x3fe5fbf0d0d5cc49 */
245
    6.89335009598845749323e-01,  /* 0x3fe60f084b46e05e */
246
    6.91656621853199760075e-01,  /* 0x3fe6220d115d7b8d */
247
    6.93969341323259825138e-01,  /* 0x3fe634ff312d1f3b */
248
    6.96273194408023488045e-01,  /* 0x3fe647deb8e20b8f */
249
    6.98568207680949848637e-01,  /* 0x3fe65aabb6c07b02 */
250
    7.00854407884450081312e-01,  /* 0x3fe66d663923e086 */
251
    7.03131821924453670469e-01,  /* 0x3fe6800e4e7e2857 */
252
    7.05400476865049030906e-01,  /* 0x3fe692a40556fb6a */
253
    7.07660399923197958039e-01,  /* 0x3fe6a5276c4b0575 */
254
    7.09911618463524796141e-01,  /* 0x3fe6b798920b3d98 */
255
    7.12154159993178659249e-01,  /* 0x3fe6c9f7855c3198 */
256
    7.14388052156768926793e-01,  /* 0x3fe6dc44551553ae */
257
    7.16613322731374569052e-01,  /* 0x3fe6ee7f10204aef */
258
    7.18829999621624415873e-01,  /* 0x3fe700a7c5784633 */
259
    7.21038110854851588272e-01,  /* 0x3fe712be84295198 */
260
    7.23237684576317874097e-01,  /* 0x3fe724c35b4fae7b */
261
    7.25428749044510712274e-01,  /* 0x3fe736b65a172dff */
262
    7.27611332626510676214e-01,  /* 0x3fe748978fba8e0f */
263
    7.29785463793429123314e-01,  /* 0x3fe75a670b82d8d8 */
264
    7.31951171115916565668e-01,  /* 0x3fe76c24dcc6c6c0 */
265
    7.34108483259739652560e-01,  /* 0x3fe77dd112ea22c7 */
266
    7.36257428981428097003e-01,  /* 0x3fe78f6bbd5d315e */
267
    7.38398037123989547936e-01,  /* 0x3fe7a0f4eb9c19a2 */
268
    7.40530336612692630105e-01,  /* 0x3fe7b26cad2e50fd */
269
    7.42654356450917929600e-01,  /* 0x3fe7c3d311a6092b */
270
    7.44770125716075148681e-01,  /* 0x3fe7d528289fa093 */
271
    7.46877673555587429099e-01,  /* 0x3fe7e66c01c114fd */
272
    7.48977029182941400620e-01,  /* 0x3fe7f79eacb97898 */
273
    7.51068221873802288613e-01,  /* 0x3fe808c03940694a */
274
    7.53151280962194302759e-01,  /* 0x3fe819d0b7158a4c */
275
    7.55226235836744863583e-01,  /* 0x3fe82ad036000005 */
276
    7.57293115936992444759e-01,  /* 0x3fe83bbec5cdee22 */
277
    7.59351950749757920178e-01,  /* 0x3fe84c9c7653f7ea */
278
    7.61402769805578416573e-01,  /* 0x3fe85d69576cc2c5 */
279
    7.63445602675201784315e-01,  /* 0x3fe86e2578f87ae5 */
280
    7.65480478966144461950e-01,  /* 0x3fe87ed0eadc5a2a */
281
    7.67507428319308182552e-01,  /* 0x3fe88f6bbd023118 */
282
    7.69526480405658186434e-01,  /* 0x3fe89ff5ff57f1f7 */
283
    7.71537664922959498526e-01,  /* 0x3fe8b06fc1cf3dfe */
284
    7.73541011592573490852e-01,  /* 0x3fe8c0d9145cf49d */
285
    7.75536550156311621507e-01,  /* 0x3fe8d13206f8c4ca */
286
    7.77524310373347682379e-01,  /* 0x3fe8e17aa99cc05d */
287
    7.79504322017186335181e-01,  /* 0x3fe8f1b30c44f167 */
288
    7.81476614872688268854e-01,  /* 0x3fe901db3eeef187 */
289
    7.83441218733151756304e-01,  /* 0x3fe911f35199833b */
290
    7.85398163397448278999e-01}; /* 0x3fe921fb54442d18 */
291
292
  /* Some constants. */
293
294
  static double pi = 3.1415926535897932e+00, /* 0x400921fb54442d18 */
295
             piby2 = 1.5707963267948966e+00, /* 0x3ff921fb54442d18 */
296
             piby4 = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
297
       three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */
298
299
  double u, v, vbyu, q, s, uu, r;
300
  unsigned int swap_vu, index, xzero, yzero, xnan, ynan, xinf, yinf;
301
  int xexp, yexp, diffexp;
302
303
  double x = fx;
304
  double y = fy;
305
306
  /* Find properties of arguments x and y. */
307
308
  unsigned long ux, aux, xneg, uy, auy, yneg;
309
310
  GET_BITS_DP64(x, ux);
311
  GET_BITS_DP64(y, uy);
312
  aux = ux & ~SIGNBIT_DP64;
313
  auy = uy & ~SIGNBIT_DP64;
314
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
315
  yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
316
  xneg = ux & SIGNBIT_DP64;
317
  yneg = uy & SIGNBIT_DP64;
318
  xzero = (aux == 0);
319
  yzero = (auy == 0);
320
  xnan = (aux > PINFBITPATT_DP64);
321
  ynan = (auy > PINFBITPATT_DP64);
322
  xinf = (aux == PINFBITPATT_DP64);
323
  yinf = (auy == PINFBITPATT_DP64);
324
  
325
  diffexp = yexp - xexp;
326
327
  /* Special cases */
328
329
  if (xnan)
330
    return x + x;
331
  else if (ynan)
332
    return y + y;
333
  else if (yzero)
334
    { /* Zero y gives +-0 for positive x 
335
         and +-pi for negative x */
336
      if ((_LIB_VERSION == _SVID_) && xzero)
337
        /* Sigh - _SVID_ defines atan2(0,0) as a domain error */
338
        return retval_errno_edom(x, y);
339
      else if (xneg)
340
	{
341
	  if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
342
          else return val_with_flags(pi,AMD_F_INEXACT);
343
	}
344
      else return y;
345
    }
346
  else if (xzero)
347
    { /* Zero x gives +- pi/2 
348
         depending on sign of y */
349
      if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
350
      else val_with_flags(piby2,AMD_F_INEXACT);
351
    }
352
353
  if (diffexp > 26)
354
    { /* abs(y)/abs(x) > 2^26 => arctan(x/y) 
355
         is insignificant compared to piby2 */
356
      if (yneg) return val_with_flags(-piby2,AMD_F_INEXACT);
357
      else return val_with_flags(piby2,AMD_F_INEXACT);
358
    }
359
  else if (diffexp < -13 && (!xneg))
360
    { /* x positive and dominant over y by a factor of 2^13.
361
         In this case atan(y/x) is y/x to machine accuracy. */
362
363
      if (diffexp < -150) /* Result underflows */
364
        {
365
          if (yneg)
366
            return val_with_flags(-0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW);
367
          else
368
            return val_with_flags(0.0,AMD_F_INEXACT | AMD_F_UNDERFLOW);
369
        }
370
      else
371
        {
372
          if (diffexp < -126)
373
            {
374
              /* Result will likely be denormalized */
375
              y = scaleDouble_1(y, 100);
376
              y /= x;
377
              /* Now y is 2^100 times the true result. Scale it back down. */
378
              GET_BITS_DP64(y, uy);
379
	      scaleDownDouble(uy, 100, &uy);
380
              PUT_BITS_DP64(uy, y);
381
	      if ((uy & EXPBITS_DP64) == 0)
382
		return val_with_flags(y, AMD_F_INEXACT | AMD_F_UNDERFLOW);
383
	      else
384
		return y;
385
             }
386
          else
387
            return y / x;
388
        }
389
    }
390
  else if (diffexp < -26 && xneg)
391
    { /* abs(x)/abs(y) > 2^56 and x < 0 => arctan(y/x) 
392
         is insignificant compared to pi */
393
    if (yneg) return val_with_flags(-pi,AMD_F_INEXACT);
394
    else return val_with_flags(pi,AMD_F_INEXACT);
395
    }
396
  else if (yinf && xinf)
397
    { /* If abs(x) and abs(y) are both infinity
398
         return +-pi/4 or +- 3pi/4 according to 
399
         signs.  */ 
400
    if (xneg)
401
      {
402
      if (yneg) return val_with_flags(-three_piby4,AMD_F_INEXACT);
403
      else return val_with_flags(three_piby4,AMD_F_INEXACT);
404
      }
405
    else
406
      {
407
      if (yneg) return val_with_flags(-piby4,AMD_F_INEXACT);
408
      else return val_with_flags(piby4,AMD_F_INEXACT);
409
      }
410
    }
411
412
  /* General case: take absolute values of arguments */
413
414
  u = x; v = y;
415
  if (xneg) u = -x;
416
  if (yneg) v = -y;
417
418
  /* Swap u and v if necessary to obtain 0 < v < u. Compute v/u. */
419
420
  swap_vu = (u < v);
421
  if (swap_vu) { uu = u; u = v; v = uu; }
422
  vbyu = v/u;
423
424
  if (vbyu > 0.0625)
425
    { /* General values of v/u. Use a look-up  
426
         table and series expansion. */
427
428
      index = (int)(256*vbyu + 0.5);
429
      r = (256*v-index*u)/(256*u+index*v);
430
431
      /* Polynomial approximation to atan(vbyu) */
432
433
      s = r*r;
434
      q = atan_jby256[index-16] + r - r*s*0.33333333333224095522;
435
    }
436
  else if (vbyu < 1.e-4)
437
    { /* v/u is small enough that atan(v/u) = v/u */	  
438
      q = vbyu;
439
    }
440
  else /* vbyu <= 0.0625 */
441
    { 
442
      /* Small values of v/u. Use a series expansion */
443
      
444
      s  = vbyu*vbyu;
445
      q = vbyu -
446
	vbyu*s*(0.33333333333333170500 -
447
		s*(0.19999999999393223405 -
448
		   s*0.14285713561807169030));
449
    }
450
451
  /* Tidy-up according to which quadrant the arguments lie in */
452
453
  if (swap_vu) {q = piby2 - q;}
454
  if (xneg) {q = pi - q;}
455
  if (yneg) q = - q;
456
  return q;
457
}
458
459
weak_alias (__atan2f, atan2f)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_atanf.c.x86_64-new-libm (+103 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VAL_WITH_FLAGS
13
#define USE_SCALEFLOAT_1
14
#define USE_SCALEUPFLOAT128
15
#include "libm_inlines_amd.h"
16
#undef USE_SCALEUPFLOAT128
17
#undef USE_SCALEFLOAT_1
18
#undef USE_VAL_WITH_FLAGS
19
20
float __atanf(float fy)
21
{
22
23
  /* Some constants and split constants. */
24
25
  static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
26
27
  double c, v, s, q, z;
28
  unsigned int ynan;
29
30
  double y = fy;
31
32
  /* Find properties of argument y. */
33
34
  unsigned long uy, auy, yneg;
35
36
  GET_BITS_DP64(y, uy);
37
  auy = uy & ~SIGNBIT_DP64;
38
  yneg = uy & SIGNBIT_DP64;
39
40
  v = y;
41
  if (yneg) v = -y;
42
43
  /* Argument reduction to range [-7/16,7/16] */
44
45
  if (auy < 0x3fdc000000000000) /* v < 7./16. */
46
    {
47
      y = v;
48
      c = 0.0;
49
    }
50
  else if (auy < 0x3fe6000000000000) /* v < 11./16. */
51
    {
52
      y = (2*v-1.0)/(2.0+v);
53
      /* c = arctan(0.5) */
54
      c = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */
55
    }
56
  else if (auy < 0x3ff3000000000000) /* v < 19./16. */
57
    {
58
      y = (v-1.)/(1.0+v);
59
      /* c = arctan(1.) */
60
      c = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */
61
    }
62
  else if (auy < 0x4003800000000000) /* v < 39./16. */
63
    {
64
      y = (v-1.5)/(1.0+1.5*v);
65
      /* c = arctan(1.5) */
66
      c = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */
67
    }
68
  else
69
    {
70
71
      ynan = (auy > PINFBITPATT_DP64);  
72
73
      if (ynan) return y + y;
74
      else if (v > 0x4c80000000000000)
75
	{ /* abs(y) > 2^26 => arctan(1/y) is 
76
	     insignificant compared to piby2 */
77
	  if (yneg) return val_with_flags(-piby2, AMD_F_INEXACT);
78
	  else return val_with_flags(piby2, AMD_F_INEXACT);
79
	}
80
81
      y = -1.0/v;
82
      /* c = arctan(infinity) */
83
      c = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
84
    }
85
86
  /* Core approximation: Remez(2,2) on [-7/16,7/16] */
87
88
  s = y*y;
89
  q = y*s*
90
    (0.296528598819239217902158651186e0 + 
91
     (0.192324546402108583211697690500e0 + 
92
       0.470677934286149214138357545549e-2*s)*s)/
93
    (0.889585796862432286486651434570e0 +
94
     (0.111072499995399550138837673349e1 + 
95
       0.299309699959659728404442796915e0*s)*s);
96
97
  z = c - (q - y);
98
99
  if (yneg) z = -z;
100
  return z;
101
}
102
103
weak_alias (__atanf, atanf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_ceil.c.x86_64-new-libm (+57 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
double __ceil(double x)
13
{
14
  double r;
15
  long rexp, xneg;
16
  unsigned long ux, ax, ur, mask;
17
18
  GET_BITS_DP64(x, ux);
19
  ax = ux & (~SIGNBIT_DP64);
20
  xneg = (ux != ax);
21
22
  if (ax >= 0x4340000000000000)
23
    {
24
      /* abs(x) is either NaN, infinity, or >= 2^53 */
25
      if (ax > 0x7ff0000000000000)
26
        /* x is NaN */
27
        return x + x; /* Raise invalid if it is a signalling NaN */
28
      else
29
        return x;
30
    }
31
  else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
32
    {
33
      if (ax == 0x0000000000000000)
34
        /* x is +zero or -zero; return the same zero */
35
          return x;
36
      else if (xneg) /* x < 0.0 */
37
        return -0.0;
38
      else
39
        return 1.0;
40
    }
41
  else
42
    {
43
      rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
44
      /* Mask out the bits of r that we don't want */
45
      mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
46
      ur = (ux & ~mask);
47
      PUT_BITS_DP64(ur, r);
48
      if (xneg || (ur == ux))
49
        return r;
50
      else
51
        /* We threw some bits away and x was positive */
52
        return r + 1.0;
53
    }
54
55
}
56
57
weak_alias (__ceil, ceil)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_ceilf.c.x86_64-new-libm (+56 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
float __ceilf(float x)
13
{
14
  float r;
15
  int rexp, xneg;
16
  unsigned int ux, ax, ur, mask;
17
18
  GET_BITS_SP32(x, ux);
19
  ax = ux & (~SIGNBIT_SP32);
20
  xneg = (ux != ax);
21
22
  if (ax >= 0x4b800000)
23
    {
24
      /* abs(x) is either NaN, infinity, or >= 2^24 */
25
      if (ax > 0x7f800000)
26
        /* x is NaN */
27
        return x + x; /* Raise invalid if it is a signalling NaN */
28
      else
29
        return x;
30
    }
31
  else if (ax < 0x3f800000) /* abs(x) < 1.0 */
32
    {
33
      if (ax == 0x00000000)
34
        /* x is +zero or -zero; return the same zero */
35
        return x;
36
      else if (xneg) /* x < 0.0 */
37
        return -0.0F;
38
      else
39
        return 1.0F;
40
    }
41
  else
42
    {
43
      rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
44
      /* Mask out the bits of r that we don't want */
45
      mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
46
      ur = (ux & ~mask);
47
      PUT_BITS_SP32(ur, r);
48
49
      if (xneg || (ux == ur)) return r;
50
      else 
51
        /* We threw some bits away and x was positive */
52
        return r + 1.0F;
53
    }
54
}
55
56
weak_alias (__ceilf, ceilf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_copysign.c.x86_64-new-libm (+29 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
/* Returns the absolute value of x with the sign of y.
13
   NaNs are not considered special; their sign bits are handled
14
   the same as for any other number. */
15
16
double __copysign(double x, double y)
17
{
18
  /* This works on Hammer */
19
  double temp = -0.0; /* 0x8000000000000000 */
20
  /* AND the bit pattern with y, result in y */
21
  asm volatile ("andpd	%0, %1" : : "x" (temp), "x" (y));
22
  /* AND the ones-complement of the bit pattern with x, result in temp */
23
  asm volatile ("andnpd	%0, %1" : : "x" (x), "x" (temp));
24
  asm volatile ("orpd	%0, %1" : : "x" (temp), "x" (y));
25
  return y;
26
}
27
28
29
weak_alias (__copysign, copysign)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_copysignf.c.x86_64-new-libm (+29 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
  /* Returns the absolute value of x with the sign of y.
13
     NaNs are not considered special; their sign bits are handled
14
     the same as for any other number. */
15
16
float __copysignf(float x, float y)
17
{
18
  /* This works on Hammer */
19
  float temp = -0.0; /* 0x80000000 */
20
  /* AND the bit pattern with y, result in y */
21
  asm volatile ("andps	%0, %1" : : "x" (temp), "x" (y));
22
  /* AND the ones-complement of the bit pattern with x, result in temp */
23
  asm volatile ("andnps	%0, %1" : : "x" (x), "x" (temp));
24
  asm volatile ("orps	%0, %1" : : "x" (temp), "x" (y));
25
  return y;
26
}
27
28
29
weak_alias (__copysignf, copysignf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_cos.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_cosf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_finite.c.x86_64-new-libm (+28 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
/* Returns 0 if x is infinite or NaN, otherwise returns 1 */
13
14
int __finite(double x)
15
{
16
  /* This works on Hammer */
17
  double temp = 1.0e444; /* = infinity = 0x7ff0000000000000 */
18
  volatile int retval;
19
  retval = 0;
20
  asm volatile ("andpd	%0, %1;" : : "x" (temp), "x" (x));
21
  asm volatile ("comisd	%0, %1" : : "x" (temp), "x" (x));
22
  asm volatile ("setnz	%0" : "=g" (retval));
23
  return retval;
24
}
25
26
hidden_def (__finite)
27
weak_alias (__finite, finite)
28
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_finitef.c.x86_64-new-libm (+27 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
/* Returns 0 if x is infinite or NaN, otherwise returns 1 */
13
14
int __finitef(float x)
15
{
16
  /* This works on Hammer */
17
  float temp = 1.0e444; /* = infinity = 0x7f800000 */
18
  volatile int retval;
19
  retval = 0;
20
  asm volatile ("andps	%0, %1;" : : "x" (temp), "x" (x));
21
  asm volatile ("comiss	%0, %1" : : "x" (temp), "x" (x));
22
  asm volatile ("setnz	%0" : "=g" (retval));
23
  return retval;
24
}
25
26
hidden_def (__finitef)
27
weak_alias (__finitef, finitef)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_floor.c.x86_64-new-libm (+60 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
double __floor(double x)
13
{
14
  double r;
15
  long rexp, xneg;
16
17
18
  unsigned long ux, ax, ur, mask;
19
20
  GET_BITS_DP64(x, ux);
21
  ax = ux & (~SIGNBIT_DP64);
22
  xneg = (ux != ax);
23
24
  if (ax >= 0x4340000000000000)
25
    {
26
      /* abs(x) is either NaN, infinity, or >= 2^53 */
27
      if (ax > 0x7ff0000000000000)
28
        /* x is NaN */
29
        return x + x; /* Raise invalid if it is a signalling NaN */
30
      else
31
        return x;
32
    }
33
  else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
34
    {
35
      if (ax == 0x0000000000000000)
36
        /* x is +zero or -zero; return the same zero */
37
        return x;
38
      else if (xneg) /* x < 0.0 */
39
        return -1.0;
40
      else
41
        return 0.0;
42
    }
43
  else
44
    {
45
      r = x;
46
      rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
47
      /* Mask out the bits of r that we don't want */
48
      mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
49
      ur = (ux & ~mask);
50
      PUT_BITS_DP64(ur, r);
51
      if (xneg && (ur != ux))
52
        /* We threw some bits away and x was negative */
53
        return r - 1.0;
54
      else
55
        return r;
56
    }
57
58
}
59
60
weak_alias (__floor, floor)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_floorf.c.x86_64-new-libm (+56 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
float __floorf(float x)
13
{
14
  float r;
15
  int rexp, xneg;
16
  unsigned int ux, ax, ur, mask;
17
18
  GET_BITS_SP32(x, ux);
19
  ax = ux & (~SIGNBIT_SP32);
20
  xneg = (ux != ax);
21
22
  if (ax >= 0x4b800000)
23
    {
24
      /* abs(x) is either NaN, infinity, or >= 2^24 */
25
      if (ax > 0x7f800000)
26
        /* x is NaN */
27
        return x + x; /* Raise invalid if it is a signalling NaN */
28
      else
29
        return x;
30
    }
31
  else if (ax < 0x3f800000) /* abs(x) < 1.0 */
32
    {
33
      if (ax == 0x00000000)
34
        /* x is +zero or -zero; return the same zero */
35
        return x;
36
      else if (xneg) /* x < 0.0 */
37
        return -1.0F;
38
      else
39
        return 0.0F;
40
    }
41
  else
42
    {
43
      rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
44
      /* Mask out the bits of r that we don't want */
45
      mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
46
      ur = (ux & ~mask);
47
      PUT_BITS_SP32(ur, r);
48
      if (xneg && (ux != ur))
49
        /* We threw some bits away and x was negative */
50
        return r - 1.0F;
51
      else
52
        return r;
53
    }
54
}
55
56
weak_alias (__floorf, floorf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_fma.c.x86_64-new-libm (+117 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SCALEDOUBLE_1
13
#define USE_SCALEDOUBLE_2
14
#include "libm_inlines_amd.h"
15
#undef USE_SCALEDOUBLE_1
16
#undef USE_SCALEDOUBLE_2
17
18
double __fma(double a, double b, double sum)
19
{
20
  /* Returns a * b + sum with no intermediate loss of precision */
21
22
  double ha, ta, hb, tb, z, zz, r, s, az, asum;
23
  int ua, ub, usum;
24
  int scaled, expover, expunder, scaleexp;
25
  unsigned long u;
26
27
  GET_BITS_DP64(a, u);
28
  ua = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
29
  GET_BITS_DP64(b, u);
30
  ub = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
31
  GET_BITS_DP64(sum, u);
32
  usum = (int)((u & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
33
34
  if (ua == EMAX_DP64 + 1 || ub == EMAX_DP64 + 1 || usum == EMAX_DP64 + 1)
35
    {
36
      /* One or more of the arguments is NaN or infinity. The
37
         result will also be NaN or infinity. */
38
      return a * b + sum;
39
    }
40
  else if (ua + ub > usum + 2 * MANTLENGTH_DP64)
41
    {
42
      /* sum is negligible compared with the extra-length product a*b */
43
      return a*b;
44
    }
45
  else if (usum > ua + ub + MANTLENGTH_DP64)
46
    {
47
      /* The product a*b is negligible compared with sum */
48
      return sum;
49
    }
50
51
  expover = EMAX_DP64 - 2;
52
  expunder = EMIN_DP64 + MANTLENGTH_DP64;
53
  scaleexp = 0;
54
55
56
  if (ua + ub > expover || usum > expover)
57
    {
58
      /* The result is likely to overflow. Scale down in an attempt
59
         to avoid unnecessary overflow. The true result may still overflow. */
60
      scaled = 1;
61
      scaleexp = expover / 2;
62
      a = scaleDouble_1(a, -scaleexp);
63
      b = scaleDouble_1(b, -scaleexp);
64
      sum = scaleDouble_2(sum, -2*scaleexp);
65
    }
66
  else if (ua + ub < expunder)
67
    {
68
      /* The product a*b is near underflow; scale up */
69
      scaled = 1;
70
      scaleexp = expunder / 2;
71
      a = scaleDouble_1(a, -scaleexp);
72
      b = scaleDouble_1(b, -scaleexp);
73
      sum = scaleDouble_2(sum, -2*scaleexp);
74
    }
75
  else
76
    scaled = 0;
77
78
  /* Split a into ha (head) and ta (tail). Do the same for b. */
79
  ha = a;
80
  GET_BITS_DP64(ha, u);
81
  u &= 0xfffffffff8000000;
82
  PUT_BITS_DP64(u, ha);
83
  ta = a - ha;
84
  hb = b;
85
  GET_BITS_DP64(hb, u);
86
  u &= 0xfffffffff8000000;
87
  PUT_BITS_DP64(u, hb);
88
  tb = b - hb;
89
90
  /* Carefully multiply the parts together. z is the most significant
91
     part of the result, and zz the least significant part */
92
  z = a * b;
93
  zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb;
94
95
  /* Set az = abs(z), asum = abs(sum) */
96
  GET_BITS_DP64(z, u);
97
  u &= ~SIGNBIT_DP64;
98
  PUT_BITS_DP64(u, az);
99
  GET_BITS_DP64(sum, u);
100
  u &= ~SIGNBIT_DP64;
101
  PUT_BITS_DP64(u, asum);
102
103
  /* Carefully add (z,zz) to sum */
104
  r = z + sum;
105
106
  if (az > asum)
107
    s = ((z - r) + sum) + zz;
108
  else
109
    s = ((sum - r) + z) + zz;
110
111
  if (scaled)
112
    return scaleDouble_1(r + s, 2*scaleexp);
113
  else
114
    return r + s;
115
}
116
117
weak_alias (__fma, fma)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_fmaf.c.x86_64-new-libm (+116 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SCALEFLOAT_1
13
#define USE_SCALEFLOAT_2
14
#include "libm_inlines_amd.h"
15
#undef USE_SCALEFLOAT_1
16
#undef USE_SCALEFLOAT_2
17
18
float __fmaf(float a, float b, float sum)
19
{
20
  /* Returns a * b + sum with no intermediate loss of precision */
21
22
  float ha, ta, hb, tb, z, zz, r, s, az, asum;
23
  int ua, ub, usum;
24
  int scaled, expover, expunder, scaleexp;
25
  unsigned int u;
26
27
  GET_BITS_SP32(a, u);
28
  ua = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
29
  GET_BITS_SP32(b, u);
30
  ub = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
31
  GET_BITS_SP32(sum, u);
32
  usum = (int)((u & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
33
34
  if (ua == EMAX_SP32 + 1 || ub == EMAX_SP32 + 1 || usum == EMAX_SP32 + 1)
35
    {
36
      /* One or more of the arguments is NaN or infinity. The
37
         result will also be NaN or infinity. */
38
      return a * b + sum;
39
    }
40
  else if (ua + ub > usum + 2 * MANTLENGTH_SP32)
41
    {
42
      /* sum is negligible compared with the extra-length product a*b */
43
      return a*b;
44
    }
45
  else if (usum > ua + ub + MANTLENGTH_SP32)
46
    {
47
      /* The product a*b is negligible compared with sum */
48
      return sum;
49
    }
50
51
  expover = EMAX_SP32 - 2;
52
  expunder = EMIN_SP32 + MANTLENGTH_SP32;
53
  scaleexp = 0;
54
55
  if (ua + ub > expover || usum > expover)
56
    {
57
      /* The result is likely to overflow. Scale down in an attempt
58
         to avoid unnecessary overflow. The true result may still overflow. */
59
      scaled = 1;
60
      scaleexp = expover / 2;
61
      a = scaleFloat_1(a, -scaleexp);
62
      b = scaleFloat_1(b, -scaleexp);
63
      sum = scaleFloat_2(sum, -2*scaleexp);
64
    }
65
  else if (ua + ub < expunder)
66
    {
67
      /* The product a*b is near underflow; scale up */
68
      scaled = 1;
69
      scaleexp = expunder / 2;
70
      a = scaleFloat_1(a, -scaleexp);
71
      b = scaleFloat_1(b, -scaleexp);
72
      sum = scaleFloat_2(sum, -2*scaleexp);
73
    }
74
  else
75
    scaled = 0;
76
77
  /* Split a into ha (head) and ta (tail). Do the same for b. */
78
  ha = a;
79
  GET_BITS_SP32(ha, u);
80
  u &= 0xfffff000;
81
  PUT_BITS_SP32(u, ha);
82
  ta = a - ha;
83
  hb = b;
84
  GET_BITS_SP32(hb, u);
85
  u &= 0xfffff000;
86
  PUT_BITS_SP32(u, hb);
87
  tb = b - hb;
88
89
  /* Carefully multiply the parts together. z is the most significant
90
     part of the result, and zz the least significant part */
91
  z = a * b;
92
  zz = (((ha * hb - z) + ha * tb) + ta * hb) + ta * tb;
93
94
  /* Set az = abs(z), asum = abs(sum) */
95
  GET_BITS_SP32(z, u);
96
  u &= ~SIGNBIT_SP32;
97
  PUT_BITS_SP32(u, az);
98
  GET_BITS_SP32(sum, u);
99
  u &= ~SIGNBIT_SP32;
100
  PUT_BITS_SP32(u, asum);
101
102
  /* Carefully add (z,zz) to sum */
103
  r = z + sum;
104
105
  if (az > asum)
106
    s = ((z - r) + sum) + zz;
107
  else
108
    s = ((sum - r) + z) + zz;
109
110
  if (scaled)
111
    return scaleFloat_1(r + s, 2*scaleexp);
112
  else
113
    return r + s;
114
}
115
116
weak_alias (__fmaf, fmaf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_logb.c.x86_64-new-libm (+62 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_INFINITY_WITH_FLAGS
13
#include "libm_inlines_amd.h"
14
#undef USE_INFINITY_WITH_FLAGS
15
16
double __logb(double x)
17
{
18
19
  unsigned long ux;
20
  long u;
21
  GET_BITS_DP64(x, ux);
22
  u = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
23
  if ((ux & ~SIGNBIT_DP64) == 0)
24
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
25
    return -infinity_with_flags(AMD_F_DIVBYZERO);
26
  else if (EMIN_DP64 <= u && u <= EMAX_DP64)
27
    /* x is a normal number */
28
    return u;
29
  else if (u > EMAX_DP64)
30
    {
31
      /* x is infinity or NaN */
32
      if ((ux & MANTBITS_DP64) == 0)
33
        /* x is +/-infinity. Return +infinity with no flags. */
34
        return infinity_with_flags(0);
35
      else
36
        /* x is NaN, result is NaN */
37
        return x + x; /* Raise invalid if it is a signalling NaN */
38
    }
39
  else
40
    {
41
      /* x is denormalized. */
42
#ifdef FOLLOW_IEEE754_LOGB
43
      /* Return the value of the minimum exponent to ensure that
44
         the relationship between logb and scalb, defined in
45
         IEEE 754, holds. */
46
      return EMIN_DP64;
47
#else
48
      /* Follow the rule set by IEEE 854 for logb */
49
      ux &= MANTBITS_DP64;
50
      u = EMIN_DP64;
51
      while (ux < IMPBIT_DP64)
52
        {
53
          ux <<= 1;
54
          u--;
55
        }
56
      return u;
57
#endif
58
    }
59
60
}
61
62
weak_alias (__logb, logb)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_logbf.c.x86_64-new-libm (+60 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_INFINITYF_WITH_FLAGS
13
#include "libm_inlines_amd.h"
14
#undef USE_INFINITYF_WITH_FLAGS
15
16
float __logbf(float x)
17
{
18
  unsigned int ux;
19
  int u;
20
  GET_BITS_SP32(x, ux);
21
  u = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
22
  if ((ux & ~SIGNBIT_SP32) == 0)
23
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
24
    return -infinityf_with_flags(AMD_F_DIVBYZERO);
25
  else if (EMIN_SP32 <= u && u <= EMAX_SP32)
26
    /* x is a normal number */
27
    return u;
28
  else if (u > EMAX_SP32)
29
    {
30
      /* x is infinity or NaN */
31
      if ((ux & MANTBITS_SP32) == 0)
32
        /* x is +/-infinity. Return +infinity with no flags. */
33
        return infinityf_with_flags(0);
34
      else
35
        /* x is NaN, result is NaN */
36
        return x + x; /* Raise invalid if it is a signalling NaN */
37
    }
38
  else
39
    {
40
      /* x is denormalized. */
41
#ifdef FOLLOW_IEEE754_LOGB
42
      /* Return the value of the minimum exponent to ensure that
43
         the relationship between logb and scalb, defined in
44
         IEEE 754, holds. */
45
      return EMIN_SP32;
46
#else
47
      /* Follow the rule set by IEEE 854 for logb */
48
      ux &= MANTBITS_SP32;
49
      u = EMIN_SP32;
50
      while (ux < IMPBIT_SP32)
51
        {
52
          ux <<= 1;
53
          u--;
54
        }
55
      return u;
56
#endif
57
    }
58
}
59
60
weak_alias (__logbf, logbf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_modf.c.x86_64-new-libm (+59 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
double __modf(double x, double *iptr)
13
{
14
  /* modf splits the argument x into integer and fraction parts,
15
     each with the same sign as x. */
16
17
18
  long xexp;
19
  unsigned long ux, ax, mask;
20
21
  GET_BITS_DP64(x, ux);
22
  ax = ux & (~SIGNBIT_DP64);
23
24
  if (ax >= 0x4340000000000000)
25
    {
26
      /* abs(x) is either NaN, infinity, or >= 2^53 */
27
      if (ax > 0x7ff0000000000000)
28
        {
29
          /* x is NaN */
30
          *iptr = x;
31
          return x + x; /* Raise invalid if it is a signalling NaN */
32
        }
33
      else
34
        {
35
          /* x is infinity or large. Return zero with the sign of x */
36
          *iptr = x;
37
          PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
38
          return x;
39
        }
40
    }
41
  else if (ax < 0x3ff0000000000000)
42
    {
43
      /* abs(x) < 1.0. Set iptr to zero with the sign of x
44
         and return x. */
45
      PUT_BITS_DP64(ux & SIGNBIT_DP64, *iptr);
46
      return x;
47
    }
48
  else
49
    {
50
      xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
51
      /* Mask out the bits of x that we don't want */
52
      mask = (1L << (EXPSHIFTBITS_DP64 - xexp)) - 1;
53
      PUT_BITS_DP64(ux & ~mask, *iptr);
54
      return x - *iptr;
55
    }
56
57
}
58
59
weak_alias (__modf, modf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_modff.c.x86_64-new-libm (+54 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
float __modff(float x, float *iptr)
13
{
14
  /* modff splits the argument x into integer and fraction parts,
15
     each with the same sign as x. */
16
17
  unsigned int ux, mask;
18
  int xexp;
19
20
  GET_BITS_SP32(x, ux);
21
  xexp = ((ux & (~SIGNBIT_SP32)) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
22
23
  if (xexp < 0)
24
    {
25
      /* abs(x) < 1.0. Set iptr to zero with the sign of x
26
         and return x. */
27
      PUT_BITS_SP32(ux & SIGNBIT_SP32, *iptr);
28
      return x;
29
    }
30
  else if (xexp < EXPSHIFTBITS_SP32)
31
    {
32
      /* x lies between 1.0 and 2**(24) */
33
      /* Mask out the bits of x that we don't want */
34
      mask = (1 << (EXPSHIFTBITS_SP32 - xexp)) - 1;
35
      PUT_BITS_SP32(ux & ~mask, *iptr);
36
      return x - *iptr;
37
    }
38
  else if ((ux & (~SIGNBIT_SP32)) > 0x7f800000)
39
    {
40
      /* x is NaN */
41
      *iptr = x;
42
      return x + x; /* Raise invalid if it is a signalling NaN */
43
    }
44
  else
45
    {
46
      /* x is infinity or large. Set iptr to x and return zero
47
         with the sign of x. */
48
      *iptr = x;
49
      PUT_BITS_SP32(ux & SIGNBIT_SP32, x);
50
      return x;
51
    }
52
}
53
54
weak_alias (__modff, modff)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sin.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sincos.c.x86_64-new-libm (+311 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
10
#include "libm_amd.h"
11
#include "libm_util_amd.h"
12
13
#define USE_NAN_WITH_FLAGS
14
#define USE_VAL_WITH_FLAGS
15
#include "libm_inlines_amd.h"
16
#undef USE_NAN_WITH_FLAGS
17
#undef USE_VAL_WITH_FLAGS
18
19
/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */
20
static inline double sin_piby4(double x, double xx)
21
{
22
  /* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
23
                          = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
24
                          = x * f(w)
25
     where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
26
     We use a minimax approximation of (f(w) - 1) / w
27
     because this produces an expansion in even powers of x.
28
     If xx (the tail of x) is non-zero, we add a correction
29
     term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
30
     is an approximation to cos(x)*sin(xx) valid because
31
     xx is tiny relative to x.
32
  */
33
  static const double
34
    c1 = -0.166666666666666646259241729,
35
    c2 = 0.833333333333095043065222816e-2,
36
    c3 = -0.19841269836761125688538679e-3,
37
    c4 = 0.275573161037288022676895908448e-5,
38
    c5 = -0.25051132068021699772257377197e-7,
39
    c6 = 0.159181443044859136852668200e-9;
40
  double x2, x3, r;
41
  x2 = x * x;
42
  x3 = x2 * x;
43
  r = (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6))));
44
  if (xx == 0.0)
45
    return x + x3 * (c1 + x2 * r);
46
  else
47
    return x - ((x2 * (0.5 * xx - x3 * r) - xx) - x3 * c1);
48
}
49
50
/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */
51
static inline double cos_piby4(double x, double xx)
52
{
53
  /* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
54
                          = f(w)
55
     where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
56
     We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
57
     because this produces an expansion in even powers of x.
58
     If xx (the tail of x) is non-zero, we subtract a correction
59
     term g(x,xx) = x*xx to the result, where g(x,xx)
60
     is an approximation to sin(x)*sin(xx) valid because
61
     xx is tiny relative to x.
62
  */
63
  double r, x2, t;
64
  static const double
65
    c1 = 0.41666666666666665390037e-1,
66
    c2 = -0.13888888888887398280412e-2,
67
    c3 = 0.248015872987670414957399e-4,
68
    c4 = -0.275573172723441909470836e-6,
69
    c5 = 0.208761463822329611076335e-8,
70
    c6 = -0.113826398067944859590880e-10;
71
72
  x2 = x * x;
73
  r = 0.5 * x2;
74
  t = 1.0 - r;
75
  return t + ((((1.0 - t) - r) - x * xx) + x2 * x2 *
76
              (c1 + x2 * (c2 + x2 * (c3 + x2 * (c4 + x2 * (c5 + x2 * c6))))));
77
}
78
79
void __sincos(double x, double *s, double *c)
80
{
81
  double r, rr;
82
  int region, xneg;
83
84
  unsigned long ux, ax;
85
  GET_BITS_DP64(x, ux);
86
  ax = (ux & ~SIGNBIT_DP64);
87
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
88
    {
89
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
90
        {
91
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
92
	    {
93
	      if (ax == 0x0000000000000000)
94
                {
95
                  *s = x;
96
                  *c = 1.0;
97
                }
98
              else
99
                {
100
                  *s = x;
101
                  *c = val_with_flags(1.0, AMD_F_INEXACT);
102
                }
103
	    }
104
          else
105
            {
106
              *s = x - x*x*x*0.166666666666666666;
107
              *c = 1.0 - x*x*0.5;
108
            }
109
        }
110
      else
111
        {
112
          *s = sin_piby4(x, 0.0);
113
          *c = cos_piby4(x, 0.0);
114
        }
115
      return;
116
    }
117
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
118
    {
119
      /* x is either NaN or infinity */
120
      if (ux & MANTBITS_DP64)
121
        /* x is NaN */
122
        *s = *c = x + x; /* Raise invalid if it is a signalling NaN */
123
      else
124
        /* x is infinity. Return a NaN */
125
        *s = *c = nan_with_flags(AMD_F_INVALID);
126
      return;
127
    }
128
129
  xneg = (ax != ux);
130
131
132
  if (xneg)
133
    x = -x;
134
135
  /* Reduce x into range [-pi/4,pi/4] */
136
  __remainder_piby2(x, &r, &rr, &region);
137
138
  if (xneg)
139
    {
140
      switch (region)
141
        {
142
        default:
143
        case 0:
144
          *s = -sin_piby4(r, rr);
145
          *c = cos_piby4(r, rr);
146
          break;
147
        case 1:
148
          *s = -cos_piby4(r, rr);
149
          *c = -sin_piby4(r, rr);
150
          break;
151
        case 2:
152
          *s = sin_piby4(r, rr);
153
          *c = -cos_piby4(r, rr);
154
          break;
155
        case 3:
156
          *s = cos_piby4(r, rr);
157
          *c = sin_piby4(r, rr);
158
          break;
159
        }
160
    }
161
  else
162
    {
163
      switch (region)
164
        {
165
        default:
166
        case 0:
167
          *s = sin_piby4(r, rr);
168
          *c = cos_piby4(r, rr);
169
          break;
170
        case 1:
171
          *s = cos_piby4(r, rr);
172
          *c = -sin_piby4(r, rr);
173
          break;
174
        case 2:
175
          *s = -sin_piby4(r, rr);
176
          *c = -cos_piby4(r, rr);
177
          break;
178
        case 3:
179
          *s = -cos_piby4(r, rr);
180
          *c = sin_piby4(r, rr);
181
          break;
182
        }
183
    }
184
  return;
185
}
186
187
double __sin(double x)
188
{
189
  double r, rr;
190
  int region, xneg;
191
192
  unsigned long ux, ax;
193
  GET_BITS_DP64(x, ux);
194
  ax = (ux & ~SIGNBIT_DP64);
195
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
196
    {
197
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
198
        {
199
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
200
	    {
201
	      if (ax == 0x0000000000000000)
202
                return x;
203
              else
204
                return val_with_flags(x, AMD_F_INEXACT);
205
	    }
206
          else
207
            return x - x*x*x*0.166666666666666666;
208
        }
209
      else
210
        return sin_piby4(x, 0.0);
211
    }
212
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
213
    {
214
      /* x is either NaN or infinity */
215
      if (ux & MANTBITS_DP64)
216
        /* x is NaN */
217
        return x + x; /* Raise invalid if it is a signalling NaN */
218
      else
219
        /* x is infinity. Return a NaN */
220
        return nan_with_flags(AMD_F_INVALID);
221
    }
222
  xneg = (ax != ux);
223
224
225
  if (xneg)
226
    x = -x;
227
228
  /* Reduce x into range [-pi/4,pi/4] */
229
  __remainder_piby2(x, &r, &rr, &region);
230
231
  if (xneg)
232
    {
233
      switch (region)
234
        {
235
        default:
236
        case 0: return -sin_piby4(r, rr);
237
        case 1: return -cos_piby4(r, rr);
238
        case 2: return sin_piby4(r, rr);
239
        case 3: return cos_piby4(r, rr);
240
        }
241
    }
242
  else
243
    {
244
      switch (region)
245
        {
246
        default:
247
        case 0: return sin_piby4(r, rr);
248
        case 1: return cos_piby4(r, rr);
249
        case 2: return -sin_piby4(r, rr);
250
        case 3: return -cos_piby4(r, rr);
251
        }
252
    }
253
}
254
255
double __cos(double x)
256
{
257
  double r, rr;
258
  int region, xneg;
259
260
  unsigned long ux, ax;
261
  GET_BITS_DP64(x, ux);
262
  ax = (ux & ~SIGNBIT_DP64);
263
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
264
    {
265
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
266
        {
267
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
268
            {
269
              if (ax == 0x0000000000000000) /* abs(x) = 0.0 */
270
                return 1.0;
271
              else
272
                return val_with_flags(1.0, AMD_F_INEXACT);
273
            }
274
          else
275
            return 1.0 - x*x*0.5;
276
        }
277
      else
278
        return cos_piby4(x, 0.0);
279
    }
280
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
281
    {
282
      /* x is either NaN or infinity */
283
      if (ux & MANTBITS_DP64)
284
        /* x is NaN */
285
        return x + x; /* Raise invalid if it is a signalling NaN */
286
      else
287
        /* x is infinity. Return a NaN */
288
        return nan_with_flags(AMD_F_INVALID);
289
    }
290
  xneg = (ax != ux);
291
292
293
  if (xneg)
294
    x = -x;
295
296
  /* Reduce x into range [-pi/4,pi/4] */
297
  __remainder_piby2(x, &r, &rr, &region);
298
299
  switch (region)
300
    {
301
    default:
302
    case 0: return cos_piby4(r, rr);
303
    case 1: return -sin_piby4(r, rr);
304
    case 2: return -cos_piby4(r, rr);
305
    case 3: return sin_piby4(r, rr);
306
    }
307
}
308
309
weak_alias (__sin, sin)
310
weak_alias (__cos, cos)
311
weak_alias (__sincos, sincos)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sincosf.c.x86_64-new-libm (+321 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
10
#include "libm_amd.h"
11
#include "libm_util_amd.h"
12
13
#define USE_REMAINDER_PIBY2F_INLINE
14
#define USE_VAL_WITH_FLAGS
15
#define USE_NAN_WITH_FLAGS
16
#include "libm_inlines_amd.h"
17
#undef USE_VAL_WITH_FLAGS
18
#undef USE_NAN_WITH_FLAGS
19
#undef USE_REMAINDER_PIBY2F_INLINE
20
21
/* sin(x) approximation valid on the interval [-pi/4,pi/4]. */
22
static inline double sinf_piby4(double x)
23
{
24
  /* Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
25
                          = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
26
                          = x * f(w)
27
     where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
28
     We use a minimax approximation of (f(w) - 1) / w
29
     because this produces an expansion in even powers of x.
30
  */
31
  double x2;
32
  static const double
33
  c1 = -0.166666666638608441788607926e0,
34
  c2 = 0.833333187633086262120839299e-2,
35
  c3 = -0.198400874359527693921333720e-3,
36
  c4 = 0.272500015145584081596826911e-5;
37
38
  x2 = x * x;
39
  return (x + x * x2 * (c1 + x2 * (c2 + x2 * (c3 + x2 * c4))));
40
}
41
42
/* cos(x) approximation valid on the interval [-pi/4,pi/4]. */
43
static inline double cosf_piby4(double x)
44
{
45
  /* Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
46
                          = f(w)
47
     where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
48
     We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
49
     because this produces an expansion in even powers of x.
50
  */
51
  double x2;
52
  static const double
53
    c1 = 0.41666666664325175238031e-1,
54
    c2 = -0.13888887673175665567647e-2,
55
    c3 = 0.24800600878112441958053e-4,
56
    c4 = -0.27301013343179832472841e-6;
57
58
  x2 = x * x;
59
  return (1.0 - 0.5 * x2 + (x2 * x2 *
60
                      (c1 + x2 * (c2 + x2 * (c3 + x2 * c4)))));
61
}
62
63
64
void __sincosf(float x, float *s, float *c)
65
{
66
  double r, dx;
67
  int region, xneg;
68
69
  unsigned long ux, ax;
70
71
  dx = x;
72
73
  GET_BITS_DP64(dx, ux);
74
  ax = (ux & ~SIGNBIT_DP64);
75
76
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
77
    {
78
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
79
        {
80
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
81
            {
82
              if (ax == 0x0000000000000000)
83
                {
84
                  *s = dx;
85
                  *c = 1.0;
86
                }
87
              else
88
                {
89
                  *s = val_with_flags(dx, AMD_F_INEXACT);
90
                  *c = val_with_flags(1.0, AMD_F_INEXACT);
91
                }
92
            }
93
          else
94
            {
95
              *s = dx - dx*dx*dx*0.166666666666666666;
96
              *c = 1.0 - dx*dx*0.5;
97
            }
98
        }
99
      else
100
        {
101
          *s = sinf_piby4(x);
102
          *c = cosf_piby4(x);
103
        }
104
      return;
105
    }
106
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
107
    {
108
      /* x is either NaN or infinity */
109
      if (ux & MANTBITS_DP64)
110
        /* x is NaN */
111
        *s = *c = dx + dx; /* Raise invalid if it is a signalling NaN */
112
      else
113
        /* x is infinity. Return a NaN */
114
        *s = *c = nan_with_flags(AMD_F_INVALID);
115
      return;
116
    }
117
118
  xneg = (ux >> 63);
119
120
  if (xneg)
121
    dx = -dx;
122
123
  /* Reduce abs(x) into range [-pi/4,pi/4] */
124
  __remainder_piby2f_inline(dx, ax, &r, &region);
125
126
  if (xneg)
127
    {
128
      switch (region)
129
        {
130
        default:
131
        case 0:
132
          *s = -sinf_piby4(r);
133
          *c = cosf_piby4(r);
134
          break;
135
        case 1:
136
          *s = -cosf_piby4(r);
137
          *c = -sinf_piby4(r);
138
          break;
139
        case 2:
140
          *s = sinf_piby4(r);
141
          *c = -cosf_piby4(r);
142
          break;
143
        case 3:
144
          *s = cosf_piby4(r);
145
          *c = sinf_piby4(r);
146
          break;
147
        }
148
    }
149
  else
150
    {
151
      switch (region)
152
        {
153
        default:
154
        case 0:
155
          *s = sinf_piby4(r);
156
          *c = cosf_piby4(r);
157
          break;
158
        case 1:
159
          *s = cosf_piby4(r);
160
          *c = -sinf_piby4(r);
161
          break;
162
        case 2:
163
          *s = -sinf_piby4(r);
164
          *c = -cosf_piby4(r);
165
          break;
166
        case 3:
167
          *s = -cosf_piby4(r);
168
          *c = sinf_piby4(r);
169
          break;
170
        }
171
    }
172
}
173
174
float __sinf(float x)
175
{
176
  double r, dx;
177
  int region, xneg;
178
179
  unsigned long ux, ax;
180
181
  dx = x;
182
183
  GET_BITS_DP64(dx, ux);
184
  ax = (ux & ~SIGNBIT_DP64);
185
186
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
187
    {
188
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
189
        {
190
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
191
            {
192
              if (ax == 0x0000000000000000)
193
                return x;
194
              else
195
                return val_with_flags(dx, AMD_F_INEXACT);
196
            }
197
          else
198
            return x - x*x*x*0.166666666666666666;
199
        }
200
      else
201
        return sinf_piby4(dx);
202
    }
203
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
204
    {
205
      /* x is either NaN or infinity */
206
      if (ux & MANTBITS_DP64)
207
        /* x is NaN */
208
        return x + x; /* Raise invalid if it is a signalling NaN */
209
      else
210
        /* x is infinity. Return a NaN */
211
        return nan_with_flags(AMD_F_INVALID);
212
    }
213
214
  xneg = (ux >> 63);
215
216
  if (xneg)
217
    dx = -dx;
218
219
  /* Reduce abs(x) into range [-pi/4,pi/4] */
220
  __remainder_piby2f_inline(dx, ax, &r, &region);
221
222
  if (xneg)
223
    {
224
      switch (region)
225
        {
226
        default:
227
        case 0: return -sinf_piby4(r);
228
        case 1: return -cosf_piby4(r);
229
        case 2: return sinf_piby4(r);
230
        case 3: return cosf_piby4(r);
231
        }
232
    }
233
  else
234
    {
235
      switch (region)
236
        {
237
        default:
238
        case 0: return sinf_piby4(r);
239
        case 1: return cosf_piby4(r);
240
        case 2: return -sinf_piby4(r);
241
        case 3: return -cosf_piby4(r);
242
        }
243
    }
244
}
245
246
#if 1
247
/* Stupidly, computing cosf via sincosf is much faster,
248
   even though sincosf does the same work and more. */
249
float __cosf(float x)
250
{
251
  float s, c;
252
  __sincosf(x, &s, &c);
253
  return c;
254
}
255
256
#else
257
/* This is the way cosf should be done, but it runs half
258
   as fast as it ought to */
259
260
float __cosf(float x)
261
{
262
  double r, dx;
263
  int region, xneg;
264
265
  unsigned long ux, ax;
266
267
  dx = x;
268
269
  GET_BITS_DP64(dx, ux);
270
  ax = (ux & ~SIGNBIT_DP64);
271
272
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
273
    {
274
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
275
        {
276
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
277
            {
278
              if (ax == 0x0000000000000000)
279
                return 1.0F;
280
              else
281
                return val_with_flags(1.0, AMD_F_INEXACT);
282
            }
283
          else
284
            return 1.0F - x*x*0.5F;
285
        }
286
      else
287
        return cosf_piby4(dx);
288
    }
289
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
290
    {
291
      /* x is either NaN or infinity */
292
      if (ux & MANTBITS_DP64)
293
        /* x is NaN */
294
        return x + x; /* Raise invalid if it is a signalling NaN */
295
      else
296
        /* x is infinity. Return a NaN */
297
        return nan_with_flags(AMD_F_INVALID);
298
    }
299
300
  xneg = (ux >> 63);
301
302
  if (xneg)
303
    dx = -dx;
304
305
  /* Reduce abs(x) into range [-pi/4,pi/4] */
306
  __remainder_piby2f_inline(dx, ax, &r, &region);
307
308
  switch (region)
309
    {
310
    default:
311
    case 0: return cosf_piby4(r);
312
    case 1: return -sinf_piby4(r);
313
    case 2: return -cosf_piby4(r);
314
    case 3: return sinf_piby4(r);
315
    }
316
}
317
#endif
318
319
weak_alias (__sinf, sinf)
320
weak_alias (__cosf, cosf)
321
weak_alias (__sincosf, sincosf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_sinf.c.x86_64-new-libm (+1 lines)
Line 0 Link Here
1
/* Not needed.  */
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_tan.c.x86_64-new-libm (+145 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
10
#include "libm_amd.h"
11
#include "libm_util_amd.h"
12
13
#define USE_NAN_WITH_FLAGS
14
#define USE_VAL_WITH_FLAGS
15
#include "libm_inlines_amd.h"
16
#undef USE_NAN_WITH_FLAGS
17
#undef USE_VAL_WITH_FLAGS
18
19
/* tan(x + xx) approximation valid on the interval [-pi/4,pi/4]. 
20
   If recip is true return -1/tan(x + xx) instead. */
21
static inline double tan_piby4(double x, double xx, int recip)
22
{
23
  double r, t1, t2, xl;
24
  int transform = 0;
25
  static const double
26
     piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */
27
     piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */
28
29
  /* In order to maintain relative precision transform using the identity:
30
     tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. 
31
     Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */
32
33
  if (x > 0.68)
34
    {
35
      transform = 1;
36
      x = piby4_lead - x;
37
      xl = piby4_tail - xx;
38
      x += xl;
39
      xx = 0.0;
40
    }
41
  else if (x < -0.68)
42
    {
43
      transform = -1;
44
      x = piby4_lead + x;
45
      xl = piby4_tail + xx;
46
      x += xl;
47
      xx = 0.0;
48
    }
49
50
  /* Core Remez [2,3] approximation to tan(x+xx) on the
51
     interval [0,0.68]. */
52
53
  r = x*x + 2.0 * x * xx;
54
  t1 = x;
55
  t2 = xx + x*r*
56
    (0.372379159759792203640806338901e0 +
57
     (-0.229345080057565662883358588111e-1 +
58
      0.224044448537022097264602535574e-3*r)*r)/
59
    (0.111713747927937668539901657944e1 +
60
     (-0.515658515729031149329237816945e0 +
61
      (0.260656620398645407524064091208e-1 -
62
       0.232371494088563558304549252913e-3*r)*r)*r);
63
64
  /* Reconstruct tan(x) in the transformed case. */
65
66
  if (transform)
67
    {
68
      double t;
69
      t = t1 + t2;
70
      if (recip)
71
         return transform*(2*t/(t-1) - 1.0);
72
      else
73
         return transform*(1.0 - 2*t/(1+t));
74
    }
75
76
  if (recip)
77
    {
78
      /* Compute -1.0/(t1 + t2) accurately */
79
      double trec, trec_top, z1, z2, t;
80
      unsigned long u;
81
      t = t1 + t2;
82
      GET_BITS_DP64(t, u);
83
      u &= 0xffffffff00000000;
84
      PUT_BITS_DP64(u, z1);
85
      z2 = t2 - (z1 - t1);
86
      trec = -1.0 / t;
87
      GET_BITS_DP64(trec, u);
88
      u &= 0xffffffff00000000;
89
      PUT_BITS_DP64(u, trec_top);
90
      return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2);
91
92
    }
93
  else
94
    return t1 + t2;
95
}
96
97
double __tan(double x)
98
{
99
  double r, rr;
100
  int region, xneg;
101
102
  unsigned long ux, ax;
103
  GET_BITS_DP64(x, ux);
104
  ax = (ux & ~SIGNBIT_DP64);
105
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
106
    {
107
      if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
108
        {
109
          if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
110
	    {
111
	      if (ax == 0x0000000000000000) return x;
112
              else return val_with_flags(x, AMD_F_INEXACT);
113
	    }
114
          else
115
            return x + x*x*x*0.333333333333333333;
116
        }
117
      else
118
        return tan_piby4(x, 0.0, 0);
119
    }
120
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
121
    {
122
      /* x is either NaN or infinity */
123
      if (ux & MANTBITS_DP64)
124
        /* x is NaN */
125
        return x + x; /* Raise invalid if it is a signalling NaN */
126
      else
127
        /* x is infinity. Return a NaN */
128
        return nan_with_flags(AMD_F_INVALID);
129
    }
130
  xneg = (ax != ux);
131
132
133
  if (xneg)
134
    x = -x;
135
136
  /* Reduce x into range [-pi/4,pi/4] */
137
  __remainder_piby2(x, &r, &rr, &region);
138
139
  if (xneg)
140
    return -tan_piby4(r, rr, region & 1);
141
  else
142
    return tan_piby4(r, rr, region & 1);
143
}
144
145
weak_alias (__tan, tan)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_tanf.c.x86_64-new-libm (+97 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
10
#include "libm_amd.h"
11
#include "libm_util_amd.h"
12
13
#define USE_REMAINDER_PIBY2F_INLINE
14
#define USE_VAL_WITH_FLAGS
15
#define USE_NAN_WITH_FLAGS
16
#include "libm_inlines_amd.h"
17
#undef USE_VAL_WITH_FLAGS
18
#undef USE_NAN_WITH_FLAGS
19
#undef USE_REMAINDER_PIBY2F_INLINE
20
21
/* tan(x) approximation valid on the interval [-pi/4,pi/4]. 
22
   If recip is true return -1/tan(x) instead. */
23
static inline double tanf_piby4(double x, int recip)
24
{
25
  double r, t;
26
27
  /* Core Remez [1,2] approximation to tan(x) on the
28
     interval [0,pi/4]. */
29
  r = x*x;
30
  t = x + x*r*
31
    (0.385296071263995406715129e0 -
32
     0.172032480471481694693109e-1 * r) /
33
    (0.115588821434688393452299e+1 +
34
     (-0.51396505478854532132342e0 +
35
      0.1844239256901656082986661e-1 * r) * r);
36
37
  if (recip)
38
    return -1.0 / t;
39
  else
40
    return t;
41
}
42
43
float __tanf(float x)
44
{
45
  double r, dx;
46
  int region, xneg;
47
48
  unsigned long ux, ax;
49
50
  dx = x;
51
52
  GET_BITS_DP64(dx, ux);
53
  ax = (ux & ~SIGNBIT_DP64);
54
55
  if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
56
    {
57
      if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
58
        {
59
          if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
60
            {
61
              if (ax == 0x0000000000000000)
62
                return dx;
63
              else
64
                return val_with_flags(dx, AMD_F_INEXACT);
65
            }
66
          else
67
            return dx + dx*dx*dx*0.333333333333333333;
68
        }
69
      else
70
        return tanf_piby4(dx, 0);
71
    }
72
  else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
73
    {
74
      /* x is either NaN or infinity */
75
      if (ux & MANTBITS_DP64)
76
        /* x is NaN */
77
        return dx + dx; /* Raise invalid if it is a signalling NaN */
78
      else
79
        /* x is infinity. Return a NaN */
80
        return nan_with_flags(AMD_F_INVALID);
81
    }
82
83
  xneg = (ux >> 63);
84
85
  if (xneg)
86
    x = -x;
87
88
  /* Reduce x into range [-pi/4,pi/4] */
89
  __remainder_piby2f_inline(x, ax, &r, &region);
90
91
  if (xneg)
92
    return -tanf_piby4(r, region & 1);
93
  else
94
    return tanf_piby4(r, region & 1);
95
}
96
97
weak_alias (__tanf, tanf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_trunc.c.x86_64-new-libm (+48 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
double __trunc(double x)
13
{
14
  double r;
15
  long rexp;
16
  unsigned long ux, ax, mask;
17
18
  GET_BITS_DP64(x, ux);
19
  ax = ux & (~SIGNBIT_DP64);
20
21
  if (ax >= 0x4340000000000000)
22
    {
23
      /* abs(x) is either NaN, infinity, or >= 2^53 */
24
      if (ax > 0x7ff0000000000000)
25
        /* x is NaN */
26
        return x + x; /* Raise invalid if it is a signalling NaN */
27
      else
28
        return x;
29
    }
30
  else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
31
    {
32
      /* Return zero with the sign of x */
33
      PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
34
      return x;
35
    }
36
  else
37
    {
38
      r = x;
39
      rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
40
      /* Mask out the bits of r that we don't want */
41
      mask = (1L << (EXPSHIFTBITS_DP64 - rexp)) - 1;
42
      PUT_BITS_DP64(ux & ~mask, r);
43
      return r;
44
    }
45
46
}
47
48
weak_alias (__trunc, trunc)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_truncf.c.x86_64-new-libm (+47 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
float __truncf(float x)
13
{
14
  float r;
15
  int rexp;
16
  unsigned int ux, ax, mask;
17
18
  GET_BITS_SP32(x, ux);
19
  ax = ux & (~SIGNBIT_SP32);
20
21
  if (ax >= 0x4b800000)
22
    {
23
      /* abs(x) is either NaN, infinity, or >= 2^24 */
24
      if (ax > 0x7f800000)
25
        /* x is NaN */
26
        return x + x; /* Raise invalid if it is a signalling NaN */
27
      else
28
        return x;
29
    }
30
  else if (ax < 0x3f800000) /* abs(x) < 1.0 */
31
    {
32
      /* Return zero with the sign of x */
33
      PUT_BITS_SP32(ux & SIGNBIT_SP32, x);
34
      return x;
35
    }
36
  else
37
    {
38
      r = x;
39
      rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
40
      /* Mask out the bits of r that we don't want */
41
      mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
42
      PUT_BITS_SP32(ux & ~mask, r);
43
      return r;
44
    }
45
}
46
47
weak_alias (__truncf, truncf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_acos.c.x86_64-new-libm (+139 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VAL_WITH_FLAGS
13
#define USE_NAN_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_NAN_WITH_FLAGS
16
#undef USE_VAL_WITH_FLAGS
17
18
/* Deal with errno for out-of-range argument */
19
#include "libm_errno_amd.h"
20
static inline double retval_errno_edom(double x)
21
{
22
  struct exception exc;
23
  exc.arg1 = x;
24
  exc.arg2 = x;
25
  exc.type = DOMAIN;
26
  exc.name = (char *)"acos";
27
  if (_LIB_VERSION == _SVID_)
28
    exc.retval = HUGE;
29
  else
30
    exc.retval = nan_with_flags(AMD_F_INVALID);
31
  if (_LIB_VERSION == _POSIX_)
32
    __set_errno(EDOM);
33
  else if (!matherr(&exc))
34
    {
35
      if(_LIB_VERSION == _SVID_)
36
        (void)fputs("acos: DOMAIN error\n", stderr);
37
    __set_errno(EDOM);
38
    }
39
  return exc.retval;
40
}
41
42
double __acos(double x)
43
{
44
  /* Computes arccos(x).
45
     The argument is first reduced by noting that arccos(x) 
46
     is invalid for abs(x) > 1. For denormal and small 
47
     arguments arccos(x) = pi/2 to machine accuracy. 
48
     Remaining argument ranges are handled as follows.
49
     For abs(x) <= 0.5 use 
50
     arccos(x) = pi/2 - arcsin(x)
51
     = pi/2 - (x + x^3*R(x^2))
52
     where R(x^2) is a rational minimax approximation to 
53
     (arcsin(x) - x)/x^3.
54
     For abs(x) > 0.5 exploit the identity:
55
     arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
56
     together with the above rational approximation, and 
57
     reconstruct the terms carefully.
58
  */
59
60
  /* Some constants and split constants. */
61
62
  static const double
63
    pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ 
64
    piby2      = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
65
    piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
66
    piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
67
68
  double u, y, s=0.0, r;
69
  int xexp, xnan, transform=0;
70
71
  unsigned long ux, aux, xneg;
72
  GET_BITS_DP64(x, ux);
73
  aux = ux & ~SIGNBIT_DP64;
74
  xneg = (ux & SIGNBIT_DP64);
75
  xnan = (aux > PINFBITPATT_DP64);
76
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
77
78
  /* Special cases */
79
80
  if (xexp < -56)
81
    { /* y small enough that arccos(x) = pi/2 */
82
      return val_with_flags(piby2, AMD_F_INEXACT);
83
    }
84
  else if (xnan) return x + x;
85
  else if (xexp >= 0) 
86
    { /* abs(x) >= 1.0 */
87
      if (x == 1.0) return 0.0;
88
      else if (x == -1.0) return val_with_flags(pi, AMD_F_INEXACT);
89
      else return retval_errno_edom(x);
90
    }
91
92
  if (xneg) y = -x;
93
  else y = x;
94
95
  transform = (xexp >= -1); /* abs(x) >= 0.5 */
96
97
  if (transform)
98
    { /* Transform y into the range [0,0.5) */
99
      transform = 1;
100
      r = 0.5*(1-y);
101
      /* Hammer sqrt instruction */
102
      asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
103
      y = s;
104
    }
105
  else
106
    r = y*y;
107
108
  /* Use a rational approximation for [0.0, 0.5] */
109
110
  u = r*(0.227485835556935010735943483075 + 
111
         (-0.445017216867635649900123110649 +
112
          (0.275558175256937652532686256258 + 
113
           (-0.0549989809235685841612020091328 +
114
            (0.00109242697235074662306043804220 + 
115
             0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
116
    (1.36491501334161032038194214209 +
117
     (-3.28431505720958658909889444194 + 
118
      (2.76568859157270989520376345954 + 
119
       (-0.943639137032492685763471240072 +
120
	0.105869422087204370341222318533*r)*r)*r)*r);
121
122
  if (transform) 
123
    { /* Reconstruct acos carefully in transformed region */
124
      if (xneg) return pi - 2*(s+(y*u - piby2_tail));
125
      else
126
	{
127
	  double c, s1;
128
	  unsigned long us;
129
	  GET_BITS_DP64(s, us);
130
	  PUT_BITS_DP64(0xffffffff00000000 & us, s1);
131
	  c = (r-s1*s1)/(s+s1);
132
          return 2*s1 + (2*c+2*y*u);
133
	}
134
    }
135
  else
136
    return piby2_head - (x - (piby2_tail - x*u));
137
}
138
139
weak_alias (__acos, acos)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_acosf.c.x86_64-new-libm (+141 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VALF_WITH_FLAGS
13
#define USE_NANF_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_NANF_WITH_FLAGS
16
#undef USE_VALF_WITH_FLAGS
17
18
/* Deal with errno for out-of-range argument */
19
#include "libm_errno_amd.h"
20
static inline float retval_errno_edom(float x)
21
{
22
  struct exception exc;
23
  exc.arg1 = (double)x;
24
  exc.arg2 = (double)x;
25
  exc.type = DOMAIN;
26
  exc.name = (char *)"acosf";
27
  if (_LIB_VERSION == _SVID_)
28
    exc.retval = HUGE;
29
  else
30
    exc.retval = nanf_with_flags(AMD_F_INVALID);
31
  if (_LIB_VERSION == _POSIX_)
32
    __set_errno(EDOM);
33
  else if (!matherr(&exc))
34
    {
35
      if(_LIB_VERSION == _SVID_)
36
        (void)fputs("acosf: DOMAIN error\n", stderr);
37
    __set_errno(EDOM);
38
    }
39
  return exc.retval;
40
}
41
42
float __acosf(float x)
43
{
44
  /* Computes arccos(x).
45
     The argument is first reduced by noting that arccos(x)
46
     is invalid for abs(x) > 1. For denormal and small
47
     arguments arccos(x) = pi/2 to machine accuracy.
48
     Remaining argument ranges are handled as follows.
49
     For abs(x) <= 0.5 use
50
     arccos(x) = pi/2 - arcsin(x)
51
     = pi/2 - (x + x^3*R(x^2))
52
     where R(x^2) is a rational minimax approximation to
53
     (arcsin(x) - x)/x^3.
54
     For abs(x) > 0.5 exploit the identity:
55
     arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
56
     together with the above rational approximation, and
57
     reconstruct the terms carefully.
58
  */
59
60
  /* Some constants and split constants. */
61
62
  static const float
63
    piby2      = 1.5707963705e+00F; /* 0x3fc90fdb */
64
  static const double
65
    pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
66
    piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
67
    piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
68
69
  float u, y, s = 0.0F, r;
70
  int xexp, xnan, transform = 0;
71
72
  unsigned int ux, aux, xneg;
73
74
  /* For some reason using this:
75
        GET_BITS_SP32(x, ux);
76
     instead of the following line makes acosf run like a snail on a Hammer. */
77
  ux = (*((unsigned int *)&x));
78
79
  aux = ux & ~SIGNBIT_SP32;
80
  xneg = (ux & SIGNBIT_SP32);
81
  xnan = (aux > PINFBITPATT_SP32);
82
  xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
83
84
  /* Special cases */
85
86
  if (xexp < -26)
87
    /* y small enough that arccos(x) = pi/2 */
88
    return valf_with_flags(piby2, AMD_F_INEXACT);
89
  else if (xnan) return x + x;
90
  else if (xexp >= 0)
91
    { /* abs(x) >= 1.0 */
92
      if (x == 1.0F) return 0.0F;
93
      else if (x == -1.0F) return valf_with_flags(pi, AMD_F_INEXACT);
94
      else return retval_errno_edom(x);
95
    }
96
97
  if (xneg) y = -x;
98
  else y = x;
99
100
  transform = (xexp >= -1); /* abs(x) >= 0.5 */
101
102
  if (transform)
103
    { /* Transform y into the range [0,0.5) */
104
      transform = 1;
105
      r = 0.5F*(1-y);
106
      /* Hammer sqrt instruction */
107
      asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r));
108
      y = s;
109
    }
110
  else
111
    r = y*y;
112
113
  /* Use a rational approximation for [0.0, 0.5] */
114
115
  u=r*(0.184161606965100694821398249421F +
116
       (-0.0565298683201845211985026327361F +
117
	(-0.0133819288943925804214011424456F -
118
	 0.00396137437848476485201154797087F*r)*r)*r)/
119
    (1.10496961524520294485512696706F -
120
     0.836411276854206731913362287293F*r);
121
122
  if (transform)
123
    {
124
      /* Reconstruct acos carefully in transformed region */
125
      if (xneg)
126
        return pi - 2.0F*(s+(y*u - piby2_tail));
127
      else
128
	{
129
	  float c, s1;
130
	  unsigned int us;
131
	  GET_BITS_SP32(s, us);
132
	  PUT_BITS_SP32(0xffff0000 & us, s1);
133
	  c = (r-s1*s1)/(s+s1);
134
          return 2.0F*s1 + (2.0F*c+2.0F*y*u);
135
	}
136
    }
137
  else
138
    return piby2_head - (x - (piby2_tail - x*u));
139
}
140
141
weak_alias (__acosf, acosf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_asin.c.x86_64-new-libm (+144 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VAL_WITH_FLAGS
13
#define USE_NAN_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_NAN_WITH_FLAGS
16
#undef USE_VAL_WITH_FLAGS
17
18
/* Deal with errno for out-of-range argument */
19
#include "libm_errno_amd.h"
20
static inline double retval_errno_edom(double x)
21
{
22
  struct exception exc;
23
  exc.arg1 = x;
24
  exc.arg2 = x;
25
  exc.type = DOMAIN;
26
  exc.name = (char *)"asin";
27
  if (_LIB_VERSION == _SVID_)
28
    exc.retval = HUGE;
29
  else
30
    exc.retval = nan_with_flags(AMD_F_INVALID);
31
  if (_LIB_VERSION == _POSIX_)
32
    __set_errno(EDOM);
33
  else if (!matherr(&exc))
34
    {
35
      if(_LIB_VERSION == _SVID_)
36
        (void)fputs("asin: DOMAIN error\n", stderr);
37
    __set_errno(EDOM);
38
    }
39
  return exc.retval;
40
}
41
42
double __asin(double x)
43
{
44
  /* Computes arcsin(x).
45
     The argument is first reduced by noting that arcsin(x) 
46
     is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
47
     For denormal and small arguments arcsin(x) = x to machine
48
     accuracy. Remaining argument ranges are handled as follows. 
49
     For abs(x) <= 0.5 use 
50
     arcsin(x) = x + x^3*R(x^2)
51
     where R(x^2) is a rational minimax approximation to 
52
     (arcsin(x) - x)/x^3.
53
     For abs(x) > 0.5 exploit the identity:
54
      arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
55
     together with the above rational approximation, and 
56
     reconstruct the terms carefully.
57
    */
58
59
  /* Some constants and split constants. */
60
61
  static const double 
62
    piby2_tail  = 6.1232339957367660e-17, /* 0x3c91a62633145c07 */
63
    hpiby2_head = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
64
    piby2       = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
65
  double u, v, y, s=0.0, r;
66
  int xexp, xnan, transform=0;
67
68
  unsigned long ux, aux, xneg;
69
  GET_BITS_DP64(x, ux);
70
  aux = ux & ~SIGNBIT_DP64;
71
  xneg = (ux & SIGNBIT_DP64);
72
  xnan = (aux > PINFBITPATT_DP64);
73
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
74
75
  /* Special cases */
76
77
  if (xexp < -28)
78
    { /* y small enough that arcsin(x) = x */
79
      return val_with_flags(x, AMD_F_INEXACT);
80
    }
81
  else if (xnan) return x + x;
82
  else if (xexp >= 0) 
83
    { /* abs(x) >= 1.0 */
84
      if (x == 1.0) return val_with_flags(piby2, AMD_F_INEXACT);
85
      else if (x == -1.0) return val_with_flags(-piby2, AMD_F_INEXACT);
86
      else return retval_errno_edom(x);
87
88
    }
89
90
  if (xneg) y = -x;
91
  else y = x;
92
93
  transform = (xexp >= -1); /* abs(x) >= 0.5 */
94
95
  if (transform)
96
    { /* Transform y into the range [0,0.5) */
97
      transform = 1;
98
      r = 0.5*(1-y);
99
      /* Hammer sqrt instruction */
100
      asm volatile ("sqrtsd %1, %0" : "=x" (s) : "x" (r));
101
      y = s;
102
    }
103
  else
104
    {
105
      r = y*y;
106
    }
107
108
  /* Use a rational approximation for [0.0, 0.5] */
109
110
  u = r*(0.227485835556935010735943483075 + 
111
         (-0.445017216867635649900123110649 +
112
          (0.275558175256937652532686256258 + 
113
           (-0.0549989809235685841612020091328 +
114
            (0.00109242697235074662306043804220 + 
115
             0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
116
    (1.36491501334161032038194214209 +
117
     (-3.28431505720958658909889444194 + 
118
      (2.76568859157270989520376345954 + 
119
       (-0.943639137032492685763471240072 +
120
	0.105869422087204370341222318533*r)*r)*r)*r);
121
122
  if (transform) 
123
    { /* Reconstruct asin carefully in transformed region */
124
      	{
125
	  double c, s1, p, q;
126
	  unsigned long us;
127
	  GET_BITS_DP64(s, us);
128
	  PUT_BITS_DP64(0xffffffff00000000 & us, s1);
129
	  c = (r-s1*s1)/(s+s1);
130
	  p = 2*s*u-(piby2_tail-2*c);
131
	  q = hpiby2_head-2*s1;
132
	  v = hpiby2_head-(p-q);
133
	}
134
    }
135
  else
136
    {
137
      v = y + y*u;
138
    }
139
140
  if (xneg) return -v;
141
  else return v;
142
}
143
144
weak_alias (__asin, asin)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_asinf.c.x86_64-new-libm (+133 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_VALF_WITH_FLAGS
13
#define USE_NANF_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_NANF_WITH_FLAGS
16
#undef USE_VALF_WITH_FLAGS
17
18
/* Deal with errno for out-of-range argument */
19
#include "libm_errno_amd.h"
20
static inline float retval_errno_edom(float x)
21
{
22
  struct exception exc;
23
  exc.arg1 = (double)x;
24
  exc.arg2 = (double)x;
25
  exc.type = DOMAIN;
26
  exc.name = (char *)"asinf";
27
  if (_LIB_VERSION == _SVID_)
28
    exc.retval = HUGE;
29
  else
30
    exc.retval = nanf_with_flags(AMD_F_INVALID);
31
  if (_LIB_VERSION == _POSIX_)
32
    __set_errno(EDOM);
33
  else if (!matherr(&exc))
34
    {
35
      if(_LIB_VERSION == _SVID_)
36
        (void)fputs("asinf: DOMAIN error\n", stderr);
37
    __set_errno(EDOM);
38
    }
39
  return exc.retval;
40
}
41
42
float __asinf(float x)
43
{
44
  /* Computes arcsin(x).
45
     The argument is first reduced by noting that arcsin(x) 
46
     is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
47
     For denormal and small arguments arcsin(x) = x to machine
48
     accuracy. Remaining argument ranges are handled as follows. 
49
     For abs(x) <= 0.5 use 
50
     arcsin(x) = x + x^3*R(x^2)
51
     where R(x^2) is a rational minimax approximation to 
52
     (arcsin(x) - x)/x^3.
53
     For abs(x) > 0.5 exploit the identity:
54
      arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
55
     together with the above rational approximation, and 
56
     reconstruct the terms carefully.
57
    */
58
59
  /* Some constants and split constants. */
60
61
  static const float
62
    piby2_tail  = 7.5497894159e-08F, /* 0x33a22168 */
63
    hpiby2_head = 7.8539812565e-01F, /* 0x3f490fda */
64
    piby2       = 1.5707963705e+00F; /* 0x3fc90fdb */
65
  float u, v, y, s = 0.0F, r;
66
  int xexp, xnan, transform = 0;
67
68
  unsigned int ux, aux, xneg;
69
  GET_BITS_SP32(x, ux);
70
  aux = ux & ~SIGNBIT_SP32;
71
  xneg = (ux & SIGNBIT_SP32);
72
  xnan = (aux > PINFBITPATT_SP32);
73
  xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
74
75
  /* Special cases */
76
77
  if (xexp < -14)
78
    /* y small enough that arcsin(x) = x */
79
    return valf_with_flags(x, AMD_F_INEXACT);
80
  else if (xnan) return x + x;
81
  else if (xexp >= 0) 
82
    {
83
      /* abs(x) >= 1.0 */
84
      if (x == 1.0F) return valf_with_flags(piby2, AMD_F_INEXACT);
85
      else if (x == -1.0F) return valf_with_flags(-piby2, AMD_F_INEXACT);
86
      else return retval_errno_edom(x);
87
    }
88
89
  if (xneg) y = -x;
90
  else y = x;
91
92
  transform = (xexp >= -1); /* abs(x) >= 0.5 */
93
94
  if (transform)
95
    { /* Transform y into the range [0,0.5) */
96
      transform = 1;
97
      r = 0.5F*(1-y);
98
      /* Hammer sqrt instruction */
99
      asm volatile ("sqrtss %1, %0" : "=x" (s) : "x" (r));
100
      y = s;
101
    }
102
  else
103
    r = y*y;
104
105
  /* Use a rational approximation for [0.0, 0.5] */
106
107
  u=r*(0.184161606965100694821398249421F + 
108
       (-0.0565298683201845211985026327361F +
109
	(-0.0133819288943925804214011424456F - 
110
	 0.00396137437848476485201154797087F*r)*r)*r)/
111
    (1.10496961524520294485512696706F - 
112
     0.836411276854206731913362287293F*r);
113
114
  if (transform) 
115
    {
116
      /* Reconstruct asin carefully in transformed region */
117
      float c, s1, p, q;
118
      unsigned int us;
119
      GET_BITS_SP32(s, us);
120
      PUT_BITS_SP32(0xffff0000 & us, s1);
121
      c = (r-s1*s1)/(s+s1);
122
      p = 2.0F*s*u-(piby2_tail-2.0F*c);
123
      q = hpiby2_head-2*s1;
124
      v = hpiby2_head-(p-q);
125
    }
126
  else
127
    v = y + y*u;
128
129
  if (xneg) return -v;
130
  else return v;
131
}
132
133
weak_alias (__asinf, asinf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp.c.x86_64-new-libm (+159 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXP
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEDOUBLE_2
15
#define USE_ZERO_WITH_FLAGS
16
#define USE_INFINITY_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_ZERO_WITH_FLAGS
19
#undef USE_SPLITEXP
20
#undef USE_SCALEDOUBLE_1
21
#undef USE_SCALEDOUBLE_2
22
#undef USE_INFINITY_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline double retval_errno_erange_overflow(double x)
27
{
28
  struct exception exc;
29
  exc.arg1 = x;
30
  exc.arg2 = x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"exp";
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = HUGE;
35
  else
36
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
static inline double retval_errno_erange_underflow(double x)
45
{
46
  struct exception exc;
47
  exc.arg1 = x;
48
  exc.arg2 = x;
49
  exc.type = UNDERFLOW;
50
  exc.name = (char *)"exp";
51
  exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
52
  if (_LIB_VERSION == _POSIX_)
53
    __set_errno(ERANGE);
54
  else if (!matherr(&exc))
55
    __set_errno(ERANGE);
56
  return exc.retval;
57
}
58
59
double __exp(double x)
60
{
61
  static const double
62
       max_exp_arg =  7.09782712893383973096e+02, /* 0x40862e42fefa39ef */
63
       min_exp_arg = -7.45133219101941108420e+02, /* 0xc0874910d52d3051 */
64
 thirtytwo_by_log2 = 4.61662413084468283841e+01,  /* 0x40471547652b82fe */
65
   log2_by_32_lead = 2.16608493356034159660e-02,  /* 0x3f962e42fe000000 */
66
  log2_by_32_trail = 5.68948749532545630390e-11;  /* 0x3dcf473de6af278e */
67
68
  double z1, z2, z;
69
  int m;
70
  unsigned long ux, ax;
71
72
  /*
73
    Computation of exp(x).
74
75
    We compute the values m, z1, and z2 such that
76
    exp(x) = 2**m * (z1 + z2),  where
77
    exp(x) is the natural exponential of x.
78
79
    Computations needed in order to obtain m, z1, and z2
80
    involve three steps.
81
82
    First, we reduce the argument x to the form
83
    x = n * log2/32 + remainder,
84
    where n has the value of an integer and |remainder| <= log2/64.
85
    The value of n = x * 32/log2 rounded to the nearest integer and
86
    the remainder = x - n*log2/32.
87
88
    Second, we approximate exp(r1 + r2) - 1 where r1 is the leading
89
    part of the remainder and r2 is the trailing part of the remainder.
90
91
    Third, we reconstruct the exponential of x so that
92
    exp(x) = 2**m * (z1 + z2).
93
  */
94
95
96
  GET_BITS_DP64(x, ux);
97
  ax = ux & (~SIGNBIT_DP64);
98
99
  if (ax >= 0x40862e42fefa39ef) /* abs(x) >= 709.78... */
100
    {
101
      if(ax >= 0x7ff0000000000000)
102
        {
103
          /* x is either NaN or infinity */
104
          if (ux & MANTBITS_DP64)
105
            /* x is NaN */
106
            return x + x; /* Raise invalid if it is a signalling NaN */
107
          else if (ux & SIGNBIT_DP64)
108
            /* x is negative infinity; return 0.0 with no flags. */
109
            return 0.0;
110
          else
111
            /* x is positive infinity */
112
            return x;
113
        }
114
      if (x > max_exp_arg)
115
        /* Return +infinity with overflow flag */
116
        return retval_errno_erange_overflow(x);
117
      else if (x < min_exp_arg)
118
        /* x is negative. Return +zero with underflow and inexact flags */
119
        return retval_errno_erange_underflow(x);
120
    }
121
122
  /* Handle small arguments separately */
123
  if (ax < 0x3fb0000000000000)   /* abs(x) < 1/16 */
124
    {
125
      if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
126
        z = 1.0 + x; /* Raises inexact if x is non-zero */
127
      else
128
        z = ((((((((((
129
		      1.0/3628800)*x+
130
		     1.0/362880)*x+
131
		    1.0/40320)*x+
132
		   1.0/5040)*x+
133
		  1.0/720)*x+
134
		 1.0/120)*x+
135
		1.0/24)*x+
136
	       1.0/6)*x+
137
	      1.0/2)*x+
138
	     1.0)*x + 1.0;
139
    }
140
  else
141
    {
142
      /* Find m, z1 and z2 such that exp(x) = 2**m * (z1 + z2) */
143
144
      splitexp(x, 1.0, thirtytwo_by_log2, log2_by_32_lead, log2_by_32_trail,
145
               &m, &z1, &z2);
146
147
      /* Scale (z1 + z2) by 2.0**m */
148
149
      if (m >= EMIN_DP64 && m <= EMAX_DP64)
150
	z = scaleDouble_1((z1+z2),m);
151
      else
152
	z = scaleDouble_2((z1+z2),m);
153
    }
154
  return z;
155
}
156
157
158
weak_alias (__exp, __ieee754_exp)
159
weak_alias (__exp, exp)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp10.c.x86_64-new-libm (+158 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXP
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEDOUBLE_2
15
#define USE_ZERO_WITH_FLAGS
16
#define USE_INFINITY_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_SPLITEXP
19
#undef USE_SCALEDOUBLE_1
20
#undef USE_SCALEDOUBLE_2
21
#undef USE_ZERO_WITH_FLAGS
22
#undef USE_INFINITY_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline double retval_errno_erange_overflow(double x)
27
{
28
  struct exception exc;
29
  exc.arg1 = x;
30
  exc.arg2 = x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"exp10";
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = HUGE;
35
  else
36
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
static inline double retval_errno_erange_underflow(double x)
45
{
46
  struct exception exc;
47
  exc.arg1 = x;
48
  exc.arg2 = x;
49
  exc.type = UNDERFLOW;
50
  exc.name = (char *)"exp10";
51
  exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
52
  if (_LIB_VERSION == _POSIX_)
53
    __set_errno(ERANGE);
54
  else if (!matherr(&exc))
55
    __set_errno(ERANGE);
56
  return exc.retval;
57
}
58
59
double __exp10(double x)
60
{
61
  static const double
62
         max_exp10_arg =  3.0825471555991674677e+02, /* 0x40734413509f79ff */
63
         min_exp10_arg = -3.2330621534311580944e+02, /* 0xc07434e6420f4374 */
64
                 log10 = 2.30258509299404568401e+00, /* 0x40026bb1bbb55516 */
65
 thirtytwo_by_log10of2 = 1.06301699036395595131e+02, /* 0x405a934f0979a371 */
66
   log10of2_by_32_lead = 9.40718688070774078369e-03, /* 0x3F83441340000000 */
67
  log10of2_by_32_trail = 4.83791671566737916758e-10; /* 0x3E009F79FEF311F1 */
68
69
  double y, z1, z2, z;
70
  int m;
71
  unsigned long ux, ax;
72
73
  /*
74
    Computation of exp10(x).
75
76
    We compute the values m, z1, and z2 such that
77
    exp10(x) = 2**m * (z1 + z2),  where exp10(x) is 10**x.
78
79
    Computations needed in order to obtain m, z1, and z2
80
    involve three steps.
81
82
    First, we reduce the argument x to the form
83
    x = n * log10of2/32 + remainder,
84
    where n has the value of an integer and |remainder| <= log10of2/64.
85
    The value of n = x * 32/log10of2 rounded to the nearest integer and
86
    the remainder = x - n*log10of2/32.
87
88
    Second, we approximate exp10(r1 + r2) - 1 where r1 is the leading
89
    part of the remainder and r2 is the trailing part of the remainder.
90
91
    Third, we reconstruct exp10(x) so that
92
    exp10(x) = 2**m * (z1 + z2).
93
  */
94
95
96
  GET_BITS_DP64(x, ux);
97
  ax = ux & (~SIGNBIT_DP64);
98
99
  if (ax >= 0x40734413509f79ff) /* abs(x) >= 308.25... */
100
    {
101
       if(ax >= 0x7ff0000000000000)
102
        {
103
          /* x is either NaN or infinity */
104
          if (ux & MANTBITS_DP64)
105
            /* x is NaN */
106
            return x + x; /* Raise invalid if it is a signalling NaN */
107
          else if (ux & SIGNBIT_DP64)
108
            /* x is negative infinity; return 0.0 with no flags. */
109
            return 0.0;
110
          else
111
            /* x is positive infinity */
112
            return x;
113
        }
114
      if (x > max_exp10_arg)
115
        /* Return +infinity with overflow flag */
116
        return retval_errno_erange_overflow(x);
117
      else if (x < min_exp10_arg)
118
        /* x is negative. Return +zero with underflow and inexact flags */
119
        return retval_errno_erange_underflow(x);
120
    }
121
122
123
  /* Handle small arguments separately */
124
  if (ax < 0x3f9bcb7b131bbb9d)   /* abs(x) < 1/(16*log10) */
125
    {
126
      if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
127
        return 1.0 + x; /* Raises inexact if x is non-zero */
128
      else
129
        y = log10*x;
130
        z = ((((((((((
131
		      1.0/3628800)*y+
132
		     1.0/362880)*y+
133
		    1.0/40320)*y+
134
		   1.0/5040)*y+
135
		  1.0/720)*y+
136
		 1.0/120)*y+
137
		1.0/24)*y+
138
	       1.0/6)*y+
139
	      1.0/2)*y+
140
	     1.0)*y + 1.0;
141
    }
142
  else
143
    {
144
      /* Find m, z1 and z2 such that exp10(x) = 2**m * (z1 + z2) */
145
146
      splitexp(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead,
147
               log10of2_by_32_trail, &m, &z1, &z2);
148
149
      /* Scale (z1 + z2) by 2.0**m */
150
      if (m > EMIN_DP64 && m < EMAX_DP64)
151
	return scaleDouble_1((z1+z2),m);
152
      else
153
	return scaleDouble_2((z1+z2),m);
154
    }
155
  return z;
156
}
157
158
weak_alias (__exp10, exp10)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp10f.c.x86_64-new-libm (+155 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXPF
13
#define USE_SCALEFLOAT_1
14
#define USE_SCALEFLOAT_2
15
#define USE_ZEROF_WITH_FLAGS
16
#define USE_INFINITYF_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_SPLITEXPF
19
#undef USE_SCALEFLOAT_1
20
#undef USE_SCALEFLOAT_2
21
#undef USE_ZEROF_WITH_FLAGS
22
#undef USE_INFINITYF_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline float retval_errno_erange_overflow(float x)
27
{
28
  struct exception exc;
29
  exc.arg1 = (double)x;
30
  exc.arg2 = (double)x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"exp10f";
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = HUGE;
35
  else
36
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
static inline float retval_errno_erange_underflow(float x)
45
{
46
  struct exception exc;
47
  exc.arg1 = (double)x;
48
  exc.arg2 = (double)x;
49
  exc.type = UNDERFLOW;
50
  exc.name = (char *)"exp10f";
51
  exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
52
  if (_LIB_VERSION == _POSIX_)
53
    __set_errno(ERANGE);
54
  else if (!matherr(&exc))
55
    __set_errno(ERANGE);
56
  return exc.retval;
57
}
58
59
float __exp10f(float x)
60
{
61
  static const float
62
         max_exp10_arg = 3.8531841278E+01F, /* 0x421A209B */
63
         min_exp10_arg =-4.4853469848E+01F, /* 0xC23369F4 */
64
                 log10 = 2.3025850929E+00F, /* 0x40135D8E */
65
 thirtytwo_by_log10of2 = 1.0630169677E+02F, /* 0x42D49A78 */
66
   log10of2_by_32_lead = 9.4070434570E-03F, /* 0x3C1A2000 */
67
   log10of2_by_32_tail = 1.4390730030E-07F; /* 0x341A84F0 */
68
69
  float y, z1, z2, z;
70
  int m;
71
  unsigned int ux, ax;
72
73
  /*
74
    Computation of exp10f (x).
75
76
    We compute the values m, z1, and z2 such that
77
    exp10f(x) = 2**m * (z1 + z2),  where exp10f(x) is 10**x.
78
79
    Computations needed in order to obtain m, z1, and z2
80
    involve three steps.
81
82
    First, we reduce the argument x to the form
83
    x = n * log10of2/32 + remainder,
84
    where n has the value of an integer and |remainder| <= log10of2/64.
85
    The value of n = x * 32/log10of2 rounded to the nearest integer and
86
    the remainder = x - n*log10of2/32.
87
88
    Second, we approximate exp10f(r1 + r2) - 1 where r1 is the leading
89
    part of the remainder and r2 is the trailing part of the remainder.
90
91
    Third, we reconstruct exp10f(x) so that
92
    exp10f(x) = 2**m * (z1 + z2).
93
  */
94
95
  GET_BITS_SP32(x, ux);
96
  ax = ux & (~SIGNBIT_SP32);
97
98
  if (ax >= 0x421A209B) /* abs(x) >= 38.5... */
99
    {
100
      if(ax >= 0x7f800000)
101
        {
102
          /* x is either NaN or infinity */
103
          if (ux & MANTBITS_SP32)
104
            /* x is NaN */
105
            return x + x; /* Raise invalid if it is a signalling NaN */
106
          else if (ux & SIGNBIT_SP32)
107
            /* x is negative infinity; return 0.0 with no flags. */
108
            return 0.0F;
109
          else
110
            /* x is positive infinity */
111
            return x;
112
        }
113
      if (x > max_exp10_arg)
114
        /* Return +infinity with overflow flag */
115
        return retval_errno_erange_overflow(x);
116
      else if (x < min_exp10_arg)
117
        /* x is negative. Return +zero with underflow and inexact flags */
118
        return retval_errno_erange_underflow(x);
119
    }
120
121
  /* Handle small arguments separately */
122
  if (ax < 0x3bde5bd9)   /* abs(x) < 1/(64*log10) */
123
    {
124
      if (ax < 0x32800000)   /* abs(x) < 2^(-26) */
125
        return 1.0F + x; /* Raises inexact if x is non-zero */
126
      else
127
        y = log10*x;
128
      z = ((((((((
129
		    1.0F/40320)*x+
130
		   1.0F/5040)*y+
131
		  1.0F/720)*y+
132
		 1.0F/120)*y+
133
		1.0F/24)*y+
134
	       1.0F/6)*y+
135
	      1.0F/2)*y+
136
	     1.0F)*y + 1.0;
137
    }
138
  else
139
    {
140
      /* Find m, z1 and z2 such that exp10f(x) = 2**m * (z1 + z2) */
141
142
      splitexpf(x, log10, thirtytwo_by_log10of2, log10of2_by_32_lead,
143
                log10of2_by_32_tail, &m, &z1, &z2);
144
145
      /* Scale (z1 + z2) by 2.0**m */
146
147
      if (m >= EMIN_SP32 && m <= EMAX_SP32)
148
	z = scaleFloat_1((z1+z2),m);
149
      else
150
	z = scaleFloat_2((z1+z2),m);
151
    }
152
  return z;
153
}
154
155
weak_alias (__exp10f, exp10f)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp2.c.x86_64-new-libm (+172 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXP
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEDOUBLE_2
15
#define USE_ZERO_WITH_FLAGS
16
#define USE_INFINITY_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_ZERO_WITH_FLAGS
19
#undef USE_SPLITEXP
20
#undef USE_SCALEDOUBLE_1
21
#undef USE_SCALEDOUBLE_2
22
#undef USE_INFINITY_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline double retval_errno_erange_overflow(double x)
27
{
28
  struct exception exc;
29
  exc.arg1 = x;
30
  exc.arg2 = x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"exp2";
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = HUGE;
35
  else
36
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
static inline double retval_errno_erange_underflow(double x)
45
{
46
  struct exception exc;
47
  exc.arg1 = x;
48
  exc.arg2 = x;
49
  exc.type = UNDERFLOW;
50
  exc.name = (char *)"exp2";
51
  exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
52
  if (_LIB_VERSION == _POSIX_)
53
    __set_errno(ERANGE);
54
  else if (!matherr(&exc))
55
    __set_errno(ERANGE);
56
  return exc.retval;
57
}
58
59
double __exp2(double x)
60
{
61
  static const double
62
    max_exp2_arg = 1024.0,  /* 0x4090000000000000 */
63
    min_exp2_arg = -1074.0, /* 0xc090c80000000000 */
64
    log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */
65
    log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */
66
    log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */
67
    one_by_32_lead = 0.03125;
68
69
  double y, z1, z2, z, hx, tx, y1, y2;
70
  int m;
71
  unsigned long ux, ax;
72
73
  /*
74
    Computation of exp2(x).
75
76
    We compute the values m, z1, and z2 such that
77
    exp2(x) = 2**m * (z1 + z2),  where exp2(x) is 2**x.
78
79
    Computations needed in order to obtain m, z1, and z2
80
    involve three steps.
81
82
    First, we reduce the argument x to the form
83
    x = n/32 + remainder,
84
    where n has the value of an integer and |remainder| <= 1/64.
85
    The value of n = x * 32 rounded to the nearest integer and
86
    the remainder = x - n/32.
87
88
    Second, we approximate exp2(r1 + r2) - 1 where r1 is the leading
89
    part of the remainder and r2 is the trailing part of the remainder.
90
91
    Third, we reconstruct exp2(x) so that
92
    exp2(x) = 2**m * (z1 + z2).
93
  */
94
95
96
  GET_BITS_DP64(x, ux);
97
  ax = ux & (~SIGNBIT_DP64);
98
99
  if (ax >= 0x4090000000000000) /* abs(x) >= 1024.0 */
100
    {
101
      if(ax >= 0x7ff0000000000000)
102
        {
103
          /* x is either NaN or infinity */
104
          if (ux & MANTBITS_DP64)
105
            /* x is NaN */
106
            return x + x; /* Raise invalid if it is a signalling NaN */
107
          else if (ux & SIGNBIT_DP64)
108
            /* x is negative infinity; return 0.0 with no flags. */
109
            return 0.0;
110
          else
111
            /* x is positive infinity */
112
            return x;
113
        }
114
      if (x > max_exp2_arg)
115
        /* Return +infinity with overflow flag */
116
        return retval_errno_erange_overflow(x);
117
      else if (x < min_exp2_arg)
118
        /* x is negative. Return +zero with underflow and inexact flags */
119
        return retval_errno_erange_underflow(x);
120
    }
121
122
123
  /* Handle small arguments separately */
124
  if (ax < 0x3fb7154764ee6c2f)   /* abs(x) < 1/(16*log2) */
125
    {
126
      if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
127
        return 1.0 + x; /* Raises inexact if x is non-zero */
128
      else
129
        {
130
          /* Split x into hx (head) and tx (tail). */
131
          unsigned long u;
132
          hx = x;
133
          GET_BITS_DP64(hx, u);
134
          u &= 0xfffffffff8000000;
135
          PUT_BITS_DP64(u, hx);
136
          tx = x - hx;
137
          /* Carefully multiply x by log2. y1 is the most significant
138
             part of the result, and y2 the least significant part */
139
          y1 = x * log2_lead;
140
          y2 = (((hx * log2_lead - y1) + hx * log2_tail) +
141
                  tx * log2_lead) + tx * log2_tail;
142
143
          y = y1 + y2;
144
		z = (9.99564649780173690e-1 +
145
		     (1.61251249355268050e-5 +
146
		      (2.37986978239838493e-2 +
147
		        2.68724774856111190e-7*y)*y)*y)/
148
		    (9.99564649780173692e-1 +
149
		     (-4.99766199765151309e-1 +
150
		      (1.070876894098586184e-1 +
151
		       (-1.189773642681502232e-2 +
152
			 5.9480622371960190616e-4*y)*y)*y)*y);
153
          z = ((z * y1) + (z * y2)) + 1.0;
154
        }
155
    }
156
  else
157
    {
158
      /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */
159
160
      splitexp(x, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2);
161
162
      /* Scale (z1 + z2) by 2.0**m */
163
      if (m > EMIN_DP64 && m < EMAX_DP64)
164
	z = scaleDouble_1((z1+z2),m);
165
      else
166
	z = scaleDouble_2((z1+z2),m);
167
    }
168
  return z;
169
}
170
171
weak_alias (__exp2, exp2)
172
weak_alias (__exp2, __libm_ieee754_exp2)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_exp2f.c.x86_64-new-libm (+155 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXPF
13
#define USE_SCALEFLOAT_1
14
#define USE_SCALEFLOAT_2
15
#define USE_INFINITYF_WITH_FLAGS
16
#define USE_ZEROF_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_SPLITEXPF
19
#undef USE_SCALEFLOAT_1
20
#undef USE_SCALEFLOAT_2
21
#undef USE_INFINITYF_WITH_FLAGS
22
#undef USE_ZEROF_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline float retval_errno_erange_overflow(float x)
27
{
28
  struct exception exc;
29
  exc.arg1 = (double)x;
30
  exc.arg2 = (double)x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"exp2f";
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = HUGE;
35
  else
36
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
static inline float retval_errno_erange_underflow(float x)
45
{
46
  struct exception exc;
47
  exc.arg1 = (double)x;
48
  exc.arg2 = (double)x;
49
  exc.type = UNDERFLOW;
50
  exc.name = (char *)"exp2f";
51
  exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
52
  if (_LIB_VERSION == _POSIX_)
53
    __set_errno(ERANGE);
54
  else if (!matherr(&exc))
55
    __set_errno(ERANGE);
56
  return exc.retval;
57
}
58
59
float __exp2f(float x)
60
{
61
  static const float
62
    max_exp2_arg = 128.0F,  /* 0x43000000 */
63
    min_exp2_arg = -149.0F, /* 0xc3150000 */
64
    log2 = 6.931471824645996e-01F, /* 0x3f317218 */
65
    one_by_32_lead = 0.03125F;
66
67
  float y, z1, z2, z;
68
  int m;
69
  unsigned int ux, ax;
70
71
  /*
72
    Computation of exp2f(x).
73
74
    We compute the values m, z1, and z2 such that
75
    exp2f(x) = 2**m * (z1 + z2),  where exp2f(x) is 2**x.
76
77
    Computations needed in order to obtain m, z1, and z2
78
    involve three steps.
79
80
    First, we reduce the argument x to the form
81
    x = n/32 + remainder,
82
    where n has the value of an integer and |remainder| <= 1/64.
83
    The value of n = x * 32 rounded to the nearest integer and
84
    the remainder = x - n/32.
85
86
    Second, we approximate exp2f(r1 + r2) - 1 where r1 is the leading
87
    part of the remainder and r2 is the trailing part of the remainder.
88
89
    Third, we reconstruct exp2f(x) so that
90
    exp2f(x) = 2**m * (z1 + z2).
91
  */
92
93
  GET_BITS_SP32(x, ux);
94
  ax = ux & (~SIGNBIT_SP32);
95
96
  if (ax >= 0x43000000) /* abs(x) >= 128.0 */
97
    {
98
      if(ax >= 0x7f800000)
99
        {
100
          /* x is either NaN or infinity */
101
          if (ux & MANTBITS_SP32)
102
            /* x is NaN */
103
            return x + x; /* Raise invalid if it is a signalling NaN */
104
          else if (ux & SIGNBIT_SP32)
105
            /* x is negative infinity; return 0.0 with no flags. */
106
            return 0.0F;
107
          else
108
            /* x is positive infinity */
109
            return x;
110
        }
111
      if (x > max_exp2_arg)
112
        /* Return +infinity with overflow flag */
113
        return retval_errno_erange_overflow(x);
114
      else if (x < min_exp2_arg)
115
        /* x is negative. Return +zero with underflow and inexact flags */
116
        return retval_errno_erange_underflow(x);
117
    }
118
119
  /* Handle small arguments separately */
120
  if (ax < 0x3cb8aa3b)   /* abs(x) < 1/(64*log2) */
121
    {
122
      if (ax < 0x32800000)   /* abs(x) < 2^(-26) */
123
        return 1.0F + x; /* Raises inexact if x is non-zero */
124
      else
125
        {
126
	  y = log2*x;
127
      z = ((((((((
128
		    1.0F/40320)*y+
129
		   1.0F/5040)*y+
130
		  1.0F/720)*y+
131
		 1.0F/120)*y+
132
		1.0F/24)*y+
133
	       1.0F/6)*y+
134
	      1.0F/2)*y+
135
	     1.0F)*y + 1.0;
136
        }
137
    }
138
  else
139
    {
140
      /* Find m, z1 and z2 such that exp2f(x) = 2**m * (z1 + z2) */
141
142
      splitexpf(x, log2, 32.0F, one_by_32_lead, 0.0F, &m, &z1, &z2);
143
144
      /* Scale (z1 + z2) by 2.0**m */
145
146
      if (m >= EMIN_SP32 && m <= EMAX_SP32)
147
	z = scaleFloat_1((z1+z2),m);
148
      else
149
	z = scaleFloat_2((z1+z2),m);
150
    }
151
  return z;
152
}
153
154
weak_alias (__exp2f, exp2f)
155
weak_alias (__exp2f, __libm_ieee754_exp2f)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_expf.c.x86_64-new-libm (+154 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXPF
13
#define USE_SCALEFLOAT_1
14
#define USE_SCALEFLOAT_2
15
#define USE_ZEROF_WITH_FLAGS
16
#define USE_INFINITYF_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_SPLITEXPF
19
#undef USE_SCALEFLOAT_1
20
#undef USE_SCALEFLOAT_2
21
#undef USE_ZEROF_WITH_FLAGS
22
#undef USE_INFINITYF_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline float retval_errno_erange_overflow(float x)
27
{
28
  struct exception exc;
29
  exc.arg1 = (double)x;
30
  exc.arg2 = (double)x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"expf";
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = HUGE;
35
  else
36
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
static inline float retval_errno_erange_underflow(float x)
45
{
46
  struct exception exc;
47
  exc.arg1 = (double)x;
48
  exc.arg2 = (double)x;
49
  exc.type = UNDERFLOW;
50
  exc.name = (char *)"expf";
51
  exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
52
  if (_LIB_VERSION == _POSIX_)
53
    __set_errno(ERANGE);
54
  else if (!matherr(&exc))
55
    __set_errno(ERANGE);
56
  return exc.retval;
57
}
58
59
float __expf(float x)
60
{
61
  static const float
62
       max_exp_arg =  8.8722839355E+01, /* 0x42B17218 */
63
       min_exp_arg = -1.0327893066E+02, /* 0xC2CE8ED0 */
64
 thirtytwo_by_log2 =  4.6166240692E+01, /* 0x4238AA3B */
65
   log2_by_32_lead =  2.1659851074E-02, /* 0x3CB17000 */
66
   log2_by_32_tail =  9.9831822808E-07; /* 0x3585FDF4 */
67
68
  float z1, z2, z;
69
  int m;
70
  unsigned int ux, ax;
71
72
  /*
73
    Computation of exp(x).
74
75
    We compute the values m, z1, and z2 such that
76
    exp(x) = 2**m * (z1 + z2),  where
77
    exp(x) is the natural exponential of x.
78
79
    Computations needed in order to obtain m, z1, and z2
80
    involve three steps.
81
82
    First, we reduce the argument x to the form
83
    x = n * log2/32 + remainder,
84
    where n has the value of an integer and |remainder| <= log2/64.
85
    The value of n = x * 32/log2 rounded to the nearest integer and
86
    the remainder = x - n*log2/32.
87
88
    Second, we approximate exp(r1 + r2) - 1 where r1 is the leading
89
    part of the remainder and r2 is the trailing part of the remainder.
90
91
    Third, we reconstruct the exponential of x so that
92
    exp(x) = 2**m * (z1 + z2).
93
  */
94
95
  GET_BITS_SP32(x, ux);
96
  ax = ux & (~SIGNBIT_SP32);
97
98
  if (ax >= 0x42B17218) /* abs(x) >= 88.7... */
99
    {
100
      if(ax >= 0x7f800000)
101
        {
102
          /* x is either NaN or infinity */
103
          if (ux & MANTBITS_SP32)
104
            /* x is NaN */
105
            return x + x; /* Raise invalid if it is a signalling NaN */
106
          else if (ux & SIGNBIT_SP32)
107
            /* x is negative infinity; return 0.0 with no flags */
108
            return 0.0;
109
          else
110
            /* x is positive infinity */
111
            return x;
112
        }
113
      if (x > max_exp_arg)
114
        /* Return +infinity with overflow flag */
115
        return retval_errno_erange_overflow(x);
116
      else if (x < min_exp_arg)
117
        /* x is negative. Return +zero with underflow and inexact flags */
118
        return retval_errno_erange_underflow(x);
119
    }
120
121
  /* Handle small arguments separately */
122
  if (ax < 0x3c800000)   /* abs(x) < 1/64 */
123
    {
124
      if (ax < 0x32800000)   /* abs(x) < 2^(-26) */
125
        return 1.0 + x; /* Raises inexact if x is non-zero */
126
      else
127
        z = (((((((
128
		   1.0/5040)*x+
129
		  1.0/720)*x+
130
		 1.0/120)*x+
131
		1.0/24)*x+
132
	       1.0/6)*x+
133
	      1.0/2)*x+
134
	     1.0)*x + 1.0;
135
    }
136
  else
137
    {
138
      /* Find m and z such that exp(x) = 2**m * (z1 + z2) */
139
140
      splitexpf(x, 1.0, thirtytwo_by_log2, log2_by_32_lead,
141
                log2_by_32_tail, &m, &z1, &z2);
142
143
      /* Scale (z1 + z2) by 2.0**m */
144
145
      if (m >= EMIN_SP32 && m <= EMAX_SP32)
146
	z = scaleFloat_1((z1+z2),m);
147
      else
148
	z = scaleFloat_2((z1+z2),m);
149
    }
150
  return z;
151
}
152
153
weak_alias (__expf, expf)
154
weak_alias (__expf, __ieee754_expf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_fmod.c.x86_64-new-libm (+2 lines)
Line 0 Link Here
1
#define COMPILING_FMOD
2
#include <w_remainder.c>
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_fmodf.c.x86_64-new-libm (+2 lines)
Line 0 Link Here
1
#define COMPILING_FMOD
2
#include <w_remainderf.c>
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_hypot.c.x86_64-new-libm (+190 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SCALEDOUBLE_1
13
#define USE_INFINITY_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_SCALEDOUBLE_1
16
#undef USE_INFINITY_WITH_FLAGS
17
18
/* Deal with errno for out-of-range result */
19
#include "libm_errno_amd.h"
20
static inline double retval_errno_erange_overflow(double x, double y)
21
{
22
  struct exception exc;
23
  exc.arg1 = x;
24
  exc.arg2 = y;
25
  exc.type = OVERFLOW;
26
  exc.name = (char *)"hypot";
27
  if (_LIB_VERSION == _SVID_)
28
    exc.retval = HUGE;
29
  else
30
    exc.retval = infinity_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
31
  if (_LIB_VERSION == _POSIX_)
32
    __set_errno(ERANGE);
33
  else if (!matherr(&exc))
34
    __set_errno(ERANGE);
35
  return exc.retval;
36
}
37
38
double __hypot(double x, double y)
39
{
40
  /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
41
     the result warrants it */
42
43
  const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */
44
45
  double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts;
46
  unsigned long xexp, yexp, ux, uy, ut;
47
  int dexp, expadjust;
48
49
  GET_BITS_DP64(x, ux);
50
  ux &= ~SIGNBIT_DP64;
51
  GET_BITS_DP64(y, uy);
52
  uy &= ~SIGNBIT_DP64;
53
  xexp = (ux >> EXPSHIFTBITS_DP64);
54
  yexp = (uy >> EXPSHIFTBITS_DP64);
55
56
  if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1)
57
    {
58
      /* One or both of the arguments are NaN or infinity. The
59
         result will also be NaN or infinity. */
60
      retval = x*x + y*y;
61
      if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) ||
62
          ((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64)))
63
        /* x or y is infinity. ISO C99 defines that we must
64
           return +infinity, even if the other argument is NaN.
65
           Note that the computation of x*x + y*y above will already 
66
           have raised invalid if either x or y is a signalling NaN. */
67
        return infinity_with_flags(0);
68
      else
69
        /* One or both of x or y is NaN, and neither is infinity.
70
           Raise invalid if it's a signalling NaN */
71
        return retval;
72
    }
73
74
  /* Set x = abs(x) and y = abs(y) */
75
  PUT_BITS_DP64(ux, x);
76
  PUT_BITS_DP64(uy, y);
77
78
  /* The difference in exponents between x and y */
79
  dexp = xexp - yexp;
80
  expadjust = 0;
81
82
  if (ux == 0)
83
    /* x is zero */
84
    return y;
85
  else if (uy == 0)
86
    /* y is zero */
87
    return x;
88
  else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1)
89
    /* One of x and y is insignificant compared to the other */
90
    return x + y; /* Raise inexact */
91
  else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500)
92
    {
93
      /* Danger of overflow; scale down by 2**600. */
94
      expadjust = 600;
95
      ux -= 0x2580000000000000;
96
      PUT_BITS_DP64(ux, x);
97
      uy -= 0x2580000000000000;
98
      PUT_BITS_DP64(uy, y);
99
    }
100
  else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500)
101
    {
102
      /* Danger of underflow; scale up by 2**600. */
103
      expadjust = -600;
104
      if (xexp == 0)
105
        {
106
          /* x is denormal - handle by adding 601 to the exponent
107
           and then subtracting a correction for the implicit bit */
108
          PUT_BITS_DP64(ux + 0x2590000000000000, x);
109
          x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
110
          GET_BITS_DP64(x, ux);
111
        }
112
      else
113
        {
114
          /* x is normal - just increase the exponent by 600 */
115
          ux += 0x2580000000000000;
116
          PUT_BITS_DP64(ux, x);
117
        }
118
      if (yexp == 0)
119
        {
120
          PUT_BITS_DP64(uy + 0x2590000000000000, y);
121
          y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
122
          GET_BITS_DP64(y, uy);
123
        }
124
      else
125
        {
126
          uy += 0x2580000000000000;
127
          PUT_BITS_DP64(uy, y);
128
        }
129
    }
130
131
132
#ifdef FAST_BUT_GREATER_THAN_ONE_ULP
133
  /* Not awful, but results in accuracy loss larger than 1 ulp */
134
  r = x*x + y*y
135
#else
136
  /* Slower but more accurate */
137
138
  /* Sort so that x is greater than y */
139
  if (x < y)
140
    {
141
      u = y;
142
      y = x;
143
      x = u;
144
      ut = ux;
145
      ux = uy;
146
      uy = ut;
147
    }
148
149
  /* Split x into hx and tx, head and tail */
150
  PUT_BITS_DP64(ux & 0xfffffffff8000000, hx);
151
  tx = x - hx;
152
153
  PUT_BITS_DP64(uy & 0xfffffffff8000000, hy);
154
  ty = y - hy;
155
156
  /* Compute r = x*x + y*y with extra precision */
157
  x2 = x*x;
158
  y2 = y*y;
159
  hs = x2 + y2;
160
161
  if (dexp == 0)
162
    /* We take most care when x and y have equal exponents,
163
       i.e. are almost the same size */
164
    ts = (((x2 - hs) + y2) +
165
          ((hx * hx - x2) + 2 * hx * tx) + tx * tx) +
166
      ((hy * hy - y2) + 2 * hy * ty) + ty * ty;
167
  else
168
    ts = (((x2 - hs) + y2) +
169
          ((hx * hx - x2) + 2 * hx * tx) + tx * tx);
170
171
  r = hs + ts;
172
#endif
173
174
  /* The sqrt can introduce another half ulp error. */
175
  /* Hammer sqrt instruction */
176
  asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (r));
177
178
  /* If necessary scale the result back. This may lead to
179
     overflow but if so that's the correct result. */
180
  retval = scaleDouble_1(retval, expadjust);
181
182
  if (retval > large)
183
    /* The result overflowed. Deal with errno. */
184
    return retval_errno_erange_overflow(x, y);
185
186
  return retval;
187
}
188
189
weak_alias (__hypot, hypot)
190
weak_alias (__hypot, __ieee754_hypot)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_hypotf.c.x86_64-new-libm (+98 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#if USE_SOFTWARE_SQRT
13
#define USE_SQRTF_AMD_INLINE
14
#endif
15
#define USE_INFINITYF_WITH_FLAGS
16
#include "libm_inlines_amd.h"
17
#if USE_SOFTWARE_SQRT
18
#undef USE_SQRTF_AMD_INLINE
19
#endif
20
#undef USE_INFINITYF_WITH_FLAGS
21
22
/* Deal with errno for out-of-range result */
23
#include "libm_errno_amd.h"
24
static inline float retval_errno_erange_overflow(float x, float y)
25
{
26
  struct exception exc;
27
  exc.arg1 = (double)x;
28
  exc.arg2 = (double)y;
29
  exc.type = OVERFLOW;
30
  exc.name = (char *)"hypotf";
31
  if (_LIB_VERSION == _SVID_)
32
    exc.retval = HUGE;
33
  else
34
    exc.retval = infinityf_with_flags(AMD_F_OVERFLOW | AMD_F_INEXACT);
35
  if (_LIB_VERSION == _POSIX_)
36
    __set_errno(ERANGE);
37
  else if (!matherr(&exc))
38
    __set_errno(ERANGE);
39
  return exc.retval;
40
}
41
42
float __hypotf(float x, float y)
43
{
44
  /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
45
     the result warrants it */
46
47
    /* Do intermediate computations in double precision
48
       and use sqrt instruction from chip if available. */
49
    double dx = x, dy = y, dr, retval;
50
51
    /* The largest finite float, stored as a double */
52
    const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */
53
54
55
  unsigned long ux, uy, avx, avy;
56
57
  GET_BITS_DP64(x, avx);
58
  avx &= ~SIGNBIT_DP64;
59
  GET_BITS_DP64(y, avy);
60
  avy &= ~SIGNBIT_DP64;
61
  ux = (avx >> EXPSHIFTBITS_DP64);
62
  uy = (avy >> EXPSHIFTBITS_DP64);
63
64
  if (ux == BIASEDEMAX_DP64 + 1 || uy == BIASEDEMAX_DP64 + 1)
65
    {
66
      retval = x*x + y*y;
67
      /* One or both of the arguments are NaN or infinity. The
68
         result will also be NaN or infinity. */
69
      if (((ux == BIASEDEMAX_DP64 + 1) && !(avx & MANTBITS_DP64)) ||
70
          ((uy == BIASEDEMAX_DP64 + 1) && !(avy & MANTBITS_DP64)))
71
        /* x or y is infinity. ISO C99 defines that we must
72
           return +infinity, even if the other argument is NaN.
73
           Note that the computation of x*x + y*y above will already 
74
           have raised invalid if either x or y is a signalling NaN. */
75
        return infinityf_with_flags(0);
76
      else
77
        /* One or both of x or y is NaN, and neither is infinity.
78
           Raise invalid if it's a signalling NaN */
79
        return retval;
80
    }
81
82
    dr = (dx*dx + dy*dy);
83
84
#if USE_SOFTWARE_SQRT
85
    retval = sqrtf_amd_inline(r);
86
#else
87
    /* Hammer sqrt instruction */
88
    asm volatile ("sqrtsd %1, %0" : "=x" (retval) : "x" (dr));
89
#endif
90
91
    if (retval > large)
92
      return retval_errno_erange_overflow(x, y);
93
    else
94
      return retval;
95
  }
96
97
weak_alias (__hypotf, hypotf)
98
weak_alias (__hypotf, __ieee754_hypotf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log.c.x86_64-new-libm (+489 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_NAN_WITH_FLAGS
13
#define USE_INFINITY_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_NAN_WITH_FLAGS
16
#undef USE_INFINITY_WITH_FLAGS
17
18
/* Deal with errno for out-of-range result */
19
#include "libm_errno_amd.h"
20
static inline double retval_errno_erange_overflow(double x)
21
{
22
  struct exception exc;
23
  exc.arg1 = x;
24
  exc.arg2 = x;
25
  exc.type = SING;
26
#if defined(COMPILING_LOG10)
27
  exc.name = (char *)"log10";
28
#elif defined(COMPILING_LOG2)
29
  exc.name = (char *)"log2";
30
#else
31
  exc.name = (char *)"log";
32
#endif
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = -HUGE;
35
  else
36
    exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
/* Deal with errno for out-of-range argument */
45
static inline double retval_errno_edom(double x)
46
{
47
  struct exception exc;
48
  exc.arg1 = x;
49
  exc.arg2 = x;
50
  exc.type = DOMAIN;
51
#if defined(COMPILING_LOG10)
52
  exc.name = (char *)"log10";
53
#elif defined(COMPILING_LOG2)
54
  exc.name = (char *)"log2";
55
#else
56
  exc.name = (char *)"log";
57
#endif
58
  if (_LIB_VERSION == _SVID_)
59
    exc.retval = -HUGE;
60
  else
61
    exc.retval = nan_with_flags(AMD_F_INVALID);
62
  if (_LIB_VERSION == _POSIX_)
63
    __set_errno(EDOM);
64
  else if (!matherr(&exc))
65
    {
66
      if(_LIB_VERSION == _SVID_)
67
#if defined(COMPILING_LOG10)
68
        (void)fputs("log10: DOMAIN error\n", stderr);
69
#elif defined(COMPILING_LOG2)
70
        (void)fputs("log2: DOMAIN error\n", stderr);
71
#else
72
        (void)fputs("log: DOMAIN error\n", stderr);
73
#endif
74
    __set_errno(EDOM);
75
    }
76
  return exc.retval;
77
}
78
79
#if defined(COMPILING_LOG10)
80
double __log10(double x)
81
#elif defined(COMPILING_LOG2)
82
double __log2(double x)
83
#else
84
double __log(double x)
85
#endif
86
{
87
88
  int expadjust, xexp;
89
  double r, r1, r2, correction, f, f1, f2, q, u, v, z1, z2, poly;
90
  int index;
91
  unsigned long ux;
92
#if defined(COMPILING_LOG10) || defined (COMPILING_LOG2)
93
  unsigned long ut;
94
#endif
95
96
  /*
97
    Computes natural log(x). Algorithm based on:
98
    Ping-Tak Peter Tang
99
    "Table-driven implementation of the logarithm function in IEEE
100
    floating-point arithmetic"
101
    ACM Transactions on Mathematical Software (TOMS)
102
    Volume 16, Issue 4 (December 1990)
103
  */
104
105
/* Arrays ln_lead_table and ln_tail_table contain
106
   leading and trailing parts respectively of precomputed
107
   values of natural log(1+i/64), for i = 0, 1, ..., 64.
108
   ln_lead_table contains the first 24 bits of precision,
109
   and ln_tail_table contains a further 53 bits precision. */
110
111
  static const double ln_lead_table[65] = {
112
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
113
    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */
114
    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */
115
    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */
116
    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */
117
    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */
118
    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */
119
    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */
120
    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */
121
    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */
122
    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */
123
    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */
124
    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */
125
    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */
126
    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */
127
    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */
128
    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */
129
    2.35566020011901855469e-01,   /* 0x3fce270700000000 */
130
    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */
131
    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */
132
    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */
133
    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */
134
    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */
135
    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */
136
    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */
137
    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */
138
    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */
139
    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */
140
    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */
141
    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */
142
    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */
143
    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */
144
    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */
145
    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */
146
    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */
147
    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */
148
    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */
149
    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */
150
    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */
151
    4.75845873355865478516e-01,   /* 0x3fde744240000000 */
152
    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */
153
    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */
154
    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */
155
    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */
156
    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */
157
    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */
158
    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */
159
    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */
160
    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */
161
    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */
162
    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */
163
    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */
164
    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */
165
    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */
166
    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */
167
    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */
168
    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */
169
    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */
170
    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */
171
    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */
172
    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */
173
    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */
174
    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */
175
    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */
176
    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */
177
178
  static const double ln_tail_table[65] = {
179
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
180
    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */
181
    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */
182
    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */
183
    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */
184
    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */
185
    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */
186
    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */
187
    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */
188
    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */
189
    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */
190
    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */
191
    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */
192
    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */
193
    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */
194
    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */
195
    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */
196
    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */
197
    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */
198
    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */
199
    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */
200
    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */
201
    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */
202
    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */
203
    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */
204
    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */
205
    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */
206
    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */
207
    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */
208
    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */
209
    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */
210
    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */
211
    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */
212
    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */
213
    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */
214
    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */
215
    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */
216
    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */
217
    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */
218
    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */
219
    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */
220
    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */
221
    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */
222
    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */
223
    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */
224
    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */
225
    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */
226
    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */
227
    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */
228
    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */
229
    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */
230
    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */
231
    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */
232
    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */
233
    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */
234
    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */
235
    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */
236
    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */
237
    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */
238
    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */
239
    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */
240
    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */
241
    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */
242
    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */
243
    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */
244
245
#ifndef COMPILING_LOG2
246
  /* log2_lead and log2_tail sum to an extra-precise version
247
     of log(2) */
248
  static const double
249
    log2_lead = 6.93147122859954833984e-01,  /* 0x3fe62e42e0000000 */
250
    log2_tail = 5.76999904754328540596e-08;  /* 0x3e6efa39ef35793c */
251
#endif
252
253
  static const double
254
  /* Approximating polynomial coefficients for x near 1.0 */
255
    ca_1 = 8.33333333333317923934e-02,  /* 0x3fb55555555554e6 */
256
    ca_2 = 1.25000000037717509602e-02,  /* 0x3f89999999bac6d4 */
257
    ca_3 = 2.23213998791944806202e-03,  /* 0x3f62492307f1519f */
258
    ca_4 = 4.34887777707614552256e-04,  /* 0x3f3c8034c85dfff0 */
259
260
  /* Approximating polynomial coefficients for other x */
261
    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */
262
    cb_2 = 1.24999999978138668903e-02,  /* 0x3f89999999865ede */
263
    cb_3 = 2.23219810758559851206e-03;  /* 0x3f6249423bd94741 */
264
265
#if defined(COMPILING_LOG10)
266
  /* log10e_lead and log10e_tail sum to an extra-precision
267
     version of log10(e) (19 bits in lead) */
268
  static const double
269
    log10e_lead = 4.34293746948242187500e-01, /* 0x3fdbcb7800000000 */
270
    log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */
271
#elif defined(COMPILING_LOG2)
272
  /* log2e_lead and log2e_tail sum to an extra-precision
273
     version of log2(e) (19 bits in lead) */
274
   static const double
275
     log2e_lead = 1.44269180297851562500E+00, /* 0x3FF7154400000000 */
276
     log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */
277
#endif
278
279
  static const unsigned long
280
    log_thresh1 = 0x3fee0faa00000000,
281
    log_thresh2 = 0x3ff1082c00000000;
282
283
284
  GET_BITS_DP64(x, ux);
285
286
  if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
287
    {
288
      /* x is either NaN or infinity */
289
      if (ux & MANTBITS_DP64)
290
        /* x is NaN */
291
        return x + x; /* Raise invalid if it is a signalling NaN */
292
      else
293
        {
294
          /* x is infinity */
295
          if (ux & SIGNBIT_DP64)
296
            /* x is negative infinity. Return a NaN. */
297
            return retval_errno_edom(x);
298
          else
299
            return x;
300
        }
301
    }
302
  else if (!(ux & ~SIGNBIT_DP64))
303
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
304
    return retval_errno_erange_overflow(x);
305
  else if (ux & SIGNBIT_DP64)
306
    /* x is negative. Return a NaN. */
307
    return retval_errno_edom(x);
308
309
310
  /* log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
311
     log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 */
312
  if (ux >= log_thresh1 && ux <= log_thresh2)
313
    {
314
      /* Arguments close to 1.0 are handled separately to maintain
315
         accuracy.
316
317
         The approximation in this region exploits the identity
318
             log( 1 + r ) = log( 1 + u/2 )  -  log( 1 - u/2 ), where
319
             u  = 2r / (2+r).
320
         Note that the right hand side has an odd Taylor series expansion
321
         which converges much faster than the Taylor series expansion of
322
         log( 1 + r ) in r. Thus, we approximate log( 1 + r ) by
323
             u + A1 * u^3 + A2 * u^5 + ... + An * u^(2n+1).
324
325
         One subtlety is that since u cannot be calculated from
326
         r exactly, the rounding error in the first u should be
327
         avoided if possible. To accomplish this, we observe that
328
                       u  =  r  -  r*r/(2+r).
329
         Since x (=1+r) is the input argument, and thus presumed exact,
330
         the formula above approximates u accurately because
331
                       u  =  r  -  correction,
332
         and the magnitude of "correction" (of the order of r*r)
333
         is small.
334
         With these observations, we will approximate log( 1 + r ) by
335
            r + (  (A1*u^3 + ... + An*u^(2n+1)) - correction ).
336
337
         We approximate log(1+r) by an odd polynomial in u, where
338
                  u = 2r/(2+r) = r - r*r/(2+r).
339
      */
340
      r = x - 1.0;
341
      u          = r / (2.0 + r);
342
      correction = r * u;
343
      u          = u + u;
344
      v          = u * u;
345
      r1 = r;
346
      r2 = (u * v * (ca_1 + v * (ca_2 + v * (ca_3 + v * ca_4))) - correction);
347
#if defined(COMPILING_LOG10)
348
      /* At this point r1,r2 is an extra-precise approximation to
349
         natural log(x). Convert it to log10(x) by multiplying
350
         carefully by log10(e).
351
         Shift some bits from r1 to r2 so that log10e_lead*r1
352
         can be computed without rounding error */
353
      r = r1;
354
      GET_BITS_DP64(r1, ut);
355
      PUT_BITS_DP64(ut & 0xffffffff00000000, r1);
356
      r2 = r2 + (r - r1);
357
      return (((log10e_tail*r2) + log10e_tail*r1) + log10e_lead*r2) +
358
        log10e_lead*r1;
359
#elif defined(COMPILING_LOG2)
360
      /* Similarly handle log2(x) by multiplying carefully by log2(e). */
361
      r = r1;
362
      GET_BITS_DP64(r1, ut);
363
      PUT_BITS_DP64(ut & 0xffffffff00000000, r1);
364
      r2 = r2 + (r - r1);
365
      return (((log2e_tail*r2) + log2e_tail*r1) + log2e_lead*r2) +
366
        log2e_lead*r1;
367
#else
368
      return r1 + r2;
369
#endif
370
    }
371
  else
372
    {
373
      /*
374
        First, we decompose the argument x to the form
375
        x  =  2**M  *  (F1  +  F2),
376
        where  1 <= F1+F2 < 2, M has the value of an integer,
377
        F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.
378
379
        Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
380
        in U, where U  =  2 F2 / (2 F1 + F2).
381
        Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
382
        The core approximation calculates
383
        Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.
384
        Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
385
        thus, Poly =  2 arctanh( U/2 ) / U  -  1.
386
387
        It is not hard to see that
388
          log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
389
        Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).
390
        The values of log(F1) are calculated beforehand and stored
391
        in the program.
392
      */
393
394
      if (ux < IMPBIT_DP64)
395
        {
396
          /* The input argument x is denormalized */
397
          /* Normalize f by increasing the exponent by 60
398
             and subtracting a correction to account for the implicit
399
             bit. This replaces a slow denormalized
400
             multiplication by a fast normal subtraction. */
401
          static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
402
          PUT_BITS_DP64(ux | 0x03d0000000000000, f);
403
          f -= corr;
404
          GET_BITS_DP64(f, ux);
405
          expadjust = 60;
406
        }
407
      else
408
        {
409
          f = x;
410
          expadjust = 0;
411
        }
412
413
      /* Store the exponent of x in xexp and put
414
         f into the range [0.5,1) */
415
      xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
416
      PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);
417
418
      /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */
419
420
      /* Set index to be the nearest integer to 128*f */
421
      /*
422
        r = 128.0 * f;
423
        index = (int)(r + 0.5);
424
      */
425
      /* This code instead of the above can save several cycles.
426
         It only works because 64 <= r < 128, so
427
         the nearest integer is always contained in exactly
428
         7 bits, and the right shift is always the same. */
429
      index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46)
430
        + ((ux & 0x0000200000000000) >> 45);
431
432
      z1 = ln_lead_table[index-64];
433
      q = ln_tail_table[index-64];
434
      f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
435
      f2 = f - f1;
436
      /* At this point, x = 2**xexp * ( f1  +  f2 ) where
437
         f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */
438
439
      /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */
440
      u = f2 / (f1 + 0.5 * f2);
441
442
      /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).
443
         The core approximation calculates
444
         poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */
445
      v = u * u;
446
      poly = (v * (cb_1 + v * (cb_2 + v * cb_3)));
447
      z2 = q + (u + u * poly);
448
449
      /* Now z1,z2 is an extra-precise approximation of log(2f). */
450
451
#if defined (COMPILING_LOG10)
452
      /* Add xexp * log(2) to z1,z2 to get log(x). */
453
      r1 = (xexp * log2_lead + z1);
454
      r2 = (xexp * log2_tail + z2);
455
      /* At this point r1,r2 is an extra-precise approximation to
456
         natural log(x). Convert it to log10(x) by multiplying
457
         carefully by log10(e). */
458
      return (((log10e_tail*r2) + log10e_tail*r1) + log10e_lead*r2) +
459
        log10e_lead*r1;
460
#elif defined(COMPILING_LOG2)
461
      /* Convert to log2(x) by multiplying carefully by log2(e)
462
         and adding xexp. */
463
      r1 = xexp + log2e_lead*z1;
464
      r2 = (((log2e_tail*z2) + log2e_tail*z1) + log2e_lead*z2);
465
      return r1 + r2;
466
#else
467
      /* Add xexp * log(2) to z1,z2 to get the result log(x).
468
         The computed r1 is not subject to rounding error because
469
         xexp has at most 10 significant bits, log(2) has 24 significant
470
         bits, and z1 has up to 24 bits; and the exponents of z1
471
         and z2 differ by at most 6. */
472
      r1 = (xexp * log2_lead + z1);
473
      r2 = (xexp * log2_tail + z2);
474
      /* Natural log(x) */
475
      return r1 + r2;
476
#endif
477
    }
478
}
479
480
#if defined(COMPILING_LOG10)
481
weak_alias (__log10, log10)
482
weak_alias (__log10, __ieee754_log10)
483
#elif defined(COMPILING_LOG2)
484
weak_alias (__log2, log2)
485
weak_alias (__log2, __ieee754_log2)
486
#else
487
weak_alias (__log, log)
488
weak_alias (__log, __ieee754_log)
489
#endif
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log10.c.x86_64-new-libm (+2 lines)
Line 0 Link Here
1
#define COMPILING_LOG10 1
2
#include <w_log.c>
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log10f.c.x86_64-new-libm (+2 lines)
Line 0 Link Here
1
#define COMPILING_LOG10 1
2
#include <w_logf.c>
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log2.c.x86_64-new-libm (+2 lines)
Line 0 Link Here
1
#define COMPILING_LOG2 1
2
#include <w_log.c>
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_log2f.c.x86_64-new-libm (+2 lines)
Line 0 Link Here
1
#define COMPILING_LOG2 1
2
#include <w_logf.c>
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_logf.c.x86_64-new-libm (+375 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_NANF_WITH_FLAGS
13
#define USE_INFINITYF_WITH_FLAGS
14
#include "libm_inlines_amd.h"
15
#undef USE_NANF_WITH_FLAGS
16
#undef USE_INFINITYF_WITH_FLAGS
17
18
/* Deal with errno for out-of-range result */
19
#include "libm_errno_amd.h"
20
static inline float retval_errno_erange_overflow(float x)
21
{
22
  struct exception exc;
23
  exc.arg1 = (double)x;
24
  exc.arg2 = (double)x;
25
  exc.type = SING;
26
#if defined(COMPILING_LOG10)
27
  exc.name = (char *)"log10f";
28
#elif defined(COMPILING_LOG2)
29
  exc.name = (char *)"log2f";
30
#else
31
  exc.name = (char *)"logf";
32
#endif
33
  if (_LIB_VERSION == _SVID_)
34
    exc.retval = -HUGE;
35
  else
36
    exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO);
37
  if (_LIB_VERSION == _POSIX_)
38
    __set_errno(ERANGE);
39
  else if (!matherr(&exc))
40
    __set_errno(ERANGE);
41
  return exc.retval;
42
}
43
44
/* Deal with errno for out-of-range argument */
45
static inline float retval_errno_edom(float x)
46
{
47
  struct exception exc;
48
  exc.arg1 = (double)x;
49
  exc.arg2 = (double)x;
50
  exc.type = DOMAIN;
51
#if defined(COMPILING_LOG10)
52
  exc.name = (char *)"log10f";
53
#elif defined(COMPILING_LOG2)
54
  exc.name = (char *)"log2f";
55
#else
56
  exc.name = (char *)"logf";
57
#endif
58
  if (_LIB_VERSION == _SVID_)
59
    exc.retval = -HUGE;
60
  else
61
    exc.retval = nanf_with_flags(AMD_F_INVALID);
62
  if (_LIB_VERSION == _POSIX_)
63
    __set_errno(EDOM);
64
  else if (!matherr(&exc))
65
    {
66
      if(_LIB_VERSION == _SVID_)
67
#if defined(COMPILING_LOG10)
68
        (void)fputs("log10f: DOMAIN error\n", stderr);
69
#elif defined(COMPILING_LOG2)
70
        (void)fputs("log2f: DOMAIN error\n", stderr);
71
#else
72
        (void)fputs("logf: DOMAIN error\n", stderr);
73
#endif
74
    __set_errno(EDOM);
75
    }
76
  return exc.retval;
77
}
78
79
#if defined(COMPILING_LOG10)
80
float __log10f(float fx)
81
#elif defined(COMPILING_LOG2)
82
float __log2f(float fx)
83
#else
84
float __logf(float fx)
85
#endif
86
{
87
88
  double x = fx;
89
90
  int xexp;
91
  double r, f, f1, f2, q, u, v, z1, z2, poly;
92
  int index;
93
  unsigned long ux;
94
95
  /*
96
    Computes natural log(x) for float arguments. Algorithm is 
97
    basically a promotion of the arguments to double followed 
98
    by an inlined version of the double algorithm, simplified 
99
    for efficiency (see log_amd.c). Simplifications include:
100
    * Special algorithm for arguments near 1.0 not required
101
    * Scaling of denormalised arguments not required
102
    * Shorter core series approximations used
103
  */
104
105
/* Arrays ln_lead_table and ln_tail_table contain
106
   leading and trailing parts respectively of precomputed
107
   values of natural log(1+i/64), for i = 0, 1, ..., 64.
108
   ln_lead_table contains the first 24 bits of precision,
109
   and ln_tail_table contains a further 53 bits precision. */
110
111
  static const double ln_lead_table[65] = {
112
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
113
    1.55041813850402832031e-02,   /* 0x3f8fc0a800000000 */
114
    3.07716131210327148438e-02,   /* 0x3f9f829800000000 */
115
    4.58095073699951171875e-02,   /* 0x3fa7745800000000 */
116
    6.06245994567871093750e-02,   /* 0x3faf0a3000000000 */
117
    7.52233862876892089844e-02,   /* 0x3fb341d700000000 */
118
    8.96121263504028320312e-02,   /* 0x3fb6f0d200000000 */
119
    1.03796780109405517578e-01,   /* 0x3fba926d00000000 */
120
    1.17783010005950927734e-01,   /* 0x3fbe270700000000 */
121
    1.31576299667358398438e-01,   /* 0x3fc0d77e00000000 */
122
    1.45181953907012939453e-01,   /* 0x3fc2955280000000 */
123
    1.58604979515075683594e-01,   /* 0x3fc44d2b00000000 */
124
    1.71850204467773437500e-01,   /* 0x3fc5ff3000000000 */
125
    1.84922337532043457031e-01,   /* 0x3fc7ab8900000000 */
126
    1.97825729846954345703e-01,   /* 0x3fc9525a80000000 */
127
    2.10564732551574707031e-01,   /* 0x3fcaf3c900000000 */
128
    2.23143517971038818359e-01,   /* 0x3fcc8ff780000000 */
129
    2.35566020011901855469e-01,   /* 0x3fce270700000000 */
130
    2.47836112976074218750e-01,   /* 0x3fcfb91800000000 */
131
    2.59957492351531982422e-01,   /* 0x3fd0a324c0000000 */
132
    2.71933674812316894531e-01,   /* 0x3fd1675c80000000 */
133
    2.83768117427825927734e-01,   /* 0x3fd22941c0000000 */
134
    2.95464158058166503906e-01,   /* 0x3fd2e8e280000000 */
135
    3.07025015354156494141e-01,   /* 0x3fd3a64c40000000 */
136
    3.18453729152679443359e-01,   /* 0x3fd4618bc0000000 */
137
    3.29753279685974121094e-01,   /* 0x3fd51aad80000000 */
138
    3.40926527976989746094e-01,   /* 0x3fd5d1bd80000000 */
139
    3.51976394653320312500e-01,   /* 0x3fd686c800000000 */
140
    3.62905442714691162109e-01,   /* 0x3fd739d7c0000000 */
141
    3.73716354370117187500e-01,   /* 0x3fd7eaf800000000 */
142
    3.84411692619323730469e-01,   /* 0x3fd89a3380000000 */
143
    3.94993782043457031250e-01,   /* 0x3fd9479400000000 */
144
    4.05465066432952880859e-01,   /* 0x3fd9f323c0000000 */
145
    4.15827870368957519531e-01,   /* 0x3fda9cec80000000 */
146
    4.26084339618682861328e-01,   /* 0x3fdb44f740000000 */
147
    4.36236739158630371094e-01,   /* 0x3fdbeb4d80000000 */
148
    4.46287095546722412109e-01,   /* 0x3fdc8ff7c0000000 */
149
    4.56237375736236572266e-01,   /* 0x3fdd32fe40000000 */
150
    4.66089725494384765625e-01,   /* 0x3fddd46a00000000 */
151
    4.75845873355865478516e-01,   /* 0x3fde744240000000 */
152
    4.85507786273956298828e-01,   /* 0x3fdf128f40000000 */
153
    4.95077252388000488281e-01,   /* 0x3fdfaf5880000000 */
154
    5.04556000232696533203e-01,   /* 0x3fe02552a0000000 */
155
    5.13945698738098144531e-01,   /* 0x3fe0723e40000000 */
156
    5.23248136043548583984e-01,   /* 0x3fe0be72e0000000 */
157
    5.32464742660522460938e-01,   /* 0x3fe109f380000000 */
158
    5.41597247123718261719e-01,   /* 0x3fe154c3c0000000 */
159
    5.50647079944610595703e-01,   /* 0x3fe19ee6a0000000 */
160
    5.59615731239318847656e-01,   /* 0x3fe1e85f40000000 */
161
    5.68504691123962402344e-01,   /* 0x3fe23130c0000000 */
162
    5.77315330505371093750e-01,   /* 0x3fe2795e00000000 */
163
    5.86049020290374755859e-01,   /* 0x3fe2c0e9e0000000 */
164
    5.94707071781158447266e-01,   /* 0x3fe307d720000000 */
165
    6.03290796279907226562e-01,   /* 0x3fe34e2880000000 */
166
    6.11801505088806152344e-01,   /* 0x3fe393e0c0000000 */
167
    6.20240390300750732422e-01,   /* 0x3fe3d90260000000 */
168
    6.28608644008636474609e-01,   /* 0x3fe41d8fe0000000 */
169
    6.36907458305358886719e-01,   /* 0x3fe4618bc0000000 */
170
    6.45137906074523925781e-01,   /* 0x3fe4a4f840000000 */
171
    6.53301239013671875000e-01,   /* 0x3fe4e7d800000000 */
172
    6.61398470401763916016e-01,   /* 0x3fe52a2d20000000 */
173
    6.69430613517761230469e-01,   /* 0x3fe56bf9c0000000 */
174
    6.77398800849914550781e-01,   /* 0x3fe5ad4040000000 */
175
    6.85303986072540283203e-01,   /* 0x3fe5ee02a0000000 */
176
    6.93147122859954833984e-01};  /* 0x3fe62e42e0000000 */
177
178
  static const double ln_tail_table[65] = {
179
    0.00000000000000000000e+00,   /* 0x0000000000000000 */
180
    5.15092497094772879206e-09,   /* 0x3e361f807c79f3db */
181
    4.55457209735272790188e-08,   /* 0x3e6873c1980267c8 */
182
    2.86612990859791781788e-08,   /* 0x3e5ec65b9f88c69e */
183
    2.23596477332056055352e-08,   /* 0x3e58022c54cc2f99 */
184
    3.49498983167142274770e-08,   /* 0x3e62c37a3a125330 */
185
    3.23392843005887000414e-08,   /* 0x3e615cad69737c93 */
186
    1.35722380472479366661e-08,   /* 0x3e4d256ab1b285e9 */
187
    2.56504325268044191098e-08,   /* 0x3e5b8abcb97a7aa2 */
188
    5.81213608741512136843e-08,   /* 0x3e6f34239659a5dc */
189
    5.59374849578288093334e-08,   /* 0x3e6e07fd48d30177 */
190
    5.06615629004996189970e-08,   /* 0x3e6b32df4799f4f6 */
191
    5.24588857848400955725e-08,   /* 0x3e6c29e4f4f21cf8 */
192
    9.61968535632653505972e-10,   /* 0x3e1086c848df1b59 */
193
    1.34829655346594463137e-08,   /* 0x3e4cf456b4764130 */
194
    3.65557749306383026498e-08,   /* 0x3e63a02ffcb63398 */
195
    3.33431709374069198903e-08,   /* 0x3e61e6a6886b0976 */
196
    5.13008650536088382197e-08,   /* 0x3e6b8abcb97a7aa2 */
197
    5.09285070380306053751e-08,   /* 0x3e6b578f8aa35552 */
198
    3.20853940845502057341e-08,   /* 0x3e6139c871afb9fc */
199
    4.06713248643004200446e-08,   /* 0x3e65d5d30701ce64 */
200
    5.57028186706125221168e-08,   /* 0x3e6de7bcb2d12142 */
201
    5.48356693724804282546e-08,   /* 0x3e6d708e984e1664 */
202
    1.99407553679345001938e-08,   /* 0x3e556945e9c72f36 */
203
    1.96585517245087232086e-09,   /* 0x3e20e2f613e85bda */
204
    6.68649386072067321503e-09,   /* 0x3e3cb7e0b42724f6 */
205
    5.89936034642113390002e-08,   /* 0x3e6fac04e52846c7 */
206
    2.85038578721554472484e-08,   /* 0x3e5e9b14aec442be */
207
    5.09746772910284482606e-08,   /* 0x3e6b5de8034e7126 */
208
    5.54234668933210171467e-08,   /* 0x3e6dc157e1b259d3 */
209
    6.29100830926604004874e-09,   /* 0x3e3b05096ad69c62 */
210
    2.61974119468563937716e-08,   /* 0x3e5c2116faba4cdd */
211
    4.16752115011186398935e-08,   /* 0x3e665fcc25f95b47 */
212
    2.47747534460820790327e-08,   /* 0x3e5a9a08498d4850 */
213
    5.56922172017964209793e-08,   /* 0x3e6de647b1465f77 */
214
    2.76162876992552906035e-08,   /* 0x3e5da71b7bf7861d */
215
    7.08169709942321478061e-09,   /* 0x3e3e6a6886b09760 */
216
    5.77453510221151779025e-08,   /* 0x3e6f0075eab0ef64 */
217
    4.43021445893361960146e-09,   /* 0x3e33071282fb989b */
218
    3.15140984357495864573e-08,   /* 0x3e60eb43c3f1bed2 */
219
    2.95077445089736670973e-08,   /* 0x3e5faf06ecb35c84 */
220
    1.44098510263167149349e-08,   /* 0x3e4ef1e63db35f68 */
221
    1.05196987538551827693e-08,   /* 0x3e469743fb1a71a5 */
222
    5.23641361722697546261e-08,   /* 0x3e6c1cdf404e5796 */
223
    7.72099925253243069458e-09,   /* 0x3e4094aa0ada625e */
224
    5.62089493829364197156e-08,   /* 0x3e6e2d4c96fde3ec */
225
    3.53090261098577946927e-08,   /* 0x3e62f4d5e9a98f34 */
226
    3.80080516835568242269e-08,   /* 0x3e6467c96ecc5cbe */
227
    5.66961038386146408282e-08,   /* 0x3e6e7040d03dec5a */
228
    4.42287063097349852717e-08,   /* 0x3e67bebf4282de36 */
229
    3.45294525105681104660e-08,   /* 0x3e6289b11aeb783f */
230
    2.47132034530447431509e-08,   /* 0x3e5a891d1772f538 */
231
    3.59655343422487209774e-08,   /* 0x3e634f10be1fb591 */
232
    5.51581770357780862071e-08,   /* 0x3e6d9ce1d316eb93 */
233
    3.60171867511861372793e-08,   /* 0x3e63562a19a9c442 */
234
    1.94511067964296180547e-08,   /* 0x3e54e2adf548084c */
235
    1.54137376631349347838e-08,   /* 0x3e508ce55cc8c97a */
236
    3.93171034490174464173e-09,   /* 0x3e30e2f613e85bda */
237
    5.52990607758839766440e-08,   /* 0x3e6db03ebb0227bf */
238
    3.29990737637586136511e-08,   /* 0x3e61b75bb09cb098 */
239
    1.18436010922446096216e-08,   /* 0x3e496f16abb9df22 */
240
    4.04248680368301346709e-08,   /* 0x3e65b3f399411c62 */
241
    2.27418915900284316293e-08,   /* 0x3e586b3e59f65355 */
242
    1.70263791333409206020e-08,   /* 0x3e52482ceae1ac12 */
243
    5.76999904754328540596e-08};  /* 0x3e6efa39ef35793c */
244
245
  static const double
246
    log2 = 6.931471805599453e-01,       /* 0x3fe62e42fefa39ef */
247
248
  /* Approximating polynomial coefficients */
249
    cb_1 = 8.33333333333333593622e-02,  /* 0x3fb5555555555557 */
250
    cb_2 = 1.24999999978138668903e-02;  /* 0x3f89999999865ede */
251
252
#if defined(COMPILING_LOG10)
253
  static const double
254
    log10e      = 4.34294481903251827651e-01; /* 0x3fdbcb7b1526e50e */
255
#elif defined(COMPILING_LOG2)
256
   static const double
257
     log2e      = 1.44269504088896340735e+00; /* 0x3ff71547652b82fe */
258
#endif
259
260
261
  GET_BITS_DP64(x, ux);
262
263
#if !defined(COMPILING_LOG10) && !defined(COMPILING_LOG2)
264
  if (ux == 0x4005bf0a80000000)
265
    /* Treat this, the number closest to e in float arithmetic,
266
       as a special case and return 1.0 */
267
    return 1.0F;
268
#endif
269
270
  if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
271
    {
272
      /* x is either NaN or infinity */
273
      if (ux & MANTBITS_DP64)
274
        /* x is NaN */
275
        return fx + fx; /* Raise invalid if it is a signalling NaN */
276
      else
277
        {
278
          /* x is infinity */
279
          if (ux & SIGNBIT_DP64)
280
            /* x is negative infinity. Return a NaN. */
281
            return retval_errno_edom(fx);
282
          else
283
            return fx;
284
        }
285
    }
286
  else if (!(ux & ~SIGNBIT_DP64))
287
    /* x is +/-zero. Return -infinity with div-by-zero flag. */
288
    return retval_errno_erange_overflow(fx);
289
  else if (ux & SIGNBIT_DP64)
290
    /* x is negative. Return a NaN. */
291
    return retval_errno_edom(fx);
292
293
294
  /*
295
    First, we decompose the argument x to the form
296
    x  =  2**M  *  (F1  +  F2),
297
    where  1 <= F1+F2 < 2, M has the value of an integer,
298
    F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.
299
    
300
    Second, we approximate log( 1 + F2/F1 ) by an odd polynomial
301
    in U, where U  =  2 F2 / (2 F2 + F1).
302
    Note that log( 1 + F2/F1 ) = log( 1 + U/2 ) - log( 1 - U/2 ).
303
    The core approximation calculates
304
    Poly = [log( 1 + U/2 ) - log( 1 - U/2 )]/U   -   1.
305
    Note that  log(1 + U/2) - log(1 - U/2) = 2 arctanh ( U/2 ),
306
    thus, Poly =  2 arctanh( U/2 ) / U  -  1.
307
308
    It is not hard to see that
309
    log(x) = M*log(2) + log(F1) + log( 1 + F2/F1 ).
310
    Hence, we return Z1 = log(F1), and  Z2 = log( 1 + F2/F1).
311
    The values of log(F1) are calculated beforehand and stored
312
    in the program.
313
  */
314
315
  f = x;
316
317
  /* Store the exponent of x in xexp and put
318
     f into the range [0.5,1) */
319
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
320
  PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);
321
322
  /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */
323
324
  /* Set index to be the nearest integer to 128*f */
325
  /*
326
    r = 128.0 * f;
327
    index = (int)(r + 0.5);
328
  */
329
  /* This code instead of the above can save several cycles.
330
     It only works because 64 <= r < 128, so
331
     the nearest integer is always contained in exactly
332
     7 bits, and the right shift is always the same. */
333
  index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46)
334
    + ((ux & 0x0000200000000000) >> 45);
335
  z1 = ln_lead_table[index-64];
336
  q = ln_tail_table[index-64];
337
  f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
338
  f2 = f - f1;
339
  /* At this point, x = 2**xexp * ( f1  +  f2 ) where
340
     f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */
341
  
342
  /* Calculate u = 2 f2 / ( 2 f1 + f2 ) = f2 / ( f1 + 0.5*f2 ) */
343
  /* u = f2 / (f1 + 0.5 * f2); */
344
  u = f2 / (f1 + 0.5 * f2);
345
  
346
  /* Here, |u| <= 2(exp(1/16)-1) / (exp(1/16)+1).
347
     The core approximation calculates
348
     poly = [log(1 + u/2) - log(1 - u/2)]/u  -  1  */
349
  v = u * u;
350
  poly = (v * (cb_1 + v * cb_2));
351
  z2 = q + (u + u * poly);
352
  
353
  /* Now z1,z2 is an extra-precise approximation of log(f).
354
     Add xexp * log(2) to z1, z2 to get the result log(x). */
355
356
  r = xexp*log2 + z1 + z2;
357
#if defined (COMPILING_LOG10)
358
  return log10e*r;
359
#elif defined(COMPILING_LOG2)
360
  return log2e*r;
361
#else
362
  return r;
363
#endif
364
}
365
366
#if defined(COMPILING_LOG10)
367
weak_alias (__log10f, log10f)
368
weak_alias (__log10f, __ieee754_log10f)
369
#elif defined(COMPILING_LOG2)
370
weak_alias (__log2f, log2f)
371
weak_alias (__log2f, __ieee754_log2f)
372
#else
373
weak_alias (__logf, logf)
374
weak_alias (__logf, __ieee754_logf)
375
#endif
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_pow.c.x86_64-new-libm (+807 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXP
13
#define USE_SCALEDOUBLE_2
14
#define USE_SCALEDOUBLE_3
15
#define USE_SCALEDOWNDOUBLE
16
#define USE_INFINITY_WITH_FLAGS
17
#define USE_ZERO_WITH_FLAGS
18
#define USE_NAN_WITH_FLAGS
19
#define USE_VAL_WITH_FLAGS
20
#include "libm_inlines_amd.h"
21
#undef USE_SPLITEXP
22
#undef USE_SCALEDOUBLE_2
23
#undef USE_SCALEDOUBLE_3
24
#undef USE_SCALEDOWNDOUBLE
25
#undef USE_INFINITY_WITH_FLAGS
26
#undef USE_ZERO_WITH_FLAGS
27
#undef USE_NAN_WITH_FLAGS
28
#undef USE_VAL_WITH_FLAGS
29
30
/* Deal with errno for out-of-range result */
31
#include "libm_errno_amd.h"
32
static inline double retval_errno_erange_overflow(double x, double y, int sign)
33
{
34
  struct exception exc;
35
  exc.arg1 = x;
36
  exc.arg2 = y;
37
  exc.type = OVERFLOW;
38
  exc.name = (char *)"pow";
39
  if (_LIB_VERSION == _SVID_)
40
    {
41
      if (sign == 1)
42
        exc.retval = HUGE;
43
      else /* sign = -1 */
44
        exc.retval = -HUGE;
45
    }
46
  else
47
    {
48
      if (sign == 1)
49
        exc.retval = infinity_with_flags(AMD_F_OVERFLOW);
50
      else /* sign == -1 */
51
        exc.retval = -infinity_with_flags(AMD_F_OVERFLOW);
52
    }
53
  if (_LIB_VERSION == _POSIX_)
54
    __set_errno(ERANGE);
55
  else if (!matherr(&exc))
56
    __set_errno(ERANGE);
57
  return exc.retval;
58
}
59
60
static inline double retval_errno_erange_underflow(double x, double y, int sign)
61
{
62
  struct exception exc;
63
  exc.arg1 = x;
64
  exc.arg2 = y;
65
  exc.type = UNDERFLOW;
66
  exc.name = (char *)"pow";
67
  if (sign == 1)
68
    exc.retval = zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
69
  else /* sign == -1 */
70
    exc.retval = -zero_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
71
  if (_LIB_VERSION == _POSIX_)
72
    __set_errno(ERANGE);
73
  else if (!matherr(&exc))
74
    __set_errno(ERANGE);
75
  return exc.retval;
76
}
77
78
/* Deal with errno for out-of-range arguments */
79
static inline double retval_errno_edom(double x, double y, int type)
80
{
81
  struct exception exc;
82
  exc.arg1 = x;
83
  exc.arg2 = y;
84
  exc.type = DOMAIN;
85
  exc.name = (char *)"pow";
86
  if (_LIB_VERSION == _SVID_)
87
    exc.retval = 0.0;
88
  else if (type == 1)
89
    exc.retval = infinity_with_flags(AMD_F_DIVBYZERO);
90
  else if (type == 2)
91
    exc.retval = -infinity_with_flags(AMD_F_DIVBYZERO);
92
  else /* type == 3 */
93
    exc.retval = nan_with_flags(AMD_F_INVALID);
94
  if (_LIB_VERSION == _POSIX_)
95
    __set_errno (EDOM);
96
  if (!matherr(&exc))
97
    {
98
      if (_LIB_VERSION == _SVID_)
99
        (void)fputs("pow: DOMAIN error\n", stderr);
100
      __set_errno(EDOM);
101
    }
102
  return exc.retval;
103
}
104
105
/* Computes the exact product of x and y, the result being the
106
   nearly doublelength number (z,zz) */
107
static void mul12(double x, double y,
108
                  double *z, double *zz)
109
{
110
  double hx, tx, hy, ty;
111
  /* Split x into hx (head) and tx (tail). Do the same for y. */
112
  unsigned long u;
113
  GET_BITS_DP64(x, u);
114
  u &= 0xfffffffff8000000;
115
  PUT_BITS_DP64(u, hx);
116
  tx = x - hx;
117
  GET_BITS_DP64(y, u);
118
  u &= 0xfffffffff8000000;
119
  PUT_BITS_DP64(u, hy);
120
  ty = y - hy;
121
  *z = x * y;
122
  *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty;
123
}
124
125
/* Computes the doublelength product of (x,xx) and (y,yy), the result
126
   being the doublelength number (z,zz) */
127
static void mul2(double x, double xx, double y, double yy,
128
                 double *z, double *zz)
129
{
130
  double c, cc;
131
  mul12(x, y, &c, &cc);
132
  cc = x * yy + xx * y + cc;
133
  *z = c + cc;
134
  *zz = (c - *z) + cc;
135
}
136
137
double __pow(double x, double y)
138
{
139
140
  /* Arrays log2_lead_table and log2_tail_table contain
141
     leading and trailing parts respectively of precomputed
142
     values of natural log2(1+i/64), for i = 0, 1, ..., 64.
143
     ln_lead_table contains the first 24 bits of precision,
144
     and ln_tail_table contains a further 53 bits precision. */
145
146
  static const double log2_lead_table[65] = {
147
    0.00000000000000000000E+00,  /* 0x0000000000000000 */
148
    2.23678126931190490723E-02,  /* 0x3F96E79680000000 */
149
    4.43941168487071990967E-02,  /* 0x3FA6BAD360000000 */
150
    6.60891830921173095703E-02,  /* 0x3FB0EB3880000000 */
151
    8.74628350138664245605E-02,  /* 0x3FB663F6E0000000 */
152
    1.08524456620216369629E-01,  /* 0x3FBBC84240000000 */
153
    1.29283010959625244141E-01,  /* 0x3FC08C5880000000 */
154
    1.49747118353843688965E-01,  /* 0x3FC32AE9E0000000 */
155
    1.69924989342689514160E-01,  /* 0x3FC5C01A20000000 */
156
    1.89824551343917846680E-01,  /* 0x3FC84C2BC0000000 */
157
    2.09453359246253967285E-01,  /* 0x3FCACF5E20000000 */
158
    2.28818684816360473633E-01,  /* 0x3FCD49EE40000000 */
159
    2.47927501797676086426E-01,  /* 0x3FCFBC16A0000000 */
160
    2.66786515712738037109E-01,  /* 0x3FD11307C0000000 */
161
    2.85402208566665649414E-01,  /* 0x3FD24407A0000000 */
162
    3.03780734539031982422E-01,  /* 0x3FD37124C0000000 */
163
    3.21928083896636962891E-01,  /* 0x3FD49A7840000000 */
164
    3.39849978685379028320E-01,  /* 0x3FD5C01A20000000 */
165
    3.57551991939544677734E-01,  /* 0x3FD6E221C0000000 */
166
    3.75039428472518920898E-01,  /* 0x3FD800A560000000 */
167
    3.92317414283752441406E-01,  /* 0x3FD91BBA80000000 */
168
    4.09390926361083984375E-01,  /* 0x3FDA337600000000 */
169
    4.26264733076095581055E-01,  /* 0x3FDB47EBE0000000 */
170
    4.42943483591079711914E-01,  /* 0x3FDC592FA0000000 */
171
    4.59431618452072143555E-01,  /* 0x3FDD6753E0000000 */
172
    4.75733429193496704102E-01,  /* 0x3FDE726AA0000000 */
173
    4.91853088140487670898E-01,  /* 0x3FDF7A8560000000 */
174
    5.07794618606567382812E-01,  /* 0x3FE03FDA80000000 */
175
    5.23561954498291015625E-01,  /* 0x3FE0C10500000000 */
176
    5.39158761501312255859E-01,  /* 0x3FE140C9E0000000 */
177
    5.54588794708251953125E-01,  /* 0x3FE1BF3100000000 */
178
    5.69855570793151855469E-01,  /* 0x3FE23C41C0000000 */
179
    5.84962487220764160156E-01,  /* 0x3FE2B80340000000 */
180
    5.99912822246551513672E-01,  /* 0x3FE3327C60000000 */
181
    6.14709794521331787109E-01,  /* 0x3FE3ABB3E0000000 */
182
    6.29356563091278076172E-01,  /* 0x3FE423B060000000 */
183
    6.43856167793273925781E-01,  /* 0x3FE49A7840000000 */
184
    6.58211469650268554688E-01,  /* 0x3FE5101180000000 */
185
    6.72425329685211181641E-01,  /* 0x3FE5848220000000 */
186
    6.86500489711761474609E-01,  /* 0x3FE5F7CFE0000000 */
187
    7.00439691543579101562E-01,  /* 0x3FE66A0080000000 */
188
    7.14245498180389404297E-01,  /* 0x3FE6DB1960000000 */
189
    7.27920413017272949219E-01,  /* 0x3FE74B1FC0000000 */
190
    7.41466939449310302734E-01,  /* 0x3FE7BA18E0000000 */
191
    7.54887461662292480469E-01,  /* 0x3FE82809C0000000 */
192
    7.68184304237365722656E-01,  /* 0x3FE894F740000000 */
193
    7.81359672546386718750E-01,  /* 0x3FE900E600000000 */
194
    7.94415831565856933594E-01,  /* 0x3FE96BDAC0000000 */
195
    8.07354867458343505859E-01,  /* 0x3FE9D5D9E0000000 */
196
    8.20178925991058349609E-01,  /* 0x3FEA3EE7E0000000 */
197
    8.32889974117279052734E-01,  /* 0x3FEAA708E0000000 */
198
    8.45490038394927978516E-01,  /* 0x3FEB0E4120000000 */
199
    8.57980966567993164062E-01,  /* 0x3FEB749480000000 */
200
    8.70364665985107421875E-01,  /* 0x3FEBDA0700000000 */
201
    8.82643043994903564453E-01,  /* 0x3FEC3E9CA0000000 */
202
    8.94817709922790527344E-01,  /* 0x3FECA258C0000000 */
203
    9.06890571117401123047E-01,  /* 0x3FED053F60000000 */
204
    9.18863236904144287109E-01,  /* 0x3FED6753E0000000 */
205
    9.30737316608428955078E-01,  /* 0x3FEDC899A0000000 */
206
    9.42514479160308837891E-01,  /* 0x3FEE291420000000 */
207
    9.54196274280548095703E-01,  /* 0x3FEE88C6A0000000 */
208
    9.65784251689910888672E-01,  /* 0x3FEEE7B460000000 */
209
    9.77279901504516601562E-01,  /* 0x3FEF45E080000000 */
210
    9.88684654235839843750E-01,  /* 0x3FEFA34E00000000 */
211
    1.00000000000000000000E+00}; /* 0x3FF0000000000000 */
212
213
  static const double log2_tail_table[65] = {
214
    0.00000000000000000000E+00,  /* 0x0000000000000000 */
215
    3.35335459194866276130E-10,  /* 0x3DF70B48A629B89C */
216
    2.50974623855642191448E-09,  /* 0x3E258EFD87313606 */
217
    7.36565512335896390543E-09,  /* 0x3E3FA29F9AB3CF74 */
218
    6.23647298369351871453E-09,  /* 0x3E3AC913167CCC53 */
219
    1.57952684118980398844E-10,  /* 0x3DE5B5774C7658A0 */
220
    5.98534121117166302413E-09,  /* 0x3E39B4F3C72C4F78 */
221
    1.15083837244190179789E-09,  /* 0x3E13C570D0FA8F90 */
222
    1.20996228487473215213E-08,  /* 0x3E49FBD6879FA00B */
223
    7.53609938318432874467E-09,  /* 0x3E402F03B2FDD224 */
224
    6.38269581457264661091E-09,  /* 0x3E3B69D927DFC23C */
225
    5.67952040356156465017E-09,  /* 0x3E3864B2DF91E96A */
226
    1.16459094073677371864E-08,  /* 0x3E4902680A23A8D9 */
227
    2.49821633265319561946E-08,  /* 0x3E5AD30B75CB0970 */
228
    1.02955826924364881206E-08,  /* 0x3E461C0E73048B72 */
229
    1.36380709420054099385E-08,  /* 0x3E4D499BD9B32266 */
230
    1.09907253849796912371E-08,  /* 0x3E479A3715FC9256 */
231
    2.41992456974946430426E-08,  /* 0x3E59FBD6879FA00B */
232
    1.26785390154315961619E-08,  /* 0x3E4B3A19BCAF1AA4 */
233
    2.87440583546118995874E-09,  /* 0x3E28B0E2A19575B0 */
234
    8.49500784748945819113E-09,  /* 0x3E423E2E1169656A */
235
    9.77661777174938265384E-09,  /* 0x3E44FEC0A13AF880 */
236
    2.16260023578294509223E-08,  /* 0x3E573882A0A4146E */
237
    1.22576485902594488001E-08,  /* 0x3E4A52B6ACFCFDCA */
238
    1.85225112644675216321E-10,  /* 0x3DE975077F1F5F00 */
239
    1.77290105086271740075E-09,  /* 0x3E1E754D20C519E0 */
240
    8.18918703987935816281E-09,  /* 0x3E41960D9D9C3262 */
241
    2.15921288850262793860E-08,  /* 0x3E572F32FE672868 */
242
    1.55872185666914610882E-09,  /* 0x3E1AC754CB104AE0 */
243
    4.96067191344004864525E-08,  /* 0x3E6AA1E5439E15A5 */
244
    5.69693854190458063634E-08,  /* 0x3E6E95D00DE3B513 */
245
    3.75377959861950863279E-08,  /* 0x3E642727C8080ECC */
246
    1.35003920212974864041E-08,  /* 0x3E4CFDEB43CFD004 */
247
    1.99405761661543437744E-08,  /* 0x3E5569394D90D724 */
248
    4.95938764277745619566E-08,  /* 0x3E6AA02166CCCAB2 */
249
    5.69883315429349605246E-08,  /* 0x3E6E986AA9670761 */
250
    2.19814507699593824742E-08,  /* 0x3E579A3715FC9256 */
251
    1.31015261824841576777E-08,  /* 0x3E4C22A3E377A524 */
252
    1.22862844080671745121E-08,  /* 0x3E4A6274CF0E362C */
253
    3.74714569064514928410E-08,  /* 0x3E641E09AEB8CB1A */
254
    2.65975130588343109077E-08,  /* 0x3E5C8F11979A5DB6 */
255
    1.94857332324691494283E-08,  /* 0x3E54EC3293B2FBE0 */
256
    4.15459262300620263689E-08,  /* 0x3E664E0753C6E578 */
257
    4.69518366451302198484E-08,  /* 0x3E693502E409EAB7 */
258
    4.05011760638924658298E-08,  /* 0x3E65BE7072DBDC04 */
259
    2.05395606358225316367E-08,  /* 0x3E560DDF1680DD44 */
260
    4.09782728853196822622E-08,  /* 0x3E660002CCFE43F5 */
261
    3.47842490297177925737E-08,  /* 0x3E62ACB5F5EFEC49 */
262
    5.45992606015825934783E-08,  /* 0x3E6D5010B3666559 */
263
    3.64241293587091694274E-08,  /* 0x3E638E181ED0798D */
264
    4.00474626225128781862E-08,  /* 0x3E658014D37CDE37 */
265
    1.25494472416488406547E-08,  /* 0x3E4AF321AF5E9BB4 */
266
    2.85595789566572715872E-08,  /* 0x3E5EAA65B49696E2 */
267
    5.35982971014292903334E-08,  /* 0x3E6CC67E6DB516DE */
268
    5.36693769435427990824E-09,  /* 0x3E370D02A99B4C58 */
269
    5.33851529883522815863E-08,  /* 0x3E6CA9331635FEE3 */
270
    2.44911174062771809500E-08,  /* 0x3E5A4C112CE6312E */
271
    3.70450225289350432643E-10,  /* 0x3DF975077F1F5F00 */
272
    2.09544573213940723936E-08,  /* 0x3E567FEAD8BCCE74 */
273
    2.61789310367290825660E-08,  /* 0x3E5C1C02803F7554 */
274
    3.61063271131029934309E-08,  /* 0x3E63626A72AA21A3 */
275
    3.29721761549390770201E-08,  /* 0x3E61B3A9507D6DC1 */
276
    2.19953998687869412865E-08,  /* 0x3E579E0CAA9C9AB6 */
277
    3.25363260095299997864E-08,  /* 0x3E6177C23362928B */
278
    0.00000000000000000000E+00}; /* 0x0000000000000000 */
279
280
  static const double
281
    /* Reciprocal of log(2.0) */
282
    reclog2_lead =     1.44269504088896338700E+00, /* 0x3FF71547652B82FE */
283
    reclog2_tail =     2.03552737409310207851E-17; /* 0x3C7777D0FFDA0D20 */
284
285
  const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */
286
287
  unsigned long ux, ax, uy, ay, mask;
288
  int yexp, inty, xpos, ypos, negateres;
289
  double r, u1, u2, w, w1, w2;
290
  volatile int dummy;
291
292
  double u, r1, r2, f, z1, z2, q, f1, f2, poly;
293
  int xexp, expadjust, index, iw;
294
295
  double argx = x, argy = y;
296
297
  GET_BITS_DP64(x, ux);
298
  ax = ux & (~SIGNBIT_DP64);
299
  xpos = ax == ux;
300
  GET_BITS_DP64(y, uy);
301
  ay = uy & (~SIGNBIT_DP64);
302
  ypos = ay == uy;
303
304
  if (ux == 0x3ff0000000000000)
305
    {
306
      /* x = +1.0. Return +1.0 for all y, even NaN,
307
         raising invalid only if y is a signalling NaN */
308
      if (y + 1.0 == 2.0) dummy = 1;
309
      return 1.0;
310
    }
311
  else if (ay == 0)
312
    {
313
      /* y is zero. Return 1.0, even if x is infinity or NaN,
314
         raising invalid only if x is a signalling NaN */
315
      if (x + 1.0 == 2.0) dummy = 1;
316
      return 1.0;
317
    }
318
  else if (((ax & EXPBITS_DP64) == EXPBITS_DP64) &&
319
           (ax & MANTBITS_DP64))
320
    /* x is NaN. Return NaN, with invalid exception if it's
321
       a signalling NaN. */
322
    return x + x;
323
  else if (((ay & EXPBITS_DP64) == EXPBITS_DP64) &&
324
             (ay & MANTBITS_DP64))
325
   /* y is NaN. Return NaN, with invalid exception if y
326
      is a signalling NaN. */
327
   return y + y;
328
  else if (uy == 0x3ff0000000000000)
329
    /* y is 1.0; return x */
330
    return x;
331
  else if ((ay & EXPBITS_DP64) > 0x43e0000000000000)
332
    {
333
      /* y is infinite or so large that the result would
334
         overflow or underflow. Flags should be raised
335
         unless y is an exact infinity. */
336
      int yinf = (ay == EXPBITS_DP64);
337
      if (ypos)
338
        {
339
          /* y is +ve */
340
          if (ax == 0)
341
            /* abs(x) = 0.0. */
342
            return 0.0;
343
          else if (ax < 0x3ff0000000000000)
344
            {
345
              /* abs(x) < 1.0 */
346
              if (yinf)
347
                return 0.0;
348
              else
349
                return retval_errno_erange_underflow(argx, argy, 1);
350
            }
351
          else if (ax == 0x3ff0000000000000)
352
            /* abs(x) = 1.0. */
353
            return 1.0;
354
          else
355
            {
356
              /* abs(x) > 1.0 */
357
              if (yinf)
358
                return infinity_with_flags(0);
359
              else
360
                return retval_errno_erange_overflow(argx, argy, 1);
361
            }
362
        }
363
      else
364
        {
365
          /* y is -ve */
366
          if (ax == 0)
367
            /* abs(x) = 0.0. Return +infinity. */
368
            return retval_errno_edom(argx, argy, 1);
369
          else if (ax < 0x3ff0000000000000)
370
            {
371
            /* abs(x) < 1.0; return +infinity. */
372
              if (yinf)
373
                return infinity_with_flags(0);
374
              else
375
                return retval_errno_erange_overflow(argx, argy, 1);
376
            }
377
          else if (ax == 0x3ff0000000000000)
378
            /* abs(x) = 1.0. */
379
            return 1.0;
380
          else
381
            {
382
              /* abs(x) > 1.0 */
383
              if (yinf)
384
                return 0.0;
385
              else
386
                return retval_errno_erange_underflow(argx, argy, 1);
387
            }
388
        }
389
    }
390
391
  /* See whether y is an integer.
392
     inty = 0 means not an integer.
393
     inty = 1 means odd integer.
394
     inty = 2 means even integer.
395
  */
396
  yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 + 1;
397
  if (yexp < 1)
398
    inty = 0;
399
  else if (yexp > 53)
400
    inty = 2;
401
  else /* 1 <= yexp <= 53 */
402
    {
403
      /* Mask out the bits of r that we don't want */
404
      mask = (1L << (53 - yexp)) - 1;
405
      if ((uy & mask) != 0)
406
        inty = 0;
407
      else if (((uy & ~mask) >> (53 - yexp)) & 0x0000000000000001)
408
        inty = 1;
409
      else
410
        inty = 2;
411
    }
412
413
  if ((ax & EXPBITS_DP64) == EXPBITS_DP64)
414
    {
415
      /* x is infinity (NaN was already ruled out). */
416
      if (xpos)
417
        {
418
          /* x is +infinity */
419
          if (ypos)
420
            /* y > 0.0 */
421
            return x;
422
          else
423
            return 0.0;
424
        }
425
      else
426
        {
427
          /* x is -infinity */
428
          if (inty == 1)
429
            {
430
              /* y is an odd integer */
431
              if (ypos)
432
                /* Result is -infinity */
433
                return x;
434
              else
435
                return -zero_with_flags(0);
436
            }
437
          else
438
            {
439
              if (ypos)
440
                /* Result is +infinity */
441
                return -x;
442
              else
443
                return 0.0;
444
            }
445
        }
446
    }
447
  else if (ax == 0)
448
    {
449
      /* x is zero */
450
      if (xpos)
451
        {
452
          /* x is +0.0 */
453
          if (ypos)
454
            /* y is positive; return +0.0 for all cases */
455
            return x;
456
          else
457
            /* y is negative; return +infinity with div-by-zero
458
               for all cases */
459
            return retval_errno_edom(argx, argy, 1);
460
        }
461
      else
462
        {
463
          /* x is -0.0 */
464
          if (ypos)
465
            {
466
              /* y is positive */
467
              if (inty == 1)
468
                /* -0.0 raised to a positive odd integer returns -0.0 */
469
                return x;
470
              else
471
                /* Return +0.0 */
472
                return -x;
473
            }
474
          else
475
            {
476
              /* y is negative */
477
              if (inty == 1)
478
                /* -0.0 raised to a negative odd integer returns -infinity
479
                   with div-by-zero */
480
                return retval_errno_edom(argx, argy, 2);
481
              else
482
                /* Return +infinity with div-by-zero */
483
                return retval_errno_edom(argx, argy, 1);
484
            }
485
        }
486
    }
487
488
  negateres = 0;
489
  if (!xpos)
490
    {
491
      /* x is negative */
492
      if (inty)
493
        {
494
          /* It's OK because y is an integer. */
495
          ux = ax;
496
          PUT_BITS_DP64(ux, x); /* x = abs(x) */
497
          /* If y is odd, the result will be negative */
498
          negateres = (inty == 1);
499
        }
500
      else
501
        /* y is not an integer. Return a NaN. */
502
        return retval_errno_edom(argx, argy, 3);
503
    }
504
505
  if (ay < 0x3c00000000000000)   /* abs(y) < 2^(-63) */
506
      {
507
        /* y is close enough to zero for the result to be 1.0
508
           no matter what the size of x */
509
        return 1.0 + y;
510
      }
511
512
  /*
513
    Calculate log2(x)
514
515
    First, we decompose the argument x to the form
516
    x  =  2**M  *  (F1  +  F2),
517
    where  1 <= F1+F2 < 2, M has the value of an integer,
518
    F1 = 1 + j/64, j ranges from 0 to 64, and |F2| <= 1/128.
519
520
    Second, we approximate log2( 1 + F2/F1 ) by a polynomial
521
    in U, where U  =  2 F2 / (2 F1 + F2).
522
    Note that log2( 1 + F2/F1 ) = log2( 1 + U/2 ) - log2( 1 - U/2 ).
523
    The core approximation calculates
524
    Poly = [log2( 1 + U/2 ) - log2( 1 - U/2 )]/U   -   1.
525
526
    It is not hard to see that
527
    log2(x) = M + log2(F1) + log2( 1 + F2/F1 ).
528
    Hence, we return Z1 = log2(F1), and  Z2 = log2( 1 + F2/F1).
529
    The values of log2(F1) are calculated beforehand and stored
530
    in the program.
531
  */
532
533
  if (ux < IMPBIT_DP64)
534
      {
535
        /* The input argument x is denormalized */
536
        /* Normalize f by increasing the exponent by 60
537
           and subtracting a correction to account for the implicit
538
           bit. This replaces a slow denormalized
539
           multiplication by a fast normal subtraction. */
540
        static const double corr = 2.5653355008114851558350183e-290; /* 0x03d0000000000000 */
541
        PUT_BITS_DP64(ux | 0x03d0000000000000, f);
542
        f -= corr;
543
        GET_BITS_DP64(f, ux);
544
        expadjust = 60;
545
      }
546
    else
547
      {
548
        f = x;
549
        expadjust = 0;
550
      }
551
552
  /* Store the exponent of x in xexp and put
553
     f into the range [0.5,1) */
554
  xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64 - expadjust;
555
  PUT_BITS_DP64((ux & MANTBITS_DP64) | HALFEXPBITS_DP64, f);
556
557
  /* Now  x = 2**xexp  * f,  1/2 <= f < 1. */
558
559
  /* Set index to be the nearest integer to 128*f */
560
  /*
561
    r = 128.0 * f;
562
    index = (int)(r + 0.5);
563
  */
564
  /* This code instead of the above can save several cycles.
565
     It only works because 64 <= r < 128, so
566
     the nearest integer is always contained in exactly
567
     7 bits, and the right shift is always the same. */
568
      index = (((ux & 0x000fc00000000000) | 0x0010000000000000) >> 46)
569
        + ((ux & 0x0000200000000000) >> 45);
570
571
  z1 = log2_lead_table[index-64];
572
  q = log2_tail_table[index-64];
573
  f1 = index * 0.0078125; /* 0.0078125 = 1/128 */
574
  f2 = f - f1;
575
  /* At this point, x = 2**xexp * ( f1  +  f2 ) where
576
     f1 = j/128, j = 64, 65, ..., 128 and |f2| <= 1/256. */
577
578
  /* Compute z2 from Taylor series of log2(1+f1/f2) */
579
580
  u = f2/f1;
581
#if 0
582
  div2(f2,0.0,f1,0.0,&r1,&r2);
583
#else
584
  {
585
    double cc, h, hh;
586
    mul12(u, f1, &h, &hh);
587
    cc = ((f2 - h) - hh) / f1;
588
    r1 = u + cc;
589
    r2 = (u - r1) + cc;
590
  }
591
#endif
592
  poly = -u*(u*(1./2.-u*(1./3.-u*
593
                         (1./4.-u*(1./5.-u*(1./6.-u*
594
                                            (1./7.-u*(1./8.))))))));
595
#if 0
596
  add2(r1,r2,poly,0.0,&r1,&r2);
597
#else
598
  {
599
    double r, s;
600
    r = r1 + poly;
601
    s = ((r1 - r) + poly) + r2;
602
    r1 = r + s;
603
    r2 = (r - r1) + s;
604
  }
605
#endif
606
607
  mul2(reclog2_lead,reclog2_tail,r1,r2,&r1,&r2);
608
609
#if 0
610
  add2(r1,r2,q,0.0,&r1,&r2);
611
#else
612
  {
613
    if (r1 == 0.0)
614
      r1 = q;
615
    else if (q != 0.0)
616
      {
617
        double r, s;
618
        r = r1 + q;
619
        s = ((r1 - r) + q) + r2;
620
        r1 = r + s;
621
        r2 = (r - r1) + s;
622
      }
623
  }
624
#endif
625
626
#if 0
627
  add2(z1,0.0,r1,r2,&z1,&z2);
628
#else
629
  {
630
    double r, s;
631
    r = z1 + r1;
632
    s = ((z1 - r) + r1) + r2;
633
    z1 = r + s;
634
    z2 = (r - z1) + s;
635
  }
636
#endif
637
638
  /* Now z1,z2 is an extra-precise approximation of log2(2f).
639
     Add xexp to z1,z2 to get the result log2(x).
640
     The computed r1 is not subject to rounding error because
641
     xexp has at most 10 significant bits, log(2) has 24 significant
642
     bits, and z1 has up to 24 bits; and the exponents of z1
643
     and z2 differ by at most 6. */
644
645
#if 0
646
  add2(z1,z2,xexp,0.0,&u1,&u2);
647
#else
648
  {
649
    double r, s;
650
    r = z1 + xexp;
651
    s = ((xexp - r) + z1) + z2;
652
    u1 = r + s;
653
    u2 = (r - u1) + s;
654
  }
655
#endif
656
657
  /* end of log2(x) calculation*/
658
659
  /* Test for overflow and underflow due to y*log2(x)
660
     being too large or small. */
661
662
  if ((u1+u2)*y > 1025)
663
    {
664
      if (negateres)
665
        return retval_errno_erange_overflow(argx, argy, -1);
666
      else
667
        return retval_errno_erange_overflow(argx, argy, 1);
668
    }
669
  else if ((u1+u2)*y < -1074)
670
    {
671
      if (negateres)
672
        return retval_errno_erange_underflow(argx, argy, -1);
673
      else
674
        return retval_errno_erange_underflow(argx, argy, 1);
675
    }
676
677
  /* Carefully compute log2(x) * y */
678
#if 0
679
  mul2(u1, u2, y, 0.0, &w1, &w2);
680
#else
681
  {
682
    double c, cc;
683
    mul12(u1, y, &c, &cc);
684
    cc = u2 * y + cc;
685
    w1 = c + cc;
686
    w2 = (c - w1) + cc;
687
  }
688
#endif
689
690
  w = w1 + w2;
691
  iw = (int)(w);
692
693
#if 0
694
  sub2(w1, w2, (double)iw, 0.0, &w1, &w2);
695
#else
696
  {
697
    double a, b;
698
    a = w1 - iw;
699
    b = ((w1 - a) - iw) + w2;
700
    w1 = a + b;
701
    w2 = (a - w1) + b;
702
  }
703
#endif
704
705
  w = w1 + w2;
706
707
  /* The following code computes r = exp2(w) */
708
709
  {
710
    static const double
711
      log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */
712
      log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */
713
      log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */
714
      one_by_32_lead = 0.03125;
715
716
    double p, z1, z2, z, hx, tx, y1, y2;
717
    int m, n;
718
719
    GET_BITS_DP64(w, ux);
720
    ax = ux & (~SIGNBIT_DP64);
721
722
    /* Handle small arguments separately */
723
    if (ax < 0x3fb7154764ee6c2f)   /* abs(x) < 1/(16*log2) */
724
      {
725
        if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
726
          z = 1.0 + w; /* Raises inexact if x is non-zero */
727
        else
728
          {
729
            /* Split x into hx (head) and tx (tail). */
730
            unsigned long u;
731
            hx = w;
732
            GET_BITS_DP64(hx, u);
733
            u &= 0xfffffffff8000000;
734
            PUT_BITS_DP64(u, hx);
735
            tx = w - hx;
736
            /* Carefully multiply x by log2. y1 is the most significant
737
               part of the result, and y2 the least significant part */
738
            y1 = w * log2_lead;
739
            y2 = (((hx * log2_lead - y1) + hx * log2_tail) +
740
                  tx * log2_lead) + tx * log2_tail;
741
742
            p = y1 + y2;
743
            z = (9.99564649780173690e-1 +
744
                 (1.61251249355268050e-5 +
745
                  (2.37986978239838493e-2 +
746
                   2.68724774856111190e-7*p)*p)*p)/
747
              (9.99564649780173692e-1 +
748
               (-4.99766199765151309e-1 +
749
                (1.070876894098586184e-1 +
750
                 (-1.189773642681502232e-2 +
751
                  5.9480622371960190616e-4*p)*p)*p)*p);
752
753
            z = ((z * y1) + (z * y2)) + 1.0;
754
          }
755
        r = scaleDouble_2(z, iw);
756
      }
757
    else
758
      {
759
        /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */
760
        splitexp(w, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2);
761
762
        /* Scale (z1 + z2) by 2.0**(m + iw) */
763
764
        n = m+iw;
765
        z = z1+z2;
766
767
        if (n < -1022)
768
          { /* Result will be denormalised after scaling
769
               down by 2**n. Using scaleDownDouble instead
770
               of scaleDouble_3 is faster in this case. */
771
            GET_BITS_DP64(z, ux);
772
            scaleDownDouble(ux, -n, &ux);
773
            PUT_BITS_DP64(ux, r);
774
          }
775
        else
776
          r = scaleDouble_3(z, n);
777
      }
778
  }
779
780
  /* If r overflowed or underflowed we need to deal with errno */
781
  if (r > large)
782
    {
783
      /* Result has overflowed. */
784
      if (negateres)
785
        return retval_errno_erange_overflow(argx, argy, -1);
786
      else
787
        return retval_errno_erange_overflow(argx, argy, 1);
788
    }
789
  else if (r == 0.0)
790
    {
791
      /* Result has underflowed. */
792
      if (negateres)
793
        return retval_errno_erange_underflow(argx, argy, -1);
794
      else
795
        return retval_errno_erange_underflow(argx, argy, 1);
796
    }
797
  else
798
    {
799
      if (negateres)
800
        return -r;
801
      else
802
        return r;
803
    }
804
}
805
806
weak_alias (__pow, pow)
807
weak_alias (__pow, __ieee754_pow)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_powf.c.x86_64-new-libm (+358 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_ZEROF_WITH_FLAGS
13
#define USE_INFINITYF_WITH_FLAGS
14
#define USE_NANF_WITH_FLAGS
15
#include "libm_inlines_amd.h"
16
#undef USE_ZEROF_WITH_FLAGS
17
#undef USE_INFINITYF_WITH_FLAGS
18
#undef USE_NANF_WITH_FLAGS
19
20
/* Deal with errno for out-of-range result */
21
#include "libm_errno_amd.h"
22
static inline float retval_errno_erange_overflow(float x, float y, int sign)
23
{
24
  struct exception exc;
25
  exc.arg1 = (double)x;
26
  exc.arg2 = (double)y;
27
  exc.type = OVERFLOW;
28
  exc.name = (char *)"powf";
29
  if (_LIB_VERSION == _SVID_)
30
    {
31
      if (sign == 1)
32
        exc.retval = HUGE;
33
      else /* sign = -1 */
34
        exc.retval = -HUGE;
35
    }
36
  else
37
    {
38
      if (sign == 1)
39
        exc.retval = infinityf_with_flags(AMD_F_OVERFLOW);
40
      else /* sign == -1 */
41
        exc.retval = -infinityf_with_flags(AMD_F_OVERFLOW);
42
    }
43
  if (_LIB_VERSION == _POSIX_)
44
    __set_errno(ERANGE);
45
  else if (!matherr(&exc))
46
    __set_errno(ERANGE);
47
  return exc.retval;
48
}
49
50
static inline float retval_errno_erange_underflow(float x, float y, int sign)
51
{
52
  struct exception exc;
53
  exc.arg1 = (double)x;
54
  exc.arg2 = (double)y;
55
  exc.type = UNDERFLOW;
56
  exc.name = (char *)"powf";
57
  if (sign == 1)
58
    exc.retval = zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
59
  else /* sign == -1 */
60
    exc.retval = -zerof_with_flags(AMD_F_UNDERFLOW | AMD_F_INEXACT);
61
  if (_LIB_VERSION == _POSIX_)
62
    __set_errno(ERANGE);
63
  else if (!matherr(&exc))
64
    __set_errno(ERANGE);
65
  return exc.retval;
66
}
67
68
/* Deal with errno for out-of-range arguments */
69
static inline float retval_errno_edom(float x, float y, int type)
70
{
71
  struct exception exc;
72
  exc.arg1 = (double)x;
73
  exc.arg2 = (double)y;
74
  exc.type = DOMAIN;
75
  exc.name = (char *)"powf";
76
  if (_LIB_VERSION == _SVID_)
77
    exc.retval = 0.0;
78
  else if (type == 1)
79
    exc.retval = infinityf_with_flags(AMD_F_DIVBYZERO);
80
  else if (type == 2)
81
    exc.retval = -infinityf_with_flags(AMD_F_DIVBYZERO);
82
  else /* type == 3 */
83
    exc.retval = nanf_with_flags(AMD_F_INVALID);
84
  if (_LIB_VERSION == _POSIX_)
85
    __set_errno (EDOM);
86
  if (!matherr(&exc))
87
    {
88
      if (_LIB_VERSION == _SVID_)
89
        (void)fputs("pow: DOMAIN error\n", stderr);
90
      __set_errno(EDOM);
91
    }
92
  return exc.retval;
93
}
94
95
float __powf(float x, float y)
96
{
97
  unsigned int ux, ax, uy, ay, mask;
98
  int yexp, inty, xpos, ypos, negateres;
99
  double dx, dy, dw, dlog2, dr;
100
  volatile int dummy;
101
102
  /* Largest float, stored as a double */
103
  const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */
104
105
  /* Smallest float, stored as a double */
106
  const double tiny = 1.40129846432481707092e-45; /* 0x36a0000000000000 */
107
108
  GET_BITS_SP32(x, ux);
109
  ax = ux & (~SIGNBIT_SP32);
110
  xpos = ax == ux;
111
  GET_BITS_SP32(y, uy);
112
  ay = uy & (~SIGNBIT_SP32);
113
  ypos = ay == uy;
114
115
  if (ux == 0x3f800000)
116
    {
117
      /* x = +1.0. Return +1.0 for all y, even NaN,
118
	 raising invalid only if y is a signalling NaN */
119
      if (y + 1.0F == 2.0F) dummy = 1;
120
      return 1.0F;
121
    }
122
  else if (ay == 0)
123
    {
124
      /* y is zero. Return 1.0, even if x is infinity or NaN,
125
         raising invalid only if x is a signalling NaN */
126
      if (x + 1.0F == 2.0F) dummy = 1;
127
      return 1.0F;
128
    }
129
  else if (((ax & EXPBITS_SP32) == EXPBITS_SP32) &&
130
           (ax & MANTBITS_SP32))
131
    /* x is NaN. Return NaN, with invalid exception if it's
132
       a signalling NaN. */
133
    return x + x;
134
  else if (((ay & EXPBITS_SP32) == EXPBITS_SP32) &&
135
           (ay & MANTBITS_SP32))
136
   /* y is NaN. Return NaN, with invalid exception if y
137
      is a signalling NaN. */
138
   return y + y;
139
  else if (uy == 0x3f800000)
140
    /* y is 1.0; return x */
141
    return x;
142
  else if ((ay & EXPBITS_SP32) > 0x4f000000)
143
    {
144
      /* y is infinite or so large that the result would 
145
         overflow or underflow. Flags should be raised 
146
         unless y is an exact infinity. */
147
      int yinf = (ay == EXPBITS_SP32);
148
      if (ypos)
149
        {
150
          /* y is +ve */
151
          if (ax == 0)
152
            /* abs(x) = 0.0. */
153
            return 0.0F;
154
          else if (ax < 0x3f800000)
155
	    {
156
	      /* abs(x) < 1.0 */
157
	      if (yinf)
158
                return 0.0F;
159
	      else
160
                return retval_errno_erange_underflow(x, y, 1);
161
	    }
162
          else if (ax == 0x3f800000)
163
            /* abs(x) = 1.0. */
164
            return 1.0F;
165
          else
166
	    {
167
	      /* abs(x) > 1.0 */
168
	      if (yinf)
169
                return infinityf_with_flags(0);
170
	      else
171
                return retval_errno_erange_overflow(x, y, 1);
172
	    }
173
        }
174
      else
175
        {
176
          /* y is -ve */
177
          if (ax == 0)
178
            /* abs(x) = 0.0. Return +infinity. */
179
            return retval_errno_edom(x, y, 1);
180
          else if (ax < 0x3f800000)
181
	    {
182
            /* abs(x) < 1.0; return +infinity. */
183
	      if (yinf)
184
                return infinityf_with_flags(0); 
185
	      else
186
                return retval_errno_erange_overflow(x, y, 1);
187
	    }
188
          else if (ax == 0x3f800000)
189
            /* abs(x) = 1.0. */
190
            return 1.0F;
191
          else
192
	    {
193
	      /* abs(x) > 1.0 */
194
	      if (yinf)
195
                return 0.0F;
196
	      else
197
                return retval_errno_erange_underflow(x, y, 1);
198
	    }
199
        }
200
    }
201
202
  /* See whether y is an integer.
203
     inty = 0 means not an integer.
204
     inty = 1 means odd integer.
205
     inty = 2 means even integer.
206
  */
207
  yexp = ((uy & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32 + 1;
208
  if (yexp < 1)
209
    inty = 0;
210
  else if (yexp > 24)
211
    inty = 2;
212
  else /* 1 <= yexp <= 24 */
213
    {
214
      /* Mask out the bits of r that we don't want */
215
      mask = (1 << (24 - yexp)) - 1;
216
      if ((uy & mask) != 0)
217
        inty = 0;
218
      else if (((uy & ~mask) >> (24 - yexp)) & 0x00000001)
219
        inty = 1;
220
      else
221
        inty = 2;
222
    }
223
224
  if ((ax & EXPBITS_SP32) == EXPBITS_SP32)
225
    {
226
      /* x is infinity (NaN was already ruled out). */
227
      if (xpos)
228
        {
229
          /* x is +infinity */
230
          if (ypos)
231
            /* y > 0.0 */
232
            return x;
233
          else
234
            return 0.0F;
235
        }
236
      else
237
        {
238
          /* x is -infinity */
239
          if (inty == 1)
240
            {
241
              /* y is an odd integer */
242
              if (ypos)
243
                /* Result is -infinity */
244
                return x;
245
              else
246
                return -0.0F;
247
            }
248
          else
249
            {
250
              if (ypos)
251
                /* Result is +infinity */
252
                return -x;
253
              else
254
                return 0.0F;
255
            }
256
        }
257
    }
258
  else if (ax == 0)
259
    {
260
      /* x is zero */
261
      if (xpos)
262
        {
263
          /* x is +0.0 */
264
          if (ypos)
265
            /* y is positive; return +0.0 for all cases */
266
            return x;
267
          else
268
            /* y is negative; return +infinity with div-by-zero
269
               for all cases */
270
            return retval_errno_edom(x, y, 1);
271
        }
272
      else
273
        {
274
          /* x is -0.0 */
275
          if (ypos)
276
            {
277
              /* y is positive */
278
              if (inty == 1)
279
                /* -0.0 raised to a positive odd integer returns -0.0 */
280
                return x;
281
              else
282
                /* Return +0.0 */
283
                return -x;
284
            }
285
          else
286
            {
287
              /* y is negative */
288
              if (inty == 1)
289
                /* -0.0 raised to a negative odd integer returns -infinity
290
                   with div-by-zero */
291
                return retval_errno_edom(x, y, 2);
292
              else
293
                /* Return +infinity with div-by-zero */
294
                return retval_errno_edom(x, y, 1);
295
            }
296
        }
297
    }
298
299
  negateres = 0;
300
  if (!xpos)
301
    {
302
      /* x is negative */
303
      if (inty)
304
        {
305
          /* It's OK because y is an integer. */
306
          ux = ax;
307
          PUT_BITS_SP32(ux, x); /* x = abs(x) */
308
          /* If y is odd, the result will be negative */
309
          negateres = (inty == 1);
310
        }
311
      else
312
        /* y is not an integer. Return a NaN. */
313
        return retval_errno_edom(x, y, 3);
314
    }
315
316
  if (ay < 0x2e800000)   /* abs(y) < 2^(-34) */
317
    {
318
      /* y is close enough to zero for the result to be 1.0
319
         no matter what the size of x */
320
      return 1.0F + y;
321
    }
322
323
  /* Simply use double precision for computation of log2(x),
324
     y*log2(x) and exp2(y*log2(x)) */
325
  dx = x;
326
  dy = y;
327
  dlog2 = log2(dx);
328
  dw = y * dlog2;
329
  dr = exp2(dw);
330
331
  /* If dr overflowed or underflowed we need to deal with errno */
332
  if (dr > large)
333
    {
334
      /* Double dr has overflowed range of float. */
335
      if (negateres)
336
        return retval_errno_erange_overflow(x, y, -1);
337
      else
338
        return retval_errno_erange_overflow(x, y, 1);
339
    }
340
  else if (dr < tiny)
341
    {
342
      /* Double dr has underflowed range of float. */
343
      if (negateres)
344
        return retval_errno_erange_underflow(x, y, -1);
345
      else
346
        return retval_errno_erange_underflow(x, y, 1);
347
    }
348
  else
349
    {
350
      if (negateres)
351
        return -dr;
352
      else
353
        return dr;
354
    }
355
}
356
357
weak_alias (__powf, powf)
358
weak_alias (__powf, __ieee754_powf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_remainder.c.x86_64-new-libm (+270 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_NAN_WITH_FLAGS
13
#define USE_SCALEDOUBLE_3
14
#define USE_GET_FPSW_INLINE
15
#define USE_SET_FPSW_INLINE
16
#include "libm_inlines_amd.h"
17
#undef USE_NAN_WITH_FLAGS
18
#undef USE_SCALEDOUBLE_3
19
#undef USE_GET_FPSW_INLINE
20
#undef USE_SET_FPSW_INLINE
21
22
/* Computes the exact product of x and y, the result being the
23
   nearly doublelength number (z,zz) */
24
static inline void dekker_mul12(double x, double y,
25
				double *z, double *zz)
26
{
27
  double hx, tx, hy, ty;
28
  /* Split x into hx (head) and tx (tail). Do the same for y. */
29
  unsigned long u;
30
  GET_BITS_DP64(x, u);
31
  u &= 0xfffffffff8000000;
32
  PUT_BITS_DP64(u, hx);
33
  tx = x - hx;
34
  GET_BITS_DP64(y, u);
35
  u &= 0xfffffffff8000000;
36
  PUT_BITS_DP64(u, hy);
37
  ty = y - hy;
38
  *z = x * y;
39
  *zz = (((hx * hy - *z) + hx * ty) + tx * hy) + tx * ty;
40
}
41
42
43
#if defined(COMPILING_FMOD)
44
double __fmod(double x, double y)
45
#else
46
double __remainder(double x, double y)
47
#endif
48
{
49
  double dx, dy, scale, w, t, v, c, cc;
50
  int i, ntimes, xexp, yexp;
51
  unsigned long u, ux, uy, ax, ay, todd;
52
  unsigned int sw;
53
54
  dx = x;
55
  dy = y;
56
57
  GET_BITS_DP64(dx, ux);
58
  GET_BITS_DP64(dy, uy);
59
  ax = ux & ~SIGNBIT_DP64;
60
  ay = uy & ~SIGNBIT_DP64;
61
  xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
62
  yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
63
64
  if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
65
      yexp < 1 || yexp > BIASEDEMAX_DP64)
66
    {
67
      /* x or y is zero, denormalized, NaN or infinity */
68
      if (xexp > BIASEDEMAX_DP64)
69
        {
70
          /* x is NaN or infinity */
71
          if (ux & MANTBITS_DP64)
72
            /* x is NaN */
73
            return dx + dx; /* Raise invalid if it is a signalling NaN */
74
          else
75
            /* x is infinity; result is NaN */
76
            return nan_with_flags(AMD_F_INVALID);
77
        }
78
      else if (yexp > BIASEDEMAX_DP64)
79
        {
80
          /* y is NaN or infinity */
81
          if (uy & MANTBITS_DP64)
82
            /* y is NaN */
83
            return dy + dy; /* Raise invalid if it is a signalling NaN */
84
          else
85
            /* y is infinity; result is x */
86
            return dx;
87
        }
88
      else if (ax == 0x0000000000000000)
89
        {
90
          /* x is zero */
91
          if (ay == 0x0000000000000000)
92
            /* y is zero */
93
            return nan_with_flags(AMD_F_INVALID);
94
          else
95
            return dx;
96
        }
97
      else if (ay == 0x0000000000000000)
98
        /* y is zero */
99
        return nan_with_flags(AMD_F_INVALID);
100
101
      /* We've exhausted all other possibilities. One or both of x and
102
         y must be denormalized */
103
      if (xexp < 1)
104
        {
105
          /* x is denormalized. Figure out its exponent. */
106
          u = ax;
107
          while (u < IMPBIT_DP64)
108
            {
109
              xexp--;
110
              u <<= 1;
111
            }
112
        }
113
      if (yexp < 1)
114
        {
115
          /* y is denormalized. Figure out its exponent. */
116
          u = ay;
117
          while (u < IMPBIT_DP64)
118
            {
119
              yexp--;
120
              u <<= 1;
121
            }
122
        }
123
    }
124
  else if (ax == ay)
125
    {
126
      /* abs(x) == abs(y); return zero with the sign of x */
127
      PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
128
      return dx;
129
    }
130
131
  /* Set x = abs(x), y = abs(y) */
132
  PUT_BITS_DP64(ax, dx);
133
  PUT_BITS_DP64(ay, dy);
134
135
  if (ax < ay)
136
    {
137
      /* abs(x) < abs(y) */
138
#if !defined(COMPILING_FMOD)
139
      if (dx > 0.5*dy)
140
        dx -= dy;
141
#endif
142
      return x < 0.0? -dx : dx;
143
    }
144
145
  /* Save the current floating-point status word. We need
146
     to do this because the remainder function is always
147
     exact for finite arguments, but our algorithm causes
148
     the inexact flag to be raised. We therefore need to
149
     restore the entry status before exiting. */
150
  sw = get_fpsw_inline();
151
152
  /* Set ntimes to the number of times we need to do a
153
     partial remainder. If the exponent of x is an exact multiple
154
     of 52 larger than the exponent of y, and the mantissa of x is
155
     less than the mantissa of y, ntimes will be one too large
156
     but it doesn't matter - it just means that we'll go round
157
     the loop below one extra time. */
158
  if (xexp <= yexp)
159
    ntimes = 0;
160
  else
161
    ntimes = (xexp - yexp) / 52;
162
163
  if (ntimes == 0)
164
    {
165
      w = dy;
166
      scale = 1.0;
167
    }
168
  else
169
    {
170
      /* Set w = y * 2^(52*ntimes) */
171
      w = scaleDouble_3(dy, ntimes * 52);
172
173
      /* Set scale = 2^(-52) */
174
      PUT_BITS_DP64((unsigned long)(-52 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
175
                    scale);
176
    }
177
178
  /* Each time round the loop we compute a partial remainder.
179
     This is done by subtracting a large multiple of w
180
     from x each time, where w is a scaled up version of y.
181
     The subtraction must be performed exactly in quad
182
     precision, though the result at each stage can
183
     fit exactly in a double precision number. */
184
  for (i = 0; i < ntimes; i++)
185
    {
186
      /* t is the integer multiple of w that we will subtract.
187
         We use a truncated value for t.
188
189
         N.B. w has been chosen so that the integer t will have
190
         at most 52 significant bits. This is the amount by
191
         which the exponent of the partial remainder dx gets reduced
192
         every time around the loop. In theory we could use
193
         53 bits in t, but the quad precision multiplication
194
         routine dekker_mul12 does not allow us to do that because
195
         it loses the last (106th) bit of its quad precision result. */
196
197
      /* Set dx = dx - w * t, where t is equal to trunc(dx/w). */
198
      t = (double)(long)(dx / w);
199
      /* At this point, t may be one too large due to
200
         rounding of dx/w */
201
202
      /* Compute w * t in quad precision */
203
      dekker_mul12(w, t, &c, &cc);
204
205
      /* Subtract w * t from dx */
206
      v = dx - c;
207
      dx = v + (((dx - v) - c) - cc);
208
209
      /* If t was one too large, dx will be negative. Add back
210
         one w */
211
      /* It might be possible to speed up this loop by finding
212
         a way to compute correctly truncated t directly from dx and w.
213
         We would then avoid the need for this check on negative dx. */
214
      if (dx < 0.0)
215
        dx += w;
216
217
      /* Scale w down by 2^(-52) for the next iteration */
218
      w *= scale;
219
    }
220
221
  /* One more time */
222
  /* Variable todd says whether the integer t is odd or not */
223
  t = (double)(long)(dx / w);
224
  todd = ((long)(dx / w)) & 1;
225
  dekker_mul12(w, t, &c, &cc);
226
  v = dx - c;
227
  dx = v + (((dx - v) - c) - cc);
228
  if (dx < 0.0)
229
    {
230
      todd = !todd;
231
      dx += w;
232
    }
233
234
  /* At this point, dx lies in the range [0,dy) */
235
#if !defined(COMPILING_FMOD)
236
  /* For the fmod function, we're done apart from setting 
237
     the correct sign. */
238
  /* For the remainder function, we need to adjust dx
239
     so that it lies in the range (-y/2, y/2] by carefully
240
     subtracting w (== dy == y) if necessary. The rigmarole
241
     with todd is to get the correct sign of the result
242
     when x/y lies exactly half way between two integers,
243
     when we need to choose the even integer. */
244
  if (ay < 0x7fd0000000000000)
245
    {
246
      if (dx + dx > w || (todd && (dx + dx == w)))
247
        dx -= w;
248
    }
249
  else if (dx > 0.5 * w || (todd && (dx == 0.5 * w)))
250
    dx -= w;
251
252
#endif
253
254
  /* **** N.B. for some reason this breaks the 32 bit version
255
     of remainder when compiling with optimization. */
256
  /* Restore the entry status flags */
257
  set_fpsw_inline(sw);
258
259
  /* Set the result sign according to input argument x */
260
  return x < 0.0? -dx : dx;
261
262
}
263
264
#if defined(COMPILING_FMOD)
265
weak_alias (__fmod, fmod)
266
weak_alias (__fmod,  __ieee754_fmod)
267
#else
268
weak_alias (__remainder, remainder)
269
weak_alias (__remainder,  __ieee754_remainder)
270
#endif
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_remainder_piby2.c.x86_64-new-libm (+541 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
/* Define this to get debugging print statements activated */
13
#define DEBUGGING_PRINT
14
#undef DEBUGGING_PRINT
15
16
17
#ifdef DEBUGGING_PRINT
18
#include <stdio.h>
19
char *d2b(int d, int bitsper, int point)
20
{
21
  static char buff[50];
22
  int i, j;
23
  j = bitsper;
24
  if (point >= 0 && point <= bitsper)
25
    j++;
26
  buff[j] = '\0';
27
  for (i = bitsper - 1; i >= 0; i--)
28
    {
29
      j--;
30
      if (d % 2 == 1)
31
        buff[j] = '1';
32
      else
33
        buff[j] = '0';
34
      if (i == point)
35
        {
36
          j--;
37
          buff[j] = '.';
38
        }
39
      d /= 2;
40
    }
41
  return buff;
42
}
43
#endif
44
45
/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
46
   extra precision, and return the result in r, rr.
47
   Return value "region" tells how many lots of pi/2 were subtracted
48
   from x to put it in the range [-pi/4,pi/4], mod 4. */
49
void __remainder_piby2(double x, double *r, double *rr, int *region)
50
{
51
52
  /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */
53
  static const double
54
    eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */
55
56
  static const double
57
    piby2_lead = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
58
    piby2_tail = 6.12323399573676480327e-17, /* 0x3c91a62633145c06 */
59
    pi_lead = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */
60
    pi_tail = 1.22464679914735296065e-16, /* 0x3ca1a62633145c06 */
61
    three_piby2_lead = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */
62
    three_piby2_tail = 1.83697019872102919446e-16, /* 0x3caa79394c9e8a08 */
63
    two_pi_lead = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */
64
    two_pi_tail = 2.44929359829470592131e-16, /* 0x3cb1a62633145c06 */
65
    five_piby2_lead = 7.85398163397448278999e+00, /* 0x401f6a7a2955385e */
66
    five_piby2_tail = 3.06161699786838264816e-16; /* 0x3cb60fafbfd97308 */
67
68
  /* Each of these threshold values is the closest machine
69
     number BELOW a multiple of pi/4, i.e. they are not
70
     rounded to nearest. thresh1 is 1*pi/4, thresh2 is 2*pi/4, etc.
71
     This ensures that we end up in precisely the correct region. */
72
  static const double
73
    thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */
74
    thresh2 = 1.5707963267948965579989e+00, /* 0x3ff921fb54442d18 */
75
    thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */
76
    thresh4 = 3.1415926535897931159979e+00, /* 0x400921fb54442d18 */
77
    thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */
78
    thresh6 = 4.7123889803846896739969e+00, /* 0x4012d97c7f3321d2 */
79
    thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */
80
    thresh8 = 6.2831853071795862319959e+00, /* 0x401921fb54442d18 */
81
    thresh9 = 7.0685834705770345109954e+00, /* 0x401c463abeccb2bb */
82
    thresh10 = 7.8539816339744827899949e+00; /* 0x401f6a7a2955385e */
83
84
  static const double
85
    twobypi = 6.36619772367581271411E-01, /* 0x3FE45F306DC9C882 */
86
    twobypi_part1 = 6.36619761586189270020e-01, /* 0x3fe45f3068000000 */
87
    twobypi_part2 = 1.07813920013910546913e-08, /* 0x3e47272208000000 */
88
    twobypi_part3 = 7.16649491121506946045e-17, /* 0x3c94a7f09d5f47d6 */
89
    piby2_part1 = 1.57079631090164184570e+00, /* 0x3ff921fb50000000 */
90
    piby2_part2 = 1.58932547122958567343e-08, /* 0x3e5110b460000000 */
91
    piby2_part3 = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */
92
93
  static const double cancellationThresh = 1.0e-12;
94
  int done = 0;
95
96
  /* For small values of x, up to 11*pi/4, we do quad precision
97
     subtraction of the relevant multiple of pi/2 */
98
  if (x <= eleven_piby4)
99
    {
100
      double s, t, ctest;
101
      if (x <= thresh1) /* x < pi/4 */
102
        {
103
          /* Quick return if x is already less than pi/4 */
104
          *r = x;
105
          *rr = 0.0;
106
          *region = 0;
107
          return;
108
        }
109
      else if (x <= thresh2) /* x < 2*pi/4 */
110
        {
111
          t = x - piby2_lead;
112
          s = ((-piby2_lead - t) + x) - piby2_tail;
113
          *region = 1;
114
        }
115
      else if (x <= thresh3) /* x < 3*pi/4 */
116
        {
117
          t = x - piby2_lead;
118
          s = ((x - t) - piby2_lead) - piby2_tail;
119
          *region = 1;
120
        }
121
      else if (x <= thresh4) /* x < 4*pi/4 */
122
        {
123
          t = x - pi_lead;
124
          s = ((-pi_lead - t) + x) - pi_tail;
125
          *region = 2;
126
        }
127
      else if (x <= thresh5) /* x < 5*pi/4 */
128
        {
129
          t = x - pi_lead;
130
          s = ((x - t) - pi_lead) - pi_tail;
131
          *region = 2;
132
        }
133
      else if (x <= thresh6) /* x < 6*pi/4 */
134
        {
135
          t = x - three_piby2_lead;
136
          s = ((-three_piby2_lead - t) + x) - three_piby2_tail;
137
          *region = 3;
138
        }
139
      else if (x <= thresh7) /* x < 7*pi/4 */
140
        {
141
          t = x - three_piby2_lead;
142
          s = ((x - t) - three_piby2_lead) - three_piby2_tail;
143
          *region = 3;
144
        }
145
      else if (x <= thresh8) /* x < 8*pi/4 */
146
        {
147
          t = x - two_pi_lead;
148
          s = ((-two_pi_lead - t) + x) - two_pi_tail;
149
          *region = 0;
150
        }
151
      else if (x <= thresh9) /* x < 9*pi/4 */
152
        {
153
          t = x - two_pi_lead;
154
          s = ((x - t) - two_pi_lead) - two_pi_tail;
155
          *region = 0;
156
        }
157
      else if (x <= thresh10) /* x < 10*pi/4 */
158
        {
159
          t = x - five_piby2_lead;
160
          s = ((-five_piby2_lead - t) + x) - five_piby2_tail;
161
          *region = 1;
162
        }
163
      else /* x < 11*pi/4 */
164
        {
165
          t = x - five_piby2_lead;
166
          s = ((x - t) - five_piby2_lead) - five_piby2_tail;
167
          *region = 1;
168
        }
169
170
      *r = t + s;
171
      *rr = (t - *r) + s;
172
173
      /* Check for massive cancellation which may happen very close
174
         to multiples of pi/2 */
175
      if (*r < 0.0)
176
        ctest = -(*r);
177
      else
178
        ctest = *r;
179
#ifdef DEBUGGING_PRINT
180
      printf("Cancellation threshold test = (%g > %g)\n",
181
             ctest, cancellationThresh);
182
#endif
183
184
      /* Check if cancellation error was not too large */
185
      if (ctest > cancellationThresh)
186
        done = 1;
187
      /* Otherwise fall through to the expensive method */
188
    }
189
  else if (x <= 1.0e5)
190
    {
191
      /* This range reduction is accurate enough for x up to
192
         approximately 2**(20) except near multiples of pi/2 */
193
194
      /* We perform quad precision arithmetic to find the
195
         nearest multiple of pi/2 to x */
196
197
      int reg, it;
198
      double hx, tx, z, zz, w, ww, dreg, s, t, c, cc, ctest;
199
200
      /* Split x into head and tail, hx and tx */
201
      unsigned long u;
202
      GET_BITS_DP64(x, u);
203
      u &= 0xfffffffff8000000;
204
      PUT_BITS_DP64(u, hx);
205
      tx = x - hx;
206
207
      /* Multiply x by 2/pi in extra precision, result in (z, zz) */
208
      c = x * twobypi;
209
      cc = ((((hx * twobypi_part1 - c) + hx * twobypi_part2) +
210
            tx * twobypi_part1) + tx * twobypi_part2) + x * twobypi_part3;
211
      z = c + cc;
212
      zz = (c - z) + cc;
213
214
#ifdef DEBUGGING_PRINT
215
      printf("z = %30.20e = %s\n", z, double2hex(&z));
216
      printf("zz = %30.20e = %s\n", zz, double2hex(&zz));
217
#endif
218
219
      /* Find reg, the nearest integer to (z, zz). We need to be
220
         careful when (z,zz) is very near an odd multiple of 0.5.
221
         The simple formula
222
           reg = (int)((zz + 0.5) + z);
223
         fails in that case because the double rounding may
224
         lead us astray. */
225
      t = z + z;
226
      it = (int)t;
227
      if (it == t && it & 1)
228
        {
229
          /* z is an odd multiple of 0.5; we must use zz
230
             to discriminate */
231
          if (zz > 0.0)
232
            reg = (int)z + 1;
233
          else
234
            reg = (int)z;
235
        }
236
      else
237
        reg = (int)(z + 0.5);
238
239
#ifdef DEBUGGING_PRINT
240
      printf("reg = %d\n", reg);
241
#endif
242
243
      /* Carefully subtract reg from (z, zz), result in (w, ww) */
244
      dreg = reg;
245
      s = z - dreg;
246
      if (z > dreg)
247
        t = ((z - s) - dreg) + zz;
248
      else
249
        t = ((-dreg - s) + z) + zz;
250
      w = s + t;
251
      ww = (s - w) + t;
252
253
#ifdef DEBUGGING_PRINT
254
      printf("w = %30.20e = %s\n", w, double2hex(&w));
255
      printf("ww = %30.20e = %s\n", ww, double2hex(&ww));
256
#endif
257
258
     /* Check for massive cancellation which may happen very close
259
        to multiples of pi/2 */
260
      if (w < 0.0)
261
        ctest = -w;
262
      else
263
        ctest = w;
264
265
      /* If cancellation is not too severe, continue with this method.
266
         Otherwise we fall through to the expensive, accurate method */
267
      if (ctest > cancellationThresh)
268
        {
269
          /* Split w into (hx, tx) */
270
          GET_BITS_DP64(w, u);
271
          u &= 0xfffffffff8000000;
272
          PUT_BITS_DP64(u, hx);
273
          tx = w - hx;
274
275
          /* Carefully multiply (w, ww) by pi/2 */
276
          c = piby2_lead * w;
277
          cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) +
278
                 piby2_part2 * hx) + piby2_part2 * tx) +
279
            (piby2_lead * ww + piby2_part3 * w);
280
          *r = c + cc;
281
          *rr = (c - *r) + cc;
282
283
          *region = reg & 3;
284
285
#ifdef DEBUGGING_PRINT
286
          printf("r = %30.20e = %s\n", *r, double2hex(r));
287
          printf("rr = %30.20e = %s\n", *rr, double2hex(rr));
288
#endif
289
          done = 1;
290
        }
291
    }
292
293
  if (!done)
294
    {
295
      /* This method simulates multi-precision floating-point
296
         arithmetic and is accurate for all 1 <= x < infinity */
297
      const int bitsper = 10;
298
      unsigned long res[500];
299
      unsigned long ux, u, carry, mask, mant, highbitsrr;
300
      int first, last, i, rexp, xexp, resexp, ltb, determ;
301
      double xx, t;
302
      static unsigned long pibits[] =
303
      {
304
        0,    0,    0,    0,    0,    0,
305
        162,  998,   54,  915,  580,   84,  671,  777,  855,  839,
306
        851,  311,  448,  877,  553,  358,  316,  270,  260,  127,
307
        593,  398,  701,  942,  965,  390,  882,  283,  570,  265,
308
        221,  184,    6,  292,  750,  642,  465,  584,  463,  903,
309
        491,  114,  786,  617,  830,  930,   35,  381,  302,  749,
310
        72,  314,  412,  448,  619,  279,  894,  260,  921,  117,
311
        569,  525,  307,  637,  156,  529,  504,  751,  505,  160,
312
        945, 1022,  151, 1023,  480,  358,   15,  956,  753,   98,
313
        858,   41,  721,  987,  310,  507,  242,  498,  777,  733,
314
        244,  399,  870,  633,  510,  651,  373,  158,  940,  506,
315
        997,  965,  947,  833,  825,  990,  165,  164,  746,  431,
316
        949, 1004,  287,  565,  464,  533,  515,  193,  111,  798
317
      };
318
319
      GET_BITS_DP64(x, ux);
320
321
#ifdef DEBUGGING_PRINT
322
      printf("On entry, x = %25.20e = %s\n", x, double2hex(&x));
323
#endif
324
325
      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
326
      ux = (ux & MANTBITS_DP64) | IMPBIT_DP64;
327
328
      /* Now ux is the mantissa bit pattern of x as a long integer */
329
      carry = 0;
330
      mask = (1L << bitsper) - 1;
331
332
      /* Set first and last to the positions of the first
333
         and last chunks of 2/pi that we need */
334
      first = xexp / bitsper;
335
      resexp = xexp - first * bitsper;
336
      /* 180 is the theoretical maximum number of bits (actually
337
         175 for IEEE double precision) that we need to extract
338
         from the middle of 2/pi to compute the reduced argument
339
         accurately enough for our purposes */
340
      last = first + 180 / bitsper;
341
342
      /* Do a long multiplication of the bits of 2/pi by the
343
         integer mantissa */
344
#if 0
345
      for (i = last; i >= first; i--)
346
        {
347
          u = pibits[i] * ux + carry;
348
          res[i - first] = u & mask;
349
          carry = u >> bitsper;
350
        }
351
      res[last - first + 1] = 0;
352
#else
353
      /* Unroll the loop. This is only correct because we know
354
         that bitsper is fixed as 10. */
355
      res[19] = 0;
356
      u = pibits[last] * ux;
357
      res[18] = u & mask;
358
      carry = u >> bitsper;
359
      u = pibits[last-1] * ux + carry;
360
      res[17] = u & mask;
361
      carry = u >> bitsper;
362
      u = pibits[last-2] * ux + carry;
363
      res[16] = u & mask;
364
      carry = u >> bitsper;
365
      u = pibits[last-3] * ux + carry;
366
      res[15] = u & mask;
367
      carry = u >> bitsper;
368
      u = pibits[last-4] * ux + carry;
369
      res[14] = u & mask;
370
      carry = u >> bitsper;
371
      u = pibits[last-5] * ux + carry;
372
      res[13] = u & mask;
373
      carry = u >> bitsper;
374
      u = pibits[last-6] * ux + carry;
375
      res[12] = u & mask;
376
      carry = u >> bitsper;
377
      u = pibits[last-7] * ux + carry;
378
      res[11] = u & mask;
379
      carry = u >> bitsper;
380
      u = pibits[last-8] * ux + carry;
381
      res[10] = u & mask;
382
      carry = u >> bitsper;
383
      u = pibits[last-9] * ux + carry;
384
      res[9] = u & mask;
385
      carry = u >> bitsper;
386
      u = pibits[last-10] * ux + carry;
387
      res[8] = u & mask;
388
      carry = u >> bitsper;
389
      u = pibits[last-11] * ux + carry;
390
      res[7] = u & mask;
391
      carry = u >> bitsper;
392
      u = pibits[last-12] * ux + carry;
393
      res[6] = u & mask;
394
      carry = u >> bitsper;
395
      u = pibits[last-13] * ux + carry;
396
      res[5] = u & mask;
397
      carry = u >> bitsper;
398
      u = pibits[last-14] * ux + carry;
399
      res[4] = u & mask;
400
      carry = u >> bitsper;
401
      u = pibits[last-15] * ux + carry;
402
      res[3] = u & mask;
403
      carry = u >> bitsper;
404
      u = pibits[last-16] * ux + carry;
405
      res[2] = u & mask;
406
      carry = u >> bitsper;
407
      u = pibits[last-17] * ux + carry;
408
      res[1] = u & mask;
409
      carry = u >> bitsper;
410
      u = pibits[last-18] * ux + carry;
411
      res[0] = u & mask;
412
#endif
413
414
#ifdef DEBUGGING_PRINT
415
      printf("resexp = %d\n", resexp);
416
      printf("Significant part of x * 2/pi with binary"
417
             " point in correct place:\n");
418
      for (i = 0; i <= last - first; i++)
419
        {
420
          if (i > 0 && i % 5 == 0)
421
            printf("\n ");
422
          if (i == 1)
423
            printf("%s ", d2b((int)res[i], bitsper, resexp));
424
          else
425
            printf("%s ", d2b((int)res[i], bitsper, -1));
426
        }
427
      printf("\n");
428
#endif
429
430
      /* Reconstruct the result */
431
      ltb = (int)((((res[0] << bitsper) | res[1])
432
                   >> (bitsper - 1 - resexp)) & 7);
433
434
      /* determ says whether the fractional part is >= 0.5 */
435
      determ = ltb & 1;
436
437
#ifdef DEBUGGING_PRINT
438
      printf("ltb = %d (last two bits before binary point"
439
             " and first bit after)\n", ltb);
440
      printf("determ = %d (1 means need to negate because the fractional\n"
441
             "            part of x * 2/pi is greater than 0.5)\n", determ);
442
#endif
443
444
      i = 1;
445
      if (determ)
446
        {
447
          /* The mantissa is >= 0.5. We want to subtract it
448
             from 1.0 by negating all the bits */
449
          *region = ((ltb >> 1) + 1) & 3;
450
          mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1);
451
          while (mant < 0x0020000000000000)
452
            {
453
              i++;
454
              mant = (mant << bitsper) | (~(res[i]) & mask);
455
            }
456
          highbitsrr = ~(res[i + 1]) << (64 - bitsper);
457
        }
458
      else
459
        {
460
          *region = (ltb >> 1);
461
          mant = res[1] & ((1L << (bitsper - resexp)) - 1);
462
          while (mant < 0x0020000000000000)
463
            {
464
              i++;
465
              mant = (mant << bitsper) | res[i];
466
            }
467
          highbitsrr = res[i + 1] << (64 - bitsper);
468
        }
469
470
      rexp = 52 + resexp - i * bitsper;
471
472
      while (mant >= 0x0020000000000000)
473
        {
474
          rexp++;
475
          highbitsrr = (highbitsrr >> 1) | ((mant & 1) << 63);
476
          mant >>= 1;
477
        }
478
479
#ifdef DEBUGGING_PRINT
480
      printf("Normalised mantissa = 0x%016lx\n", mant);
481
      printf("High bits of rest of mantissa = 0x%016lx\n", highbitsrr);
482
      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);
483
#endif
484
485
      /* Put the result exponent rexp onto the mantissa pattern */
486
      u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
487
      ux = (mant & MANTBITS_DP64) | u;
488
      if (determ)
489
        /* If we negated the mantissa we negate x too */
490
        ux |= SIGNBIT_DP64;
491
      PUT_BITS_DP64(ux, x);
492
493
      /* Create the bit pattern for rr */
494
      highbitsrr >>= 12; /* Note this is shifted one place too far */
495
      u = ((unsigned long)rexp + EXPBIAS_DP64 - 53) << EXPSHIFTBITS_DP64;
496
      PUT_BITS_DP64(u, t);
497
      u |= highbitsrr;
498
      PUT_BITS_DP64(u, xx);
499
500
      /* Subtract the implicit bit we accidentally added */
501
      xx -= t;
502
      /* Set the correct sign, and double to account for the
503
         "one place too far" shift */
504
      if (determ)
505
        xx *= -2.0;
506
      else
507
        xx *= 2.0;
508
509
#ifdef DEBUGGING_PRINT
510
      printf("(lead part of x*2/pi) = %25.20e = %s\n", x, double2hex(&x));
511
      printf("(tail part of x*2/pi) = %25.20e = %s\n", xx, double2hex(&xx));
512
#endif
513
514
      /* (x,xx) is an extra-precise version of the fractional part of
515
         x * 2 / pi. Multiply (x,xx) by pi/2 in extra precision
516
         to get the reduced argument (r,rr). */
517
      {
518
        double hx, tx, c, cc;
519
        /* Split x into hx (head) and tx (tail) */
520
        GET_BITS_DP64(x, ux);
521
        ux &= 0xfffffffff8000000;
522
        PUT_BITS_DP64(ux, hx);
523
        tx = x - hx;
524
525
        c = piby2_lead * x;
526
        cc = ((((piby2_part1 * hx - c) + piby2_part1 * tx) +
527
               piby2_part2 * hx) + piby2_part2 * tx) +
528
          (piby2_lead * xx + piby2_part3 * x);
529
        *r = c + cc;
530
        *rr = (c - *r) + cc;
531
      }
532
533
#ifdef DEBUGGING_PRINT
534
      printf(" (r,rr) = lead and tail parts of frac(x*2/pi) * pi/2:\n");
535
      printf(" r = %25.20e = %s\n", *r, double2hex(r));
536
      printf("rr = %25.20e = %s\n", *rr, double2hex(rr));
537
      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",
538
             *region);
539
#endif
540
    }
541
}
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_remainder_piby2f.c.x86_64-new-libm (+386 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
/* Define this to get debugging print statements activated */
13
#define DEBUGGING_PRINT
14
#undef DEBUGGING_PRINT
15
16
17
#ifdef DEBUGGING_PRINT
18
#include <stdio.h>
19
char *d2b(long d, int bitsper, int point)
20
{
21
  static char buff[200];
22
  int i, j;
23
  j = bitsper;
24
  if (point >= 0 && point <= bitsper)
25
    j++;
26
  buff[j] = '\0';
27
  for (i = bitsper - 1; i >= 0; i--)
28
    {
29
      j--;
30
      if (d % 2 == 1)
31
        buff[j] = '1';
32
      else
33
        buff[j] = '0';
34
      if (i == point)
35
        {
36
          j--;
37
          buff[j] = '.';
38
        }
39
      d /= 2;
40
    }
41
  return buff;
42
}
43
#endif
44
45
/* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
46
   extra precision, and return the result in r.
47
   Return value "region" tells how many lots of pi/2 were subtracted
48
   from x to put it in the range [-pi/4,pi/4], mod 4. */
49
void __remainder_piby2f(float x, double *r, int *region)
50
{
51
52
  /* eleven_piby4 is the closest machine number BELOW 11*pi/4 */
53
  static const double
54
    eleven_piby4 = 8.6393797973719301808159e+00; /* 0x4021475cc9eedf00 */
55
56
  static const double
57
    piby2 = 1.57079632679489655800e+00, /* 0x3ff921fb54442d18 */
58
    twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */
59
    pi = 3.14159265358979311600e+00, /* 0x400921fb54442d18 */
60
    three_piby2 = 4.71238898038468967400e+00, /* 0x4012d97c7f3321d2 */
61
    two_pi = 6.28318530717958623200e+00, /* 0x401921fb54442d18 */
62
    five_piby2 = 7.85398163397448278999e+00; /* 0x401f6a7a2955385e */
63
64
  /* Each of these threshold values is the closest machine
65
     number BELOW a multiple of pi/4, i.e. they are not
66
     rounded to nearest. thresh1 is 1*pi/4, thresh3 is 3*pi/4, etc.
67
     This ensures that we end up in precisely the correct region. */
68
  static const double
69
    thresh1 = 7.8539816339744827899949e-01, /* 0x3fe921fb54442d18 */
70
    thresh3 = 2.3561944901923448369984e+00, /* 0x4002d97c7f3321d2 */
71
    thresh5 = 3.9269908169872413949974e+00, /* 0x400f6a7a2955385e */
72
    thresh7 = 5.4977871437821379529964e+00, /* 0x4015fdbbe9bba775 */
73
    thresh9 = 7.0685834705770345109954e+00; /* 0x401c463abeccb2bb */
74
75
  static const double cancellationThresh = 1.0e-5;
76
  int done = 0;
77
  double dx;
78
79
  dx = x;
80
81
  /* For small values of x, up to 11*pi/4, we do double precision
82
     subtraction of the relevant multiple of pi/2 */
83
  if (dx <= eleven_piby4) /* x <= 11*pi/4 */
84
    {
85
      double t, ctest;
86
87
     if (dx <= thresh5) /* x < 5*pi/4 */
88
       {
89
         if (dx <= thresh1) /* x < pi/4 */
90
           {
91
             /* Quick return if x is already less than pi/4 */
92
             *r = dx;
93
             *region = 0;
94
             return;
95
           }
96
         else if (dx <= thresh3) /* x < 3*pi/4 */
97
           {
98
             t = dx - piby2;
99
             *region = 1;
100
           }
101
         else /* x < 5*pi/4 */
102
           {
103
             t = dx - pi;
104
             *region = 2;
105
           }
106
       }
107
     else
108
       {
109
         if (dx <= thresh7) /* x < 7*pi/4 */
110
           {
111
             t = dx - three_piby2;
112
             *region = 3;
113
           }
114
         else if (dx <= thresh9) /* x < 9*pi/4 */
115
           {
116
             t = dx - two_pi;
117
             *region = 0;
118
           }
119
         else /* x < 11*pi/4 */
120
           {
121
             t = dx - five_piby2;
122
             *region = 1;
123
           }
124
       }
125
126
     /* Check for massive cancellation which may happen very close
127
        to multiples of pi/2 */
128
     if (t < 0.0)
129
       ctest = -t;
130
     else
131
       ctest = t;
132
#ifdef DEBUGGING_PRINT
133
     printf("Cancellation threshold test = (%g > %g)\n",
134
            ctest, cancellationThresh);
135
#endif
136
137
     /* Check if cancellation error was not too large */
138
     if (ctest > cancellationThresh)
139
       {
140
         *r = t;
141
         done = 1;
142
       }
143
     /* Otherwise fall through to the expensive method */
144
    }
145
  else if (dx <= 1.0e6)
146
    {
147
      /* This range reduction is accurate enough for x up to
148
         approximately 2**(20) except near multiples of pi/2 */
149
150
      /* We perform double precision arithmetic to find the
151
         nearest multiple of pi/2 to x */
152
      int reg;
153
      double z, w, c, ctest;
154
155
      /* Multiply x by 2/pi in double precision, result in z */
156
      z = dx * twobypi;
157
158
#ifdef DEBUGGING_PRINT
159
      printf("z = %30.20e = %s\n", z, double2hex(&z));
160
#endif
161
162
      /* Find reg, the nearest integer to z */
163
      reg = (int)(z + 0.5);
164
165
#ifdef DEBUGGING_PRINT
166
      printf("reg = %d\n", reg);
167
#endif
168
169
      /* Subtract reg from z, result in w */
170
      w = z - reg;
171
172
#ifdef DEBUGGING_PRINT
173
      printf("w = %30.20e = %s\n", w, double2hex(&w));
174
#endif
175
176
     /* Check for massive cancellation which may happen very close
177
        to multiples of pi/2 */
178
      if (w < 0.0)
179
        ctest = -w;
180
      else
181
        ctest = w;
182
183
      /* If cancellation is not too severe, continue with this method.
184
         Otherwise we fall through to the expensive, accurate method */
185
      if (ctest > cancellationThresh)
186
        {
187
          /* Multiply w by pi/2 */
188
          c = w * piby2;
189
          *r = c;
190
          *region = reg & 3;
191
192
#ifdef DEBUGGING_PRINT
193
          printf("r = %30.20e = %s\n", *r, double2hex(r));
194
#endif
195
          done = 1;
196
        }
197
    }
198
199
  if (!done)
200
    {
201
      /* This method simulates multi-precision floating-point
202
         arithmetic and is accurate for all 1 <= x < infinity */
203
#if 0
204
      const int bitsper = 36;
205
#else
206
#define bitsper 36
207
#endif
208
      unsigned long res[10];
209
      unsigned long u, carry, mask, mant, nextbits;
210
      unsigned long ux;
211
      int first, last, i, rexp, xexp, resexp, ltb, determ, bc;
212
      static const double
213
        piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
214
      static unsigned long pibits[] =
215
      {
216
        0L,
217
        5215L, 13000023176L, 11362338026L, 67174558139L,
218
        34819822259L, 10612056195L, 67816420731L, 57840157550L,
219
        19558516809L, 50025467026L, 25186875954L, 18152700886L
220
      };
221
222
#ifdef DEBUGGING_PRINT
223
      printf("On entry, x = %25.20e = %s\n", dx, double2hex(&dx));
224
#endif
225
226
227
  GET_BITS_DP64(dx, ux);
228
229
      xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
230
      ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29;
231
232
#ifdef DEBUGGING_PRINT
233
      printf("ux = %s\n", d2b(ux, 64, -1));
234
#endif
235
236
      /* Now ux is the mantissa bit pattern of x as a long integer */
237
      mask = (1L << bitsper) - 1;
238
239
      /* Set first and last to the positions of the first
240
         and last chunks of 2/pi that we need */
241
      first = xexp / bitsper;
242
      resexp = xexp - first * bitsper;
243
      /* 120 is the theoretical maximum number of bits (actually
244
         115 for IEEE single precision) that we need to extract
245
         from the middle of 2/pi to compute the reduced argument
246
         accurately enough for our purposes */
247
      last = first + 120 / bitsper;
248
249
#ifdef DEBUGGING_PRINT
250
      printf("first = %d, last = %d\n", first, last);
251
#endif
252
253
      /* Do a long multiplication of the bits of 2/pi by the
254
         integer mantissa */
255
#if 0
256
      for (i = last; i >= first; i--)
257
        {
258
          u = pibits[i] * ux + carry;
259
          res[i - first] = u & mask;
260
          carry = u >> bitsper;
261
        }
262
      res[last - first + 1] = 0;
263
#else
264
      /* Unroll the loop. This is only correct because we know
265
         that bitsper is fixed as 36. */
266
      res[4] = 0;
267
      u = pibits[last] * ux;
268
      res[3] = u & mask;
269
      carry = u >> bitsper;
270
      u = pibits[last - 1] * ux + carry;
271
      res[2] = u & mask;
272
      carry = u >> bitsper;
273
      u = pibits[last - 2] * ux + carry;
274
      res[1] = u & mask;
275
      carry = u >> bitsper;
276
      u = pibits[first] * ux + carry;
277
      res[0] = u & mask;
278
#endif
279
280
#ifdef DEBUGGING_PRINT
281
      printf("resexp = %d\n", resexp);
282
      printf("Significant part of x * 2/pi with binary"
283
             " point in correct place:\n");
284
      for (i = 0; i <= last - first; i++)
285
        {
286
          if (i > 0 && i % 5 == 0)
287
            printf("\n ");
288
          if (i == 1)
289
            printf("%s ", d2b(res[i], bitsper, resexp));
290
          else
291
            printf("%s ", d2b(res[i], bitsper, -1));
292
        }
293
      printf("\n");
294
#endif
295
296
      /* Reconstruct the result */
297
      ltb = (int)((((res[0] << bitsper) | res[1])
298
                   >> (bitsper - 1 - resexp)) & 7);
299
300
      /* determ says whether the fractional part is >= 0.5 */
301
      determ = ltb & 1;
302
303
#ifdef DEBUGGING_PRINT
304
      printf("ltb = %d (last two bits before binary point"
305
             " and first bit after)\n", ltb);
306
      printf("determ = %d (1 means need to negate because the fractional\n"
307
             "            part of x * 2/pi is greater than 0.5)\n", determ);
308
#endif
309
310
      i = 1;
311
      if (determ)
312
        {
313
          /* The mantissa is >= 0.5. We want to subtract it
314
             from 1.0 by negating all the bits */
315
          *region = ((ltb >> 1) + 1) & 3;
316
          mant = ~(res[1]) & ((1L << (bitsper - resexp)) - 1);
317
          while (mant < 0x0000000000010000)
318
            {
319
              i++;
320
              mant = (mant << bitsper) | (~(res[i]) & mask);
321
            }
322
          nextbits = (~(res[i+1]) & mask);
323
        }
324
      else
325
        {
326
          *region = (ltb >> 1);
327
          mant = res[1] & ((1L << (bitsper - resexp)) - 1);
328
          while (mant < 0x0000000000010000)
329
            {
330
              i++;
331
              mant = (mant << bitsper) | res[i];
332
            }
333
          nextbits = res[i+1];
334
        }
335
336
#ifdef DEBUGGING_PRINT
337
      printf("First bits of mant = %s\n", d2b(mant, bitsper, -1));
338
#endif
339
340
      /* Normalize the mantissa. The shift value 6 here, determined by
341
         trial and error, seems to give optimal speed. */
342
      bc = 0;
343
      while (mant < 0x0000400000000000)
344
        {
345
          bc += 6;
346
          mant <<= 6;
347
        }
348
      while (mant < 0x0010000000000000)
349
        {
350
          bc++;
351
          mant <<= 1;
352
        }
353
      mant |= nextbits >> (bitsper - bc);
354
355
      rexp = 52 + resexp - bc - i * bitsper;
356
357
#ifdef DEBUGGING_PRINT
358
      printf("Normalised mantissa = 0x%016lx\n", mant);
359
      printf("Exponent to be inserted on mantissa = rexp = %d\n", rexp);
360
#endif
361
362
      /* Put the result exponent rexp onto the mantissa pattern */
363
      u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
364
      ux = (mant & MANTBITS_DP64) | u;
365
      if (determ)
366
        /* If we negated the mantissa we negate x too */
367
        ux |= SIGNBIT_DP64;
368
      PUT_BITS_DP64(ux, dx);
369
370
#ifdef DEBUGGING_PRINT
371
      printf("(x*2/pi) = %25.20e = %s\n", dx, double2hex(&dx));
372
#endif
373
374
      /* x is a double precision version of the fractional part of
375
         x * 2 / pi. Multiply x by pi/2 in double precision
376
         to get the reduced argument r. */
377
      *r = dx * piby2;
378
379
#ifdef DEBUGGING_PRINT
380
      printf(" r = frac(x*2/pi) * pi/2:\n");
381
      printf(" r = %25.20e = %s\n", *r, double2hex(r));
382
      printf("region = (number of pi/2 subtracted from x) mod 4 = %d\n",
383
             *region);
384
#endif
385
    }
386
}
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_remainderf.c.x86_64-new-libm (+188 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_NAN_WITH_FLAGS
13
#define USE_SCALEDOUBLE_1
14
#define USE_GET_FPSW_INLINE
15
#define USE_SET_FPSW_INLINE
16
#include "libm_inlines_amd.h"
17
#undef USE_NAN_WITH_FLAGS
18
#undef USE_SCALEDOUBLE_1
19
#undef USE_GET_FPSW_INLINE
20
#undef USE_SET_FPSW_INLINE
21
22
#if defined(COMPILING_FMOD)
23
float __fmodf(float x, float y)
24
#else
25
float __remainderf(float x, float y)
26
#endif
27
{
28
  double dx, dy, scale, w, t;
29
  int i, ntimes, xexp, yexp;
30
  unsigned long ux, uy, ax, ay;
31
32
  unsigned int sw;
33
34
  dx = x;
35
  dy = y;
36
37
  GET_BITS_DP64(dx, ux);
38
  GET_BITS_DP64(dy, uy);
39
  ax = ux & ~SIGNBIT_DP64;
40
  ay = uy & ~SIGNBIT_DP64;
41
  xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
42
  yexp = ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
43
44
  if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
45
      yexp < 1 || yexp > BIASEDEMAX_DP64)
46
    {
47
      /* x or y is zero, NaN or infinity (neither x nor y can be
48
         denormalized because we promoted from float to double) */
49
      if (xexp > BIASEDEMAX_DP64)
50
        {
51
          /* x is NaN or infinity */
52
          if (ux & MANTBITS_DP64)
53
            /* x is NaN */
54
            return dx + dx; /* Raise invalid if it is a signalling NaN */
55
          else
56
            /* x is infinity; result is NaN */
57
            return nan_with_flags(AMD_F_INVALID);
58
        }
59
      else if (yexp > BIASEDEMAX_DP64)
60
        {
61
          /* y is NaN or infinity */
62
          if (uy & MANTBITS_DP64)
63
            /* y is NaN */
64
            return dy + dy; /* Raise invalid if it is a signalling NaN */
65
          else
66
            /* y is infinity; result is x */
67
            return dx;
68
        }
69
      else if (xexp < 1)
70
        {
71
          /* x must be zero (cannot be denormalized) */
72
          if (yexp < 1)
73
            /* y must be zero (cannot be denormalized) */
74
            return nan_with_flags(AMD_F_INVALID);
75
          else
76
            return dx;
77
        }
78
      else
79
        /* y must be zero */
80
        return nan_with_flags(AMD_F_INVALID);
81
    }
82
  else if (ax == ay)
83
    {
84
      /* abs(x) == abs(y); return zero with the sign of x */
85
      PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
86
      return dx;
87
    }
88
89
  /* Set dx = abs(x), dy = abs(y) */
90
  PUT_BITS_DP64(ax, dx);
91
  PUT_BITS_DP64(ay, dy);
92
93
  if (ax < ay)
94
    {
95
      /* abs(x) < abs(y) */
96
#if !defined(COMPILING_FMOD)
97
      if (dx > 0.5*dy)
98
        dx -= dy;
99
#endif
100
      return x < 0.0? -dx : dx;
101
    }
102
103
  /* Save the current floating-point status word. We need
104
     to do this because the remainder function is always
105
     exact for finite arguments, but our algorithm causes
106
     the inexact flag to be raised. We therefore need to
107
     restore the entry status before exiting. */
108
  sw = get_fpsw_inline();
109
110
  /* Set ntimes to the number of times we need to do a
111
     partial remainder. If the exponent of x is an exact multiple
112
     of 24 larger than the exponent of y, and the mantissa of x is
113
     less than the mantissa of y, ntimes will be one too large
114
     but it doesn't matter - it just means that we'll go round
115
     the loop below one extra time. */
116
  if (xexp <= yexp)
117
    {
118
      ntimes = 0;
119
      w = dy;
120
      scale = 1.0;
121
    }
122
  else
123
    {
124
      ntimes = (xexp - yexp) / 24;
125
126
      /* Set w = y * 2^(24*ntimes) */
127
      PUT_BITS_DP64((unsigned long)(ntimes * 24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
128
                    scale);
129
      w = scale * dy;
130
      /* Set scale = 2^(-24) */
131
      PUT_BITS_DP64((unsigned long)(-24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
132
                    scale);
133
    }
134
135
  /* Each time round the loop we compute a partial remainder.
136
     This is done by subtracting a large multiple of w
137
     from x each time, where w is a scaled up version of y.
138
     The subtraction can be performed exactly when performed
139
     in double precision, and the result at each stage can
140
     fit exactly in a single precision number. */
141
  for (i = 0; i < ntimes; i++)
142
    {
143
      /* t is the integer multiple of w that we will subtract.
144
         We use a truncated value for t. */
145
      t = (double)((int)(dx / w));
146
      dx -= w * t;
147
      /* Scale w down by 2^(-24) for the next iteration */
148
      w *= scale;
149
    }
150
151
  /* One more time */
152
#if defined(COMPILING_FMOD)
153
  t = (double)((int)(dx / w));
154
  dx -= w * t;
155
#else
156
 {
157
  unsigned int todd;
158
  /* Variable todd says whether the integer t is odd or not */
159
  t = (double)((int)(dx / w));
160
  todd = ((int)(dx / w)) & 1;
161
  dx -= w * t;
162
163
  /* At this point, dx lies in the range [0,dy) */
164
  /* For the remainder function, we need to adjust dx
165
     so that it lies in the range (-y/2, y/2] by carefully
166
     subtracting w (== dy == y) if necessary. */
167
  if (dx > 0.5 * w || ((dx == 0.5 * w) && todd))
168
    dx -= w;
169
 }
170
#endif
171
172
  /* **** N.B. for some reason this breaks the 32 bit version
173
     of remainder when compiling with optimization. */
174
  /* Restore the entry status flags */
175
  set_fpsw_inline(sw);
176
177
  /* Set the result sign according to input argument x */
178
  return x < 0.0? -dx : dx;
179
180
}
181
182
#if defined(COMPILING_FMOD)
183
weak_alias (__fmodf, fmodf)
184
weak_alias (__fmodf,  __ieee754_fmodf)
185
#else
186
weak_alias (__remainderf, remainderf)
187
weak_alias (__remainderf,  __ieee754_remainderf)
188
#endif
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_sinh.c.x86_64-new-libm (+335 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXP
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEDOUBLE_2
15
#define USE_INFINITY_WITH_FLAGS
16
#define USE_VAL_WITH_FLAGS
17
#include "libm_inlines_amd.h"
18
#undef USE_SPLITEXP
19
#undef USE_SCALEDOUBLE_1
20
#undef USE_SCALEDOUBLE_2
21
#undef USE_INFINITY_WITH_FLAGS
22
#undef USE_VAL_WITH_FLAGS
23
24
/* Deal with errno for out-of-range result */
25
#include "libm_errno_amd.h"
26
static inline double retval_errno_erange(double x, int xneg)
27
{
28
  struct exception exc;
29
  exc.arg1 = x;
30
  exc.arg2 = x;
31
  exc.type = OVERFLOW;
32
  exc.name = (char *)"sinh";
33
  if (_LIB_VERSION == _SVID_)
34
    {
35
      if (xneg)
36
        exc.retval = -HUGE;
37
      else
38
        exc.retval = HUGE;
39
    }
40
  else
41
    {
42
      if (xneg)
43
        exc.retval = -infinity_with_flags(AMD_F_OVERFLOW);
44
      else
45
        exc.retval = infinity_with_flags(AMD_F_OVERFLOW);
46
    }
47
  if (_LIB_VERSION == _POSIX_)
48
    __set_errno(ERANGE);
49
  else if (!matherr(&exc))
50
    __set_errno(ERANGE);
51
  return exc.retval;
52
}
53
54
double __sinh(double x)
55
{
56
  /*
57
    After dealing with special cases the computation is split into
58
    regions as follows:
59
60
    abs(x) >= max_sinh_arg:
61
    sinh(x) = sign(x)*Inf
62
63
    abs(x) >= small_threshold:
64
    sinh(x) = sign(x)*exp(abs(x))/2 computed using the
65
    splitexp and scaleDouble functions as for exp_amd().
66
67
    abs(x) < small_threshold:
68
    compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
69
    sinh(x) is then sign(x)*z.                             */
70
71
  static const double
72
    max_sinh_arg = 7.10475860073943977113e+02, /* 0x408633ce8fb9f87e */
73
    thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
74
    log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
75
    log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
76
    small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
77
  /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
78
79
  /* Lead and tail tabulated values of sinh(i) and cosh(i) 
80
     for i = 0,...,36. The lead part has 26 leading bits. */
81
82
  static const double sinh_lead[   37] = {
83
    0.00000000000000000000e+00,  /* 0x0000000000000000 */
84
    1.17520117759704589844e+00,  /* 0x3ff2cd9fc0000000 */
85
    3.62686038017272949219e+00,  /* 0x400d03cf60000000 */
86
    1.00178747177124023438e+01,  /* 0x40240926e0000000 */
87
    2.72899169921875000000e+01,  /* 0x403b4a3800000000 */
88
    7.42032089233398437500e+01,  /* 0x40528d0160000000 */
89
    2.01713153839111328125e+02,  /* 0x406936d228000000 */
90
    5.48316116333007812500e+02,  /* 0x4081228768000000 */
91
    1.49047882080078125000e+03,  /* 0x409749ea50000000 */
92
    4.05154187011718750000e+03,  /* 0x40afa71570000000 */
93
    1.10132326660156250000e+04,  /* 0x40c5829dc8000000 */
94
    2.99370708007812500000e+04,  /* 0x40dd3c4488000000 */
95
    8.13773945312500000000e+04,  /* 0x40f3de1650000000 */
96
    2.21206695312500000000e+05,  /* 0x410b00b590000000 */
97
    6.01302140625000000000e+05,  /* 0x412259ac48000000 */
98
    1.63450865625000000000e+06,  /* 0x4138f0cca8000000 */
99
    4.44305525000000000000e+06,  /* 0x4150f2ebd0000000 */
100
    1.20774762500000000000e+07,  /* 0x4167093488000000 */
101
    3.28299845000000000000e+07,  /* 0x417f4f2208000000 */
102
    8.92411500000000000000e+07,  /* 0x419546d8f8000000 */
103
    2.42582596000000000000e+08,  /* 0x41aceb0888000000 */
104
    6.59407856000000000000e+08,  /* 0x41c3a6e1f8000000 */
105
    1.79245641600000000000e+09,  /* 0x41dab5adb8000000 */
106
    4.87240166400000000000e+09,  /* 0x41f226af30000000 */
107
    1.32445608960000000000e+10,  /* 0x4208ab7fb0000000 */
108
    3.60024494080000000000e+10,  /* 0x4220c3d390000000 */
109
    9.78648043520000000000e+10,  /* 0x4236c93268000000 */
110
    2.66024116224000000000e+11,  /* 0x424ef822f0000000 */
111
    7.23128516608000000000e+11,  /* 0x42650bba30000000 */
112
    1.96566712320000000000e+12,  /* 0x427c9aae40000000 */
113
    5.34323724288000000000e+12,  /* 0x4293704708000000 */
114
    1.45244246507520000000e+13,  /* 0x42aa6b7658000000 */
115
    3.94814795284480000000e+13,  /* 0x42c1f43fc8000000 */
116
    1.07321789251584000000e+14,  /* 0x42d866f348000000 */
117
    2.91730863685632000000e+14,  /* 0x42f0953e28000000 */
118
    7.93006722514944000000e+14,  /* 0x430689e220000000 */
119
    2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
120
121
  static const double sinh_tail[   37] = {
122
    0.00000000000000000000e+00,  /* 0x0000000000000000 */
123
    1.60467555584448807892e-08,  /* 0x3e513ae6096a0092 */
124
    2.76742892754807136947e-08,  /* 0x3e5db70cfb79a640 */
125
    2.09697499555224576530e-07,  /* 0x3e8c2526b66dc067 */
126
    2.04940252448908240062e-07,  /* 0x3e8b81b18647f380 */
127
    1.65444891522700935932e-06,  /* 0x3ebbc1cdd1e1eb08 */
128
    3.53116789999998198721e-06,  /* 0x3ecd9f201534fb09 */
129
    6.94023870987375490695e-06,  /* 0x3edd1c064a4e9954 */
130
    4.98876893611587449271e-06,  /* 0x3ed4eca65d06ea74 */
131
    3.19656024605152215752e-05,  /* 0x3f00c259bcc0ecc5 */
132
    2.08687768377236501204e-04,  /* 0x3f2b5a6647cf9016 */
133
    4.84668088325403796299e-05,  /* 0x3f09691adefb0870 */
134
    1.17517985422733832468e-03,  /* 0x3f53410fc29cde38 */
135
    6.90830086959560562415e-04,  /* 0x3f46a31a50b6fb3c */
136
    1.45697262451506548420e-03,  /* 0x3f57defc71805c40 */
137
    2.99859023684906737806e-02,  /* 0x3f9eb49fd80e0bab */
138
    1.02538800507941396667e-02,  /* 0x3f84fffc7bcd5920 */
139
    1.26787628407699110022e-01,  /* 0x3fc03a93b6c63435 */
140
    6.86652479544033744752e-02,  /* 0x3fb1940bb255fd1c */
141
    4.81593627621056619148e-01,  /* 0x3fded26e14260b50 */
142
    1.70489513795397629181e+00,  /* 0x3ffb47401fc9f2a2 */
143
    1.12416073482258713767e+01,  /* 0x40267bb3f55634f1 */
144
    7.06579578070110514432e+00,  /* 0x401c435ff8194ddc */
145
    5.91244512999659974639e+01,  /* 0x404d8fee052ba63a */
146
    1.68921736147050694399e+02,  /* 0x40651d7edccde3f6 */
147
    2.60692936262073658327e+02,  /* 0x40704b1644557d1a */
148
    3.62419382134885609048e+02,  /* 0x4076a6b5ca0a9dc4 */
149
    4.07689930834187271103e+03,  /* 0x40afd9cc72249aba */
150
    1.55377375868385224749e+04,  /* 0x40ce58de693edab5 */
151
    2.53720210371943067003e+04,  /* 0x40d8c70158ac6363 */
152
    4.78822310734952334315e+04,  /* 0x40e7614764f43e20 */
153
    1.81871712615542812273e+05,  /* 0x4106337db36fc718 */
154
    5.62892347580489004031e+05,  /* 0x41212d98b1f611e2 */
155
    6.41374032312148716301e+05,  /* 0x412392bc108b37cc */
156
    7.57809544070145115256e+06,  /* 0x415ce87bdc3473dc */
157
    3.64177136406482197344e+06,  /* 0x414bc8d5ae99ad14 */
158
    7.63580561355670914054e+06}; /* 0x415d20d76744835c */
159
160
  static const double cosh_lead[   37] = {
161
    1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
162
    1.54308062791824340820e+00,  /* 0x3ff8b07550000000 */
163
    3.76219564676284790039e+00,  /* 0x400e18fa08000000 */
164
    1.00676617622375488281e+01,  /* 0x402422a490000000 */
165
    2.73082327842712402344e+01,  /* 0x403b4ee858000000 */
166
    7.42099475860595703125e+01,  /* 0x40528d6fc8000000 */
167
    2.01715633392333984375e+02,  /* 0x406936e678000000 */
168
    5.48317031860351562500e+02,  /* 0x4081228948000000 */
169
    1.49047915649414062500e+03,  /* 0x409749eaa8000000 */
170
    4.05154199218750000000e+03,  /* 0x40afa71580000000 */
171
    1.10132329101562500000e+04,  /* 0x40c5829dd0000000 */
172
    2.99370708007812500000e+04,  /* 0x40dd3c4488000000 */
173
    8.13773945312500000000e+04,  /* 0x40f3de1650000000 */
174
    2.21206695312500000000e+05,  /* 0x410b00b590000000 */
175
    6.01302140625000000000e+05,  /* 0x412259ac48000000 */
176
    1.63450865625000000000e+06,  /* 0x4138f0cca8000000 */
177
    4.44305525000000000000e+06,  /* 0x4150f2ebd0000000 */
178
    1.20774762500000000000e+07,  /* 0x4167093488000000 */
179
    3.28299845000000000000e+07,  /* 0x417f4f2208000000 */
180
    8.92411500000000000000e+07,  /* 0x419546d8f8000000 */
181
    2.42582596000000000000e+08,  /* 0x41aceb0888000000 */
182
    6.59407856000000000000e+08,  /* 0x41c3a6e1f8000000 */
183
    1.79245641600000000000e+09,  /* 0x41dab5adb8000000 */
184
    4.87240166400000000000e+09,  /* 0x41f226af30000000 */
185
    1.32445608960000000000e+10,  /* 0x4208ab7fb0000000 */
186
    3.60024494080000000000e+10,  /* 0x4220c3d390000000 */
187
    9.78648043520000000000e+10,  /* 0x4236c93268000000 */
188
    2.66024116224000000000e+11,  /* 0x424ef822f0000000 */
189
    7.23128516608000000000e+11,  /* 0x42650bba30000000 */
190
    1.96566712320000000000e+12,  /* 0x427c9aae40000000 */
191
    5.34323724288000000000e+12,  /* 0x4293704708000000 */
192
    1.45244246507520000000e+13,  /* 0x42aa6b7658000000 */
193
    3.94814795284480000000e+13,  /* 0x42c1f43fc8000000 */
194
    1.07321789251584000000e+14,  /* 0x42d866f348000000 */
195
    2.91730863685632000000e+14,  /* 0x42f0953e28000000 */
196
    7.93006722514944000000e+14,  /* 0x430689e220000000 */
197
    2.15561576592179200000e+15}; /* 0x431ea215a0000000 */
198
199
  static const double cosh_tail[   37] = {
200
    0.00000000000000000000e+00,  /* 0x0000000000000000 */
201
    6.89700037027478056904e-09,  /* 0x3e3d9f5504c2bd28 */
202
    4.43207835591715833630e-08,  /* 0x3e67cb66f0a4c9fd */
203
    2.33540217013828929694e-07,  /* 0x3e8f58617928e588 */
204
    5.17452463948269748331e-08,  /* 0x3e6bc7d000c38d48 */
205
    9.38728274131605919153e-07,  /* 0x3eaf7f9d4e329998 */
206
    2.73012191010840495544e-06,  /* 0x3ec6e6e464885269 */
207
    3.29486051438996307950e-06,  /* 0x3ecba3a8b946c154 */
208
    4.75803746362771416375e-06,  /* 0x3ed3f4e76110d5a4 */
209
    3.33050940471947692369e-05,  /* 0x3f017622515a3e2b */
210
    9.94707313972136215365e-06,  /* 0x3ee4dc4b528af3d0 */
211
    6.51685096227860253398e-05,  /* 0x3f11156278615e10 */
212
    1.18132406658066663359e-03,  /* 0x3f535ad50ed821f5 */
213
    6.93090416366541877541e-04,  /* 0x3f46b61055f2935c */
214
    1.45780415323416845386e-03,  /* 0x3f57e2794a601240 */
215
    2.99862082708111758744e-02,  /* 0x3f9eb4b45f6aadd3 */
216
    1.02539925859688602072e-02,  /* 0x3f85000b967b3698 */
217
    1.26787669807076286421e-01,  /* 0x3fc03a940fadc092 */
218
    6.86652631843830962843e-02,  /* 0x3fb1940bf3bf874c */
219
    4.81593633223853068159e-01,  /* 0x3fded26e1a2a2110 */
220
    1.70489514001513020602e+00,  /* 0x3ffb4740205796d6 */
221
    1.12416073489841270572e+01,  /* 0x40267bb3f55cb85d */
222
    7.06579578098005001152e+00,  /* 0x401c435ff81e18ac */
223
    5.91244513000686140458e+01,  /* 0x404d8fee052bdea4 */
224
    1.68921736147088438429e+02,  /* 0x40651d7edccde926 */
225
    2.60692936262087528121e+02,  /* 0x40704b1644557e0e */
226
    3.62419382134890611269e+02,  /* 0x4076a6b5ca0a9e1c */
227
    4.07689930834187453002e+03,  /* 0x40afd9cc72249abe */
228
    1.55377375868385224749e+04,  /* 0x40ce58de693edab5 */
229
    2.53720210371943103382e+04,  /* 0x40d8c70158ac6364 */
230
    4.78822310734952334315e+04,  /* 0x40e7614764f43e20 */
231
    1.81871712615542812273e+05,  /* 0x4106337db36fc718 */
232
    5.62892347580489004031e+05,  /* 0x41212d98b1f611e2 */
233
    6.41374032312148716301e+05,  /* 0x412392bc108b37cc */
234
    7.57809544070145115256e+06,  /* 0x415ce87bdc3473dc */
235
    3.64177136406482197344e+06,  /* 0x414bc8d5ae99ad14 */
236
    7.63580561355670914054e+06}; /* 0x415d20d76744835c */
237
238
  unsigned long ux, aux, xneg;
239
  double y, z, z1, z2;
240
  int m;
241
242
  /* Special cases */
243
244
  GET_BITS_DP64(x, ux);
245
  aux = ux & ~SIGNBIT_DP64;
246
  if (aux < 0x3e30000000000000) /* |x| small enough that sinh(x) = x */
247
    {
248
      if (aux == 0)
249
        /* with no inexact */
250
        return x;
251
      else
252
        return val_with_flags(x, AMD_F_INEXACT);
253
    }
254
  else if (aux >= 0x7ff0000000000000) /* |x| is NaN or Inf */
255
    return x + x;
256
257
  xneg = (aux != ux);
258
259
  y = x;
260
  if (xneg) y = -x;
261
262
  if (y >= max_sinh_arg)
263
    /* Return +/-infinity with overflow flag */
264
    return retval_errno_erange(x, xneg);
265
  else if (y >= small_threshold)
266
    {
267
      /* In this range y is large enough so that
268
         the negative exponential is negligible,
269
         so sinh(y) is approximated by sign(x)*exp(y)/2. The
270
         code below is an inlined version of that from
271
         exp() with two changes (it operates on
272
         y instead of x, and the division by 2 is
273
         done by reducing m by 1). */
274
275
      splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
276
               log2_by_32_tail, &m, &z1, &z2);
277
      m -= 1;
278
279
      if (m >= EMIN_DP64 && m <= EMAX_DP64)
280
        z = scaleDouble_1((z1+z2),m);
281
      else
282
        z = scaleDouble_2((z1+z2),m);
283
    }
284
  else
285
    {
286
      /* In this range we find the integer part y0 of y 
287
         and the increment dy = y - y0. We then compute
288
 
289
         z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
290
291
         where sinh(y0) and cosh(y0) are tabulated above. */
292
293
      int ind;
294
      double dy, dy2, sdy, cdy, sdy1, sdy2;
295
296
      ind = (int)y;
297
      dy = y - ind;
298
299
      dy2 = dy*dy;
300
      sdy = dy*dy2*(0.166666666666666667013899e0 +
301
                    (0.833333333333329931873097e-2 +
302
                     (0.198412698413242405162014e-3 +
303
                      (0.275573191913636406057211e-5 +
304
                       (0.250521176994133472333666e-7 +
305
                        (0.160576793121939886190847e-9 +
306
                         0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
307
308
      cdy = dy2*(0.500000000000000005911074e0 +
309
                 (0.416666666666660876512776e-1 +
310
                  (0.138888888889814854814536e-2 +
311
                   (0.248015872460622433115785e-4 +
312
                    (0.275573350756016588011357e-6 +
313
                     (0.208744349831471353536305e-8 +
314
                      0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
315
316
      /* At this point sinh(dy) is approximated by dy + sdy.
317
	 Shift some significant bits from dy to sdy. */
318
319
      GET_BITS_DP64(dy, ux);
320
      ux &= 0xfffffffff8000000;
321
      PUT_BITS_DP64(ux, sdy1);
322
      sdy2 = sdy + (dy - sdy1);
323
324
      z = ((((((cosh_tail[ind]*sdy2 + sinh_tail[ind]*cdy) 
325
	       + cosh_tail[ind]*sdy1) + sinh_tail[ind])  
326
	     + cosh_lead[ind]*sdy2) + sinh_lead[ind]*cdy) 
327
	   + cosh_lead[ind]*sdy1) + sinh_lead[ind];
328
    }
329
330
  if (xneg) z = - z;
331
  return z;
332
}
333
334
weak_alias (__sinh, sinh)
335
weak_alias (__sinh, __ieee754_sinh)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/w_sinhf.c.x86_64-new-libm (+250 lines)
Line 0 Link Here
1
/*
2
(C) 2002 Advanced Micro Devices, Inc. 
3
** YOUR USE OF THIS LIBRARY IS SUBJECT TO THE TERMS 
4
    AND CONDITIONS OF THE GNU LESSER GENERAL PUBLIC 
5
    LICENSE FOUND IN THE "README" FILE THAT IS INCLUDED WITH 
6
    THIS LIBRARY**
7
*/
8
9
#include "libm_amd.h"
10
#include "libm_util_amd.h"
11
12
#define USE_SPLITEXP
13
#define USE_SCALEDOUBLE_1
14
#define USE_SCALEDOUBLE_2
15
#define USE_INFINITY_WITH_FLAGS
16
#include "libm_inlines_amd.h"
17
#undef USE_SPLITEXP
18
#undef USE_SCALEDOUBLE_1
19
#undef USE_SCALEDOUBLE_2
20
#undef USE_INFINITY_WITH_FLAGS
21
22
/* Deal with errno for out-of-range result */
23
#include "libm_errno_amd.h"
24
static inline float retval_errno_erange(float x, int xneg)
25
{
26
  struct exception exc;
27
  exc.arg1 = (double)x;
28
  exc.arg2 = (double)x;
29
  exc.type = OVERFLOW;
30
  exc.name = (char *)"sinh";
31
  if (_LIB_VERSION == _SVID_)
32
    {
33
      if (xneg)
34
        exc.retval = -HUGE;
35
      else
36
        exc.retval = HUGE;
37
    }
38
  else
39
    {
40
      if (xneg)
41
        exc.retval = -infinity_with_flags(AMD_F_OVERFLOW);
42
      else
43
        exc.retval = infinity_with_flags(AMD_F_OVERFLOW);
44
    }
45
  if (_LIB_VERSION == _POSIX_)
46
    __set_errno(ERANGE);
47
  else if (!matherr(&exc))
48
    __set_errno(ERANGE);
49
  return exc.retval;
50
}
51
52
float __sinhf(float fx)
53
{
54
  /*
55
    After dealing with special cases the computation is split into
56
    regions as follows:
57
58
    abs(x) >= max_sinh_arg:
59
    sinh(x) = sign(x)*Inf
60
61
    abs(x) >= small_threshold:
62
    sinh(x) = sign(x)*exp(abs(x))/2 computed using the
63
    splitexp and scaleDouble functions as for exp_amd().
64
65
    abs(x) < small_threshold:
66
    compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
67
    sinh(x) is then sign(x)*z.                             */
68
69
  static const double
70
    /* The max argument of sinhf, but stored as a double */
71
    max_sinh_arg = 8.94159862922329438106e+01, /* 0x40565a9f84f82e63 */
72
    thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
73
    log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
74
    log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
75
    small_threshold = 8*BASEDIGITS_DP64*0.30102999566398119521373889;
76
  /* (8*BASEDIGITS_DP64*log10of2) ' exp(-x) insignificant c.f. exp(x) */
77
78
  /* Tabulated values of sinh(i) and cosh(i) for i = 0,...,36. */
79
80
  static const double sinh_lead[   37] = {
81
    0.00000000000000000000e+00,  /* 0x0000000000000000 */
82
    1.17520119364380137839e+00,  /* 0x3ff2cd9fc44eb982 */
83
    3.62686040784701857476e+00,  /* 0x400d03cf63b6e19f */
84
    1.00178749274099008204e+01,  /* 0x40240926e70949ad */
85
    2.72899171971277496596e+01,  /* 0x403b4a3803703630 */
86
    7.42032105777887522891e+01,  /* 0x40528d0166f07374 */
87
    2.01713157370279219549e+02,  /* 0x406936d22f67c805 */
88
    5.48316123273246489589e+02,  /* 0x408122876ba380c9 */
89
    1.49047882578955000099e+03,  /* 0x409749ea514eca65 */
90
    4.05154190208278987484e+03,  /* 0x40afa7157430966f */
91
    1.10132328747033916443e+04,  /* 0x40c5829dced69991 */
92
    2.99370708492480553105e+04,  /* 0x40dd3c4488cb48d6 */
93
    8.13773957064298447222e+04,  /* 0x40f3de1654d043f0 */
94
    2.21206696003330085659e+05,  /* 0x410b00b5916a31a5 */
95
    6.01302142081972560845e+05,  /* 0x412259ac48bef7e3 */
96
    1.63450868623590236530e+06,  /* 0x4138f0ccafad27f6 */
97
    4.44305526025387924165e+06,  /* 0x4150f2ebd0a7ffe3 */
98
    1.20774763767876271158e+07,  /* 0x416709348c0ea4ed */
99
    3.28299845686652474105e+07,  /* 0x417f4f22091940bb */
100
    8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
101
    2.42582597704895108938e+08,  /* 0x41aceb088b68e803 */
102
    6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
103
    1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
104
    4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
105
    1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
106
    3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
107
    9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
108
    2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
109
    7.23128532145737548828e+11,  /* 0x42650bba3796379a */
110
    1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
111
    5.34323729076223046875e+12,  /* 0x429370470aec28ec */
112
    1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
113
    3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
114
    1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
115
    2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
116
    7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
117
    2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
118
119
  static const double cosh_lead[   37] = {
120
    1.00000000000000000000e+00,  /* 0x3ff0000000000000 */
121
    1.54308063481524371241e+00,  /* 0x3ff8b07551d9f550 */
122
    3.76219569108363138810e+00,  /* 0x400e18fa0df2d9bc */
123
    1.00676619957777653269e+01,  /* 0x402422a497d6185e */
124
    2.73082328360164865444e+01,  /* 0x403b4ee858de3e80 */
125
    7.42099485247878334349e+01,  /* 0x40528d6fcbeff3a9 */
126
    2.01715636122455890700e+02,  /* 0x406936e67db9b919 */
127
    5.48317035155212010977e+02,  /* 0x4081228949ba3a8b */
128
    1.49047916125217807348e+03,  /* 0x409749eaa93f4e76 */
129
    4.05154202549259389343e+03,  /* 0x40afa715845d8894 */
130
    1.10132329201033226127e+04,  /* 0x40c5829dd053712d */
131
    2.99370708659497577173e+04,  /* 0x40dd3c4489115627 */
132
    8.13773957125740562333e+04,  /* 0x40f3de1654d6b543 */
133
    2.21206696005590405548e+05,  /* 0x410b00b5916b6105 */
134
    6.01302142082804115489e+05,  /* 0x412259ac48bf13ca */
135
    1.63450868623620807193e+06,  /* 0x4138f0ccafad2d17 */
136
    4.44305526025399193168e+06,  /* 0x4150f2ebd0a8005c */
137
    1.20774763767876680940e+07,  /* 0x416709348c0ea503 */
138
    3.28299845686652623117e+07,  /* 0x417f4f22091940bf */
139
    8.92411504815936237574e+07,  /* 0x419546d8f9ed26e1 */
140
    2.42582597704895138741e+08,  /* 0x41aceb088b68e804 */
141
    6.59407867241607308388e+08,  /* 0x41c3a6e1fd9eecfd */
142
    1.79245642306579566002e+09,  /* 0x41dab5adb9c435ff */
143
    4.87240172312445068359e+09,  /* 0x41f226af33b1fdc0 */
144
    1.32445610649217357635e+10,  /* 0x4208ab7fb5475fb7 */
145
    3.60024496686929321289e+10,  /* 0x4220c3d3920962c8 */
146
    9.78648047144193725586e+10,  /* 0x4236c932696a6b5c */
147
    2.66024120300899291992e+11,  /* 0x424ef822f7f6731c */
148
    7.23128532145737548828e+11,  /* 0x42650bba3796379a */
149
    1.96566714857202099609e+12,  /* 0x427c9aae4631c056 */
150
    5.34323729076223046875e+12,  /* 0x429370470aec28ec */
151
    1.45244248326237109375e+13,  /* 0x42aa6b765d8cdf6c */
152
    3.94814800913403437500e+13,  /* 0x42c1f43fcc4b662c */
153
    1.07321789892958031250e+14,  /* 0x42d866f34a725782 */
154
    2.91730871263727437500e+14,  /* 0x42f0953e2f3a1ef7 */
155
    7.93006726156715250000e+14,  /* 0x430689e221bc8d5a */
156
    2.15561577355759750000e+15}; /* 0x431ea215a1d20d76 */
157
158
  unsigned long ux, aux, xneg;
159
  double x = fx, y, z, z1, z2;
160
  int m;
161
162
  /* Special cases */
163
164
  GET_BITS_DP64(x, ux);
165
  aux = ux & ~SIGNBIT_DP64;
166
  if (aux < 0x3f10000000000000) /* |x| small enough that sinh(x) = x */
167
    {
168
      if (aux == 0) return x; /* with no inexact */
169
      if (LAMBDA_DP64 + x  > 1.0) return x; /* with inexact */
170
    }
171
  else if (aux >= 0x7ff0000000000000) /* |x| is NaN or Inf */
172
    return x + x;
173
174
  xneg = (aux != ux);
175
176
  y = x;
177
  if (xneg) y = -x;
178
179
  if (y >= max_sinh_arg)
180
    {
181
      /* Return infinity with overflow flag. */
182
#if 0
183
      /* This way handles non-POSIX behaviour but weirdly causes
184
         sinhf to run half as fast for all arguments on Hammer */
185
      return retval_errno_erange(fx, xneg);
186
#else
187
      /* This handles POSIX behaviour */
188
      __set_errno(ERANGE);
189
        z = infinity_with_flags(AMD_F_OVERFLOW);
190
#endif
191
    }
192
  else if (y >= small_threshold)
193
    {
194
      /* In this range y is large enough so that
195
         the negative exponential is negligible,
196
         so sinh(y) is approximated by sign(x)*exp(y)/2. The
197
         code below is an inlined version of that from
198
         exp() with two changes (it operates on
199
         y instead of x, and the division by 2 is
200
         done by reducing m by 1). */
201
202
      splitexp(y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
203
               log2_by_32_tail, &m, &z1, &z2);
204
      m -= 1;
205
      /* scaleDouble_1 is always safe because the argument x was
206
         float, rather than double */
207
      z = scaleDouble_1((z1+z2),m);
208
    }
209
  else
210
    {
211
      /* In this range we find the integer part y0 of y 
212
         and the increment dy = y - y0. We then compute
213
 
214
         z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
215
216
         where sinh(y0) and cosh(y0) are tabulated above. */
217
218
      int ind;
219
      double dy, dy2, sdy, cdy;
220
221
      ind = (int)y;
222
      dy = y - ind;
223
224
      dy2 = dy*dy;
225
226
      sdy = dy + dy*dy2*(0.166666666666666667013899e0 +
227
			 (0.833333333333329931873097e-2 +
228
			  (0.198412698413242405162014e-3 +
229
			   (0.275573191913636406057211e-5 +
230
			    (0.250521176994133472333666e-7 +
231
			     (0.160576793121939886190847e-9 +
232
			      0.7746188980094184251527126e-12*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
233
234
      cdy = 1 + dy2*(0.500000000000000005911074e0 +
235
		     (0.416666666666660876512776e-1 +
236
		      (0.138888888889814854814536e-2 +
237
		       (0.248015872460622433115785e-4 +
238
			(0.275573350756016588011357e-6 +
239
			 (0.208744349831471353536305e-8 +
240
			  0.1163921388172173692062032e-10*dy2)*dy2)*dy2)*dy2)*dy2)*dy2);
241
242
      z = sinh_lead[ind]*cdy + cosh_lead[ind]*sdy;
243
    }
244
245
  if (xneg) z = - z;
246
  return z;
247
}
248
249
weak_alias (__sinhf, sinhf)
250
weak_alias (__sinhf, __ieee754_sinhf)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_copysign.S.x86_64-new-libm (-52 lines)
Lines 1-52 Link Here
1
/* copy sign, double version.
2
   Copyright (C) 2002 Free Software Foundation, Inc.
3
   This file is part of the GNU C Library.
4
   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
5
6
   The GNU C Library is free software; you can redistribute it and/or
7
   modify it under the terms of the GNU Lesser General Public
8
   License as published by the Free Software Foundation; either
9
   version 2.1 of the License, or (at your option) any later version.
10
11
   The GNU C Library is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
   Lesser General Public License for more details.
15
16
   You should have received a copy of the GNU Lesser General Public
17
   License along with the GNU C Library; if not, write to the Free
18
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19
   02111-1307 USA.  */
20
21
#include <machine/asm.h>
22
23
#ifdef __ELF__
24
	.section .rodata
25
#else
26
	.text
27
#endif
28
29
	.align ALIGNARG(4)
30
	ASM_TYPE_DIRECTIVE(signmask,@object)
31
signmask:
32
	.byte 0, 0, 0, 0, 0, 0, 0, 0x80
33
	.byte 0, 0, 0, 0, 0, 0, 0, 0
34
othermask:
35
	.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f
36
	.byte 0, 0, 0, 0, 0, 0, 0, 0
37
	ASM_SIZE_DIRECTIVE(othermask)
38
39
#ifdef PIC
40
#define MO(op) op##(%rip)
41
#else
42
#define MO(op) op
43
#endif
44
45
ENTRY(__copysign)
46
	andpd MO(othermask),%xmm0
47
	andpd MO(signmask),%xmm1
48
	orpd %xmm1,%xmm0
49
	ret
50
END (__copysign)
51
52
weak_alias (__copysign, copysign)
(-)glibc-2.3.3/sysdeps/x86_64/fpu/s_copysignf.S.x86_64-new-libm (-49 lines)
Lines 1-49 Link Here
1
/* copy sign, double version.
2
   Copyright (C) 2002 Free Software Foundation, Inc.
3
   This file is part of the GNU C Library.
4
   Contributed by Andreas Jaeger <aj@suse.de>, 2002.
5
6
   The GNU C Library is free software; you can redistribute it and/or
7
   modify it under the terms of the GNU Lesser General Public
8
   License as published by the Free Software Foundation; either
9
   version 2.1 of the License, or (at your option) any later version.
10
11
   The GNU C Library is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
   Lesser General Public License for more details.
15
16
   You should have received a copy of the GNU Lesser General Public
17
   License along with the GNU C Library; if not, write to the Free
18
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19
   02111-1307 USA.  */
20
21
#include <machine/asm.h>
22
23
#ifdef __ELF__
24
	.section .rodata
25
#else
26
	.text
27
#endif
28
29
	.align ALIGNARG(4)
30
	ASM_TYPE_DIRECTIVE(mask,@object)
31
mask:
32
	.byte 0xff, 0xff, 0xff, 0x7f
33
	ASM_SIZE_DIRECTIVE(mask)
34
35
#ifdef PIC
36
#define MO(op) op##(%rip)
37
#else
38
#define MO(op) op
39
#endif
40
41
ENTRY(__copysignf)
42
	movss	MO(mask),%xmm3
43
	andps	%xmm3,%xmm0
44
	andnps	%xmm1,%xmm3
45
	orps	%xmm3,%xmm0
46
	retq
47
END (__copysignf)
48
49
weak_alias (__copysignf, copysignf)

Return to bug 100289