summaryrefslogtreecommitdiff
path: root/libc/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'libc/sysdeps')
-rw-r--r--libc/sysdeps/generic/ldconfig.h39
-rw-r--r--libc/sysdeps/generic/math_private.h6
-rw-r--r--libc/sysdeps/generic/stackguard-macros.h8
-rw-r--r--libc/sysdeps/i386/ffs.c1
-rw-r--r--libc/sysdeps/i386/i686/ffs.c1
-rw-r--r--libc/sysdeps/i386/stackguard-macros.h8
-rw-r--r--libc/sysdeps/ieee754/dbl-64/MathLib.h30
-rw-r--r--libc/sysdeps/ieee754/dbl-64/dbl2mpn.c8
-rw-r--r--libc/sysdeps/ieee754/dbl-64/dla.h86
-rw-r--r--libc/sysdeps/ieee754/dbl-64/dosincos.c254
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_acosh.c55
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_atan2.c27
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_cosh.c72
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_exp.c406
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_exp2.c8
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_fmod.c225
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_gamma_r.c2
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_hypot.c157
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_ilogb.c53
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_j0.c394
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_j1.c366
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_jn.c478
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_log.c23
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_log10.c16
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_log2.c28
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_pow.c573
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c215
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_remainder.c180
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_sinh.c69
-rw-r--r--libc/sysdeps/ieee754/dbl-64/e_sqrt.c95
-rw-r--r--libc/sysdeps/ieee754/dbl-64/halfulp.c129
-rw-r--r--libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c315
-rw-r--r--libc/sysdeps/ieee754/dbl-64/mpa-arch.h20
-rw-r--r--libc/sysdeps/ieee754/dbl-64/mpa.c29
-rw-r--r--libc/sysdeps/ieee754/dbl-64/mpatan.c1
-rw-r--r--libc/sysdeps/ieee754/dbl-64/mpn2dbl.c2
-rw-r--r--libc/sysdeps/ieee754/dbl-64/mptan.c1
-rw-r--r--libc/sysdeps/ieee754/dbl-64/mydefs.h9
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_asinh.c54
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_atan.c21
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_cbrt.c19
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_ceil.c91
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_copysign.c13
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_erf.c523
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_expm1.c227
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_fabs.c11
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_finite.c9
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_floor.c91
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_frexp.c37
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_isinf.c10
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c6
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_isnan.c15
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_llround.c2
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_log1p.c171
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_logb.c2
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_lrint.c2
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_modf.c84
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_nearbyint.c63
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_remquo.c16
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_rint.c46
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_scalbln.c61
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_scalbn.c61
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_sin.c445
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_sincos.c2
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_tan.c10
-rw-r--r--libc/sysdeps/ieee754/dbl-64/s_tanh.c68
-rw-r--r--libc/sysdeps/ieee754/dbl-64/sincos32.c520
-rw-r--r--libc/sysdeps/ieee754/dbl-64/slowexp.c13
-rw-r--r--libc/sysdeps/ieee754/dbl-64/slowpow.c14
-rw-r--r--libc/sysdeps/ieee754/ldbl-128/printf_fphex.c14
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c8
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c38
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c28
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c14
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c6
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c6
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c34
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c125
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c7
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c88
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c16
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c32
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c8
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c8
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c32
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c136
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c7
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c18
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c51
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h98
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c10
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c12
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c8
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c34
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c46
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h206
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c72
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c42
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c5
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c15
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c55
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c12
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c10
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c12
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c5
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c19
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c15
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c17
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c14
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c7
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c11
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c5
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c27
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c62
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c57
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c13
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c8
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c16
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c21
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c21
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c8
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c4
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c10
-rw-r--r--libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c26
-rw-r--r--libc/sysdeps/ieee754/ldbl-96/printf_fphex.c8
-rw-r--r--libc/sysdeps/mach/hurd/fork.c15
-rw-r--r--libc/sysdeps/mach/hurd/i386/tls.h1
-rw-r--r--libc/sysdeps/posix/dirstream.h9
-rw-r--r--libc/sysdeps/posix/getaddrinfo.c103
-rw-r--r--libc/sysdeps/powerpc/bits/fenv.h71
-rw-r--r--libc/sysdeps/powerpc/bits/mathinline.h26
-rw-r--r--libc/sysdeps/powerpc/ffs.c1
-rw-r--r--libc/sysdeps/powerpc/fpu/e_sqrt.c2
-rw-r--r--libc/sysdeps/powerpc/fpu/e_sqrtf.c2
-rw-r--r--libc/sysdeps/powerpc/fpu/fclrexcpt.c4
-rw-r--r--libc/sysdeps/powerpc/fpu/fedisblxcpt.c10
-rw-r--r--libc/sysdeps/powerpc/fpu/feenablxcpt.c10
-rw-r--r--libc/sysdeps/powerpc/fpu/fegetexcept.c10
-rw-r--r--libc/sysdeps/powerpc/fpu/feholdexcpt.c5
-rw-r--r--libc/sysdeps/powerpc/fpu/fenv_libc.h2
-rw-r--r--libc/sysdeps/powerpc/fpu/fesetenv.c4
-rw-r--r--libc/sysdeps/powerpc/fpu/feupdateenv.c6
-rw-r--r--libc/sysdeps/powerpc/fpu/fgetexcptflg.c2
-rw-r--r--libc/sysdeps/powerpc/fpu/fraiseexcpt.c12
-rw-r--r--libc/sysdeps/powerpc/fpu/fsetexcptflg.c8
-rw-r--r--libc/sysdeps/powerpc/fpu/ftestexcept.c2
-rw-r--r--libc/sysdeps/powerpc/fpu/libm-test-ulps68
-rw-r--r--libc/sysdeps/powerpc/fpu/s_float_bitwise.h54
-rw-r--r--libc/sysdeps/powerpc/fpu/s_llround.c33
-rw-r--r--libc/sysdeps/powerpc/fpu/s_llroundf.c33
-rw-r--r--libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c87
-rw-r--r--libc/sysdeps/powerpc/fpu_control.h69
-rw-r--r--libc/sysdeps/powerpc/jmpbuf-offsets.h6
-rw-r--r--libc/sysdeps/powerpc/longjmp.c6
-rw-r--r--libc/sysdeps/powerpc/nofpu/Makefile30
-rw-r--r--libc/sysdeps/powerpc/nofpu/Subdirs1
-rw-r--r--libc/sysdeps/powerpc/nofpu/Versions20
-rw-r--r--libc/sysdeps/powerpc/nofpu/fclrexcpt.c37
-rw-r--r--libc/sysdeps/powerpc/nofpu/fedisblxcpt.c32
-rw-r--r--libc/sysdeps/powerpc/nofpu/feenablxcpt.c32
-rw-r--r--libc/sysdeps/powerpc/nofpu/fegetenv.c48
-rw-r--r--libc/sysdeps/powerpc/nofpu/fegetexcept.c27
-rw-r--r--libc/sysdeps/powerpc/nofpu/fegetround.c28
-rw-r--r--libc/sysdeps/powerpc/nofpu/feholdexcpt.c43
-rw-r--r--libc/sysdeps/powerpc/nofpu/fenv_const.c34
-rw-r--r--libc/sysdeps/powerpc/nofpu/fenv_libc.h28
-rw-r--r--libc/sysdeps/powerpc/nofpu/fesetenv.c42
-rw-r--r--libc/sysdeps/powerpc/nofpu/fesetround.c33
-rw-r--r--libc/sysdeps/powerpc/nofpu/feupdateenv.c51
-rw-r--r--libc/sysdeps/powerpc/nofpu/fgetexcptflg.c37
-rw-r--r--libc/sysdeps/powerpc/nofpu/fraiseexcpt.c41
-rw-r--r--libc/sysdeps/powerpc/nofpu/fsetexcptflg.c38
-rw-r--r--libc/sysdeps/powerpc/nofpu/ftestexcept.c28
-rw-r--r--libc/sysdeps/powerpc/nofpu/get-rounding-mode.h35
-rw-r--r--libc/sysdeps/powerpc/nofpu/libm-test-ulps7297
-rw-r--r--libc/sysdeps/powerpc/nofpu/shlib-versions1
-rw-r--r--libc/sysdeps/powerpc/nofpu/sim-full.c46
-rw-r--r--libc/sysdeps/powerpc/nofpu/soft-supp.h48
-rw-r--r--libc/sysdeps/powerpc/novmx-longjmp.c12
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/memcmp.S128
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/memcpy.S130
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/memset.S152
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/strcmp.S134
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/strcpy.S107
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/strlen.S75
-rw-r--r--libc/sysdeps/powerpc/powerpc32/405/strncmp.S128
-rw-r--r--libc/sysdeps/powerpc/powerpc32/440/Implies2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/464/Implies2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/476/Implies2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/476/memset.S152
-rw-r--r--libc/sysdeps/powerpc/powerpc32/Makefile10
-rw-r--r--libc/sysdeps/powerpc/powerpc32/__longjmp-common.S42
-rw-r--r--libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/dl-machine.c14
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile9
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c53
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c (renamed from libc/sysdeps/x86_64/multiarch/strend-sse4.S)53
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c54
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c54
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c47
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c36
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c29
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c57
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c41
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h96
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c49
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c35
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c47
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c42
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c41
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c42
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c41
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c41
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c28
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c40
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c55
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c31
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S27
-rw-r--r--libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c53
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S8
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S3
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S73
-rw-r--r--libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/mcount.c2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S20
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h21
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/memcmp.S1064
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/memcpy.S58
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/memset.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power4/strncmp.S56
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S2
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/memcpy.S81
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power6/memset.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S5
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S7
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c8
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/memchr.S185
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/memcmp.S1626
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/memcpy.S24
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S28
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/memrchr.S187
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/memset.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S17
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/strchr.S51
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S27
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/strlen.S17
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/strncmp.S55
-rw-r--r--libc/sysdeps/powerpc/powerpc32/power7/strnlen.S106
-rw-r--r--libc/sysdeps/powerpc/powerpc32/setjmp-common.S41
-rw-r--r--libc/sysdeps/powerpc/powerpc32/setjmp.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc32/stackguard-macros.h10
-rw-r--r--libc/sysdeps/powerpc/powerpc32/stpcpy.S18
-rw-r--r--libc/sysdeps/powerpc/powerpc32/strchr.S71
-rw-r--r--libc/sysdeps/powerpc/powerpc32/strcmp.S42
-rw-r--r--libc/sysdeps/powerpc/powerpc32/strcpy.S18
-rw-r--r--libc/sysdeps/powerpc/powerpc32/strlen.S69
-rw-r--r--libc/sysdeps/powerpc/powerpc32/strncmp.S56
-rw-r--r--libc/sysdeps/powerpc/powerpc64/__longjmp-common.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/dl-machine.h28
-rw-r--r--libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S6
-rw-r--r--libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S4
-rw-r--r--libc/sysdeps/powerpc/powerpc64/memcpy.S27
-rw-r--r--libc/sysdeps/powerpc/powerpc64/memset.S10
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/memcmp.S1041
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/memcpy.S61
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/memset.S30
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power4/strncmp.S63
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power6/memcpy.S329
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power6/memset.S10
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S6
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S5
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/memchr.S188
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/memcmp.S1613
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/memcpy.S704
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S26
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/memrchr.S192
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/memset.S6
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S17
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/strchr.S43
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S19
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/strlen.S17
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/strncmp.S61
-rw-r--r--libc/sysdeps/powerpc/powerpc64/power7/strnlen.S111
-rw-r--r--libc/sysdeps/powerpc/powerpc64/setjmp-common.S72
-rw-r--r--libc/sysdeps/powerpc/powerpc64/setjmp.S12
-rw-r--r--libc/sysdeps/powerpc/powerpc64/stackguard-macros.h10
-rw-r--r--libc/sysdeps/powerpc/powerpc64/stpcpy.S18
-rw-r--r--libc/sysdeps/powerpc/powerpc64/strchr.S75
-rw-r--r--libc/sysdeps/powerpc/powerpc64/strcmp.S65
-rw-r--r--libc/sysdeps/powerpc/powerpc64/strcpy.S27
-rw-r--r--libc/sysdeps/powerpc/powerpc64/strlen.S75
-rw-r--r--libc/sysdeps/powerpc/powerpc64/strncmp.S63
-rw-r--r--libc/sysdeps/powerpc/preconfigure11
-rw-r--r--libc/sysdeps/powerpc/soft-fp/sfp-machine.h115
-rw-r--r--libc/sysdeps/powerpc/sysdep.h15
-rw-r--r--libc/sysdeps/s390/ffs.c1
-rw-r--r--libc/sysdeps/s390/s390-32/stackguard-macros.h11
-rw-r--r--libc/sysdeps/s390/s390-64/stackguard-macros.h14
-rw-r--r--libc/sysdeps/sh/stackguard-macros.h6
-rw-r--r--libc/sysdeps/sparc/fpu/libm-test-ulps125
-rw-r--r--libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c7
-rw-r--r--libc/sysdeps/sparc/sparc32/stackguard-macros.h3
-rw-r--r--libc/sysdeps/sparc/sparc64/stackguard-macros.h3
-rw-r--r--libc/sysdeps/unix/Makefile6
-rw-r--r--libc/sysdeps/unix/make-syscalls.sh2
-rw-r--r--libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h4
-rw-r--r--libc/sysdeps/unix/sysv/linux/configure2
-rw-r--r--libc/sysdeps/unix/sysv/linux/configure.in2
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h54
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies2
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies2
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies2
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies2
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies3
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S12
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S8
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies2
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h144
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S60
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data67
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist17
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist3
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist6
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist2523
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist9
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist18
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist519
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist127
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist277
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist104
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist52
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist48
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist8
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data41
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S60
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S60
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S12
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S24
-rw-r--r--libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S6
-rw-r--r--libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h1
-rw-r--r--libc/sysdeps/unix/sysv/linux/tst-fanotify.c17
-rw-r--r--libc/sysdeps/x86_64/ffs.c1
-rw-r--r--libc/sysdeps/x86_64/fpu/printf_fphex.c9
-rw-r--r--libc/sysdeps/x86_64/memset.S10
-rw-r--r--libc/sysdeps/x86_64/multiarch/Makefile4
-rw-r--r--libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c7
-rw-r--r--libc/sysdeps/x86_64/multiarch/memcmp-sse4.S84
-rw-r--r--libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S126
-rw-r--r--libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S10
-rw-r--r--libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S86
-rw-r--r--libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S254
-rw-r--r--libc/sysdeps/x86_64/multiarch/strchr.S127
-rw-r--r--libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S9
-rw-r--r--libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S555
-rw-r--r--libc/sysdeps/x86_64/multiarch/strrchr.S288
-rw-r--r--libc/sysdeps/x86_64/stackguard-macros.h5
-rw-r--r--libc/sysdeps/x86_64/strchr.S188
-rw-r--r--libc/sysdeps/x86_64/strchrnul.S41
-rw-r--r--libc/sysdeps/x86_64/strrchr.S238
385 files changed, 26368 insertions, 9199 deletions
diff --git a/libc/sysdeps/generic/ldconfig.h b/libc/sysdeps/generic/ldconfig.h
index d96089d49..037f95995 100644
--- a/libc/sysdeps/generic/ldconfig.h
+++ b/libc/sysdeps/generic/ldconfig.h
@@ -21,24 +21,27 @@
#include <stdint.h>
-#define FLAG_ANY -1
-#define FLAG_TYPE_MASK 0x00ff
-#define FLAG_LIBC4 0x0000
-#define FLAG_ELF 0x0001
-#define FLAG_ELF_LIBC5 0x0002
-#define FLAG_ELF_LIBC6 0x0003
-#define FLAG_REQUIRED_MASK 0xff00
-#define FLAG_SPARC_LIB64 0x0100
-#define FLAG_IA64_LIB64 0x0200
-#define FLAG_X8664_LIB64 0x0300
-#define FLAG_S390_LIB64 0x0400
-#define FLAG_POWERPC_LIB64 0x0500
-#define FLAG_MIPS64_LIBN32 0x0600
-#define FLAG_MIPS64_LIBN64 0x0700
-#define FLAG_X8664_LIBX32 0x0800
-#define FLAG_ARM_LIBHF 0x0900
-#define FLAG_AARCH64_LIB64 0x0a00
-#define FLAG_ARM_LIBSF 0x0b00
+#define FLAG_ANY -1
+#define FLAG_TYPE_MASK 0x00ff
+#define FLAG_LIBC4 0x0000
+#define FLAG_ELF 0x0001
+#define FLAG_ELF_LIBC5 0x0002
+#define FLAG_ELF_LIBC6 0x0003
+#define FLAG_REQUIRED_MASK 0xff00
+#define FLAG_SPARC_LIB64 0x0100
+#define FLAG_IA64_LIB64 0x0200
+#define FLAG_X8664_LIB64 0x0300
+#define FLAG_S390_LIB64 0x0400
+#define FLAG_POWERPC_LIB64 0x0500
+#define FLAG_MIPS64_LIBN32 0x0600
+#define FLAG_MIPS64_LIBN64 0x0700
+#define FLAG_X8664_LIBX32 0x0800
+#define FLAG_ARM_LIBHF 0x0900
+#define FLAG_AARCH64_LIB64 0x0a00
+#define FLAG_ARM_LIBSF 0x0b00
+#define FLAG_MIPS_LIB32_NAN2008 0x0c00
+#define FLAG_MIPS64_LIBN32_NAN2008 0x0d00
+#define FLAG_MIPS64_LIBN64_NAN2008 0x0e00
/* Name of auxiliary cache. */
#define _PATH_LDCONFIG_AUX_CACHE "/var/cache/ldconfig/aux-cache"
diff --git a/libc/sysdeps/generic/math_private.h b/libc/sysdeps/generic/math_private.h
index c0fc03d38..9b881a3e7 100644
--- a/libc/sysdeps/generic/math_private.h
+++ b/libc/sysdeps/generic/math_private.h
@@ -356,10 +356,8 @@ extern void __dubcos (double __x, double __dx, double __v[]);
extern double __halfulp (double __x, double __y);
extern double __sin32 (double __x, double __res, double __res1);
extern double __cos32 (double __x, double __res, double __res1);
-extern double __mpsin (double __x, double __dx);
-extern double __mpcos (double __x, double __dx);
-extern double __mpsin1 (double __x);
-extern double __mpcos1 (double __x);
+extern double __mpsin (double __x, double __dx, bool __range_reduce);
+extern double __mpcos (double __x, double __dx, bool __range_reduce);
extern double __slowexp (double __x);
extern double __slowpow (double __x, double __y, double __z);
extern void __docos (double __x, double __dx, double __v[]);
diff --git a/libc/sysdeps/generic/stackguard-macros.h b/libc/sysdeps/generic/stackguard-macros.h
index ababf65d3..b4a6b23ff 100644
--- a/libc/sysdeps/generic/stackguard-macros.h
+++ b/libc/sysdeps/generic/stackguard-macros.h
@@ -2,3 +2,11 @@
extern uintptr_t __stack_chk_guard;
#define STACK_CHK_GUARD __stack_chk_guard
+
+#ifdef PTRGUARD_LOCAL
+extern uintptr_t __pointer_chk_guard_local;
+# define POINTER_CHK_GUARD __pointer_chk_guard_local
+#else
+extern uintptr_t __pointer_chk_guard;
+# define POINTER_CHK_GUARD __pointer_chk_guard
+#endif
diff --git a/libc/sysdeps/i386/ffs.c b/libc/sysdeps/i386/ffs.c
index 47496dcf7..77d2e60b7 100644
--- a/libc/sysdeps/i386/ffs.c
+++ b/libc/sysdeps/i386/ffs.c
@@ -41,6 +41,7 @@ __ffs (x)
return cnt;
}
weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
libc_hidden_builtin_def (ffs)
#undef ffsl
weak_alias (__ffs, ffsl)
diff --git a/libc/sysdeps/i386/i686/ffs.c b/libc/sysdeps/i386/i686/ffs.c
index 5c97050e8..cde1c9956 100644
--- a/libc/sysdeps/i386/i686/ffs.c
+++ b/libc/sysdeps/i386/i686/ffs.c
@@ -39,6 +39,7 @@ __ffs (x)
return cnt + 1;
}
weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
libc_hidden_builtin_def (ffs)
#undef ffsl
weak_alias (__ffs, ffsl)
diff --git a/libc/sysdeps/i386/stackguard-macros.h b/libc/sysdeps/i386/stackguard-macros.h
index 8c31e197e..039762927 100644
--- a/libc/sysdeps/i386/stackguard-macros.h
+++ b/libc/sysdeps/i386/stackguard-macros.h
@@ -2,3 +2,11 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("movl %%gs:0x14, %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ \
+ uintptr_t x; \
+ asm ("movl %%gs:%c1, %0" : "=r" (x) \
+ : "i" (offsetof (tcbhead_t, pointer_guard))); \
+ x; \
+ })
diff --git a/libc/sysdeps/ieee754/dbl-64/MathLib.h b/libc/sysdeps/ieee754/dbl-64/MathLib.h
index 23f71980b..cf56606e3 100644
--- a/libc/sysdeps/ieee754/dbl-64/MathLib.h
+++ b/libc/sysdeps/ieee754/dbl-64/MathLib.h
@@ -33,14 +33,14 @@
/* It returns the original status of these modes. */
/* See further explanations of usage in DPChange.h */
/********************************************************************/
-unsigned short Init_Lib(void);
+unsigned short Init_Lib (void);
/********************************************************************/
/* Function that changes the precision and rounding modes to the */
/* specified by the argument received. See further explanations in */
/* DPChange.h */
/********************************************************************/
-void Exit_Lib(unsigned short);
+void Exit_Lib (unsigned short);
/* The asin() function calculates the arc sine of its argument. */
@@ -48,7 +48,7 @@ void Exit_Lib(unsigned short);
/* (between -PI/2 and PI/2). */
/* If the argument is greater than 1 or less than -1 it returns */
/* a NaN. */
-double uasin(double );
+double uasin (double);
/* The acos() function calculates the arc cosine of its argument. */
@@ -56,47 +56,45 @@ double uasin(double );
/* (between -PI/2 and PI/2). */
/* If the argument is greater than 1 or less than -1 it returns */
/* a NaN. */
-double uacos(double );
+double uacos (double);
/* The atan() function calculates the arctanget of its argument. */
/* The function returns the arc tangent in radians */
/* (between -PI/2 and PI/2). */
-double uatan(double );
+double uatan (double);
/* The uatan2() function calculates the arc tangent of the two arguments x */
/* and y (x is the right argument and y is the left one).The signs of both */
/* arguments are used to determine the quadrant of the result. */
/* The function returns the result in radians, which is between -PI and PI */
-double uatan2(double ,double );
+double uatan2 (double, double);
/* Compute log(x). The base of log is e (natural logarithm) */
-double ulog(double );
+double ulog (double);
/* Compute e raised to the power of argument x. */
-double uexp(double );
+double uexp (double);
/* Compute sin(x). The argument x is assumed to be given in radians.*/
-double usin(double );
+double usin (double);
/* Compute cos(x). The argument x is assumed to be given in radians.*/
-double ucos(double );
+double ucos (double);
/* Compute tan(x). The argument x is assumed to be given in radians.*/
-double utan(double );
+double utan (double);
/* Compute the square root of non-negative argument x. */
/* If x is negative the returned value is NaN. */
-double usqrt(double );
+double usqrt (double);
/* Compute x raised to the power of y, where x is the left argument */
/* and y is the right argument. The function returns a NaN if x<0. */
/* If x equals zero it returns -inf */
-double upow(double , double );
+double upow (double, double);
/* Computing x mod y, where x is the left argument and y is the */
/* right one. */
-double uremainder(double , double );
-
-
+double uremainder (double, double);
#endif
diff --git a/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c b/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c
index f2294de5a..087d64377 100644
--- a/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c
+++ b/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c
@@ -40,14 +40,14 @@ __mpn_extract_double (mp_ptr res_ptr, mp_size_t size,
#if BITS_PER_MP_LIMB == 32
res_ptr[0] = u.ieee.mantissa1; /* Low-order 32 bits of fraction. */
res_ptr[1] = u.ieee.mantissa0; /* High-order 20 bits. */
- #define N 2
+ # define N 2
#elif BITS_PER_MP_LIMB == 64
/* Hopefully the compiler will combine the two bitfield extracts
and this composition into just the original quadword extract. */
res_ptr[0] = ((mp_limb_t) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
- #define N 1
+ # define N 1
#else
- #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
+ # error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
#endif
/* The format does not fill the last limb. There are some zeros. */
#define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
@@ -73,7 +73,7 @@ __mpn_extract_double (mp_ptr res_ptr, mp_size_t size,
#if N == 2
res_ptr[N - 1] = res_ptr[1] << cnt
| (N - 1)
- * (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
+ * (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
res_ptr[0] <<= cnt;
#else
res_ptr[N - 1] <<= cnt;
diff --git a/libc/sysdeps/ieee754/dbl-64/dla.h b/libc/sysdeps/ieee754/dbl-64/dla.h
index b09f00ec7..6666982bf 100644
--- a/libc/sysdeps/ieee754/dbl-64/dla.h
+++ b/libc/sysdeps/ieee754/dbl-64/dla.h
@@ -61,13 +61,13 @@
/* storage variables of type double. */
#ifdef DLA_FMS
-# define EMULV(x,y,z,zz,p,hx,tx,hy,ty) \
- z=x*y; zz=DLA_FMS(x,y,z);
+# define EMULV(x, y, z, zz, p, hx, tx, hy, ty) \
+ z = x * y; zz = DLA_FMS (x, y, z);
#else
-# define EMULV(x,y,z,zz,p,hx,tx,hy,ty) \
- p=CN*(x); hx=((x)-p)+p; tx=(x)-hx; \
- p=CN*(y); hy=((y)-p)+p; ty=(y)-hy; \
- z=(x)*(y); zz=(((hx*hy-z)+hx*ty)+tx*hy)+tx*ty;
+# define EMULV(x, y, z, zz, p, hx, tx, hy, ty) \
+ p = CN * (x); hx = ((x) - p) + p; tx = (x) - hx; \
+ p = CN * (y); hy = ((y) - p) + p; ty = (y) - hy; \
+ z = (x) * (y); zz = (((hx * hy - z) + hx * ty) + tx * hy) + tx * ty;
#endif
@@ -93,11 +93,11 @@
/* are assumed to be double-length numbers. r,s are temporary */
/* storage variables of type double. */
-#define ADD2(x,xx,y,yy,z,zz,r,s) \
- r=(x)+(y); s=(ABS(x)>ABS(y)) ? \
- (((((x)-r)+(y))+(yy))+(xx)) : \
- (((((y)-r)+(x))+(xx))+(yy)); \
- z=r+s; zz=(r-z)+s;
+#define ADD2(x, xx, y, yy, z, zz, r, s) \
+ r = (x) + (y); s = (ABS (x) > ABS (y)) ? \
+ (((((x) - r) + (y)) + (yy)) + (xx)) : \
+ (((((y) - r) + (x)) + (xx)) + (yy)); \
+ z = r + s; zz = (r - z) + s;
/* Double-length subtraction, Dekker. The macro produces a double-length */
@@ -106,11 +106,11 @@
/* are assumed to be double-length numbers. r,s are temporary */
/* storage variables of type double. */
-#define SUB2(x,xx,y,yy,z,zz,r,s) \
- r=(x)-(y); s=(ABS(x)>ABS(y)) ? \
- (((((x)-r)-(y))-(yy))+(xx)) : \
- ((((x)-((y)+r))+(xx))-(yy)); \
- z=r+s; zz=(r-z)+s;
+#define SUB2(x, xx, y, yy, z, zz, r, s) \
+ r = (x) - (y); s = (ABS (x) > ABS (y)) ? \
+ (((((x) - r) - (y)) - (yy)) + (xx)) : \
+ ((((x) - ((y) + r)) + (xx)) - (yy)); \
+ z = r + s; zz = (r - z) + s;
/* Double-length multiplication, Dekker. The macro produces a double-length */
@@ -119,9 +119,9 @@
/* are assumed to be double-length numbers. p,hx,tx,hy,ty,q,c,cc are */
/* temporary storage variables of type double. */
-#define MUL2(x,xx,y,yy,z,zz,p,hx,tx,hy,ty,q,c,cc) \
- MUL12(x,y,c,cc,p,hx,tx,hy,ty,q) \
- cc=((x)*(yy)+(xx)*(y))+cc; z=c+cc; zz=(c-z)+cc;
+#define MUL2(x, xx, y, yy, z, zz, p, hx, tx, hy, ty, q, c, cc) \
+ MUL12 (x, y, c, cc, p, hx, tx, hy, ty, q) \
+ cc = ((x) * (yy) + (xx) * (y)) + cc; z = c + cc; zz = (c - z) + cc;
/* Double-length division, Dekker. The macro produces a double-length */
@@ -142,18 +142,18 @@
/* are assumed to be double-length numbers. r,rr,s,ss,u,uu,w */
/* are temporary storage variables of type double. */
-#define ADD2A(x,xx,y,yy,z,zz,r,rr,s,ss,u,uu,w) \
- r=(x)+(y); \
- if (ABS(x)>ABS(y)) { rr=((x)-r)+(y); s=(rr+(yy))+(xx); } \
- else { rr=((y)-r)+(x); s=(rr+(xx))+(yy); } \
- if (rr!=0.0) { \
- z=r+s; zz=(r-z)+s; } \
- else { \
- ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)+(yy)) : (((yy)-s)+(xx)); \
- u=r+s; \
- uu=(ABS(r)>ABS(s)) ? ((r-u)+s) : ((s-u)+r) ; \
- w=uu+ss; z=u+w; \
- zz=(ABS(u)>ABS(w)) ? ((u-z)+w) : ((w-z)+u) ; }
+#define ADD2A(x, xx, y, yy, z, zz, r, rr, s, ss, u, uu, w) \
+ r = (x) + (y); \
+ if (ABS (x) > ABS (y)) { rr = ((x) - r) + (y); s = (rr + (yy)) + (xx); } \
+ else { rr = ((y) - r) + (x); s = (rr + (xx)) + (yy); } \
+ if (rr != 0.0) { \
+ z = r + s; zz = (r - z) + s; } \
+ else { \
+ ss = (ABS (xx) > ABS (yy)) ? (((xx) - s) + (yy)) : (((yy) - s) + (xx));\
+ u = r + s; \
+ uu = (ABS (r) > ABS (s)) ? ((r - u) + s) : ((s - u) + r); \
+ w = uu + ss; z = u + w; \
+ zz = (ABS (u) > ABS (w)) ? ((u - z) + w) : ((w - z) + u); }
/* Double-length subtraction, slower but more accurate than SUB2. */
@@ -163,15 +163,15 @@
/* are assumed to be double-length numbers. r,rr,s,ss,u,uu,w */
/* are temporary storage variables of type double. */
-#define SUB2A(x,xx,y,yy,z,zz,r,rr,s,ss,u,uu,w) \
- r=(x)-(y); \
- if (ABS(x)>ABS(y)) { rr=((x)-r)-(y); s=(rr-(yy))+(xx); } \
- else { rr=(x)-((y)+r); s=(rr+(xx))-(yy); } \
- if (rr!=0.0) { \
- z=r+s; zz=(r-z)+s; } \
- else { \
- ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)-(yy)) : ((xx)-((yy)+s)); \
- u=r+s; \
- uu=(ABS(r)>ABS(s)) ? ((r-u)+s) : ((s-u)+r) ; \
- w=uu+ss; z=u+w; \
- zz=(ABS(u)>ABS(w)) ? ((u-z)+w) : ((w-z)+u) ; }
+#define SUB2A(x, xx, y, yy, z, zz, r, rr, s, ss, u, uu, w) \
+ r = (x) - (y); \
+ if (ABS (x) > ABS (y)) { rr = ((x) - r) - (y); s = (rr - (yy)) + (xx); } \
+ else { rr = (x) - ((y) + r); s = (rr + (xx)) - (yy); } \
+ if (rr != 0.0) { \
+ z = r + s; zz = (r - z) + s; } \
+ else { \
+ ss = (ABS (xx) > ABS (yy)) ? (((xx) - s) - (yy)) : ((xx) - ((yy) + s)); \
+ u = r + s; \
+ uu = (ABS (r) > ABS (s)) ? ((r - u) + s) : ((s - u) + r); \
+ w = uu + ss; z = u + w; \
+ zz = (ABS (u) > ABS (w)) ? ((u - z) + w) : ((w - z) + u); }
diff --git a/libc/sysdeps/ieee754/dbl-64/dosincos.c b/libc/sysdeps/ieee754/dbl-64/dosincos.c
index 00726285a..e1c8836b7 100644
--- a/libc/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/libc/sysdeps/ieee754/dbl-64/dosincos.c
@@ -57,49 +57,52 @@ extern const union
void
SECTION
-__dubsin(double x, double dx, double v[]) {
- double r,s,c,cc,d,dd,d2,dd2,e,ee,
- sn,ssn,cs,ccs,ds,dss,dc,dcc;
+__dubsin (double x, double dx, double v[])
+{
+ double r, s, c, cc, d, dd, d2, dd2, e, ee,
+ sn, ssn, cs, ccs, ds, dss, dc, dcc;
#ifndef DLA_FMS
- double p,hx,tx,hy,ty,q;
+ double p, hx, tx, hy, ty, q;
#endif
mynumber u;
int4 k;
- u.x=x+big.x;
- k = u.i[LOW_HALF]<<2;
- x=x-(u.x-big.x);
- d=x+dx;
- dd=(x-d)+dx;
- /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
- MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
- sn=__sincostab.x[k]; /* */
- ssn=__sincostab.x[k+1]; /* sin(Xi) and cos(Xi) */
- cs=__sincostab.x[k+2]; /* */
- ccs=__sincostab.x[k+3]; /* */
- MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* Taylor */
- ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
- MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* series */
- ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
- MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc); /* for sin */
- MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,d,dd,ds,dss,r,s); /* ds=sin(t) */
-
- MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ;/* Taylor */
- ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); /* series */
- ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); /* for cos */
- ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc); /* dc=cos(t) */
-
- MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
- MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- SUB2(e,ee,dc,dcc,e,ee,r,s);
- ADD2(e,ee,sn,ssn,e,ee,r,s); /* e+ee=sin(x+dx) */
-
- v[0]=e;
- v[1]=ee;
+ u.x = x + big.x;
+ k = u.i[LOW_HALF] << 2;
+ x = x - (u.x - big.x);
+ d = x + dx;
+ dd = (x - d) + dx;
+ /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
+ MUL2 (d, dd, d, dd, d2, dd2, p, hx, tx, hy, ty, q, c, cc);
+ sn = __sincostab.x[k]; /* */
+ ssn = __sincostab.x[k + 1]; /* sin(Xi) and cos(Xi) */
+ cs = __sincostab.x[k + 2]; /* */
+ ccs = __sincostab.x[k + 3]; /* */
+ /* Taylor series for sin ds=sin(t) */
+ MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
+ MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
+ MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, d, dd, ds, dss, r, s);
+
+ /* Taylor series for cos dc=cos(t) */
+ MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+
+ MUL2 (cs, ccs, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (dc, dcc, sn, ssn, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ SUB2 (e, ee, dc, dcc, e, ee, r, s);
+ ADD2 (e, ee, sn, ssn, e, ee, r, s); /* e+ee=sin(x+dx) */
+
+ v[0] = e;
+ v[1] = ee;
}
/**********************************************************************/
/* Routine receive Double-Length number (x+dx) and computes cos(x+dx) */
@@ -110,64 +113,65 @@ __dubsin(double x, double dx, double v[]) {
void
SECTION
-__dubcos(double x, double dx, double v[]) {
- double r,s,c,cc,d,dd,d2,dd2,e,ee,
- sn,ssn,cs,ccs,ds,dss,dc,dcc;
+__dubcos (double x, double dx, double v[])
+{
+ double r, s, c, cc, d, dd, d2, dd2, e, ee,
+ sn, ssn, cs, ccs, ds, dss, dc, dcc;
#ifndef DLA_FMS
- double p,hx,tx,hy,ty,q;
+ double p, hx, tx, hy, ty, q;
#endif
mynumber u;
int4 k;
- u.x=x+big.x;
- k = u.i[LOW_HALF]<<2;
- x=x-(u.x-big.x);
- d=x+dx;
- dd=(x-d)+dx; /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
- MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
- sn=__sincostab.x[k]; /* */
- ssn=__sincostab.x[k+1]; /* sin(Xi) and cos(Xi) */
- cs=__sincostab.x[k+2]; /* */
- ccs=__sincostab.x[k+3]; /* */
- MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
- MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
- MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,d,dd,ds,dss,r,s);
-
- MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-
- MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
- MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-
- MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
- MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
- MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
- ADD2(ds,dss,d,dd,ds,dss,r,s);
- MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
- MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- MUL2(sn,ssn,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
- MUL2(dc,dcc,cs,ccs,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
- ADD2(e,ee,dc,dcc,e,ee,r,s);
- SUB2(cs,ccs,e,ee,e,ee,r,s);
-
- v[0]=e;
- v[1]=ee;
+ u.x = x + big.x;
+ k = u.i[LOW_HALF] << 2;
+ x = x - (u.x - big.x);
+ d = x + dx;
+ dd = (x - d) + dx; /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
+ MUL2 (d, dd, d, dd, d2, dd2, p, hx, tx, hy, ty, q, c, cc);
+ sn = __sincostab.x[k]; /* */
+ ssn = __sincostab.x[k + 1]; /* sin(Xi) and cos(Xi) */
+ cs = __sincostab.x[k + 2]; /* */
+ ccs = __sincostab.x[k + 3]; /* */
+ MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
+ MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
+ MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, d, dd, ds, dss, r, s);
+
+ MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+
+ MUL2 (cs, ccs, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (dc, dcc, sn, ssn, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+
+ MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
+ MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
+ MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (ds, dss, d, dd, ds, dss, r, s);
+ MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
+ MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (sn, ssn, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
+ MUL2 (dc, dcc, cs, ccs, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+ ADD2 (e, ee, dc, dcc, e, ee, r, s);
+ SUB2 (cs, ccs, e, ee, e, ee, r, s);
+
+ v[0] = e;
+ v[1] = ee;
}
/**********************************************************************/
/* Routine receive Double-Length number (x+dx) and computes cos(x+dx) */
@@ -175,29 +179,45 @@ __dubcos(double x, double dx, double v[]) {
/**********************************************************************/
void
SECTION
-__docos(double x, double dx, double v[]) {
- double y,yy,p,w[2];
- if (x>0) {y=x; yy=dx;}
- else {y=-x; yy=-dx;}
- if (y<0.5*hp0.x) /* y< PI/4 */
- {__dubcos(y,yy,w); v[0]=w[0]; v[1]=w[1];}
- else if (y<1.5*hp0.x) { /* y< 3/4 * PI */
- p=hp0.x-y; /* p = PI/2 - y */
- yy=hp1.x-yy;
- y=p+yy;
- yy=(p-y)+yy;
- if (y>0) {__dubsin(y,yy,w); v[0]=w[0]; v[1]=w[1];}
- /* cos(x) = sin ( 90 - x ) */
- else {__dubsin(-y,-yy,w); v[0]=-w[0]; v[1]=-w[1];
- }
- }
- else { /* y>= 3/4 * PI */
- p=2.0*hp0.x-y; /* p = PI- y */
- yy=2.0*hp1.x-yy;
- y=p+yy;
- yy=(p-y)+yy;
- __dubcos(y,yy,w);
- v[0]=-w[0];
- v[1]=-w[1];
- }
+__docos (double x, double dx, double v[])
+{
+ double y, yy, p, w[2];
+ if (x > 0)
+ {
+ y = x; yy = dx;
+ }
+ else
+ {
+ y = -x; yy = -dx;
+ }
+ if (y < 0.5 * hp0.x) /* y< PI/4 */
+ {
+ __dubcos (y, yy, w); v[0] = w[0]; v[1] = w[1];
+ }
+ else if (y < 1.5 * hp0.x) /* y< 3/4 * PI */
+ {
+ p = hp0.x - y; /* p = PI/2 - y */
+ yy = hp1.x - yy;
+ y = p + yy;
+ yy = (p - y) + yy;
+ if (y > 0)
+ {
+ __dubsin (y, yy, w); v[0] = w[0]; v[1] = w[1];
+ }
+ /* cos(x) = sin ( 90 - x ) */
+ else
+ {
+ __dubsin (-y, -yy, w); v[0] = -w[0]; v[1] = -w[1];
+ }
+ }
+ else /* y>= 3/4 * PI */
+ {
+ p = 2.0 * hp0.x - y; /* p = PI- y */
+ yy = 2.0 * hp1.x - yy;
+ y = p + yy;
+ yy = (p - y) + yy;
+ __dubcos (y, yy, w);
+ v[0] = -w[0];
+ v[1] = -w[1];
+ }
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_acosh.c b/libc/sysdeps/ieee754/dbl-64/e_acosh.c
index b24a6f645..c1f3590f7 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_acosh.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_acosh.c
@@ -28,31 +28,42 @@
#include <math_private.h>
static const double
-one = 1.0,
-ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
+ one = 1.0,
+ ln2 = 6.93147180559945286227e-01; /* 0x3FE62E42, 0xFEFA39EF */
double
-__ieee754_acosh(double x)
+__ieee754_acosh (double x)
{
- double t;
- int32_t hx;
- u_int32_t lx;
- EXTRACT_WORDS(hx,lx,x);
- if(hx<0x3ff00000) { /* x < 1 */
- return (x-x)/(x-x);
- } else if(hx >=0x41b00000) { /* x > 2**28 */
- if(hx >=0x7ff00000) { /* x is inf of NaN */
- return x+x;
- } else
- return __ieee754_log(x)+ln2; /* acosh(huge)=log(2x) */
- } else if(((hx-0x3ff00000)|lx)==0) {
- return 0.0; /* acosh(1) = 0 */
- } else if (hx > 0x40000000) { /* 2**28 > x > 2 */
- t=x*x;
- return __ieee754_log(2.0*x-one/(x+__ieee754_sqrt(t-one)));
- } else { /* 1<x<2 */
- t = x-one;
- return __log1p(t+__ieee754_sqrt(2.0*t+t*t));
+ double t;
+ int32_t hx;
+ u_int32_t lx;
+ EXTRACT_WORDS (hx, lx, x);
+ if (hx < 0x3ff00000) /* x < 1 */
+ {
+ return (x - x) / (x - x);
+ }
+ else if (hx >= 0x41b00000) /* x > 2**28 */
+ {
+ if (hx >= 0x7ff00000) /* x is inf of NaN */
+ {
+ return x + x;
}
+ else
+ return __ieee754_log (x) + ln2; /* acosh(huge)=log(2x) */
+ }
+ else if (((hx - 0x3ff00000) | lx) == 0)
+ {
+ return 0.0; /* acosh(1) = 0 */
+ }
+ else if (hx > 0x40000000) /* 2**28 > x > 2 */
+ {
+ t = x * x;
+ return __ieee754_log (2.0 * x - one / (x + __ieee754_sqrt (t - one)));
+ }
+ else /* 1<x<2 */
+ {
+ t = x - one;
+ return __log1p (t + __ieee754_sqrt (2.0 * t + t * t));
+ }
}
strong_alias (__ieee754_acosh, __acosh_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_atan2.c b/libc/sysdeps/ieee754/dbl-64/e_atan2.c
index 4ebe9c01f..e36305cda 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -42,6 +42,7 @@
#include "uatan.tbl"
#include "atnat2.h"
#include <math_private.h>
+#include <stap-probe.h>
#ifndef SECTION
# define SECTION
@@ -71,14 +72,14 @@ __ieee754_atan2 (double y, double x)
int i, de, ux, dx, uy, dy;
static const int pr[MM] = { 6, 8, 10, 20, 32 };
double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, t7, t8,
- z, zz, cor, s1, ss1, s2, ss2;
+ z, zz, cor, s1, ss1, s2, ss2;
#ifndef DLA_FMS
double t4, t5, t6;
#endif
number num;
- static const int ep = 59768832, /* 57*16**5 */
- em = -59768832; /* -57*16**5 */
+ static const int ep = 59768832, /* 57*16**5 */
+ em = -59768832; /* -57*16**5 */
/* x=NaN or y=NaN */
num.d = x;
@@ -293,17 +294,17 @@ __ieee754_atan2 (double y, double x)
if (i < 112)
{
if (i < 48)
- u9 = u91.d; /* u < 1/4 */
+ u9 = u91.d; /* u < 1/4 */
else
u9 = u92.d;
- } /* 1/4 <= u < 1/2 */
+ } /* 1/4 <= u < 1/2 */
else
{
if (i < 176)
- u9 = u93.d; /* 1/2 <= u < 3/4 */
+ u9 = u93.d; /* 1/2 <= u < 3/4 */
else
u9 = u94.d;
- } /* 3/4 <= u <= 1 */
+ } /* 3/4 <= u <= 1 */
if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1))
return signArctan2 (y, z);
@@ -311,9 +312,9 @@ __ieee754_atan2 (double y, double x)
EADD (t1, du, v, vv);
s1 = v * (hij[i][11].d
+ v * (hij[i][12].d
- + v * (hij[i][13].d
- + v * (hij[i][14].d
- + v * hij[i][15].d))));
+ + v * (hij[i][13].d
+ + v * (hij[i][14].d
+ + v * hij[i][15].d))));
ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
@@ -597,7 +598,11 @@ atan2Mp (double x, double y, const int pr[])
__mp_dbl (&mpz1, &z1, p);
__mp_dbl (&mpz2, &z2, p);
if (z1 == z2)
- return z1;
+ {
+ LIBC_PROBE (slowatan2, 4, &p, &x, &y, &z1);
+ return z1;
+ }
}
+ LIBC_PROBE (slowatan2_inexact, 4, &p, &x, &y, &z1);
return z1; /*if impossible to do exact computing */
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_cosh.c b/libc/sysdeps/ieee754/dbl-64/e_cosh.c
index 229d5a2fb..6caf943ed 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_cosh.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_cosh.c
@@ -34,49 +34,55 @@
#include <math.h>
#include <math_private.h>
-static const double one = 1.0, half=0.5, huge = 1.0e300;
+static const double one = 1.0, half = 0.5, huge = 1.0e300;
double
__ieee754_cosh (double x)
{
- double t,w;
- int32_t ix;
- u_int32_t lx;
+ double t, w;
+ int32_t ix;
+ u_int32_t lx;
- /* High word of |x|. */
- GET_HIGH_WORD(ix,x);
- ix &= 0x7fffffff;
+ /* High word of |x|. */
+ GET_HIGH_WORD (ix, x);
+ ix &= 0x7fffffff;
- /* |x| in [0,22] */
- if (ix < 0x40360000) {
- /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
- if(ix<0x3fd62e43) {
- t = __expm1(fabs(x));
- w = one+t;
- if (ix<0x3c800000) return w; /* cosh(tiny) = 1 */
- return one+(t*t)/(w+w);
- }
-
- /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */
- t = __ieee754_exp(fabs(x));
- return half*t+half/t;
+ /* |x| in [0,22] */
+ if (ix < 0x40360000)
+ {
+ /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
+ if (ix < 0x3fd62e43)
+ {
+ t = __expm1 (fabs (x));
+ w = one + t;
+ if (ix < 0x3c800000)
+ return w; /* cosh(tiny) = 1 */
+ return one + (t * t) / (w + w);
}
- /* |x| in [22, log(maxdouble)] return half*exp(|x|) */
- if (ix < 0x40862e42) return half*__ieee754_exp(fabs(x));
+ /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */
+ t = __ieee754_exp (fabs (x));
+ return half * t + half / t;
+ }
- /* |x| in [log(maxdouble), overflowthresold] */
- GET_LOW_WORD(lx,x);
- if (ix<0x408633ce || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
- w = __ieee754_exp(half*fabs(x));
- t = half*w;
- return t*w;
- }
+ /* |x| in [22, log(maxdouble)] return half*exp(|x|) */
+ if (ix < 0x40862e42)
+ return half * __ieee754_exp (fabs (x));
+
+ /* |x| in [log(maxdouble), overflowthresold] */
+ GET_LOW_WORD (lx, x);
+ if (ix < 0x408633ce || ((ix == 0x408633ce) && (lx <= (u_int32_t) 0x8fb9f87d)))
+ {
+ w = __ieee754_exp (half * fabs (x));
+ t = half * w;
+ return t * w;
+ }
- /* x is INF or NaN */
- if(ix>=0x7ff00000) return x*x;
+ /* x is INF or NaN */
+ if (ix >= 0x7ff00000)
+ return x * x;
- /* |x| > overflowthresold, cosh(x) overflow */
- return huge*huge;
+ /* |x| > overflowthresold, cosh(x) overflow */
+ return huge * huge;
}
strong_alias (__ieee754_cosh, __cosh_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_exp.c b/libc/sysdeps/ieee754/dbl-64/e_exp.c
index 07cc4a91b..df3aa5efa 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_exp.c
@@ -44,221 +44,299 @@
# define SECTION
#endif
-double __slowexp(double);
+double __slowexp (double);
-/***************************************************************************/
-/* An ultimate exp routine. Given an IEEE double machine number x */
-/* it computes the correctly rounded (to nearest) value of e^x */
-/***************************************************************************/
+/* An ultimate exp routine. Given an IEEE double machine number x it computes
+ the correctly rounded (to nearest) value of e^x. */
double
SECTION
-__ieee754_exp(double x) {
+__ieee754_exp (double x)
+{
double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- mynumber junk1, junk2, binexp = {{0,0}};
- int4 i,j,m,n,ex;
+ mynumber junk1, junk2, binexp = {{0, 0}};
+ int4 i, j, m, n, ex;
double retval;
SET_RESTORE_ROUND (FE_TONEAREST);
junk1.x = x;
m = junk1.i[HIGH_HALF];
- n = m&hugeint;
-
- if (n > smallint && n < bigint) {
-
- y = x*log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
-
- junk1.x = y;
-
- eps = bexp*ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp*ln_two1.x;
-
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) - eps; /* x = bexp*ln(2) + base + del */
- eps = del + del*del*(p3.x*del + p2.x);
-
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
-
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
-
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-
- rem=(bet + bet*eps)+al*eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
- else { retval = __slowexp(x); goto ret; } /*if error is over bound */
- }
+ n = m & hugeint;
+
+ if (n > smallint && n < bigint)
+ {
+ y = x * log2e.x + three51.x;
+ bexp = y - three51.x; /* multiply the result by 2**bexp */
+
+ junk1.x = y;
+
+ eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
+ t = x - bexp * ln_two1.x;
+
+ y = t + three33.x;
+ base = y - three33.x; /* t rounded to a multiple of 2**-18 */
+ junk2.x = y;
+ del = (t - base) - eps; /* x = bexp*ln(2) + base + del */
+ eps = del + del * del * (p3.x * del + p2.x);
+
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+
+ rem = (bet + bet * eps) + al * eps;
+ res = al + rem;
+ cor = (al - res) + rem;
+ if (res == (res + cor * err_0))
+ {
+ retval = res * binexp.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ } /*if error is over bound */
+ }
- if (n <= smallint) { retval = 1.0; goto ret; }
+ if (n <= smallint)
+ {
+ retval = 1.0;
+ goto ret;
+ }
- if (n >= badint) {
- if (n > infint) { retval = x+x; goto ret; } /* x is NaN */
- if (n < infint) { retval = (x>0) ? (hhuge*hhuge) : (tiny*tiny); goto ret; }
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0) { retval = x+x; goto ret; } /* x is NaN */
- retval = (x>0)?inf.x:zero; /* |x| = inf; return either inf or 0 */
- goto ret;
- }
+ if (n >= badint)
+ {
+ if (n > infint)
+ {
+ retval = x + x;
+ goto ret;
+ } /* x is NaN */
+ if (n < infint)
+ {
+ retval = (x > 0) ? (hhuge * hhuge) : (tiny * tiny);
+ goto ret;
+ }
+ /* x is finite, cause either overflow or underflow */
+ if (junk1.i[LOW_HALF] != 0)
+ {
+ retval = x + x;
+ goto ret;
+ } /* x is NaN */
+ retval = (x > 0) ? inf.x : zero; /* |x| = inf; return either inf or 0 */
+ goto ret;
+ }
- y = x*log2e.x + three51.x;
+ y = x * log2e.x + three51.x;
bexp = y - three51.x;
junk1.x = y;
- eps = bexp*ln_two2.x;
- t = x - bexp*ln_two1.x;
+ eps = bexp * ln_two2.x;
+ t = x - bexp * ln_two1.x;
y = t + three33.x;
base = y - three33.x;
junk2.x = y;
del = (t - base) - eps;
- eps = del + del*del*(p3.x*del + p2.x);
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
- rem=(bet + bet*eps)+al*eps;
+ eps = del + del * del * (p3.x * del + p2.x);
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+ rem = (bet + bet * eps) + al * eps;
res = al + rem;
cor = (al - res) + rem;
- if (m>>31) {
- ex=junk1.i[LOW_HALF];
- if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
- if (ex >=-1022) {
- binexp.i[HIGH_HALF] = (1023+ex)<<20;
- if (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
- else { retval = __slowexp(x); goto ret; } /*if error is over bound */
+ if (m >> 31)
+ {
+ ex = junk1.i[LOW_HALF];
+ if (res < 1.0)
+ {
+ res += res;
+ cor += cor;
+ ex -= 1;
+ }
+ if (ex >= -1022)
+ {
+ binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+ if (res == (res + cor * err_0))
+ {
+ retval = res * binexp.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ } /*if error is over bound */
+ }
+ ex = -(1022 + ex);
+ binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+ res *= binexp.x;
+ cor *= binexp.x;
+ eps = 1.0000000001 + err_0 * binexp.x;
+ t = 1.0 + res;
+ y = ((1.0 - t) + res) + cor;
+ res = t + y;
+ cor = (t - res) + y;
+ if (res == (res + eps * cor))
+ {
+ binexp.i[HIGH_HALF] = 0x00100000;
+ retval = (res - 1.0) * binexp.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ } /* if error is over bound */
}
- ex = -(1022+ex);
- binexp.i[HIGH_HALF] = (1023-ex)<<20;
- res*=binexp.x;
- cor*=binexp.x;
- eps=1.0000000001+err_0*binexp.x;
- t=1.0+res;
- y = ((1.0-t)+res)+cor;
- res=t+y;
- cor = (t-res)+y;
- if (res == (res + eps*cor))
- { binexp.i[HIGH_HALF] = 0x00100000;
- retval = (res-1.0)*binexp.x;
- goto ret;
+ else
+ {
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+ if (res == (res + cor * err_0))
+ {
+ retval = res * binexp.x * t256.x;
+ goto ret;
+ }
+ else
+ {
+ retval = __slowexp (x);
+ goto ret;
+ }
}
- else { retval = __slowexp(x); goto ret; } /* if error is over bound */
- }
- else {
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
- if (res == (res+cor*err_0)) { retval = res*binexp.x*t256.x; goto ret; }
- else { retval = __slowexp(x); goto ret; }
- }
- ret:
+ret:
return retval;
}
#ifndef __ieee754_exp
strong_alias (__ieee754_exp, __exp_finite)
#endif
-/************************************************************************/
-/* Compute e^(x+xx)(Double-Length number) .The routine also receive */
-/* bound of error of previous calculation .If after computing exp */
-/* error bigger than allows routine return non positive number */
-/*else return e^(x + xx) (always positive ) */
-/************************************************************************/
-
+/* Compute e^(x+xx). The routine also receives bound of error of previous
+ calculation. If after computing exp the error exceeds the allowed bounds,
+ the routine returns a non-positive number. Otherwise it returns the
+ computed result, which is always positive. */
double
SECTION
-__exp1(double x, double xx, double error) {
+__exp1 (double x, double xx, double error)
+{
double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
- mynumber junk1, junk2, binexp = {{0,0}};
- int4 i,j,m,n,ex;
+ mynumber junk1, junk2, binexp = {{0, 0}};
+ int4 i, j, m, n, ex;
junk1.x = x;
m = junk1.i[HIGH_HALF];
- n = m&hugeint; /* no sign */
-
- if (n > smallint && n < bigint) {
- y = x*log2e.x + three51.x;
- bexp = y - three51.x; /* multiply the result by 2**bexp */
+ n = m & hugeint; /* no sign */
- junk1.x = y;
+ if (n > smallint && n < bigint)
+ {
+ y = x * log2e.x + three51.x;
+ bexp = y - three51.x; /* multiply the result by 2**bexp */
- eps = bexp*ln_two2.x; /* x = bexp*ln(2) + t - eps */
- t = x - bexp*ln_two1.x;
+ junk1.x = y;
- y = t + three33.x;
- base = y - three33.x; /* t rounded to a multiple of 2**-18 */
- junk2.x = y;
- del = (t - base) + (xx-eps); /* x = bexp*ln(2) + base + del */
- eps = del + del*del*(p3.x*del + p2.x);
+ eps = bexp * ln_two2.x; /* x = bexp*ln(2) + t - eps */
+ t = x - bexp * ln_two1.x;
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
+ y = t + three33.x;
+ base = y - three33.x; /* t rounded to a multiple of 2**-18 */
+ junk2.x = y;
+ del = (t - base) + (xx - eps); /* x = bexp*ln(2) + base + del */
+ eps = del + del * del * (p3.x * del + p2.x);
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
- rem=(bet + bet*eps)+al*eps;
- res = al + rem;
- cor = (al - res) + rem;
- if (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
- else return -10.0;
- }
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
- if (n <= smallint) return 1.0; /* if x->0 e^x=1 */
+ rem = (bet + bet * eps) + al * eps;
+ res = al + rem;
+ cor = (al - res) + rem;
+ if (res == (res + cor * (1.0 + error + err_1)))
+ return res * binexp.x;
+ else
+ return -10.0;
+ }
- if (n >= badint) {
- if (n > infint) return(zero/zero); /* x is NaN, return invalid */
- if (n < infint) return ( (x>0) ? (hhuge*hhuge) : (tiny*tiny) );
- /* x is finite, cause either overflow or underflow */
- if (junk1.i[LOW_HALF] != 0) return (zero/zero); /* x is NaN */
- return ((x>0)?inf.x:zero ); /* |x| = inf; return either inf or 0 */
- }
+ if (n <= smallint)
+ return 1.0; /* if x->0 e^x=1 */
+
+ if (n >= badint)
+ {
+ if (n > infint)
+ return (zero / zero); /* x is NaN, return invalid */
+ if (n < infint)
+ return ((x > 0) ? (hhuge * hhuge) : (tiny * tiny));
+ /* x is finite, cause either overflow or underflow */
+ if (junk1.i[LOW_HALF] != 0)
+ return (zero / zero); /* x is NaN */
+ return ((x > 0) ? inf.x : zero); /* |x| = inf; return either inf or 0 */
+ }
- y = x*log2e.x + three51.x;
+ y = x * log2e.x + three51.x;
bexp = y - three51.x;
junk1.x = y;
- eps = bexp*ln_two2.x;
- t = x - bexp*ln_two1.x;
+ eps = bexp * ln_two2.x;
+ t = x - bexp * ln_two1.x;
y = t + three33.x;
base = y - three33.x;
junk2.x = y;
- del = (t - base) + (xx-eps);
- eps = del + del*del*(p3.x*del + p2.x);
- i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
- j = (junk2.i[LOW_HALF]&511)<<1;
- al = coar.x[i]*fine.x[j];
- bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
- rem=(bet + bet*eps)+al*eps;
+ del = (t - base) + (xx - eps);
+ eps = del + del * del * (p3.x * del + p2.x);
+ i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+ j = (junk2.i[LOW_HALF] & 511) << 1;
+ al = coar.x[i] * fine.x[j];
+ bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+ + coar.x[i + 1] * fine.x[j + 1]);
+ rem = (bet + bet * eps) + al * eps;
res = al + rem;
cor = (al - res) + rem;
- if (m>>31) {
- ex=junk1.i[LOW_HALF];
- if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
- if (ex >=-1022) {
- binexp.i[HIGH_HALF] = (1023+ex)<<20;
- if (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
- else return -10.0;
+ if (m >> 31)
+ {
+ ex = junk1.i[LOW_HALF];
+ if (res < 1.0)
+ {
+ res += res;
+ cor += cor;
+ ex -= 1;
+ }
+ if (ex >= -1022)
+ {
+ binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+ if (res == (res + cor * (1.0 + error + err_1)))
+ return res * binexp.x;
+ else
+ return -10.0;
+ }
+ ex = -(1022 + ex);
+ binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+ res *= binexp.x;
+ cor *= binexp.x;
+ eps = 1.00000000001 + (error + err_1) * binexp.x;
+ t = 1.0 + res;
+ y = ((1.0 - t) + res) + cor;
+ res = t + y;
+ cor = (t - res) + y;
+ if (res == (res + eps * cor))
+ {
+ binexp.i[HIGH_HALF] = 0x00100000;
+ return (res - 1.0) * binexp.x;
+ }
+ else
+ return -10.0;
+ }
+ else
+ {
+ binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+ if (res == (res + cor * (1.0 + error + err_1)))
+ return res * binexp.x * t256.x;
+ else
+ return -10.0;
}
- ex = -(1022+ex);
- binexp.i[HIGH_HALF] = (1023-ex)<<20;
- res*=binexp.x;
- cor*=binexp.x;
- eps=1.00000000001+(error+err_1)*binexp.x;
- t=1.0+res;
- y = ((1.0-t)+res)+cor;
- res=t+y;
- cor = (t-res)+y;
- if (res == (res + eps*cor))
- {binexp.i[HIGH_HALF] = 0x00100000; return (res-1.0)*binexp.x;}
- else return -10.0;
- }
- else {
- binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
- if (res == (res+cor*(1.0+error+err_1)))
- return res*binexp.x*t256.x;
- else return -10.0;
- }
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_exp2.c b/libc/sysdeps/ieee754/dbl-64/e_exp2.c
index 96ec735e3..e1ba940e6 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -46,7 +46,7 @@ __ieee754_exp2 (double x)
if (__builtin_expect (isless (x, himark), 1))
{
/* Exceptional cases: */
- if (__builtin_expect (! isgreaterequal (x, lomark), 0))
+ if (__builtin_expect (!isgreaterequal (x, lomark), 0))
{
if (__isinf (x))
/* e^-inf == 0, with no error. */
@@ -93,7 +93,7 @@ __ieee754_exp2 (double x)
/* 3. Compute ex2 = 2^(t/512+e+ex). */
ex2_u.d = exp2_accuratetable[tval & 511];
tval >>= 9;
- unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
+ unsafe = abs (tval) >= -DBL_MIN_EXP - 1;
ex2_u.ieee.exponent += tval >> unsafe;
scale_u.d = 1.0;
scale_u.ieee.exponent += tval - (tval >> unsafe);
@@ -106,7 +106,7 @@ __ieee754_exp2 (double x)
* x + .055504110254308625)
* x + .240226506959100583)
* x + .69314718055994495) * ex2_u.d;
- math_opt_barrier (x22);
+ math_opt_barrier (x22);
}
/* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex). */
@@ -119,6 +119,6 @@ __ieee754_exp2 (double x)
}
else
/* Return x, if x is a NaN or Inf; or overflow, otherwise. */
- return TWO1023*x;
+ return TWO1023 * x;
}
strong_alias (__ieee754_exp2, __exp2_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_fmod.c b/libc/sysdeps/ieee754/dbl-64/e_fmod.c
index b8548fae4..c83c2aedb 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_fmod.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_fmod.c
@@ -18,111 +18,156 @@
#include <math.h>
#include <math_private.h>
-static const double one = 1.0, Zero[] = {0.0, -0.0,};
+static const double one = 1.0, Zero[] = { 0.0, -0.0, };
double
__ieee754_fmod (double x, double y)
{
- int32_t n,hx,hy,hz,ix,iy,sx,i;
- u_int32_t lx,ly,lz;
+ int32_t n, hx, hy, hz, ix, iy, sx, i;
+ u_int32_t lx, ly, lz;
- EXTRACT_WORDS(hx,lx,x);
- EXTRACT_WORDS(hy,ly,y);
- sx = hx&0x80000000; /* sign of x */
- hx ^=sx; /* |x| */
- hy &= 0x7fffffff; /* |y| */
+ EXTRACT_WORDS (hx, lx, x);
+ EXTRACT_WORDS (hy, ly, y);
+ sx = hx & 0x80000000; /* sign of x */
+ hx ^= sx; /* |x| */
+ hy &= 0x7fffffff; /* |y| */
- /* purge off exception values */
- if((hy|ly)==0||(hx>=0x7ff00000)|| /* y=0,or x not finite */
- ((hy|((ly|-ly)>>31))>0x7ff00000)) /* or y is NaN */
- return (x*y)/(x*y);
- if(hx<=hy) {
- if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
- if(lx==ly)
- return Zero[(u_int32_t)sx>>31]; /* |x|=|y| return x*0*/
- }
+ /* purge off exception values */
+ if ((hy | ly) == 0 || (hx >= 0x7ff00000) || /* y=0,or x not finite */
+ ((hy | ((ly | -ly) >> 31)) > 0x7ff00000)) /* or y is NaN */
+ return (x * y) / (x * y);
+ if (hx <= hy)
+ {
+ if ((hx < hy) || (lx < ly))
+ return x; /* |x|<|y| return x */
+ if (lx == ly)
+ return Zero[(u_int32_t) sx >> 31]; /* |x|=|y| return x*0*/
+ }
- /* determine ix = ilogb(x) */
- if(__builtin_expect(hx<0x00100000, 0)) { /* subnormal x */
- if(hx==0) {
- for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
- } else {
- for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
- }
- } else ix = (hx>>20)-1023;
+ /* determine ix = ilogb(x) */
+ if (__builtin_expect (hx < 0x00100000, 0)) /* subnormal x */
+ {
+ if (hx == 0)
+ {
+ for (ix = -1043, i = lx; i > 0; i <<= 1)
+ ix -= 1;
+ }
+ else
+ {
+ for (ix = -1022, i = (hx << 11); i > 0; i <<= 1)
+ ix -= 1;
+ }
+ }
+ else
+ ix = (hx >> 20) - 1023;
- /* determine iy = ilogb(y) */
- if(__builtin_expect(hy<0x00100000, 0)) { /* subnormal y */
- if(hy==0) {
- for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
- } else {
- for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
- }
- } else iy = (hy>>20)-1023;
+ /* determine iy = ilogb(y) */
+ if (__builtin_expect (hy < 0x00100000, 0)) /* subnormal y */
+ {
+ if (hy == 0)
+ {
+ for (iy = -1043, i = ly; i > 0; i <<= 1)
+ iy -= 1;
+ }
+ else
+ {
+ for (iy = -1022, i = (hy << 11); i > 0; i <<= 1)
+ iy -= 1;
+ }
+ }
+ else
+ iy = (hy >> 20) - 1023;
- /* set up {hx,lx}, {hy,ly} and align y to x */
- if(__builtin_expect(ix >= -1022, 1))
- hx = 0x00100000|(0x000fffff&hx);
- else { /* subnormal x, shift x to normal */
- n = -1022-ix;
- if(n<=31) {
- hx = (hx<<n)|(lx>>(32-n));
- lx <<= n;
- } else {
- hx = lx<<(n-32);
- lx = 0;
- }
+ /* set up {hx,lx}, {hy,ly} and align y to x */
+ if (__builtin_expect (ix >= -1022, 1))
+ hx = 0x00100000 | (0x000fffff & hx);
+ else /* subnormal x, shift x to normal */
+ {
+ n = -1022 - ix;
+ if (n <= 31)
+ {
+ hx = (hx << n) | (lx >> (32 - n));
+ lx <<= n;
+ }
+ else
+ {
+ hx = lx << (n - 32);
+ lx = 0;
+ }
+ }
+ if (__builtin_expect (iy >= -1022, 1))
+ hy = 0x00100000 | (0x000fffff & hy);
+ else /* subnormal y, shift y to normal */
+ {
+ n = -1022 - iy;
+ if (n <= 31)
+ {
+ hy = (hy << n) | (ly >> (32 - n));
+ ly <<= n;
}
- if(__builtin_expect(iy >= -1022, 1))
- hy = 0x00100000|(0x000fffff&hy);
- else { /* subnormal y, shift y to normal */
- n = -1022-iy;
- if(n<=31) {
- hy = (hy<<n)|(ly>>(32-n));
- ly <<= n;
- } else {
- hy = ly<<(n-32);
- ly = 0;
- }
+ else
+ {
+ hy = ly << (n - 32);
+ ly = 0;
}
+ }
- /* fix point fmod */
- n = ix - iy;
- while(n--) {
- hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
- if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
- else {
- if((hz|lz)==0) /* return sign(x)*0 */
- return Zero[(u_int32_t)sx>>31];
- hx = hz+hz+(lz>>31); lx = lz+lz;
- }
+ /* fix point fmod */
+ n = ix - iy;
+ while (n--)
+ {
+ hz = hx - hy; lz = lx - ly; if (lx < ly)
+ hz -= 1;
+ if (hz < 0)
+ {
+ hx = hx + hx + (lx >> 31); lx = lx + lx;
}
- hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
- if(hz>=0) {hx=hz;lx=lz;}
+ else
+ {
+ if ((hz | lz) == 0) /* return sign(x)*0 */
+ return Zero[(u_int32_t) sx >> 31];
+ hx = hz + hz + (lz >> 31); lx = lz + lz;
+ }
+ }
+ hz = hx - hy; lz = lx - ly; if (lx < ly)
+ hz -= 1;
+ if (hz >= 0)
+ {
+ hx = hz; lx = lz;
+ }
- /* convert back to floating value and restore the sign */
- if((hx|lx)==0) /* return sign(x)*0 */
- return Zero[(u_int32_t)sx>>31];
- while(hx<0x00100000) { /* normalize x */
- hx = hx+hx+(lx>>31); lx = lx+lx;
- iy -= 1;
+ /* convert back to floating value and restore the sign */
+ if ((hx | lx) == 0) /* return sign(x)*0 */
+ return Zero[(u_int32_t) sx >> 31];
+ while (hx < 0x00100000) /* normalize x */
+ {
+ hx = hx + hx + (lx >> 31); lx = lx + lx;
+ iy -= 1;
+ }
+ if (__builtin_expect (iy >= -1022, 1)) /* normalize output */
+ {
+ hx = ((hx - 0x00100000) | ((iy + 1023) << 20));
+ INSERT_WORDS (x, hx | sx, lx);
+ }
+ else /* subnormal output */
+ {
+ n = -1022 - iy;
+ if (n <= 20)
+ {
+ lx = (lx >> n) | ((u_int32_t) hx << (32 - n));
+ hx >>= n;
+ }
+ else if (n <= 31)
+ {
+ lx = (hx << (32 - n)) | (lx >> n); hx = sx;
}
- if(__builtin_expect(iy>= -1022, 1)) { /* normalize output */
- hx = ((hx-0x00100000)|((iy+1023)<<20));
- INSERT_WORDS(x,hx|sx,lx);
- } else { /* subnormal output */
- n = -1022 - iy;
- if(n<=20) {
- lx = (lx>>n)|((u_int32_t)hx<<(32-n));
- hx >>= n;
- } else if (n<=31) {
- lx = (hx<<(32-n))|(lx>>n); hx = sx;
- } else {
- lx = hx>>(n-32); hx = sx;
- }
- INSERT_WORDS(x,hx|sx,lx);
- x *= one; /* create necessary signal */
+ else
+ {
+ lx = hx >> (n - 32); hx = sx;
}
- return x; /* exact output */
+ INSERT_WORDS (x, hx | sx, lx);
+ x *= one; /* create necessary signal */
+ }
+ return x; /* exact output */
}
strong_alias (__ieee754_fmod, __fmod_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c b/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c
index 5b17f7b5a..13e389d7c 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c
@@ -135,7 +135,7 @@ __ieee754_gamma_r (double x, int *signgamp)
*signgamp = 0;
return (x - x) / (x - x);
}
- if (__builtin_expect ((unsigned int) hx == 0xfff00000 && lx==0, 0))
+ if (__builtin_expect ((unsigned int) hx == 0xfff00000 && lx == 0, 0))
{
/* x == -Inf. According to ISO this is NaN. */
*signgamp = 0;
diff --git a/libc/sysdeps/ieee754/dbl-64/e_hypot.c b/libc/sysdeps/ieee754/dbl-64/e_hypot.c
index 2dd681cf1..9f4321efe 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_hypot.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_hypot.c
@@ -46,76 +46,101 @@
#include <math_private.h>
double
-__ieee754_hypot(double x, double y)
+__ieee754_hypot (double x, double y)
{
- double a,b,t1,t2,y1,y2,w;
- int32_t j,k,ha,hb;
+ double a, b, t1, t2, y1, y2, w;
+ int32_t j, k, ha, hb;
- GET_HIGH_WORD(ha,x);
- ha &= 0x7fffffff;
- GET_HIGH_WORD(hb,y);
- hb &= 0x7fffffff;
- if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
- SET_HIGH_WORD(a,ha); /* a <- |a| */
- SET_HIGH_WORD(b,hb); /* b <- |b| */
- if((ha-hb)>0x3c00000) {return a+b;} /* x/y > 2**60 */
- k=0;
- if(__builtin_expect(ha > 0x5f300000, 0)) { /* a>2**500 */
- if(ha >= 0x7ff00000) { /* Inf or NaN */
- u_int32_t low;
- w = a+b; /* for sNaN */
- GET_LOW_WORD(low,a);
- if(((ha&0xfffff)|low)==0) w = a;
- GET_LOW_WORD(low,b);
- if(((hb^0x7ff00000)|low)==0) w = b;
- return w;
- }
- /* scale a and b by 2**-600 */
- ha -= 0x25800000; hb -= 0x25800000; k += 600;
- SET_HIGH_WORD(a,ha);
- SET_HIGH_WORD(b,hb);
+ GET_HIGH_WORD (ha, x);
+ ha &= 0x7fffffff;
+ GET_HIGH_WORD (hb, y);
+ hb &= 0x7fffffff;
+ if (hb > ha)
+ {
+ a = y; b = x; j = ha; ha = hb; hb = j;
+ }
+ else
+ {
+ a = x; b = y;
+ }
+ SET_HIGH_WORD (a, ha); /* a <- |a| */
+ SET_HIGH_WORD (b, hb); /* b <- |b| */
+ if ((ha - hb) > 0x3c00000)
+ {
+ return a + b;
+ } /* x/y > 2**60 */
+ k = 0;
+ if (__builtin_expect (ha > 0x5f300000, 0)) /* a>2**500 */
+ {
+ if (ha >= 0x7ff00000) /* Inf or NaN */
+ {
+ u_int32_t low;
+ w = a + b; /* for sNaN */
+ GET_LOW_WORD (low, a);
+ if (((ha & 0xfffff) | low) == 0)
+ w = a;
+ GET_LOW_WORD (low, b);
+ if (((hb ^ 0x7ff00000) | low) == 0)
+ w = b;
+ return w;
}
- if(__builtin_expect(hb < 0x20b00000, 0)) { /* b < 2**-500 */
- if(hb <= 0x000fffff) { /* subnormal b or 0 */
- u_int32_t low;
- GET_LOW_WORD(low,b);
- if((hb|low)==0) return a;
- t1=0;
- SET_HIGH_WORD(t1,0x7fd00000); /* t1=2^1022 */
- b *= t1;
- a *= t1;
- k -= 1022;
- } else { /* scale a and b by 2^600 */
- ha += 0x25800000; /* a *= 2^600 */
- hb += 0x25800000; /* b *= 2^600 */
- k -= 600;
- SET_HIGH_WORD(a,ha);
- SET_HIGH_WORD(b,hb);
- }
+ /* scale a and b by 2**-600 */
+ ha -= 0x25800000; hb -= 0x25800000; k += 600;
+ SET_HIGH_WORD (a, ha);
+ SET_HIGH_WORD (b, hb);
+ }
+ if (__builtin_expect (hb < 0x20b00000, 0)) /* b < 2**-500 */
+ {
+ if (hb <= 0x000fffff) /* subnormal b or 0 */
+ {
+ u_int32_t low;
+ GET_LOW_WORD (low, b);
+ if ((hb | low) == 0)
+ return a;
+ t1 = 0;
+ SET_HIGH_WORD (t1, 0x7fd00000); /* t1=2^1022 */
+ b *= t1;
+ a *= t1;
+ k -= 1022;
}
- /* medium size a and b */
- w = a-b;
- if (w>b) {
- t1 = 0;
- SET_HIGH_WORD(t1,ha);
- t2 = a-t1;
- w = __ieee754_sqrt(t1*t1-(b*(-b)-t2*(a+t1)));
- } else {
- a = a+a;
- y1 = 0;
- SET_HIGH_WORD(y1,hb);
- y2 = b - y1;
- t1 = 0;
- SET_HIGH_WORD(t1,ha+0x00100000);
- t2 = a - t1;
- w = __ieee754_sqrt(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+ else /* scale a and b by 2^600 */
+ {
+ ha += 0x25800000; /* a *= 2^600 */
+ hb += 0x25800000; /* b *= 2^600 */
+ k -= 600;
+ SET_HIGH_WORD (a, ha);
+ SET_HIGH_WORD (b, hb);
}
- if(k!=0) {
- u_int32_t high;
- t1 = 1.0;
- GET_HIGH_WORD(high,t1);
- SET_HIGH_WORD(t1,high+(k<<20));
- return t1*w;
- } else return w;
+ }
+ /* medium size a and b */
+ w = a - b;
+ if (w > b)
+ {
+ t1 = 0;
+ SET_HIGH_WORD (t1, ha);
+ t2 = a - t1;
+ w = __ieee754_sqrt (t1 * t1 - (b * (-b) - t2 * (a + t1)));
+ }
+ else
+ {
+ a = a + a;
+ y1 = 0;
+ SET_HIGH_WORD (y1, hb);
+ y2 = b - y1;
+ t1 = 0;
+ SET_HIGH_WORD (t1, ha + 0x00100000);
+ t2 = a - t1;
+ w = __ieee754_sqrt (t1 * y1 - (w * (-w) - (t1 * y2 + t2 * b)));
+ }
+ if (k != 0)
+ {
+ u_int32_t high;
+ t1 = 1.0;
+ GET_HIGH_WORD (high, t1);
+ SET_HIGH_WORD (t1, high + (k << 20));
+ return t1 * w;
+ }
+ else
+ return w;
}
strong_alias (__ieee754_hypot, __hypot_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_ilogb.c b/libc/sysdeps/ieee754/dbl-64/e_ilogb.c
index 0452a71fb..1e338a59c 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_ilogb.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_ilogb.c
@@ -25,30 +25,39 @@ static char rcsid[] = "$NetBSD: s_ilogb.c,v 1.9 1995/05/10 20:47:28 jtc Exp $";
#include <math.h>
#include <math_private.h>
-int __ieee754_ilogb(double x)
+int
+__ieee754_ilogb (double x)
{
- int32_t hx,lx,ix;
+ int32_t hx, lx, ix;
- GET_HIGH_WORD(hx,x);
- hx &= 0x7fffffff;
- if(hx<0x00100000) {
- GET_LOW_WORD(lx,x);
- if((hx|lx)==0)
- return FP_ILOGB0; /* ilogb(0) = FP_ILOGB0 */
- else /* subnormal x */
- if(hx==0) {
- for (ix = -1043; lx>0; lx<<=1) ix -=1;
- } else {
- for (ix = -1022,hx<<=11; hx>0; hx<<=1) ix -=1;
- }
- return ix;
+ GET_HIGH_WORD (hx, x);
+ hx &= 0x7fffffff;
+ if (hx < 0x00100000)
+ {
+ GET_LOW_WORD (lx, x);
+ if ((hx | lx) == 0)
+ return FP_ILOGB0; /* ilogb(0) = FP_ILOGB0 */
+ else /* subnormal x */
+ if (hx == 0)
+ {
+ for (ix = -1043; lx > 0; lx <<= 1)
+ ix -= 1;
}
- else if (hx<0x7ff00000) return (hx>>20)-1023;
- else if (FP_ILOGBNAN != INT_MAX) {
- /* ISO C99 requires ilogb(+-Inf) == INT_MAX. */
- GET_LOW_WORD(lx,x);
- if(((hx^0x7ff00000)|lx) == 0)
- return INT_MAX;
+ else
+ {
+ for (ix = -1022, hx <<= 11; hx > 0; hx <<= 1)
+ ix -= 1;
}
- return FP_ILOGBNAN;
+ return ix;
+ }
+ else if (hx < 0x7ff00000)
+ return (hx >> 20) - 1023;
+ else if (FP_ILOGBNAN != INT_MAX)
+ {
+ /* ISO C99 requires ilogb(+-Inf) == INT_MAX. */
+ GET_LOW_WORD (lx, x);
+ if (((hx ^ 0x7ff00000) | lx) == 0)
+ return INT_MAX;
+ }
+ return FP_ILOGBNAN;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_j0.c b/libc/sysdeps/ieee754/dbl-64/e_j0.c
index d641a0914..d165e8092 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_j0.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_j0.c
@@ -11,7 +11,7 @@
*/
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
for performance improvement on pipelined processors.
-*/
+ */
/* __ieee754_j0(x), __ieee754_y0(x)
* Bessel function of the first and second kinds of order zero.
@@ -61,154 +61,166 @@
#include <math.h>
#include <math_private.h>
-static double pzero(double), qzero(double);
+static double pzero (double), qzero (double);
static const double
-huge = 1e300,
-one = 1.0,
-invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
- /* R0/S0 on [0, 2.00] */
-R[] = {0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
- -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
- 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
- -4.61832688532103189199e-09}, /* 0xBE33D5E7, 0x73D63FCE */
-S[] = {0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
- 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
- 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
- 1.16614003333790000205e-09}; /* 0x3E1408BC, 0xF4745D8F */
+ huge = 1e300,
+ one = 1.0,
+ invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
+ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+/* R0/S0 on [0, 2.00] */
+ R[] = { 0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
+ -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
+ 1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
+ -4.61832688532103189199e-09 }, /* 0xBE33D5E7, 0x73D63FCE */
+ S[] = { 0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
+ 1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
+ 5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
+ 1.16614003333790000205e-09 }; /* 0x3E1408BC, 0xF4745D8F */
static const double zero = 0.0;
double
-__ieee754_j0(double x)
+__ieee754_j0 (double x)
{
- double z, s,c,ss,cc,r,u,v,r1,r2,s1,s2,z2,z4;
- int32_t hx,ix;
+ double z, s, c, ss, cc, r, u, v, r1, r2, s1, s2, z2, z4;
+ int32_t hx, ix;
- GET_HIGH_WORD(hx,x);
- ix = hx&0x7fffffff;
- if(ix>=0x7ff00000) return one/(x*x);
- x = fabs(x);
- if(ix >= 0x40000000) { /* |x| >= 2.0 */
- __sincos (x, &s, &c);
- ss = s-c;
- cc = s+c;
- if(ix<0x7fe00000) { /* make sure x+x not overflow */
- z = -__cos(x+x);
- if ((s*c)<zero) cc = z/ss;
- else ss = z/cc;
- }
- /*
- * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
- * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
- */
- if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrt(x);
- else {
- u = pzero(x); v = qzero(x);
- z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrt(x);
- }
- return z;
- }
- if(ix<0x3f200000) { /* |x| < 2**-13 */
- math_force_eval(huge+x); /* raise inexact if x != 0 */
- if(ix<0x3e400000) return one; /* |x|<2**-27 */
- else return one - 0.25*x*x;
+ GET_HIGH_WORD (hx, x);
+ ix = hx & 0x7fffffff;
+ if (ix >= 0x7ff00000)
+ return one / (x * x);
+ x = fabs (x);
+ if (ix >= 0x40000000) /* |x| >= 2.0 */
+ {
+ __sincos (x, &s, &c);
+ ss = s - c;
+ cc = s + c;
+ if (ix < 0x7fe00000) /* make sure x+x not overflow */
+ {
+ z = -__cos (x + x);
+ if ((s * c) < zero)
+ cc = z / ss;
+ else
+ ss = z / cc;
}
- z = x*x;
-#ifdef DO_NOT_USE_THIS
- r = z*(R02+z*(R03+z*(R04+z*R05)));
- s = one+z*(S01+z*(S02+z*(S03+z*S04)));
-#else
- r1 = z*R[2]; z2=z*z;
- r2 = R[3]+z*R[4]; z4=z2*z2;
- r = r1 + z2*r2 + z4*R[5];
- s1 = one+z*S[1];
- s2 = S[2]+z*S[3];
- s = s1 + z2*s2 + z4*S[4];
-#endif
- if(ix < 0x3FF00000) { /* |x| < 1.00 */
- return one + z*(-0.25+(r/s));
- } else {
- u = 0.5*x;
- return((one+u)*(one-u)+z*(r/s));
+ /*
+ * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+ * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+ */
+ if (ix > 0x48000000)
+ z = (invsqrtpi * cc) / __ieee754_sqrt (x);
+ else
+ {
+ u = pzero (x); v = qzero (x);
+ z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrt (x);
}
+ return z;
+ }
+ if (ix < 0x3f200000) /* |x| < 2**-13 */
+ {
+ math_force_eval (huge + x); /* raise inexact if x != 0 */
+ if (ix < 0x3e400000)
+ return one; /* |x|<2**-27 */
+ else
+ return one - 0.25 * x * x;
+ }
+ z = x * x;
+ r1 = z * R[2]; z2 = z * z;
+ r2 = R[3] + z * R[4]; z4 = z2 * z2;
+ r = r1 + z2 * r2 + z4 * R[5];
+ s1 = one + z * S[1];
+ s2 = S[2] + z * S[3];
+ s = s1 + z2 * s2 + z4 * S[4];
+ if (ix < 0x3FF00000) /* |x| < 1.00 */
+ {
+ return one + z * (-0.25 + (r / s));
+ }
+ else
+ {
+ u = 0.5 * x;
+ return ((one + u) * (one - u) + z * (r / s));
+ }
}
strong_alias (__ieee754_j0, __j0_finite)
static const double
-U[] = {-7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
- 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
- -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
- 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
- -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
- 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
- -3.98205194132103398453e-11}, /* 0xBDC5E43D, 0x693FB3C8 */
-V[] = {1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
- 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
- 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
- 4.41110311332675467403e-10}; /* 0x3DFE5018, 0x3BD6D9EF */
+U[] = { -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
+ 1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
+ -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
+ 3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
+ -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
+ 1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
+ -3.98205194132103398453e-11 }, /* 0xBDC5E43D, 0x693FB3C8 */
+V[] = { 1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
+ 7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
+ 2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
+ 4.41110311332675467403e-10 }; /* 0x3DFE5018, 0x3BD6D9EF */
double
-__ieee754_y0(double x)
+__ieee754_y0 (double x)
{
- double z, s,c,ss,cc,u,v,z2,z4,z6,u1,u2,u3,v1,v2;
- int32_t hx,ix,lx;
+ double z, s, c, ss, cc, u, v, z2, z4, z6, u1, u2, u3, v1, v2;
+ int32_t hx, ix, lx;
- EXTRACT_WORDS(hx,lx,x);
- ix = 0x7fffffff&hx;
- /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0, y0(0) is -inf. */
- if(ix>=0x7ff00000) return one/(x+x*x);
- if((ix|lx)==0) return -HUGE_VAL+x; /* -inf and overflow exception. */
- if(hx<0) return zero/(zero*x);
- if(ix >= 0x40000000) { /* |x| >= 2.0 */
- /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
- * where x0 = x-pi/4
- * Better formula:
- * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
- * = 1/sqrt(2) * (sin(x) + cos(x))
- * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
- * = 1/sqrt(2) * (sin(x) - cos(x))
- * To avoid cancellation, use
- * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
- * to compute the worse one.
- */
- __sincos (x, &s, &c);
- ss = s-c;
- cc = s+c;
- /*
- * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
- * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
- */
- if(ix<0x7fe00000) { /* make sure x+x not overflow */
- z = -__cos(x+x);
- if ((s*c)<zero) cc = z/ss;
- else ss = z/cc;
- }
- if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrt(x);
- else {
- u = pzero(x); v = qzero(x);
- z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrt(x);
- }
- return z;
+ EXTRACT_WORDS (hx, lx, x);
+ ix = 0x7fffffff & hx;
+ /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0, y0(0) is -inf. */
+ if (ix >= 0x7ff00000)
+ return one / (x + x * x);
+ if ((ix | lx) == 0)
+ return -HUGE_VAL + x; /* -inf and overflow exception. */
+ if (hx < 0)
+ return zero / (zero * x);
+ if (ix >= 0x40000000) /* |x| >= 2.0 */
+ { /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
+ * where x0 = x-pi/4
+ * Better formula:
+ * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
+ * = 1/sqrt(2) * (sin(x) + cos(x))
+ * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+ * = 1/sqrt(2) * (sin(x) - cos(x))
+ * To avoid cancellation, use
+ * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+ * to compute the worse one.
+ */
+ __sincos (x, &s, &c);
+ ss = s - c;
+ cc = s + c;
+ /*
+ * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+ * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+ */
+ if (ix < 0x7fe00000) /* make sure x+x not overflow */
+ {
+ z = -__cos (x + x);
+ if ((s * c) < zero)
+ cc = z / ss;
+ else
+ ss = z / cc;
}
- if(ix<=0x3e400000) { /* x < 2**-27 */
- return(U[0] + tpi*__ieee754_log(x));
+ if (ix > 0x48000000)
+ z = (invsqrtpi * ss) / __ieee754_sqrt (x);
+ else
+ {
+ u = pzero (x); v = qzero (x);
+ z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrt (x);
}
- z = x*x;
-#ifdef DO_NOT_USE_THIS
- u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
- v = one+z*(v01+z*(v02+z*(v03+z*v04)));
-#else
- u1 = U[0]+z*U[1]; z2=z*z;
- u2 = U[2]+z*U[3]; z4=z2*z2;
- u3 = U[4]+z*U[5]; z6=z4*z2;
- u = u1 + z2*u2 + z4*u3 + z6*U[6];
- v1 = one+z*V[0];
- v2 = V[1]+z*V[2];
- v = v1 + z2*v2 + z4*V[3];
-#endif
- return(u/v + tpi*(__ieee754_j0(x)*__ieee754_log(x)));
+ return z;
+ }
+ if (ix <= 0x3e400000) /* x < 2**-27 */
+ {
+ return (U[0] + tpi * __ieee754_log (x));
+ }
+ z = x * x;
+ u1 = U[0] + z * U[1]; z2 = z * z;
+ u2 = U[2] + z * U[3]; z4 = z2 * z2;
+ u3 = U[4] + z * U[5]; z6 = z4 * z2;
+ u = u1 + z2 * u2 + z4 * u3 + z6 * U[6];
+ v1 = one + z * V[0];
+ v2 = V[1] + z * V[2];
+ v = v1 + z2 * v2 + z4 * V[3];
+ return (u / v + tpi * (__ieee754_j0 (x) * __ieee754_log (x)));
}
strong_alias (__ieee754_y0, __y0_finite)
@@ -286,33 +298,43 @@ static const double pS2[5] = {
};
static double
-pzero(double x)
+pzero (double x)
{
- const double *p,*q;
- double z,r,s,z2,z4,r1,r2,r3,s1,s2,s3;
- int32_t ix;
- GET_HIGH_WORD(ix,x);
- ix &= 0x7fffffff;
- if (ix>=0x41b00000) {return one;}
- else if(ix>=0x40200000){p = pR8; q= pS8;}
- else if(ix>=0x40122E8B){p = pR5; q= pS5;}
- else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
- else if(ix>=0x40000000){p = pR2; q= pS2;}
- z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
- r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
- s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
-#else
- r1 = p[0]+z*p[1]; z2=z*z;
- r2 = p[2]+z*p[3]; z4=z2*z2;
- r3 = p[4]+z*p[5];
- r = r1 + z2*r2 + z4*r3;
- s1 = one+z*q[0];
- s2 = q[1]+z*q[2];
- s3 = q[3]+z*q[4];
- s = s1 + z2*s2 + z4*s3;
-#endif
- return one+ r/s;
+ const double *p, *q;
+ double z, r, s, z2, z4, r1, r2, r3, s1, s2, s3;
+ int32_t ix;
+ GET_HIGH_WORD (ix, x);
+ ix &= 0x7fffffff;
+ if (ix >= 0x41b00000)
+ {
+ return one;
+ }
+ else if (ix >= 0x40200000)
+ {
+ p = pR8; q = pS8;
+ }
+ else if (ix >= 0x40122E8B)
+ {
+ p = pR5; q = pS5;
+ }
+ else if (ix >= 0x4006DB6D)
+ {
+ p = pR3; q = pS3;
+ }
+ else if (ix >= 0x40000000)
+ {
+ p = pR2; q = pS2;
+ }
+ z = one / (x * x);
+ r1 = p[0] + z * p[1]; z2 = z * z;
+ r2 = p[2] + z * p[3]; z4 = z2 * z2;
+ r3 = p[4] + z * p[5];
+ r = r1 + z2 * r2 + z4 * r3;
+ s1 = one + z * q[0];
+ s2 = q[1] + z * q[2];
+ s3 = q[3] + z * q[4];
+ s = s1 + z2 * s2 + z4 * s3;
+ return one + r / s;
}
@@ -394,31 +416,41 @@ static const double qS2[6] = {
};
static double
-qzero(double x)
+qzero (double x)
{
- const double *p,*q;
- double s,r,z,z2,z4,z6,r1,r2,r3,s1,s2,s3;
- int32_t ix;
- GET_HIGH_WORD(ix,x);
- ix &= 0x7fffffff;
- if (ix>=0x41b00000) {return -.125/x;}
- else if(ix>=0x40200000){p = qR8; q= qS8;}
- else if(ix>=0x40122E8B){p = qR5; q= qS5;}
- else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
- else if(ix>=0x40000000){p = qR2; q= qS2;}
- z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
- r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
- s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
-#else
- r1 = p[0]+z*p[1]; z2=z*z;
- r2 = p[2]+z*p[3]; z4=z2*z2;
- r3 = p[4]+z*p[5]; z6=z4*z2;
- r= r1 + z2*r2 + z4*r3;
- s1 = one+z*q[0];
- s2 = q[1]+z*q[2];
- s3 = q[3]+z*q[4];
- s = s1 + z2*s2 + z4*s3 +z6*q[5];
-#endif
- return (-.125 + r/s)/x;
+ const double *p, *q;
+ double s, r, z, z2, z4, z6, r1, r2, r3, s1, s2, s3;
+ int32_t ix;
+ GET_HIGH_WORD (ix, x);
+ ix &= 0x7fffffff;
+ if (ix >= 0x41b00000)
+ {
+ return -.125 / x;
+ }
+ else if (ix >= 0x40200000)
+ {
+ p = qR8; q = qS8;
+ }
+ else if (ix >= 0x40122E8B)
+ {
+ p = qR5; q = qS5;
+ }
+ else if (ix >= 0x4006DB6D)
+ {
+ p = qR3; q = qS3;
+ }
+ else if (ix >= 0x40000000)
+ {
+ p = qR2; q = qS2;
+ }
+ z = one / (x * x);
+ r1 = p[0] + z * p[1]; z2 = z * z;
+ r2 = p[2] + z * p[3]; z4 = z2 * z2;
+ r3 = p[4] + z * p[5]; z6 = z4 * z2;
+ r = r1 + z2 * r2 + z4 * r3;
+ s1 = one + z * q[0];
+ s2 = q[1] + z * q[2];
+ s3 = q[3] + z * q[4];
+ s = s1 + z2 * s2 + z4 * s3 + z6 * q[5];
+ return (-.125 + r / s) / x;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_j1.c b/libc/sysdeps/ieee754/dbl-64/e_j1.c
index cca5f20b4..ab754c6ee 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_j1.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_j1.c
@@ -11,7 +11,7 @@
*/
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
for performance improvement on pipelined processors.
-*/
+ */
/* __ieee754_j1(x), __ieee754_y1(x)
* Bessel function of the first and second kinds of order zero.
@@ -61,76 +61,81 @@
#include <math.h>
#include <math_private.h>
-static double pone(double), qone(double);
+static double pone (double), qone (double);
static const double
-huge = 1e300,
-one = 1.0,
-invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
- /* R0/S0 on [0,2] */
-R[] = {-6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
- 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
- -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
- 4.96727999609584448412e-08}, /* 0x3E6AAAFA, 0x46CA0BD9 */
-S[] = {0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
- 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
- 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
- 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
- 1.23542274426137913908e-11}; /* 0x3DAB2ACF, 0xCFB97ED8 */
+ huge = 1e300,
+ one = 1.0,
+ invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
+ tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+/* R0/S0 on [0,2] */
+ R[] = { -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
+ 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
+ -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
+ 4.96727999609584448412e-08 }, /* 0x3E6AAAFA, 0x46CA0BD9 */
+ S[] = { 0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
+ 1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
+ 1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
+ 5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
+ 1.23542274426137913908e-11 }; /* 0x3DAB2ACF, 0xCFB97ED8 */
-static const double zero = 0.0;
+static const double zero = 0.0;
double
-__ieee754_j1(double x)
+__ieee754_j1 (double x)
{
- double z, s,c,ss,cc,r,u,v,y,r1,r2,s1,s2,s3,z2,z4;
- int32_t hx,ix;
+ double z, s, c, ss, cc, r, u, v, y, r1, r2, s1, s2, s3, z2, z4;
+ int32_t hx, ix;
- GET_HIGH_WORD(hx,x);
- ix = hx&0x7fffffff;
- if(__builtin_expect(ix>=0x7ff00000, 0)) return one/x;
- y = fabs(x);
- if(ix >= 0x40000000) { /* |x| >= 2.0 */
- __sincos (y, &s, &c);
- ss = -s-c;
- cc = s-c;
- if(ix<0x7fe00000) { /* make sure y+y not overflow */
- z = __cos(y+y);
- if ((s*c)>zero) cc = z/ss;
- else ss = z/cc;
- }
- /*
- * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
- * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
- */
- if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrt(y);
- else {
- u = pone(y); v = qone(y);
- z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrt(y);
- }
- if(hx<0) return -z;
- else return z;
+ GET_HIGH_WORD (hx, x);
+ ix = hx & 0x7fffffff;
+ if (__builtin_expect (ix >= 0x7ff00000, 0))
+ return one / x;
+ y = fabs (x);
+ if (ix >= 0x40000000) /* |x| >= 2.0 */
+ {
+ __sincos (y, &s, &c);
+ ss = -s - c;
+ cc = s - c;
+ if (ix < 0x7fe00000) /* make sure y+y not overflow */
+ {
+ z = __cos (y + y);
+ if ((s * c) > zero)
+ cc = z / ss;
+ else
+ ss = z / cc;
}
- if(__builtin_expect(ix<0x3e400000, 0)) { /* |x|<2**-27 */
- if(huge+x>one) return 0.5*x;/* inexact if x!=0 necessary */
+ /*
+ * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
+ * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
+ */
+ if (ix > 0x48000000)
+ z = (invsqrtpi * cc) / __ieee754_sqrt (y);
+ else
+ {
+ u = pone (y); v = qone (y);
+ z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrt (y);
}
- z = x*x;
-#ifdef DO_NOT_USE_THIS
- r = z*(r00+z*(r01+z*(r02+z*r03)));
- s = one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
- r *= x;
-#else
- r1 = z*R[0]; z2=z*z;
- r2 = R[1]+z*R[2]; z4=z2*z2;
- r = r1 + z2*r2 + z4*R[3];
- r *= x;
- s1 = one+z*S[1];
- s2 = S[2]+z*S[3];
- s3 = S[4]+z*S[5];
- s = s1 + z2*s2 + z4*s3;
-#endif
- return(x*0.5+r/s);
+ if (hx < 0)
+ return -z;
+ else
+ return z;
+ }
+ if (__builtin_expect (ix < 0x3e400000, 0)) /* |x|<2**-27 */
+ {
+ if (huge + x > one)
+ return 0.5 * x; /* inexact if x!=0 necessary */
+ }
+ z = x * x;
+ r1 = z * R[0]; z2 = z * z;
+ r2 = R[1] + z * R[2]; z4 = z2 * z2;
+ r = r1 + z2 * r2 + z4 * R[3];
+ r *= x;
+ s1 = one + z * S[1];
+ s2 = S[2] + z * S[3];
+ s3 = S[4] + z * S[5];
+ s = s1 + z2 * s2 + z4 * s3;
+ return (x * 0.5 + r / s);
}
strong_alias (__ieee754_j1, __j1_finite)
@@ -150,62 +155,67 @@ static const double V0[5] = {
};
double
-__ieee754_y1(double x)
+__ieee754_y1 (double x)
{
- double z, s,c,ss,cc,u,v,u1,u2,v1,v2,v3,z2,z4;
- int32_t hx,ix,lx;
+ double z, s, c, ss, cc, u, v, u1, u2, v1, v2, v3, z2, z4;
+ int32_t hx, ix, lx;
- EXTRACT_WORDS(hx,lx,x);
- ix = 0x7fffffff&hx;
- /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
- if(__builtin_expect(ix>=0x7ff00000, 0)) return one/(x+x*x);
- if(__builtin_expect((ix|lx)==0, 0))
- return -HUGE_VAL+x; /* -inf and overflow exception. */;
- if(__builtin_expect(hx<0, 0)) return zero/(zero*x);
- if(ix >= 0x40000000) { /* |x| >= 2.0 */
- __sincos (x, &s, &c);
- ss = -s-c;
- cc = s-c;
- if(ix<0x7fe00000) { /* make sure x+x not overflow */
- z = __cos(x+x);
- if ((s*c)>zero) cc = z/ss;
- else ss = z/cc;
- }
- /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
- * where x0 = x-3pi/4
- * Better formula:
- * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
- * = 1/sqrt(2) * (sin(x) - cos(x))
- * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
- * = -1/sqrt(2) * (cos(x) + sin(x))
- * To avoid cancellation, use
- * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
- * to compute the worse one.
- */
- if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrt(x);
- else {
- u = pone(x); v = qone(x);
- z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrt(x);
- }
- return z;
+ EXTRACT_WORDS (hx, lx, x);
+ ix = 0x7fffffff & hx;
+ /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
+ if (__builtin_expect (ix >= 0x7ff00000, 0))
+ return one / (x + x * x);
+ if (__builtin_expect ((ix | lx) == 0, 0))
+ return -HUGE_VAL + x;
+ /* -inf and overflow exception. */;
+ if (__builtin_expect (hx < 0, 0))
+ return zero / (zero * x);
+ if (ix >= 0x40000000) /* |x| >= 2.0 */
+ {
+ __sincos (x, &s, &c);
+ ss = -s - c;
+ cc = s - c;
+ if (ix < 0x7fe00000) /* make sure x+x not overflow */
+ {
+ z = __cos (x + x);
+ if ((s * c) > zero)
+ cc = z / ss;
+ else
+ ss = z / cc;
}
- if(__builtin_expect(ix<=0x3c900000, 0)) { /* x < 2**-54 */
- return(-tpi/x);
+ /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
+ * where x0 = x-3pi/4
+ * Better formula:
+ * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
+ * = 1/sqrt(2) * (sin(x) - cos(x))
+ * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+ * = -1/sqrt(2) * (cos(x) + sin(x))
+ * To avoid cancellation, use
+ * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+ * to compute the worse one.
+ */
+ if (ix > 0x48000000)
+ z = (invsqrtpi * ss) / __ieee754_sqrt (x);
+ else
+ {
+ u = pone (x); v = qone (x);
+ z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrt (x);
}
- z = x*x;
-#ifdef DO_NOT_USE_THIS
- u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
- v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
-#else
- u1 = U0[0]+z*U0[1];z2=z*z;
- u2 = U0[2]+z*U0[3];z4=z2*z2;
- u = u1 + z2*u2 + z4*U0[4];
- v1 = one+z*V0[0];
- v2 = V0[1]+z*V0[2];
- v3 = V0[3]+z*V0[4];
- v = v1 + z2*v2 + z4*v3;
-#endif
- return(x*(u/v) + tpi*(__ieee754_j1(x)*__ieee754_log(x)-one/x));
+ return z;
+ }
+ if (__builtin_expect (ix <= 0x3c900000, 0)) /* x < 2**-54 */
+ {
+ return (-tpi / x);
+ }
+ z = x * x;
+ u1 = U0[0] + z * U0[1]; z2 = z * z;
+ u2 = U0[2] + z * U0[3]; z4 = z2 * z2;
+ u = u1 + z2 * u2 + z4 * U0[4];
+ v1 = one + z * V0[0];
+ v2 = V0[1] + z * V0[2];
+ v3 = V0[3] + z * V0[4];
+ v = v1 + z2 * v2 + z4 * v3;
+ return (x * (u / v) + tpi * (__ieee754_j1 (x) * __ieee754_log (x) - one / x));
}
strong_alias (__ieee754_y1, __y1_finite)
@@ -267,7 +277,7 @@ static const double ps3[5] = {
1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */
};
-static const double pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
+static const double pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */
1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */
2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */
@@ -284,33 +294,43 @@ static const double ps2[5] = {
};
static double
-pone(double x)
+pone (double x)
{
- const double *p,*q;
- double z,r,s,r1,r2,r3,s1,s2,s3,z2,z4;
- int32_t ix;
- GET_HIGH_WORD(ix,x);
- ix &= 0x7fffffff;
- if (ix>=0x41b00000) {return one;}
- else if(ix>=0x40200000){p = pr8; q= ps8;}
- else if(ix>=0x40122E8B){p = pr5; q= ps5;}
- else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
- else if(ix>=0x40000000){p = pr2; q= ps2;}
- z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
- r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
- s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
-#else
- r1 = p[0]+z*p[1]; z2=z*z;
- r2 = p[2]+z*p[3]; z4=z2*z2;
- r3 = p[4]+z*p[5];
- r = r1 + z2*r2 + z4*r3;
- s1 = one+z*q[0];
- s2 = q[1]+z*q[2];
- s3 = q[3]+z*q[4];
- s = s1 + z2*s2 + z4*s3;
-#endif
- return one+ r/s;
+ const double *p, *q;
+ double z, r, s, r1, r2, r3, s1, s2, s3, z2, z4;
+ int32_t ix;
+ GET_HIGH_WORD (ix, x);
+ ix &= 0x7fffffff;
+ if (ix >= 0x41b00000)
+ {
+ return one;
+ }
+ else if (ix >= 0x40200000)
+ {
+ p = pr8; q = ps8;
+ }
+ else if (ix >= 0x40122E8B)
+ {
+ p = pr5; q = ps5;
+ }
+ else if (ix >= 0x4006DB6D)
+ {
+ p = pr3; q = ps3;
+ }
+ else if (ix >= 0x40000000)
+ {
+ p = pr2; q = ps2;
+ }
+ z = one / (x * x);
+ r1 = p[0] + z * p[1]; z2 = z * z;
+ r2 = p[2] + z * p[3]; z4 = z2 * z2;
+ r3 = p[4] + z * p[5];
+ r = r1 + z2 * r2 + z4 * r3;
+ s1 = one + z * q[0];
+ s2 = q[1] + z * q[2];
+ s3 = q[3] + z * q[4];
+ s = s1 + z2 * s2 + z4 * s3;
+ return one + r / s;
}
@@ -375,7 +395,7 @@ static const double qs3[6] = {
-1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */
};
-static const double qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
+static const double qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
-1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */
-1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */
-2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */
@@ -393,31 +413,41 @@ static const double qs2[6] = {
};
static double
-qone(double x)
+qone (double x)
{
- const double *p,*q;
- double s,r,z,r1,r2,r3,s1,s2,s3,z2,z4,z6;
- int32_t ix;
- GET_HIGH_WORD(ix,x);
- ix &= 0x7fffffff;
- if (ix>=0x41b00000) {return .375/x;}
- else if(ix>=0x40200000){p = qr8; q= qs8;}
- else if(ix>=0x40122E8B){p = qr5; q= qs5;}
- else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
- else if(ix>=0x40000000){p = qr2; q= qs2;}
- z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
- r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
- s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
-#else
- r1 = p[0]+z*p[1]; z2=z*z;
- r2 = p[2]+z*p[3]; z4=z2*z2;
- r3 = p[4]+z*p[5]; z6=z4*z2;
- r = r1 + z2*r2 + z4*r3;
- s1 = one+z*q[0];
- s2 = q[1]+z*q[2];
- s3 = q[3]+z*q[4];
- s = s1 + z2*s2 + z4*s3 + z6*q[5];
-#endif
- return (.375 + r/s)/x;
+ const double *p, *q;
+ double s, r, z, r1, r2, r3, s1, s2, s3, z2, z4, z6;
+ int32_t ix;
+ GET_HIGH_WORD (ix, x);
+ ix &= 0x7fffffff;
+ if (ix >= 0x41b00000)
+ {
+ return .375 / x;
+ }
+ else if (ix >= 0x40200000)
+ {
+ p = qr8; q = qs8;
+ }
+ else if (ix >= 0x40122E8B)
+ {
+ p = qr5; q = qs5;
+ }
+ else if (ix >= 0x4006DB6D)
+ {
+ p = qr3; q = qs3;
+ }
+ else if (ix >= 0x40000000)
+ {
+ p = qr2; q = qs2;
+ }
+ z = one / (x * x);
+ r1 = p[0] + z * p[1]; z2 = z * z;
+ r2 = p[2] + z * p[3]; z4 = z2 * z2;
+ r3 = p[4] + z * p[5]; z6 = z4 * z2;
+ r = r1 + z2 * r2 + z4 * r3;
+ s1 = one + z * q[0];
+ s2 = q[1] + z * q[2];
+ s3 = q[3] + z * q[4];
+ s = s1 + z2 * s2 + z4 * s3 + z6 * q[5];
+ return (.375 + r / s) / x;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_jn.c b/libc/sysdeps/ieee754/dbl-64/e_jn.c
index 0d2a24c93..f48e43a0d 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_jn.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_jn.c
@@ -41,246 +41,284 @@
#include <math_private.h>
static const double
-invsqrtpi= 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
-one = 1.00000000000000000000e+00; /* 0x3FF00000, 0x00000000 */
+ invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
+ two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
+ one = 1.00000000000000000000e+00; /* 0x3FF00000, 0x00000000 */
-static const double zero = 0.00000000000000000000e+00;
+static const double zero = 0.00000000000000000000e+00;
double
-__ieee754_jn(int n, double x)
+__ieee754_jn (int n, double x)
{
- int32_t i,hx,ix,lx, sgn;
- double a, b, temp, di;
- double z, w;
+ int32_t i, hx, ix, lx, sgn;
+ double a, b, temp, di;
+ double z, w;
- /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
- * Thus, J(-n,x) = J(n,-x)
- */
- EXTRACT_WORDS(hx,lx,x);
- ix = 0x7fffffff&hx;
- /* if J(n,NaN) is NaN */
- if(__builtin_expect((ix|((u_int32_t)(lx|-lx))>>31)>0x7ff00000, 0))
- return x+x;
- if(n<0){
- n = -n;
- x = -x;
- hx ^= 0x80000000;
+ /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
+ * Thus, J(-n,x) = J(n,-x)
+ */
+ EXTRACT_WORDS (hx, lx, x);
+ ix = 0x7fffffff & hx;
+ /* if J(n,NaN) is NaN */
+ if (__builtin_expect ((ix | ((u_int32_t) (lx | -lx)) >> 31) > 0x7ff00000, 0))
+ return x + x;
+ if (n < 0)
+ {
+ n = -n;
+ x = -x;
+ hx ^= 0x80000000;
+ }
+ if (n == 0)
+ return (__ieee754_j0 (x));
+ if (n == 1)
+ return (__ieee754_j1 (x));
+ sgn = (n & 1) & (hx >> 31); /* even n -- 0, odd n -- sign(x) */
+ x = fabs (x);
+ if (__builtin_expect ((ix | lx) == 0 || ix >= 0x7ff00000, 0))
+ /* if x is 0 or inf */
+ b = zero;
+ else if ((double) n <= x)
+ {
+ /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
+ if (ix >= 0x52D00000) /* x > 2**302 */
+ { /* (x >> n**2)
+ * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+ * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+ * Let s=sin(x), c=cos(x),
+ * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
+ *
+ * n sin(xn)*sqt2 cos(xn)*sqt2
+ * ----------------------------------
+ * 0 s-c c+s
+ * 1 -s-c -c+s
+ * 2 -s+c -c-s
+ * 3 s+c c-s
+ */
+ double s;
+ double c;
+ __sincos (x, &s, &c);
+ switch (n & 3)
+ {
+ case 0: temp = c + s; break;
+ case 1: temp = -c + s; break;
+ case 2: temp = -c - s; break;
+ case 3: temp = c - s; break;
+ }
+ b = invsqrtpi * temp / __ieee754_sqrt (x);
}
- if(n==0) return(__ieee754_j0(x));
- if(n==1) return(__ieee754_j1(x));
- sgn = (n&1)&(hx>>31); /* even n -- 0, odd n -- sign(x) */
- x = fabs(x);
- if(__builtin_expect((ix|lx)==0||ix>=0x7ff00000,0))
- /* if x is 0 or inf */
- b = zero;
- else if((double)n<=x) {
- /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
- if(ix>=0x52D00000) { /* x > 2**302 */
- /* (x >> n**2)
- * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
- * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
- * Let s=sin(x), c=cos(x),
- * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
- *
- * n sin(xn)*sqt2 cos(xn)*sqt2
- * ----------------------------------
- * 0 s-c c+s
- * 1 -s-c -c+s
- * 2 -s+c -c-s
- * 3 s+c c-s
- */
- double s;
- double c;
- __sincos (x, &s, &c);
- switch(n&3) {
- case 0: temp = c + s; break;
- case 1: temp = -c + s; break;
- case 2: temp = -c - s; break;
- case 3: temp = c - s; break;
- }
- b = invsqrtpi*temp/__ieee754_sqrt(x);
- } else {
- a = __ieee754_j0(x);
- b = __ieee754_j1(x);
- for(i=1;i<n;i++){
- temp = b;
- b = b*((double)(i+i)/x) - a; /* avoid underflow */
- a = temp;
- }
+ else
+ {
+ a = __ieee754_j0 (x);
+ b = __ieee754_j1 (x);
+ for (i = 1; i < n; i++)
+ {
+ temp = b;
+ b = b * ((double) (i + i) / x) - a; /* avoid underflow */
+ a = temp;
}
- } else {
- if(ix<0x3e100000) { /* x < 2**-29 */
- /* x is tiny, return the first Taylor expansion of J(n,x)
- * J(n,x) = 1/n!*(x/2)^n - ...
- */
- if(n>33) /* underflow */
- b = zero;
- else {
- temp = x*0.5; b = temp;
- for (a=one,i=2;i<=n;i++) {
- a *= (double)i; /* a = n! */
- b *= temp; /* b = (x/2)^n */
- }
- b = b/a;
+ }
+ }
+ else
+ {
+ if (ix < 0x3e100000) /* x < 2**-29 */
+ { /* x is tiny, return the first Taylor expansion of J(n,x)
+ * J(n,x) = 1/n!*(x/2)^n - ...
+ */
+ if (n > 33) /* underflow */
+ b = zero;
+ else
+ {
+ temp = x * 0.5; b = temp;
+ for (a = one, i = 2; i <= n; i++)
+ {
+ a *= (double) i; /* a = n! */
+ b *= temp; /* b = (x/2)^n */
}
- } else {
- /* use backward recurrence */
- /* x x^2 x^2
- * J(n,x)/J(n-1,x) = ---- ------ ------ .....
- * 2n - 2(n+1) - 2(n+2)
- *
- * 1 1 1
- * (for large x) = ---- ------ ------ .....
- * 2n 2(n+1) 2(n+2)
- * -- - ------ - ------ -
- * x x x
- *
- * Let w = 2n/x and h=2/x, then the above quotient
- * is equal to the continued fraction:
- * 1
- * = -----------------------
- * 1
- * w - -----------------
- * 1
- * w+h - ---------
- * w+2h - ...
- *
- * To determine how many terms needed, let
- * Q(0) = w, Q(1) = w(w+h) - 1,
- * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
- * When Q(k) > 1e4 good for single
- * When Q(k) > 1e9 good for double
- * When Q(k) > 1e17 good for quadruple
- */
- /* determine k */
- double t,v;
- double q0,q1,h,tmp; int32_t k,m;
- w = (n+n)/(double)x; h = 2.0/(double)x;
- q0 = w; z = w+h; q1 = w*z - 1.0; k=1;
- while(q1<1.0e9) {
- k += 1; z += h;
- tmp = z*q1 - q0;
- q0 = q1;
- q1 = tmp;
+ b = b / a;
+ }
+ }
+ else
+ {
+ /* use backward recurrence */
+ /* x x^2 x^2
+ * J(n,x)/J(n-1,x) = ---- ------ ------ .....
+ * 2n - 2(n+1) - 2(n+2)
+ *
+ * 1 1 1
+ * (for large x) = ---- ------ ------ .....
+ * 2n 2(n+1) 2(n+2)
+ * -- - ------ - ------ -
+ * x x x
+ *
+ * Let w = 2n/x and h=2/x, then the above quotient
+ * is equal to the continued fraction:
+ * 1
+ * = -----------------------
+ * 1
+ * w - -----------------
+ * 1
+ * w+h - ---------
+ * w+2h - ...
+ *
+ * To determine how many terms needed, let
+ * Q(0) = w, Q(1) = w(w+h) - 1,
+ * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+ * When Q(k) > 1e4 good for single
+ * When Q(k) > 1e9 good for double
+ * When Q(k) > 1e17 good for quadruple
+ */
+ /* determine k */
+ double t, v;
+ double q0, q1, h, tmp; int32_t k, m;
+ w = (n + n) / (double) x; h = 2.0 / (double) x;
+ q0 = w; z = w + h; q1 = w * z - 1.0; k = 1;
+ while (q1 < 1.0e9)
+ {
+ k += 1; z += h;
+ tmp = z * q1 - q0;
+ q0 = q1;
+ q1 = tmp;
+ }
+ m = n + n;
+ for (t = zero, i = 2 * (n + k); i >= m; i -= 2)
+ t = one / (i / x - t);
+ a = t;
+ b = one;
+ /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
+ * Hence, if n*(log(2n/x)) > ...
+ * single 8.8722839355e+01
+ * double 7.09782712893383973096e+02
+ * long double 1.1356523406294143949491931077970765006170e+04
+ * then recurrent value may overflow and the result is
+ * likely underflow to zero
+ */
+ tmp = n;
+ v = two / x;
+ tmp = tmp * __ieee754_log (fabs (v * tmp));
+ if (tmp < 7.09782712893383973096e+02)
+ {
+ for (i = n - 1, di = (double) (i + i); i > 0; i--)
+ {
+ temp = b;
+ b *= di;
+ b = b / x - a;
+ a = temp;
+ di -= two;
}
- m = n+n;
- for(t=zero, i = 2*(n+k); i>=m; i -= 2) t = one/(i/x-t);
- a = t;
- b = one;
- /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
- * Hence, if n*(log(2n/x)) > ...
- * single 8.8722839355e+01
- * double 7.09782712893383973096e+02
- * long double 1.1356523406294143949491931077970765006170e+04
- * then recurrent value may overflow and the result is
- * likely underflow to zero
- */
- tmp = n;
- v = two/x;
- tmp = tmp*__ieee754_log(fabs(v*tmp));
- if(tmp<7.09782712893383973096e+02) {
- for(i=n-1,di=(double)(i+i);i>0;i--){
- temp = b;
- b *= di;
- b = b/x - a;
- a = temp;
- di -= two;
- }
- } else {
- for(i=n-1,di=(double)(i+i);i>0;i--){
- temp = b;
- b *= di;
- b = b/x - a;
- a = temp;
- di -= two;
- /* scale b to avoid spurious overflow */
- if(b>1e100) {
- a /= b;
- t /= b;
- b = one;
- }
+ }
+ else
+ {
+ for (i = n - 1, di = (double) (i + i); i > 0; i--)
+ {
+ temp = b;
+ b *= di;
+ b = b / x - a;
+ a = temp;
+ di -= two;
+ /* scale b to avoid spurious overflow */
+ if (b > 1e100)
+ {
+ a /= b;
+ t /= b;
+ b = one;
}
}
- /* j0() and j1() suffer enormous loss of precision at and
- * near zero; however, we know that their zero points never
- * coincide, so just choose the one further away from zero.
- */
- z = __ieee754_j0 (x);
- w = __ieee754_j1 (x);
- if (fabs (z) >= fabs (w))
- b = (t * z / b);
- else
- b = (t * w / a);
}
+ /* j0() and j1() suffer enormous loss of precision at and
+ * near zero; however, we know that their zero points never
+ * coincide, so just choose the one further away from zero.
+ */
+ z = __ieee754_j0 (x);
+ w = __ieee754_j1 (x);
+ if (fabs (z) >= fabs (w))
+ b = (t * z / b);
+ else
+ b = (t * w / a);
}
- if(sgn==1) return -b; else return b;
+ }
+ if (sgn == 1)
+ return -b;
+ else
+ return b;
}
strong_alias (__ieee754_jn, __jn_finite)
double
-__ieee754_yn(int n, double x)
+__ieee754_yn (int n, double x)
{
- int32_t i,hx,ix,lx;
- int32_t sign;
- double a, b, temp;
+ int32_t i, hx, ix, lx;
+ int32_t sign;
+ double a, b, temp;
- EXTRACT_WORDS(hx,lx,x);
- ix = 0x7fffffff&hx;
- /* if Y(n,NaN) is NaN */
- if(__builtin_expect((ix|((u_int32_t)(lx|-lx))>>31)>0x7ff00000,0))
- return x+x;
- if(__builtin_expect((ix|lx)==0, 0))
- return -HUGE_VAL+x; /* -inf and overflow exception. */;
- if(__builtin_expect(hx<0, 0)) return zero/(zero*x);
- sign = 1;
- if(n<0){
- n = -n;
- sign = 1 - ((n&1)<<1);
+ EXTRACT_WORDS (hx, lx, x);
+ ix = 0x7fffffff & hx;
+ /* if Y(n,NaN) is NaN */
+ if (__builtin_expect ((ix | ((u_int32_t) (lx | -lx)) >> 31) > 0x7ff00000, 0))
+ return x + x;
+ if (__builtin_expect ((ix | lx) == 0, 0))
+ return -HUGE_VAL + x;
+ /* -inf and overflow exception. */;
+ if (__builtin_expect (hx < 0, 0))
+ return zero / (zero * x);
+ sign = 1;
+ if (n < 0)
+ {
+ n = -n;
+ sign = 1 - ((n & 1) << 1);
+ }
+ if (n == 0)
+ return (__ieee754_y0 (x));
+ if (n == 1)
+ return (sign * __ieee754_y1 (x));
+ if (__builtin_expect (ix == 0x7ff00000, 0))
+ return zero;
+ if (ix >= 0x52D00000) /* x > 2**302 */
+ { /* (x >> n**2)
+ * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+ * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+ * Let s=sin(x), c=cos(x),
+ * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
+ *
+ * n sin(xn)*sqt2 cos(xn)*sqt2
+ * ----------------------------------
+ * 0 s-c c+s
+ * 1 -s-c -c+s
+ * 2 -s+c -c-s
+ * 3 s+c c-s
+ */
+ double c;
+ double s;
+ __sincos (x, &s, &c);
+ switch (n & 3)
+ {
+ case 0: temp = s - c; break;
+ case 1: temp = -s - c; break;
+ case 2: temp = -s + c; break;
+ case 3: temp = s + c; break;
}
- if(n==0) return(__ieee754_y0(x));
- if(n==1) return(sign*__ieee754_y1(x));
- if(__builtin_expect(ix==0x7ff00000, 0)) return zero;
- if(ix>=0x52D00000) { /* x > 2**302 */
- /* (x >> n**2)
- * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
- * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
- * Let s=sin(x), c=cos(x),
- * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
- *
- * n sin(xn)*sqt2 cos(xn)*sqt2
- * ----------------------------------
- * 0 s-c c+s
- * 1 -s-c -c+s
- * 2 -s+c -c-s
- * 3 s+c c-s
- */
- double c;
- double s;
- __sincos (x, &s, &c);
- switch(n&3) {
- case 0: temp = s - c; break;
- case 1: temp = -s - c; break;
- case 2: temp = -s + c; break;
- case 3: temp = s + c; break;
- }
- b = invsqrtpi*temp/__ieee754_sqrt(x);
- } else {
- u_int32_t high;
- a = __ieee754_y0(x);
- b = __ieee754_y1(x);
- /* quit if b is -inf */
- GET_HIGH_WORD(high,b);
- for(i=1;i<n&&high!=0xfff00000;i++){
- temp = b;
- b = ((double)(i+i)/x)*b - a;
- GET_HIGH_WORD(high,b);
- a = temp;
- }
- /* If B is +-Inf, set up errno accordingly. */
- if (! __finite (b))
- __set_errno (ERANGE);
+ b = invsqrtpi * temp / __ieee754_sqrt (x);
+ }
+ else
+ {
+ u_int32_t high;
+ a = __ieee754_y0 (x);
+ b = __ieee754_y1 (x);
+ /* quit if b is -inf */
+ GET_HIGH_WORD (high, b);
+ for (i = 1; i < n && high != 0xfff00000; i++)
+ {
+ temp = b;
+ b = ((double) (i + i) / x) * b - a;
+ GET_HIGH_WORD (high, b);
+ a = temp;
}
- if(sign>0) return b; else return -b;
+ /* If B is +-Inf, set up errno accordingly. */
+ if (!__finite (b))
+ __set_errno (ERANGE);
+ }
+ if (sign > 0)
+ return b;
+ else
+ return -b;
}
strong_alias (__ieee754_yn, __yn_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_log.c b/libc/sysdeps/ieee754/dbl-64/e_log.c
index f9300f9ce..a7ab54435 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_log.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_log.c
@@ -39,6 +39,7 @@
#include "mpa.h"
#include "MathLib.h"
#include <math_private.h>
+#include <stap-probe.h>
#ifndef SECTION
# define SECTION
@@ -55,12 +56,12 @@ SECTION
__ieee754_log (double x)
{
#define M 4
- static const int pr[M] = {8, 10, 18, 32};
+ static const int pr[M] = { 8, 10, 18, 32 };
int i, j, n, ux, dx, p;
double dbl_n, u, p0, q, r0, w, nln2a, luai, lubi, lvaj, lvbj,
- sij, ssij, ttij, A, B, B0, y, y1, y2, polI, polII, sa, sb,
- t1, t2, t7, t8, t, ra, rb, ww,
- a0, aa0, s1, s2, ss2, s3, ss3, a1, aa1, a, aa, b, bb, c;
+ sij, ssij, ttij, A, B, B0, y, y1, y2, polI, polII, sa, sb,
+ t1, t2, t7, t8, t, ra, rb, ww,
+ a0, aa0, s1, s2, ss2, s3, ss3, a1, aa1, a, aa, b, bb, c;
#ifndef DLA_FMS
double t3, t4, t5, t6;
#endif
@@ -79,15 +80,15 @@ __ieee754_log (double x)
if (__builtin_expect (ux < 0x00100000, 0))
{
if (__builtin_expect (((ux & 0x7fffffff) | dx) == 0, 0))
- return MHALF / 0.0; /* return -INF */
+ return MHALF / 0.0; /* return -INF */
if (__builtin_expect (ux < 0, 0))
- return (x - x) / 0.0; /* return NaN */
+ return (x - x) / 0.0; /* return NaN */
n -= 54;
- x *= two54.d; /* scale x */
+ x *= two54.d; /* scale x */
num.d = x;
}
if (__builtin_expect (ux >= 0x7ff00000, 0))
- return x + x; /* INF or NaN */
+ return x + x; /* INF or NaN */
/* Regular values of x */
@@ -242,8 +243,12 @@ stage_n:
__mp_dbl (&mpy1, &y1, p);
__mp_dbl (&mpy2, &y2, p);
if (y1 == y2)
- return y1;
+ {
+ LIBC_PROBE (slowlog, 3, &p, &x, &y1);
+ return y1;
+ }
}
+ LIBC_PROBE (slowlog_inexact, 3, &p, &x, &y1);
return y1;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_log10.c b/libc/sysdeps/ieee754/dbl-64/e_log10.c
index ab5069e58..c3d465a4a 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_log10.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_log10.c
@@ -46,10 +46,10 @@
#include <math.h>
#include <math_private.h>
-static const double two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
-static const double ivln10 = 4.34294481903251816668e-01; /* 0x3FDBCB7B, 0x1526E50E */
-static const double log10_2hi = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */
-static const double log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
+static const double two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
+static const double ivln10 = 4.34294481903251816668e-01; /* 0x3FDBCB7B, 0x1526E50E */
+static const double log10_2hi = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */
+static const double log10_2lo = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
double
__ieee754_log10 (double x)
@@ -62,13 +62,13 @@ __ieee754_log10 (double x)
k = 0;
if (hx < 0x00100000)
- { /* x < 2**-1022 */
+ { /* x < 2**-1022 */
if (__builtin_expect (((hx & 0x7fffffff) | lx) == 0, 0))
- return -two54 / (x - x); /* log(+-0)=-inf */
+ return -two54 / (x - x); /* log(+-0)=-inf */
if (__builtin_expect (hx < 0, 0))
- return (x - x) / (x - x); /* log(-#) = NaN */
+ return (x - x) / (x - x); /* log(-#) = NaN */
k -= 54;
- x *= two54; /* subnormal number, scale up x */
+ x *= two54; /* subnormal number, scale up x */
GET_HIGH_WORD (hx, x);
}
if (__builtin_expect (hx >= 0x7ff00000, 0))
diff --git a/libc/sysdeps/ieee754/dbl-64/e_log2.c b/libc/sysdeps/ieee754/dbl-64/e_log2.c
index 4d5cab0ed..890a4a2bd 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_log2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_log2.c
@@ -58,14 +58,14 @@
#include <math_private.h>
static const double ln2 = 0.69314718055994530942;
-static const double two54 = 1.80143985094819840000e+16; /* 43500000 00000000 */
-static const double Lg1 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
-static const double Lg2 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */
-static const double Lg3 = 2.857142874366239149e-01; /* 3FD24924 94229359 */
-static const double Lg4 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */
-static const double Lg5 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */
-static const double Lg6 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
-static const double Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
+static const double two54 = 1.80143985094819840000e+16; /* 43500000 00000000 */
+static const double Lg1 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
+static const double Lg2 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */
+static const double Lg3 = 2.857142874366239149e-01; /* 3FD24924 94229359 */
+static const double Lg4 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */
+static const double Lg5 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */
+static const double Lg6 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
+static const double Lg7 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
static const double zero = 0.0;
@@ -80,13 +80,13 @@ __ieee754_log2 (double x)
k = 0;
if (hx < 0x00100000)
- { /* x < 2**-1022 */
+ { /* x < 2**-1022 */
if (__builtin_expect (((hx & 0x7fffffff) | lx) == 0, 0))
- return -two54 / (x - x); /* log(+-0)=-inf */
+ return -two54 / (x - x); /* log(+-0)=-inf */
if (__builtin_expect (hx < 0, 0))
- return (x - x) / (x - x); /* log(-#) = NaN */
+ return (x - x) / (x - x); /* log(-#) = NaN */
k -= 54;
- x *= two54; /* subnormal number, scale up x */
+ x *= two54; /* subnormal number, scale up x */
GET_HIGH_WORD (hx, x);
}
if (__builtin_expect (hx >= 0x7ff00000, 0))
@@ -94,12 +94,12 @@ __ieee754_log2 (double x)
k += (hx >> 20) - 1023;
hx &= 0x000fffff;
i = (hx + 0x95f64) & 0x100000;
- SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
+ SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000)); /* normalize x or x/2 */
k += (i >> 20);
dk = (double) k;
f = x - 1.0;
if ((0x000fffff & (2 + hx)) < 3)
- { /* |f| < 2**-20 */
+ { /* |f| < 2**-20 */
if (f == zero)
return dk;
R = f * f * (0.5 - 0.33333333333333333 * f);
diff --git a/libc/sysdeps/ieee754/dbl-64/e_pow.c b/libc/sysdeps/ieee754/dbl-64/e_pow.c
index 9a766e722..9cf230917 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_pow.c
@@ -49,354 +49,407 @@
static const double huge = 1.0e300, tiny = 1.0e-300;
-double __exp1(double x, double xx, double error);
-static double log1(double x, double *delta, double *error);
-static double my_log2(double x, double *delta, double *error);
-double __slowpow(double x, double y,double z);
-static double power1(double x, double y);
-static int checkint(double x);
+double __exp1 (double x, double xx, double error);
+static double log1 (double x, double *delta, double *error);
+static double my_log2 (double x, double *delta, double *error);
+double __slowpow (double x, double y, double z);
+static double power1 (double x, double y);
+static int checkint (double x);
-/***************************************************************************/
-/* An ultimate power routine. Given two IEEE double machine numbers y,x */
-/* it computes the correctly rounded (to nearest) value of X^y. */
-/***************************************************************************/
+/* An ultimate power routine. Given two IEEE double machine numbers y, x it
+ computes the correctly rounded (to nearest) value of X^y. */
double
SECTION
-__ieee754_pow(double x, double y) {
- double z,a,aa,error, t,a1,a2,y1,y2;
- mynumber u,v;
+__ieee754_pow (double x, double y)
+{
+ double z, a, aa, error, t, a1, a2, y1, y2;
+ mynumber u, v;
int k;
- int4 qx,qy;
- v.x=y;
- u.x=x;
- if (v.i[LOW_HALF] == 0) { /* of y */
- qx = u.i[HIGH_HALF]&0x7fffffff;
- /* Is x a NaN? */
- if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
- return x;
- if (y == 1.0) return x;
- if (y == 2.0) return x*x;
- if (y == -1.0) return 1.0/x;
- if (y == 0) return 1.0;
- }
+ int4 qx, qy;
+ v.x = y;
+ u.x = x;
+ if (v.i[LOW_HALF] == 0)
+ { /* of y */
+ qx = u.i[HIGH_HALF] & 0x7fffffff;
+ /* Is x a NaN? */
+ if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
+ return x;
+ if (y == 1.0)
+ return x;
+ if (y == 2.0)
+ return x * x;
+ if (y == -1.0)
+ return 1.0 / x;
+ if (y == 0)
+ return 1.0;
+ }
/* else */
- if(((u.i[HIGH_HALF]>0 && u.i[HIGH_HALF]<0x7ff00000)|| /* x>0 and not x->0 */
- (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0)) &&
- /* 2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
- (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) { /* if y<-1 or y>1 */
- double retval;
+ if (((u.i[HIGH_HALF] > 0 && u.i[HIGH_HALF] < 0x7ff00000) || /* x>0 and not x->0 */
+ (u.i[HIGH_HALF] == 0 && u.i[LOW_HALF] != 0)) &&
+ /* 2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
+ (v.i[HIGH_HALF] & 0x7fffffff) < 0x4ff00000)
+ { /* if y<-1 or y>1 */
+ double retval;
- SET_RESTORE_ROUND (FE_TONEAREST);
+ SET_RESTORE_ROUND (FE_TONEAREST);
- /* Avoid internal underflow for tiny y. The exact value of y does
- not matter if |y| <= 2**-64. */
- if (ABS (y) < 0x1p-64)
- y = y < 0 ? -0x1p-64 : 0x1p-64;
- z = log1(x,&aa,&error); /* x^y =e^(y log (X)) */
- t = y*CN;
- y1 = t - (t-y);
- y2 = y - y1;
- t = z*CN;
- a1 = t - (t-z);
- a2 = (z - a1)+aa;
- a = y1*a1;
- aa = y2*a1 + y*a2;
- a1 = a+aa;
- a2 = (a-a1)+aa;
- error = error*ABS(y);
- t = __exp1(a1,a2,1.9e16*error); /* return -10 or 0 if wasn't computed exactly */
- retval = (t>0)?t:power1(x,y);
+ /* Avoid internal underflow for tiny y. The exact value of y does
+ not matter if |y| <= 2**-64. */
+ if (ABS (y) < 0x1p-64)
+ y = y < 0 ? -0x1p-64 : 0x1p-64;
+ z = log1 (x, &aa, &error); /* x^y =e^(y log (X)) */
+ t = y * CN;
+ y1 = t - (t - y);
+ y2 = y - y1;
+ t = z * CN;
+ a1 = t - (t - z);
+ a2 = (z - a1) + aa;
+ a = y1 * a1;
+ aa = y2 * a1 + y * a2;
+ a1 = a + aa;
+ a2 = (a - a1) + aa;
+ error = error * ABS (y);
+ t = __exp1 (a1, a2, 1.9e16 * error); /* return -10 or 0 if wasn't computed exactly */
+ retval = (t > 0) ? t : power1 (x, y);
- return retval;
- }
+ return retval;
+ }
- if (x == 0) {
- if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
- || (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000) /* NaN */
- return y;
- if (ABS(y) > 1.0e20) return (y>0)?0:1.0/0.0;
- k = checkint(y);
- if (k == -1)
- return y < 0 ? 1.0/x : x;
- else
- return y < 0 ? 1.0/0.0 : 0.0; /* return 0 */
- }
+ if (x == 0)
+ {
+ if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
+ || (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000) /* NaN */
+ return y;
+ if (ABS (y) > 1.0e20)
+ return (y > 0) ? 0 : 1.0 / 0.0;
+ k = checkint (y);
+ if (k == -1)
+ return y < 0 ? 1.0 / x : x;
+ else
+ return y < 0 ? 1.0 / 0.0 : 0.0; /* return 0 */
+ }
- qx = u.i[HIGH_HALF]&0x7fffffff; /* no sign */
- qy = v.i[HIGH_HALF]&0x7fffffff; /* no sign */
+ qx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign */
+ qy = v.i[HIGH_HALF] & 0x7fffffff; /* no sign */
- if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0)) /* NaN */
+ if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0)) /* NaN */
return x;
- if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0)) /* NaN */
+ if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0)) /* NaN */
return x == 1.0 ? 1.0 : y;
/* if x<0 */
- if (u.i[HIGH_HALF] < 0) {
- k = checkint(y);
- if (k==0) {
- if (qy == 0x7ff00000) {
- if (x == -1.0) return 1.0;
- else if (x > -1.0) return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
- else return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
- }
+ if (u.i[HIGH_HALF] < 0)
+ {
+ k = checkint (y);
+ if (k == 0)
+ {
+ if (qy == 0x7ff00000)
+ {
+ if (x == -1.0)
+ return 1.0;
+ else if (x > -1.0)
+ return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
+ else
+ return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
+ }
+ else if (qx == 0x7ff00000)
+ return y < 0 ? 0.0 : INF.x;
+ return (x - x) / (x - x); /* y not integer and x<0 */
+ }
else if (qx == 0x7ff00000)
- return y < 0 ? 0.0 : INF.x;
- return (x - x) / (x - x); /* y not integer and x<0 */
+ {
+ if (k < 0)
+ return y < 0 ? nZERO.x : nINF.x;
+ else
+ return y < 0 ? 0.0 : INF.x;
+ }
+ /* if y even or odd */
+ return (k == 1) ? __ieee754_pow (-x, y) : -__ieee754_pow (-x, y);
}
- else if (qx == 0x7ff00000)
- {
- if (k < 0)
- return y < 0 ? nZERO.x : nINF.x;
- else
- return y < 0 ? 0.0 : INF.x;
- }
- return (k==1)?__ieee754_pow(-x,y):-__ieee754_pow(-x,y); /* if y even or odd */
- }
/* x>0 */
- if (qx == 0x7ff00000) /* x= 2^-0x3ff */
+ if (qx == 0x7ff00000) /* x= 2^-0x3ff */
return y > 0 ? x : 0;
- if (qy > 0x45f00000 && qy < 0x7ff00000) {
- if (x == 1.0) return 1.0;
- if (y>0) return (x>1.0)?huge*huge:tiny*tiny;
- if (y<0) return (x<1.0)?huge*huge:tiny*tiny;
- }
+ if (qy > 0x45f00000 && qy < 0x7ff00000)
+ {
+ if (x == 1.0)
+ return 1.0;
+ if (y > 0)
+ return (x > 1.0) ? huge * huge : tiny * tiny;
+ if (y < 0)
+ return (x < 1.0) ? huge * huge : tiny * tiny;
+ }
- if (x == 1.0) return 1.0;
- if (y>0) return (x>1.0)?INF.x:0;
- if (y<0) return (x<1.0)?INF.x:0;
- return 0; /* unreachable, to make the compiler happy */
+ if (x == 1.0)
+ return 1.0;
+ if (y > 0)
+ return (x > 1.0) ? INF.x : 0;
+ if (y < 0)
+ return (x < 1.0) ? INF.x : 0;
+ return 0; /* unreachable, to make the compiler happy */
}
+
#ifndef __ieee754_pow
strong_alias (__ieee754_pow, __pow_finite)
#endif
-/**************************************************************************/
-/* Computing x^y using more accurate but more slow log routine */
-/**************************************************************************/
+/* Compute x^y using more accurate but more slow log routine. */
static double
SECTION
-power1(double x, double y) {
- double z,a,aa,error, t,a1,a2,y1,y2;
- z = my_log2(x,&aa,&error);
- t = y*CN;
- y1 = t - (t-y);
+power1 (double x, double y)
+{
+ double z, a, aa, error, t, a1, a2, y1, y2;
+ z = my_log2 (x, &aa, &error);
+ t = y * CN;
+ y1 = t - (t - y);
y2 = y - y1;
- t = z*CN;
- a1 = t - (t-z);
+ t = z * CN;
+ a1 = t - (t - z);
a2 = z - a1;
- a = y*z;
- aa = ((y1*a1-a)+y1*a2+y2*a1)+y2*a2+aa*y;
- a1 = a+aa;
- a2 = (a-a1)+aa;
- error = error*ABS(y);
- t = __exp1(a1,a2,1.9e16*error);
- return (t >= 0)?t:__slowpow(x,y,z);
+ a = y * z;
+ aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y;
+ a1 = a + aa;
+ a2 = (a - a1) + aa;
+ error = error * ABS (y);
+ t = __exp1 (a1, a2, 1.9e16 * error);
+ return (t >= 0) ? t : __slowpow (x, y, z);
}
-/****************************************************************************/
-/* Computing log(x) (x is left argument). The result is the returned double */
-/* + the parameter delta. */
-/* The result is bounded by error (rightmost argument) */
-/****************************************************************************/
+/* Compute log(x) (x is left argument). The result is the returned double + the
+ parameter DELTA. The result is bounded by ERROR. */
static double
SECTION
-log1(double x, double *delta, double *error) {
- int i,j,m;
- double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
- mynumber u,v;
+log1 (double x, double *delta, double *error)
+{
+ int i, j, m;
+ double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+ mynumber u, v;
#ifdef BIG_ENDI
- mynumber
-/**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
+ mynumber /**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
#else
-#ifdef LITTLE_ENDI
- mynumber
-/**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
-#endif
+# ifdef LITTLE_ENDI
+ mynumber /**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
+# endif
#endif
u.x = x;
m = u.i[HIGH_HALF];
*error = 0;
*delta = 0;
- if (m < 0x00100000) /* 1<x<2^-1007 */
- { x = x*t52.x; add = -52.0; u.x = x; m = u.i[HIGH_HALF];}
+ if (m < 0x00100000) /* 1<x<2^-1007 */
+ {
+ x = x * t52.x;
+ add = -52.0;
+ u.x = x;
+ m = u.i[HIGH_HALF];
+ }
- if ((m&0x000fffff) < 0x0006a09e)
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+ if ((m & 0x000fffff) < 0x0006a09e)
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+ two52.i[LOW_HALF] = (m >> 20);
+ }
else
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+ two52.i[LOW_HALF] = (m >> 20) + 1;
+ }
v.x = u.x + bigu.x;
uu = v.x - bigu.x;
- i = (v.i[LOW_HALF]&0x000003ff)<<2;
- if (two52.i[LOW_HALF] == 1023) /* nx = 0 */
- {
- if (i > 1192 && i < 1208) /* |x-1| < 1.5*2**-10 */
- {
+ i = (v.i[LOW_HALF] & 0x000003ff) << 2;
+ if (two52.i[LOW_HALF] == 1023) /* nx = 0 */
+ {
+ if (i > 1192 && i < 1208) /* |x-1| < 1.5*2**-10 */
+ {
t = x - 1.0;
- t1 = (t+5.0e6)-5.0e6;
- t2 = t-t1;
- e1 = t - 0.5*t1*t1;
- e2 = t*t*t*(r3+t*(r4+t*(r5+t*(r6+t*(r7+t*r8)))))-0.5*t2*(t+t1);
- res = e1+e2;
- *error = 1.0e-21*ABS(t);
- *delta = (e1-res)+e2;
+ t1 = (t + 5.0e6) - 5.0e6;
+ t2 = t - t1;
+ e1 = t - 0.5 * t1 * t1;
+ e2 = (t * t * t * (r3 + t * (r4 + t * (r5 + t * (r6 + t
+ * (r7 + t * r8)))))
+ - 0.5 * t2 * (t + t1));
+ res = e1 + e2;
+ *error = 1.0e-21 * ABS (t);
+ *delta = (e1 - res) + e2;
return res;
- } /* |x-1| < 1.5*2**-10 */
+ } /* |x-1| < 1.5*2**-10 */
else
- {
- v.x = u.x*(ui.x[i]+ui.x[i+1])+bigv.x;
- vv = v.x-bigv.x;
- j = v.i[LOW_HALF]&0x0007ffff;
- j = j+j+j;
- eps = u.x - uu*vv;
- e1 = eps*ui.x[i];
- e2 = eps*(ui.x[i+1]+vj.x[j]*(ui.x[i]+ui.x[i+1]));
- e = e1+e2;
- e2 = ((e1-e)+e2);
- t=ui.x[i+2]+vj.x[j+1];
- t1 = t+e;
- t2 = (((t-t1)+e)+(ui.x[i+3]+vj.x[j+2]))+e2+e*e*(p2+e*(p3+e*p4));
- res=t1+t2;
+ {
+ v.x = u.x * (ui.x[i] + ui.x[i + 1]) + bigv.x;
+ vv = v.x - bigv.x;
+ j = v.i[LOW_HALF] & 0x0007ffff;
+ j = j + j + j;
+ eps = u.x - uu * vv;
+ e1 = eps * ui.x[i];
+ e2 = eps * (ui.x[i + 1] + vj.x[j] * (ui.x[i] + ui.x[i + 1]));
+ e = e1 + e2;
+ e2 = ((e1 - e) + e2);
+ t = ui.x[i + 2] + vj.x[j + 1];
+ t1 = t + e;
+ t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e
+ * (p2 + e * (p3 + e * p4)));
+ res = t1 + t2;
*error = 1.0e-24;
- *delta = (t1-res)+t2;
+ *delta = (t1 - res) + t2;
return res;
- }
- } /* nx = 0 */
- else /* nx != 0 */
- {
+ }
+ } /* nx = 0 */
+ else /* nx != 0 */
+ {
eps = u.x - uu;
- nx = (two52.x - two52e.x)+add;
- e1 = eps*ui.x[i];
- e2 = eps*ui.x[i+1];
- e=e1+e2;
- e2 = (e1-e)+e2;
- t=nx*ln2a.x+ui.x[i+2];
- t1=t+e;
- t2=(((t-t1)+e)+nx*ln2b.x+ui.x[i+3]+e2)+e*e*(q2+e*(q3+e*(q4+e*(q5+e*q6))));
- res = t1+t2;
+ nx = (two52.x - two52e.x) + add;
+ e1 = eps * ui.x[i];
+ e2 = eps * ui.x[i + 1];
+ e = e1 + e2;
+ e2 = (e1 - e) + e2;
+ t = nx * ln2a.x + ui.x[i + 2];
+ t1 = t + e;
+ t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e
+ * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6)))));
+ res = t1 + t2;
*error = 1.0e-21;
- *delta = (t1-res)+t2;
+ *delta = (t1 - res) + t2;
return res;
- } /* nx != 0 */
+ } /* nx != 0 */
}
-/****************************************************************************/
-/* More slow but more accurate routine of log */
-/* Computing log(x)(x is left argument).The result is return double + delta.*/
-/* The result is bounded by error (right argument) */
-/****************************************************************************/
+/* Slower but more accurate routine of log. The returned result is double +
+ DELTA. The result is bounded by ERROR. */
static double
SECTION
-my_log2(double x, double *delta, double *error) {
- int i,j,m;
- double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
- double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
- double y,yy,z,zz,j1,j2,j7,j8;
+my_log2 (double x, double *delta, double *error)
+{
+ int i, j, m;
+ double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+ double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2;
+ double y, yy, z, zz, j1, j2, j7, j8;
#ifndef DLA_FMS
- double j3,j4,j5,j6;
+ double j3, j4, j5, j6;
#endif
- mynumber u,v;
+ mynumber u, v;
#ifdef BIG_ENDI
- mynumber
-/**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
+ mynumber /**/ two52 = {{0x43300000, 0x00000000}}; /* 2**52 */
#else
-#ifdef LITTLE_ENDI
- mynumber
-/**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
-#endif
+# ifdef LITTLE_ENDI
+ mynumber /**/ two52 = {{0x00000000, 0x43300000}}; /* 2**52 */
+# endif
#endif
u.x = x;
m = u.i[HIGH_HALF];
*error = 0;
*delta = 0;
- add=0;
- if (m<0x00100000) { /* x < 2^-1022 */
- x = x*t52.x; add = -52.0; u.x = x; m = u.i[HIGH_HALF]; }
+ add = 0;
+ if (m < 0x00100000)
+ { /* x < 2^-1022 */
+ x = x * t52.x;
+ add = -52.0;
+ u.x = x;
+ m = u.i[HIGH_HALF];
+ }
- if ((m&0x000fffff) < 0x0006a09e)
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+ if ((m & 0x000fffff) < 0x0006a09e)
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+ two52.i[LOW_HALF] = (m >> 20);
+ }
else
- {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+ {
+ u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+ two52.i[LOW_HALF] = (m >> 20) + 1;
+ }
v.x = u.x + bigu.x;
uu = v.x - bigu.x;
- i = (v.i[LOW_HALF]&0x000003ff)<<2;
+ i = (v.i[LOW_HALF] & 0x000003ff) << 2;
/*------------------------------------- |x-1| < 2**-11------------------------------- */
- if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
- {
+ if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
+ {
t = x - 1.0;
- EMULV(t,s3,y,yy,j1,j2,j3,j4,j5);
- ADD2(-0.5,0,y,yy,z,zz,j1,j2);
- MUL2(t,0,z,zz,y,yy,j1,j2,j3,j4,j5,j6,j7,j8);
- MUL2(t,0,y,yy,z,zz,j1,j2,j3,j4,j5,j6,j7,j8);
+ EMULV (t, s3, y, yy, j1, j2, j3, j4, j5);
+ ADD2 (-0.5, 0, y, yy, z, zz, j1, j2);
+ MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8);
+ MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8);
- e1 = t+z;
- e2 = (((t-e1)+z)+zz)+t*t*t*(ss3+t*(s4+t*(s5+t*(s6+t*(s7+t*s8)))));
- res = e1+e2;
- *error = 1.0e-25*ABS(t);
- *delta = (e1-res)+e2;
+ e1 = t + z;
+ e2 = ((((t - e1) + z) + zz) + t * t * t
+ * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8))))));
+ res = e1 + e2;
+ *error = 1.0e-25 * ABS (t);
+ *delta = (e1 - res) + e2;
return res;
- }
+ }
/*----------------------------- |x-1| > 2**-11 -------------------------- */
else
- { /*Computing log(x) according to log table */
- nx = (two52.x - two52e.x)+add;
+ { /*Computing log(x) according to log table */
+ nx = (two52.x - two52e.x) + add;
ou1 = ui.x[i];
- ou2 = ui.x[i+1];
- lu1 = ui.x[i+2];
- lu2 = ui.x[i+3];
- v.x = u.x*(ou1+ou2)+bigv.x;
- vv = v.x-bigv.x;
- j = v.i[LOW_HALF]&0x0007ffff;
- j = j+j+j;
- eps = u.x - uu*vv;
- ov = vj.x[j];
- lv1 = vj.x[j+1];
- lv2 = vj.x[j+2];
- a = (ou1+ou2)*(1.0+ov);
- a1 = (a+1.0e10)-1.0e10;
- a2 = a*(1.0-a1*uu*vv);
- e1 = eps*a1;
- e2 = eps*a2;
- e = e1+e2;
- e2 = (e1-e)+e2;
- t=nx*ln2a.x+lu1+lv1;
- t1 = t+e;
- t2 = (((t-t1)+e)+(lu2+lv2+nx*ln2b.x+e2))+e*e*(p2+e*(p3+e*p4));
- res=t1+t2;
+ ou2 = ui.x[i + 1];
+ lu1 = ui.x[i + 2];
+ lu2 = ui.x[i + 3];
+ v.x = u.x * (ou1 + ou2) + bigv.x;
+ vv = v.x - bigv.x;
+ j = v.i[LOW_HALF] & 0x0007ffff;
+ j = j + j + j;
+ eps = u.x - uu * vv;
+ ov = vj.x[j];
+ lv1 = vj.x[j + 1];
+ lv2 = vj.x[j + 2];
+ a = (ou1 + ou2) * (1.0 + ov);
+ a1 = (a + 1.0e10) - 1.0e10;
+ a2 = a * (1.0 - a1 * uu * vv);
+ e1 = eps * a1;
+ e2 = eps * a2;
+ e = e1 + e2;
+ e2 = (e1 - e) + e2;
+ t = nx * ln2a.x + lu1 + lv1;
+ t1 = t + e;
+ t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e
+ * (p2 + e * (p3 + e * p4)));
+ res = t1 + t2;
*error = 1.0e-27;
- *delta = (t1-res)+t2;
+ *delta = (t1 - res) + t2;
return res;
- }
+ }
}
-/**********************************************************************/
-/* Routine receives a double x and checks if it is an integer. If not */
-/* it returns 0, else it returns 1 if even or -1 if odd. */
-/**********************************************************************/
+/* This function receives a double x and checks if it is an integer. If not,
+ it returns 0, else it returns 1 if even or -1 if odd. */
static int
SECTION
-checkint(double x) {
- union {int4 i[2]; double x;} u;
- int k,m,n;
+checkint (double x)
+{
+ union
+ {
+ int4 i[2];
+ double x;
+ } u;
+ int k, m, n;
u.x = x;
- m = u.i[HIGH_HALF]&0x7fffffff; /* no sign */
- if (m >= 0x7ff00000) return 0; /* x is +/-inf or NaN */
- if (m >= 0x43400000) return 1; /* |x| >= 2**53 */
- if (m < 0x40000000) return 0; /* |x| < 2, can not be 0 or 1 */
+ m = u.i[HIGH_HALF] & 0x7fffffff; /* no sign */
+ if (m >= 0x7ff00000)
+ return 0; /* x is +/-inf or NaN */
+ if (m >= 0x43400000)
+ return 1; /* |x| >= 2**53 */
+ if (m < 0x40000000)
+ return 0; /* |x| < 2, can not be 0 or 1 */
n = u.i[LOW_HALF];
- k = (m>>20)-1023; /* 1 <= k <= 52 */
- if (k == 52) return (n&1)? -1:1; /* odd or even*/
- if (k>20) {
- if (n<<(k-20)) return 0; /* if not integer */
- return (n<<(k-21))?-1:1;
- }
- if (n) return 0; /*if not integer*/
- if (k == 20) return (m&1)? -1:1;
- if (m<<(k+12)) return 0;
- return (m<<(k+11))?-1:1;
+ k = (m >> 20) - 1023; /* 1 <= k <= 52 */
+ if (k == 52)
+ return (n & 1) ? -1 : 1; /* odd or even */
+ if (k > 20)
+ {
+ if (n << (k - 20))
+ return 0; /* if not integer */
+ return (n << (k - 21)) ? -1 : 1;
+ }
+ if (n)
+ return 0; /*if not integer */
+ if (k == 20)
+ return (m & 1) ? -1 : 1;
+ if (m << (k + 12))
+ return 0;
+ return (m << (k + 11)) ? -1 : 1;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c b/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c
index 4478be0b0..2f55ca294 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c
@@ -57,110 +57,137 @@ static const int32_t npio2_hw[] = {
*/
static const double
-zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
-half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
-two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
-invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
-pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
-pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
-pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
-pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
-pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
-pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
+ zero = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
+ half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+ two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
+ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
+ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
+ pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
+ pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
+ pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
+ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
int32_t
-__ieee754_rem_pio2(double x, double *y)
+__ieee754_rem_pio2 (double x, double *y)
{
- double z,w,t,r,fn;
- double tx[3];
- int32_t e0,i,j,nx,n,ix,hx;
- u_int32_t low;
+ double z, w, t, r, fn;
+ double tx[3];
+ int32_t e0, i, j, nx, n, ix, hx;
+ u_int32_t low;
- GET_HIGH_WORD(hx,x); /* high word of x */
- ix = hx&0x7fffffff;
- if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
- {y[0] = x; y[1] = 0; return 0;}
- if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
- if(hx>0) {
- z = x - pio2_1;
- if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
- y[0] = z - pio2_1t;
- y[1] = (z-y[0])-pio2_1t;
- } else { /* near pi/2, use 33+33+53 bit pi */
- z -= pio2_2;
- y[0] = z - pio2_2t;
- y[1] = (z-y[0])-pio2_2t;
- }
- return 1;
- } else { /* negative x */
- z = x + pio2_1;
- if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
- y[0] = z + pio2_1t;
- y[1] = (z-y[0])+pio2_1t;
- } else { /* near pi/2, use 33+33+53 bit pi */
- z += pio2_2;
- y[0] = z + pio2_2t;
- y[1] = (z-y[0])+pio2_2t;
- }
- return -1;
+ GET_HIGH_WORD (hx, x); /* high word of x */
+ ix = hx & 0x7fffffff;
+ if (ix <= 0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
+ {
+ y[0] = x; y[1] = 0; return 0;
+ }
+ if (ix < 0x4002d97c) /* |x| < 3pi/4, special case with n=+-1 */
+ {
+ if (hx > 0)
+ {
+ z = x - pio2_1;
+ if (ix != 0x3ff921fb) /* 33+53 bit pi is good enough */
+ {
+ y[0] = z - pio2_1t;
+ y[1] = (z - y[0]) - pio2_1t;
+ }
+ else /* near pi/2, use 33+33+53 bit pi */
+ {
+ z -= pio2_2;
+ y[0] = z - pio2_2t;
+ y[1] = (z - y[0]) - pio2_2t;
}
+ return 1;
}
- if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
- t = fabs(x);
- n = (int32_t) (t*invpio2+half);
- fn = (double)n;
- r = t-fn*pio2_1;
- w = fn*pio2_1t; /* 1st round good to 85 bit */
- if(n<32&&ix!=npio2_hw[n-1]) {
- y[0] = r-w; /* quick check no cancellation */
- } else {
- u_int32_t high;
- j = ix>>20;
- y[0] = r-w;
- GET_HIGH_WORD(high,y[0]);
- i = j-((high>>20)&0x7ff);
- if(i>16) { /* 2nd iteration needed, good to 118 */
- t = r;
- w = fn*pio2_2;
- r = t-w;
- w = fn*pio2_2t-((t-r)-w);
- y[0] = r-w;
- GET_HIGH_WORD(high,y[0]);
- i = j-((high>>20)&0x7ff);
- if(i>49) { /* 3rd iteration need, 151 bits acc */
- t = r; /* will cover all possible cases */
- w = fn*pio2_3;
- r = t-w;
- w = fn*pio2_3t-((t-r)-w);
- y[0] = r-w;
- }
- }
+ else /* negative x */
+ {
+ z = x + pio2_1;
+ if (ix != 0x3ff921fb) /* 33+53 bit pi is good enough */
+ {
+ y[0] = z + pio2_1t;
+ y[1] = (z - y[0]) + pio2_1t;
+ }
+ else /* near pi/2, use 33+33+53 bit pi */
+ {
+ z += pio2_2;
+ y[0] = z + pio2_2t;
+ y[1] = (z - y[0]) + pio2_2t;
}
- y[1] = (r-y[0])-w;
- if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
- else return n;
+ return -1;
}
- /*
- * all other (large) arguments
- */
- if(ix>=0x7ff00000) { /* x is inf or NaN */
- y[0]=y[1]=x-x; return 0;
+ }
+ if (ix <= 0x413921fb) /* |x| ~<= 2^19*(pi/2), medium size */
+ {
+ t = fabs (x);
+ n = (int32_t) (t * invpio2 + half);
+ fn = (double) n;
+ r = t - fn * pio2_1;
+ w = fn * pio2_1t; /* 1st round good to 85 bit */
+ if (n < 32 && ix != npio2_hw[n - 1])
+ {
+ y[0] = r - w; /* quick check no cancellation */
}
- /* set z = scalbn(|x|,ilogb(x)-23) */
- GET_LOW_WORD(low,x);
- SET_LOW_WORD(z,low);
- e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
- SET_HIGH_WORD(z, ix - ((int32_t)(e0<<20)));
- for(i=0;i<2;i++) {
- tx[i] = (double)((int32_t)(z));
- z = (z-tx[i])*two24;
+ else
+ {
+ u_int32_t high;
+ j = ix >> 20;
+ y[0] = r - w;
+ GET_HIGH_WORD (high, y[0]);
+ i = j - ((high >> 20) & 0x7ff);
+ if (i > 16) /* 2nd iteration needed, good to 118 */
+ {
+ t = r;
+ w = fn * pio2_2;
+ r = t - w;
+ w = fn * pio2_2t - ((t - r) - w);
+ y[0] = r - w;
+ GET_HIGH_WORD (high, y[0]);
+ i = j - ((high >> 20) & 0x7ff);
+ if (i > 49) /* 3rd iteration need, 151 bits acc */
+ {
+ t = r; /* will cover all possible cases */
+ w = fn * pio2_3;
+ r = t - w;
+ w = fn * pio2_3t - ((t - r) - w);
+ y[0] = r - w;
+ }
+ }
}
- tx[2] = z;
- nx = 3;
- while(tx[nx-1]==zero) nx--; /* skip zero term */
- n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
- if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
+ y[1] = (r - y[0]) - w;
+ if (hx < 0)
+ {
+ y[0] = -y[0]; y[1] = -y[1]; return -n;
+ }
+ else
return n;
+ }
+ /*
+ * all other (large) arguments
+ */
+ if (ix >= 0x7ff00000) /* x is inf or NaN */
+ {
+ y[0] = y[1] = x - x; return 0;
+ }
+ /* set z = scalbn(|x|,ilogb(x)-23) */
+ GET_LOW_WORD (low, x);
+ SET_LOW_WORD (z, low);
+ e0 = (ix >> 20) - 1046; /* e0 = ilogb(z)-23; */
+ SET_HIGH_WORD (z, ix - ((int32_t) (e0 << 20)));
+ for (i = 0; i < 2; i++)
+ {
+ tx[i] = (double) ((int32_t) (z));
+ z = (z - tx[i]) * two24;
+ }
+ tx[2] = z;
+ nx = 3;
+ while (tx[nx - 1] == zero)
+ nx--; /* skip zero term */
+ n = __kernel_rem_pio2 (tx, y, e0, nx, 2, two_over_pi);
+ if (hx < 0)
+ {
+ y[0] = -y[0]; y[1] = -y[1]; return -n;
+ }
+ return n;
}
-
#endif
diff --git a/libc/sysdeps/ieee754/dbl-64/e_remainder.c b/libc/sysdeps/ieee754/dbl-64/e_remainder.c
index 2d20bb1df..c6a1901b1 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_remainder.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_remainder.c
@@ -39,89 +39,111 @@
/* An ultimate remainder routine. Given two IEEE double machine numbers x */
/* ,y it computes the correctly rounded (to nearest) value of remainder */
/**************************************************************************/
-double __ieee754_remainder(double x, double y)
+double
+__ieee754_remainder (double x, double y)
{
- double z,d,xx;
- int4 kx,ky,n,nn,n1,m1,l;
- mynumber u,t,w={{0,0}},v={{0,0}},ww={{0,0}},r;
- u.x=x;
- t.x=y;
- kx=u.i[HIGH_HALF]&0x7fffffff; /* no sign for x*/
- t.i[HIGH_HALF]&=0x7fffffff; /*no sign for y */
- ky=t.i[HIGH_HALF];
+ double z, d, xx;
+ int4 kx, ky, n, nn, n1, m1, l;
+ mynumber u, t, w = { { 0, 0 } }, v = { { 0, 0 } }, ww = { { 0, 0 } }, r;
+ u.x = x;
+ t.x = y;
+ kx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign for x*/
+ t.i[HIGH_HALF] &= 0x7fffffff; /*no sign for y */
+ ky = t.i[HIGH_HALF];
/*------ |x| < 2^1023 and 2^-970 < |y| < 2^1024 ------------------*/
- if (kx<0x7fe00000 && ky<0x7ff00000 && ky>=0x03500000) {
- SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
- if (kx+0x00100000<ky) return x;
- if ((kx-0x01500000)<ky) {
- z=x/t.x;
- v.i[HIGH_HALF]=t.i[HIGH_HALF];
- d=(z+big.x)-big.x;
- xx=(x-d*v.x)-d*(t.x-v.x);
- if (d-z!=0.5&&d-z!=-0.5) return (xx!=0)?xx:((x>0)?ZERO.x:nZERO.x);
- else {
- if (ABS(xx)>0.5*t.x) return (z>d)?xx-t.x:xx+t.x;
- else return xx;
- }
- } /* (kx<(ky+0x01500000)) */
- else {
- r.x=1.0/t.x;
- n=t.i[HIGH_HALF];
- nn=(n&0x7ff00000)+0x01400000;
- w.i[HIGH_HALF]=n;
- ww.x=t.x-w.x;
- l=(kx-nn)&0xfff00000;
- n1=ww.i[HIGH_HALF];
- m1=r.i[HIGH_HALF];
- while (l>0) {
- r.i[HIGH_HALF]=m1-l;
- z=u.x*r.x;
- w.i[HIGH_HALF]=n+l;
- ww.i[HIGH_HALF]=(n1)?n1+l:n1;
- d=(z+big.x)-big.x;
- u.x=(u.x-d*w.x)-d*ww.x;
- l=(u.i[HIGH_HALF]&0x7ff00000)-nn;
- }
- r.i[HIGH_HALF]=m1;
- w.i[HIGH_HALF]=n;
- ww.i[HIGH_HALF]=n1;
- z=u.x*r.x;
- d=(z+big.x)-big.x;
- u.x=(u.x-d*w.x)-d*ww.x;
- if (ABS(u.x)<0.5*t.x) return (u.x!=0)?u.x:((x>0)?ZERO.x:nZERO.x);
+ if (kx < 0x7fe00000 && ky < 0x7ff00000 && ky >= 0x03500000)
+ {
+ SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
+ if (kx + 0x00100000 < ky)
+ return x;
+ if ((kx - 0x01500000) < ky)
+ {
+ z = x / t.x;
+ v.i[HIGH_HALF] = t.i[HIGH_HALF];
+ d = (z + big.x) - big.x;
+ xx = (x - d * v.x) - d * (t.x - v.x);
+ if (d - z != 0.5 && d - z != -0.5)
+ return (xx != 0) ? xx : ((x > 0) ? ZERO.x : nZERO.x);
+ else
+ {
+ if (ABS (xx) > 0.5 * t.x)
+ return (z > d) ? xx - t.x : xx + t.x;
+ else
+ return xx;
+ }
+ } /* (kx<(ky+0x01500000)) */
else
- if (ABS(u.x)>0.5*t.x) return (d>z)?u.x+t.x:u.x-t.x;
- else
- {z=u.x/t.x; d=(z+big.x)-big.x; return ((u.x-d*w.x)-d*ww.x);}
- }
-
- } /* (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000) */
- else {
- if (kx<0x7fe00000&&ky<0x7ff00000&&(ky>0||t.i[LOW_HALF]!=0)) {
- y=ABS(y)*t128.x;
- z=__ieee754_remainder(x,y)*t128.x;
- z=__ieee754_remainder(z,y)*tm128.x;
- return z;
- }
- else {
- if ((kx&0x7ff00000)==0x7fe00000&&ky<0x7ff00000&&(ky>0||t.i[LOW_HALF]!=0)) {
- y=ABS(y);
- z=2.0*__ieee754_remainder(0.5*x,y);
- d = ABS(z);
- if (d <= ABS(d-y)) return z;
- else return (z>0)?z-y:z+y;
- }
- else { /* if x is too big */
- if (ky==0 && t.i[LOW_HALF] == 0) /* y = 0 */
- return (x * y) / (x * y);
- else if (kx >= 0x7ff00000 /* x not finite */
- || (ky>0x7ff00000 /* y is NaN */
- || (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
- return (x * y) / (x * y);
+ {
+ r.x = 1.0 / t.x;
+ n = t.i[HIGH_HALF];
+ nn = (n & 0x7ff00000) + 0x01400000;
+ w.i[HIGH_HALF] = n;
+ ww.x = t.x - w.x;
+ l = (kx - nn) & 0xfff00000;
+ n1 = ww.i[HIGH_HALF];
+ m1 = r.i[HIGH_HALF];
+ while (l > 0)
+ {
+ r.i[HIGH_HALF] = m1 - l;
+ z = u.x * r.x;
+ w.i[HIGH_HALF] = n + l;
+ ww.i[HIGH_HALF] = (n1) ? n1 + l : n1;
+ d = (z + big.x) - big.x;
+ u.x = (u.x - d * w.x) - d * ww.x;
+ l = (u.i[HIGH_HALF] & 0x7ff00000) - nn;
+ }
+ r.i[HIGH_HALF] = m1;
+ w.i[HIGH_HALF] = n;
+ ww.i[HIGH_HALF] = n1;
+ z = u.x * r.x;
+ d = (z + big.x) - big.x;
+ u.x = (u.x - d * w.x) - d * ww.x;
+ if (ABS (u.x) < 0.5 * t.x)
+ return (u.x != 0) ? u.x : ((x > 0) ? ZERO.x : nZERO.x);
+ else
+ if (ABS (u.x) > 0.5 * t.x)
+ return (d > z) ? u.x + t.x : u.x - t.x;
+ else
+ {
+ z = u.x / t.x; d = (z + big.x) - big.x;
+ return ((u.x - d * w.x) - d * ww.x);
+ }
+ }
+ } /* (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000) */
+ else
+ {
+ if (kx < 0x7fe00000 && ky < 0x7ff00000 && (ky > 0 || t.i[LOW_HALF] != 0))
+ {
+ y = ABS (y) * t128.x;
+ z = __ieee754_remainder (x, y) * t128.x;
+ z = __ieee754_remainder (z, y) * tm128.x;
+ return z;
+ }
else
- return x;
+ {
+ if ((kx & 0x7ff00000) == 0x7fe00000 && ky < 0x7ff00000 &&
+ (ky > 0 || t.i[LOW_HALF] != 0))
+ {
+ y = ABS (y);
+ z = 2.0 * __ieee754_remainder (0.5 * x, y);
+ d = ABS (z);
+ if (d <= ABS (d - y))
+ return z;
+ else
+ return (z > 0) ? z - y : z + y;
+ }
+ else /* if x is too big */
+ {
+ if (ky == 0 && t.i[LOW_HALF] == 0) /* y = 0 */
+ return (x * y) / (x * y);
+ else if (kx >= 0x7ff00000 /* x not finite */
+ || (ky > 0x7ff00000 /* y is NaN */
+ || (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
+ return (x * y) / (x * y);
+ else
+ return x;
+ }
+ }
}
- }
- }
}
strong_alias (__ieee754_remainder, __remainder_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_sinh.c b/libc/sysdeps/ieee754/dbl-64/e_sinh.c
index b954100ba..851b510aa 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_sinh.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_sinh.c
@@ -38,43 +38,50 @@ static char rcsid[] = "$NetBSD: e_sinh.c,v 1.7 1995/05/10 20:46:13 jtc Exp $";
static const double one = 1.0, shuge = 1.0e307;
double
-__ieee754_sinh(double x)
+__ieee754_sinh (double x)
{
- double t,w,h;
- int32_t ix,jx;
- u_int32_t lx;
+ double t, w, h;
+ int32_t ix, jx;
+ u_int32_t lx;
- /* High word of |x|. */
- GET_HIGH_WORD(jx,x);
- ix = jx&0x7fffffff;
+ /* High word of |x|. */
+ GET_HIGH_WORD (jx, x);
+ ix = jx & 0x7fffffff;
- /* x is INF or NaN */
- if(__builtin_expect(ix>=0x7ff00000, 0)) return x+x;
+ /* x is INF or NaN */
+ if (__builtin_expect (ix >= 0x7ff00000, 0))
+ return x + x;
- h = 0.5;
- if (jx<0) h = -h;
- /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */
- if (ix < 0x40360000) { /* |x|<22 */
- if (__builtin_expect(ix<0x3e300000, 0)) /* |x|<2**-28 */
- if(shuge+x>one)
- return x;/* sinh(tiny) = tiny with inexact */
- t = __expm1(fabs(x));
- if(ix<0x3ff00000) return h*(2.0*t-t*t/(t+one));
- return h*(t+t/(t+one));
- }
+ h = 0.5;
+ if (jx < 0)
+ h = -h;
+ /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */
+ if (ix < 0x40360000) /* |x|<22 */
+ {
+ if (__builtin_expect (ix < 0x3e300000, 0)) /* |x|<2**-28 */
+ if (shuge + x > one)
+ return x;
+ /* sinh(tiny) = tiny with inexact */
+ t = __expm1 (fabs (x));
+ if (ix < 0x3ff00000)
+ return h * (2.0 * t - t * t / (t + one));
+ return h * (t + t / (t + one));
+ }
- /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */
- if (ix < 0x40862e42) return h*__ieee754_exp(fabs(x));
+ /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */
+ if (ix < 0x40862e42)
+ return h * __ieee754_exp (fabs (x));
- /* |x| in [log(maxdouble), overflowthresold] */
- GET_LOW_WORD(lx,x);
- if (ix<0x408633ce || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
- w = __ieee754_exp(0.5*fabs(x));
- t = h*w;
- return t*w;
- }
+ /* |x| in [log(maxdouble), overflowthresold] */
+ GET_LOW_WORD (lx, x);
+ if (ix < 0x408633ce || ((ix == 0x408633ce) && (lx <= (u_int32_t) 0x8fb9f87d)))
+ {
+ w = __ieee754_exp (0.5 * fabs (x));
+ t = h * w;
+ return t * w;
+ }
- /* |x| > overflowthresold, sinh(x) overflow */
- return x*shuge;
+ /* |x| > overflowthresold, sinh(x) overflow */
+ return x * shuge;
}
strong_alias (__ieee754_sinh, __sinh_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_sqrt.c b/libc/sysdeps/ieee754/dbl-64/e_sqrt.c
index 54610eeea..854ae38c4 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_sqrt.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_sqrt.c
@@ -44,58 +44,67 @@
/* it computes the correctly rounded (to nearest) value of square */
/* root of x. */
/*********************************************************************/
-double __ieee754_sqrt(double x) {
+double
+__ieee754_sqrt (double x)
+{
#include "uroot.h"
static const double
rt0 = 9.99999999859990725855365213134618E-01,
rt1 = 4.99999999495955425917856814202739E-01,
rt2 = 3.75017500867345182581453026130850E-01,
rt3 = 3.12523626554518656309172508769531E-01;
- static const double big = 134217728.0;
- double y,t,del,res,res1,hy,z,zz,p,hx,tx,ty,s;
- mynumber a,c={{0,0}};
+ static const double big = 134217728.0;
+ double y, t, del, res, res1, hy, z, zz, p, hx, tx, ty, s;
+ mynumber a, c = { { 0, 0 } };
int4 k;
- a.x=x;
- k=a.i[HIGH_HALF];
- a.i[HIGH_HALF]=(k&0x001fffff)|0x3fe00000;
- t=inroot[(k&0x001fffff)>>14];
- s=a.x;
+ a.x = x;
+ k = a.i[HIGH_HALF];
+ a.i[HIGH_HALF] = (k & 0x001fffff) | 0x3fe00000;
+ t = inroot[(k & 0x001fffff) >> 14];
+ s = a.x;
/*----------------- 2^-1022 <= | x |< 2^1024 -----------------*/
- if (k>0x000fffff && k<0x7ff00000) {
- fenv_t env;
- libc_feholdexcept (&env);
- double ret;
- y=1.0-t*(t*s);
- t=t*(rt0+y*(rt1+y*(rt2+y*rt3)));
- c.i[HIGH_HALF]=0x20000000+((k&0x7fe00000)>>1);
- y=t*s;
- hy=(y+big)-big;
- del=0.5*t*((s-hy*hy)-(y-hy)*(y+hy));
- res=y+del;
- if (res == (res+1.002*((y-res)+del))) ret = res*c.x;
- else {
- res1=res+1.5*((y-res)+del);
- EMULV(res,res1,z,zz,p,hx,tx,hy,ty); /* (z+zz)=res*res1 */
- ret = ((((z-s)+zz)<0)?max(res,res1):min(res,res1))*c.x;
+ if (k > 0x000fffff && k < 0x7ff00000)
+ {
+ fenv_t env;
+ libc_feholdexcept (&env);
+ double ret;
+ y = 1.0 - t * (t * s);
+ t = t * (rt0 + y * (rt1 + y * (rt2 + y * rt3)));
+ c.i[HIGH_HALF] = 0x20000000 + ((k & 0x7fe00000) >> 1);
+ y = t * s;
+ hy = (y + big) - big;
+ del = 0.5 * t * ((s - hy * hy) - (y - hy) * (y + hy));
+ res = y + del;
+ if (res == (res + 1.002 * ((y - res) + del)))
+ ret = res * c.x;
+ else
+ {
+ res1 = res + 1.5 * ((y - res) + del);
+ EMULV (res, res1, z, zz, p, hx, tx, hy, ty); /* (z+zz)=res*res1 */
+ ret = ((((z - s) + zz) < 0) ? max (res, res1) :
+ min (res, res1)) * c.x;
+ }
+ math_force_eval (ret);
+ libc_fesetenv (&env);
+ if (x / ret != ret)
+ {
+ double force_inexact = 1.0 / 3.0;
+ math_force_eval (force_inexact);
+ }
+ /* Otherwise (x / ret == ret), either the square root was exact or
+ the division was inexact. */
+ return ret;
+ }
+ else
+ {
+ if ((k & 0x7ff00000) == 0x7ff00000)
+ return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+ if (x == 0)
+ return x; /* sqrt(+0)=+0, sqrt(-0)=-0 */
+ if (k < 0)
+ return (x - x) / (x - x); /* sqrt(-ve)=sNaN */
+ return tm256.x * __ieee754_sqrt (x * t512.x);
}
- math_force_eval (ret);
- libc_fesetenv (&env);
- if (x / ret != ret)
- {
- double force_inexact = 1.0 / 3.0;
- math_force_eval (force_inexact);
- }
- /* Otherwise (x / ret == ret), either the square root was exact or
- the division was inexact. */
- return ret;
- }
- else {
- if ((k & 0x7ff00000) == 0x7ff00000)
- return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
- if (x==0) return x; /* sqrt(+0)=+0, sqrt(-0)=-0 */
- if (k<0) return (x-x)/(x-x); /* sqrt(-ve)=sNaN */
- return tm256.x*__ieee754_sqrt(x*t512.x);
- }
}
strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/halfulp.c b/libc/sysdeps/ieee754/dbl-64/halfulp.c
index 8c5a9312a..382ad7aad 100644
--- a/libc/sysdeps/ieee754/dbl-64/halfulp.c
+++ b/libc/sysdeps/ieee754/dbl-64/halfulp.c
@@ -44,86 +44,109 @@
#endif
static const int4 tab54[32] = {
- 262143, 11585, 1782, 511, 210, 107, 63, 42,
- 30, 22, 17, 14, 12, 10, 9, 7,
- 7, 6, 5, 5, 5, 4, 4, 4,
- 3, 3, 3, 3, 3, 3, 3, 3 };
+ 262143, 11585, 1782, 511, 210, 107, 63, 42,
+ 30, 22, 17, 14, 12, 10, 9, 7,
+ 7, 6, 5, 5, 5, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3
+};
double
SECTION
-__halfulp(double x, double y)
+__halfulp (double x, double y)
{
mynumber v;
- double z,u,uu;
+ double z, u, uu;
#ifndef DLA_FMS
- double j1,j2,j3,j4,j5;
+ double j1, j2, j3, j4, j5;
#endif
- int4 k,l,m,n;
- if (y <= 0) { /*if power is negative or zero */
- v.x = y;
- if (v.i[LOW_HALF] != 0) return -10.0;
- v.x = x;
- if (v.i[LOW_HALF] != 0) return -10.0;
- if ((v.i[HIGH_HALF]&0x000fffff) != 0) return -10; /* if x =2 ^ n */
- k = ((v.i[HIGH_HALF]&0x7fffffff)>>20)-1023; /* find this n */
- z = (double) k;
- return (z*y == -1075.0)?0: -10.0;
- }
- /* if y > 0 */
+ int4 k, l, m, n;
+ if (y <= 0) /*if power is negative or zero */
+ {
+ v.x = y;
+ if (v.i[LOW_HALF] != 0)
+ return -10.0;
+ v.x = x;
+ if (v.i[LOW_HALF] != 0)
+ return -10.0;
+ if ((v.i[HIGH_HALF] & 0x000fffff) != 0)
+ return -10; /* if x =2 ^ n */
+ k = ((v.i[HIGH_HALF] & 0x7fffffff) >> 20) - 1023; /* find this n */
+ z = (double) k;
+ return (z * y == -1075.0) ? 0 : -10.0;
+ }
+ /* if y > 0 */
v.x = y;
- if (v.i[LOW_HALF] != 0) return -10.0;
+ if (v.i[LOW_HALF] != 0)
+ return -10.0;
- v.x=x;
- /* case where x = 2**n for some integer n */
- if (((v.i[HIGH_HALF]&0x000fffff)|v.i[LOW_HALF]) == 0) {
- k=(v.i[HIGH_HALF]>>20)-1023;
- return (((double) k)*y == -1075.0)?0:-10.0;
- }
+ v.x = x;
+ /* case where x = 2**n for some integer n */
+ if (((v.i[HIGH_HALF] & 0x000fffff) | v.i[LOW_HALF]) == 0)
+ {
+ k = (v.i[HIGH_HALF] >> 20) - 1023;
+ return (((double) k) * y == -1075.0) ? 0 : -10.0;
+ }
v.x = y;
k = v.i[HIGH_HALF];
- m = k<<12;
+ m = k << 12;
l = 0;
while (m)
- {m = m<<1; l++; }
- n = (k&0x000fffff)|0x00100000;
- n = n>>(20-l); /* n is the odd integer of y */
- k = ((k>>20) -1023)-l; /* y = n*2**k */
- if (k>5) return -10.0;
- if (k>0) for (;k>0;k--) n *= 2;
- if (n > 34) return -10.0;
+ {
+ m = m << 1; l++;
+ }
+ n = (k & 0x000fffff) | 0x00100000;
+ n = n >> (20 - l); /* n is the odd integer of y */
+ k = ((k >> 20) - 1023) - l; /* y = n*2**k */
+ if (k > 5)
+ return -10.0;
+ if (k > 0)
+ for (; k > 0; k--)
+ n *= 2;
+ if (n > 34)
+ return -10.0;
k = -k;
- if (k>5) return -10.0;
+ if (k > 5)
+ return -10.0;
- /* now treat x */
- while (k>0) {
- z = __ieee754_sqrt(x);
- EMULV(z,z,u,uu,j1,j2,j3,j4,j5);
- if (((u-x)+uu) != 0) break;
- x = z;
- k--;
- }
- if (k) return -10.0;
+ /* now treat x */
+ while (k > 0)
+ {
+ z = __ieee754_sqrt (x);
+ EMULV (z, z, u, uu, j1, j2, j3, j4, j5);
+ if (((u - x) + uu) != 0)
+ break;
+ x = z;
+ k--;
+ }
+ if (k)
+ return -10.0;
/* it is impossible that n == 2, so the mantissa of x must be short */
v.x = x;
- if (v.i[LOW_HALF]) return -10.0;
+ if (v.i[LOW_HALF])
+ return -10.0;
k = v.i[HIGH_HALF];
- m = k<<12;
+ m = k << 12;
l = 0;
- while (m) {m = m<<1; l++; }
- m = (k&0x000fffff)|0x00100000;
- m = m>>(20-l); /* m is the odd integer of x */
+ while (m)
+ {
+ m = m << 1; l++;
+ }
+ m = (k & 0x000fffff) | 0x00100000;
+ m = m >> (20 - l); /* m is the odd integer of x */
- /* now check whether the length of m**n is at most 54 bits */
+ /* now check whether the length of m**n is at most 54 bits */
- if (m > tab54[n-3]) return -10.0;
+ if (m > tab54[n - 3])
+ return -10.0;
- /* yes, it is - now compute x**n by simple multiplications */
+ /* yes, it is - now compute x**n by simple multiplications */
u = x;
- for (k=1;k<n;k++) u = u*x;
+ for (k = 1; k < n; k++)
+ u = u * x;
return u;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c b/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
index ec4b4cf60..047c6c288 100644
--- a/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
+++ b/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
@@ -147,166 +147,215 @@ static const double PIo2[] = {
};
static const double
-zero = 0.0,
-one = 1.0,
-two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
-twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
+ zero = 0.0,
+ one = 1.0,
+ two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
+ twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
-int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2)
+int
+__kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec,
+ const int32_t *ipio2)
{
- int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
- double z,fw,f[20],fq[20],q[20];
+ int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
+ double z, fw, f[20], fq[20], q[20];
- /* initialize jk*/
- jk = init_jk[prec];
- jp = jk;
+ /* initialize jk*/
+ jk = init_jk[prec];
+ jp = jk;
- /* determine jx,jv,q0, note that 3>q0 */
- jx = nx-1;
- jv = (e0-3)/24; if(jv<0) jv=0;
- q0 = e0-24*(jv+1);
+ /* determine jx,jv,q0, note that 3>q0 */
+ jx = nx - 1;
+ jv = (e0 - 3) / 24; if (jv < 0)
+ jv = 0;
+ q0 = e0 - 24 * (jv + 1);
- /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
- j = jv-jx; m = jx+jk;
- for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
+ /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
+ j = jv - jx; m = jx + jk;
+ for (i = 0; i <= m; i++, j++)
+ f[i] = (j < 0) ? zero : (double) ipio2[j];
- /* compute q[0],q[1],...q[jk] */
- for (i=0;i<=jk;i++) {
- for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
- }
+ /* compute q[0],q[1],...q[jk] */
+ for (i = 0; i <= jk; i++)
+ {
+ for (j = 0, fw = 0.0; j <= jx; j++)
+ fw += x[j] * f[jx + i - j];
+ q[i] = fw;
+ }
- jz = jk;
+ jz = jk;
recompute:
- /* distill q[] into iq[] reversingly */
- for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
- fw = (double)((int32_t)(twon24* z));
- iq[i] = (int32_t)(z-two24*fw);
- z = q[j-1]+fw;
- }
+ /* distill q[] into iq[] reversingly */
+ for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
+ {
+ fw = (double) ((int32_t) (twon24 * z));
+ iq[i] = (int32_t) (z - two24 * fw);
+ z = q[j - 1] + fw;
+ }
- /* compute n */
- z = __scalbn(z,q0); /* actual value of z */
- z -= 8.0*__floor(z*0.125); /* trim off integer >= 8 */
- n = (int32_t) z;
- z -= (double)n;
- ih = 0;
- if(q0>0) { /* need iq[jz-1] to determine n */
- i = (iq[jz-1]>>(24-q0)); n += i;
- iq[jz-1] -= i<<(24-q0);
- ih = iq[jz-1]>>(23-q0);
- }
- else if(q0==0) ih = iq[jz-1]>>23;
- else if(z>=0.5) ih=2;
+ /* compute n */
+ z = __scalbn (z, q0); /* actual value of z */
+ z -= 8.0 * __floor (z * 0.125); /* trim off integer >= 8 */
+ n = (int32_t) z;
+ z -= (double) n;
+ ih = 0;
+ if (q0 > 0) /* need iq[jz-1] to determine n */
+ {
+ i = (iq[jz - 1] >> (24 - q0)); n += i;
+ iq[jz - 1] -= i << (24 - q0);
+ ih = iq[jz - 1] >> (23 - q0);
+ }
+ else if (q0 == 0)
+ ih = iq[jz - 1] >> 23;
+ else if (z >= 0.5)
+ ih = 2;
- if(ih>0) { /* q > 0.5 */
- n += 1; carry = 0;
- for(i=0;i<jz ;i++) { /* compute 1-q */
- j = iq[i];
- if(carry==0) {
- if(j!=0) {
- carry = 1; iq[i] = 0x1000000- j;
- }
- } else iq[i] = 0xffffff - j;
- }
- if(q0>0) { /* rare case: chance is 1 in 12 */
- switch(q0) {
- case 1:
- iq[jz-1] &= 0x7fffff; break;
- case 2:
- iq[jz-1] &= 0x3fffff; break;
- }
+ if (ih > 0) /* q > 0.5 */
+ {
+ n += 1; carry = 0;
+ for (i = 0; i < jz; i++) /* compute 1-q */
+ {
+ j = iq[i];
+ if (carry == 0)
+ {
+ if (j != 0)
+ {
+ carry = 1; iq[i] = 0x1000000 - j;
+ }
}
- if(ih==2) {
- z = one - z;
- if(carry!=0) z -= __scalbn(one,q0);
+ else
+ iq[i] = 0xffffff - j;
+ }
+ if (q0 > 0) /* rare case: chance is 1 in 12 */
+ {
+ switch (q0)
+ {
+ case 1:
+ iq[jz - 1] &= 0x7fffff; break;
+ case 2:
+ iq[jz - 1] &= 0x3fffff; break;
}
}
+ if (ih == 2)
+ {
+ z = one - z;
+ if (carry != 0)
+ z -= __scalbn (one, q0);
+ }
+ }
- /* check if recomputation is needed */
- if(z==zero) {
- j = 0;
- for (i=jz-1;i>=jk;i--) j |= iq[i];
- if(j==0) { /* need recomputation */
- for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
+ /* check if recomputation is needed */
+ if (z == zero)
+ {
+ j = 0;
+ for (i = jz - 1; i >= jk; i--)
+ j |= iq[i];
+ if (j == 0) /* need recomputation */
+ {
+ for (k = 1; iq[jk - k] == 0; k++)
+ ; /* k = no. of terms needed */
- for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
- f[jx+i] = (double) ipio2[jv+i];
- for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
- q[i] = fw;
- }
- jz += k;
- goto recompute;
+ for (i = jz + 1; i <= jz + k; i++) /* add q[jz+1] to q[jz+k] */
+ {
+ f[jx + i] = (double) ipio2[jv + i];
+ for (j = 0, fw = 0.0; j <= jx; j++)
+ fw += x[j] * f[jx + i - j];
+ q[i] = fw;
}
+ jz += k;
+ goto recompute;
}
+ }
- /* chop off zero terms */
- if(z==0.0) {
- jz -= 1; q0 -= 24;
- while(iq[jz]==0) { jz--; q0-=24;}
- } else { /* break z into 24-bit if necessary */
- z = __scalbn(z,-q0);
- if(z>=two24) {
- fw = (double)((int32_t)(twon24*z));
- iq[jz] = (int32_t)(z-two24*fw);
- jz += 1; q0 += 24;
- iq[jz] = (int32_t) fw;
- } else iq[jz] = (int32_t) z ;
+ /* chop off zero terms */
+ if (z == 0.0)
+ {
+ jz -= 1; q0 -= 24;
+ while (iq[jz] == 0)
+ {
+ jz--; q0 -= 24;
}
-
- /* convert integer "bit" chunk to floating-point value */
- fw = __scalbn(one,q0);
- for(i=jz;i>=0;i--) {
- q[i] = fw*(double)iq[i]; fw*=twon24;
+ }
+ else /* break z into 24-bit if necessary */
+ {
+ z = __scalbn (z, -q0);
+ if (z >= two24)
+ {
+ fw = (double) ((int32_t) (twon24 * z));
+ iq[jz] = (int32_t) (z - two24 * fw);
+ jz += 1; q0 += 24;
+ iq[jz] = (int32_t) fw;
}
+ else
+ iq[jz] = (int32_t) z;
+ }
- /* compute PIo2[0,...,jp]*q[jz,...,0] */
- for(i=jz;i>=0;i--) {
- for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
- fq[jz-i] = fw;
- }
+ /* convert integer "bit" chunk to floating-point value */
+ fw = __scalbn (one, q0);
+ for (i = jz; i >= 0; i--)
+ {
+ q[i] = fw * (double) iq[i]; fw *= twon24;
+ }
- /* compress fq[] into y[] */
- switch(prec) {
- case 0:
- fw = 0.0;
- for (i=jz;i>=0;i--) fw += fq[i];
- y[0] = (ih==0)? fw: -fw;
- break;
- case 1:
- case 2:;
+ /* compute PIo2[0,...,jp]*q[jz,...,0] */
+ for (i = jz; i >= 0; i--)
+ {
+ for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
+ fw += PIo2[k] * q[i + k];
+ fq[jz - i] = fw;
+ }
+
+ /* compress fq[] into y[] */
+ switch (prec)
+ {
+ case 0:
+ fw = 0.0;
+ for (i = jz; i >= 0; i--)
+ fw += fq[i];
+ y[0] = (ih == 0) ? fw : -fw;
+ break;
+ case 1:
+ case 2:;
#if __FLT_EVAL_METHOD__ != 0
- volatile
+ volatile
#endif
- double fv = 0.0;
- for (i=jz;i>=0;i--) fv += fq[i];
- y[0] = (ih==0)? fv: -fv;
- fv = fq[0]-fv;
- for (i=1;i<=jz;i++) fv += fq[i];
- y[1] = (ih==0)? fv: -fv;
- break;
- case 3: /* painful */
- for (i=jz;i>0;i--) {
+ double fv = 0.0;
+ for (i = jz; i >= 0; i--)
+ fv += fq[i];
+ y[0] = (ih == 0) ? fv : -fv;
+ fv = fq[0] - fv;
+ for (i = 1; i <= jz; i++)
+ fv += fq[i];
+ y[1] = (ih == 0) ? fv : -fv;
+ break;
+ case 3: /* painful */
+ for (i = jz; i > 0; i--)
+ {
#if __FLT_EVAL_METHOD__ != 0
- volatile
+ volatile
#endif
- double fv = (double)(fq[i-1]+fq[i]);
- fq[i] += fq[i-1]-fv;
- fq[i-1] = fv;
- }
- for (i=jz;i>1;i--) {
+ double fv = (double) (fq[i - 1] + fq[i]);
+ fq[i] += fq[i - 1] - fv;
+ fq[i - 1] = fv;
+ }
+ for (i = jz; i > 1; i--)
+ {
#if __FLT_EVAL_METHOD__ != 0
- volatile
+ volatile
#endif
- double fv = (double)(fq[i-1]+fq[i]);
- fq[i] += fq[i-1]-fv;
- fq[i-1] = fv;
- }
- for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
- if(ih==0) {
- y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
- } else {
- y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
- }
+ double fv = (double) (fq[i - 1] + fq[i]);
+ fq[i] += fq[i - 1] - fv;
+ fq[i - 1] = fv;
+ }
+ for (fw = 0.0, i = jz; i >= 2; i--)
+ fw += fq[i];
+ if (ih == 0)
+ {
+ y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
+ }
+ else
+ {
+ y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
}
- return n&7;
+ }
+ return n & 7;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/mpa-arch.h b/libc/sysdeps/ieee754/dbl-64/mpa-arch.h
index 7de9d51ae..779a43d9b 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpa-arch.h
+++ b/libc/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -22,15 +22,15 @@ typedef int64_t mantissa_store_t;
#define TWOPOW(i) (1L << i)
#define RADIX_EXP 24
-#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */
+#define RADIX TWOPOW (RADIX_EXP) /* 2^24 */
/* Divide D by RADIX and put the remainder in R. D must be a non-negative
integral value. */
#define DIV_RADIX(d, r) \
- ({ \
- r = d & (RADIX - 1); \
- d >>= RADIX_EXP; \
- })
+ ({ \
+ r = d & (RADIX - 1); \
+ d >>= RADIX_EXP; \
+ })
/* Put the integer component of a double X in R and retain the fraction in
X. This is used in extracting mantissa digits for MP_NO by using the
@@ -38,10 +38,10 @@ typedef int64_t mantissa_store_t;
digit and then scaling by RADIX to get the next mantissa digit in the same
manner. */
#define INTEGER_OF(x, i) \
- ({ \
- i = (mantissa_t) x; \
- x -= i; \
- })
+ ({ \
+ i = (mantissa_t) x; \
+ x -= i; \
+ })
/* Align IN down to F. The code assumes that F is a power of two. */
-#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
+#define ALIGN_DOWN_TO(in, f) ((in) & - (f))
diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.c b/libc/sysdeps/ieee754/dbl-64/mpa.c
index a3feb175e..190e8a637 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpa.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpa.c
@@ -50,8 +50,8 @@
#endif
#ifndef NO__CONST
-const mp_no mpone = {1, {1.0, 1.0}};
-const mp_no mptwo = {1, {1.0, 2.0}};
+const mp_no mpone = { 1, { 1.0, 1.0 } };
+const mp_no mptwo = { 1, { 1.0, 2.0 } };
#endif
#ifndef NO___ACR
@@ -123,7 +123,7 @@ __cpy (const mp_no *x, mp_no *y, int p)
static void
norm (const mp_no *x, double *y, int p)
{
-#define R RADIXI
+# define R RADIXI
long i;
double c;
mantissa_t a, u, v, z[5];
@@ -140,7 +140,7 @@ norm (const mp_no *x, double *y, int p)
}
else
{
- for (a = 1, z[1] = X[1]; z[1] < TWO23;)
+ for (a = 1, z[1] = X[1]; z[1] < TWO23; )
{
a *= 2;
z[1] *= 2;
@@ -188,7 +188,7 @@ norm (const mp_no *x, double *y, int p)
c *= RADIXI;
*y = c;
-#undef R
+# undef R
}
/* Convert a multiple precision number *X into a double precision
@@ -201,7 +201,7 @@ denorm (const mp_no *x, double *y, int p)
double c;
mantissa_t u, z[5];
-#define R RADIXI
+# define R RADIXI
if (EX < -44 || (EX == -44 && X[1] < TWO5))
{
*y = 0;
@@ -298,7 +298,7 @@ denorm (const mp_no *x, double *y, int p)
c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
*y = c * TWOM1032;
-#undef R
+# undef R
}
/* Convert multiple precision number *X into double precision number *Y. The
@@ -394,7 +394,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = 1;
}
else
- {
+ {
Z[k--] = zk;
zk = 0;
}
@@ -409,7 +409,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = 1;
}
else
- {
+ {
Z[k--] = zk;
zk = 0;
}
@@ -471,7 +471,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = -1;
}
else
- {
+ {
Z[k--] = zk;
zk = 0;
}
@@ -487,18 +487,19 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
zk = -1;
}
else
- {
+ {
Z[k--] = zk;
zk = 0;
}
}
/* Normalize. */
- for (i = 1; Z[i] == 0; i++);
+ for (i = 1; Z[i] == 0; i++)
+ ;
EZ = EZ - i + 1;
- for (k = 1; i <= p2 + 1;)
+ for (k = 1; i <= p2 + 1; )
Z[k++] = Z[i++];
- for (; k <= p2;)
+ for (; k <= p2; )
Z[k++] = 0;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/mpatan.c b/libc/sysdeps/ieee754/dbl-64/mpatan.c
index 807b16a9b..a6ae61195 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpatan.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpatan.c
@@ -43,7 +43,6 @@ void
SECTION
__mpatan (mp_no *x, mp_no *y, int p)
{
-
int i, m, n;
double dx;
mp_no mptwoim1 =
diff --git a/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c b/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c
index 8a2f45b9d..0fada79a0 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c
@@ -40,7 +40,7 @@ __mpn_construct_double (mp_srcptr frac_ptr, int expt, int negative)
u.ieee.mantissa0 = (frac_ptr[0] >> 32) & (((mp_limb_t) 1
<< (DBL_MANT_DIG - 32)) - 1);
#else
- #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
+ # error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
#endif
return u.d;
diff --git a/libc/sysdeps/ieee754/dbl-64/mptan.c b/libc/sysdeps/ieee754/dbl-64/mptan.c
index 281bfca1c..a5c3fb36c 100644
--- a/libc/sysdeps/ieee754/dbl-64/mptan.c
+++ b/libc/sysdeps/ieee754/dbl-64/mptan.c
@@ -44,7 +44,6 @@ void
SECTION
__mptan (double x, mp_no *mpy, int p)
{
-
int n;
mp_no mpw, mpc, mps;
diff --git a/libc/sysdeps/ieee754/dbl-64/mydefs.h b/libc/sysdeps/ieee754/dbl-64/mydefs.h
index 89ca965c6..a430397c6 100644
--- a/libc/sysdeps/ieee754/dbl-64/mydefs.h
+++ b/libc/sysdeps/ieee754/dbl-64/mydefs.h
@@ -28,10 +28,9 @@
#define MY_H
typedef int int4;
-typedef union {int4 i[2]; double x;} mynumber;
-
-#define ABS(x) (((x)>0)?(x):-(x))
-#define max(x,y) (((y)>(x))?(y):(x))
-#define min(x,y) (((y)<(x))?(y):(x))
+typedef union { int4 i[2]; double x; } mynumber;
+#define ABS(x) (((x) > 0) ? (x) : -(x))
+#define max(x, y) (((y) > (x)) ? (y) : (x))
+#define min(x, y) (((y) < (x)) ? (y) : (x))
#endif
diff --git a/libc/sysdeps/ieee754/dbl-64/s_asinh.c b/libc/sysdeps/ieee754/dbl-64/s_asinh.c
index 68e854f5f..550074684 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_asinh.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_asinh.c
@@ -25,33 +25,43 @@
#include <math_private.h>
static const double
-one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-huge= 1.00000000000000000000e+300;
+ one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+ ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
+ huge = 1.00000000000000000000e+300;
double
-__asinh(double x)
+__asinh (double x)
{
- double w;
- int32_t hx,ix;
- GET_HIGH_WORD(hx,x);
- ix = hx&0x7fffffff;
- if(__builtin_expect(ix< 0x3e300000, 0)) { /* |x|<2**-28 */
- if(huge+x>one) return x; /* return x inexact except 0 */
+ double w;
+ int32_t hx, ix;
+ GET_HIGH_WORD (hx, x);
+ ix = hx & 0x7fffffff;
+ if (__builtin_expect (ix < 0x3e300000, 0)) /* |x|<2**-28 */
+ {
+ if (huge + x > one)
+ return x; /* return x inexact except 0 */
+ }
+ if (__builtin_expect (ix > 0x41b00000, 0)) /* |x| > 2**28 */
+ {
+ if (ix >= 0x7ff00000)
+ return x + x; /* x is inf or NaN */
+ w = __ieee754_log (fabs (x)) + ln2;
+ }
+ else
+ {
+ double xa = fabs (x);
+ if (ix > 0x40000000) /* 2**28 > |x| > 2.0 */
+ {
+ w = __ieee754_log (2.0 * xa + one / (__ieee754_sqrt (xa * xa + one) +
+ xa));
}
- if(__builtin_expect(ix>0x41b00000, 0)) { /* |x| > 2**28 */
- if(ix>=0x7ff00000) return x+x; /* x is inf or NaN */
- w = __ieee754_log(fabs(x))+ln2;
- } else {
- double xa = fabs(x);
- if (ix>0x40000000) { /* 2**28 > |x| > 2.0 */
- w = __ieee754_log(2.0*xa+one/(__ieee754_sqrt(xa*xa+one)+xa));
- } else { /* 2.0 > |x| > 2**-28 */
- double t = xa*xa;
- w =__log1p(xa+t/(one+__ieee754_sqrt(one+t)));
- }
+ else /* 2.0 > |x| > 2**-28 */
+ {
+ double t = xa * xa;
+ w = __log1p (xa + t / (one + __ieee754_sqrt (one + t)));
}
- return __copysign(w, x);
+ }
+ return __copysign (w, x);
}
weak_alias (__asinh, asinh)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_atan.c b/libc/sysdeps/ieee754/dbl-64/s_atan.c
index 7b6c83ffb..744e96146 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_atan.c
@@ -42,6 +42,7 @@
#include "uatan.tbl"
#include "atnat.h"
#include <math.h>
+#include <stap-probe.h>
void __mpatan (mp_no *, mp_no *, int); /* see definition in mpatan.c */
static double atanMp (double, const int[]);
@@ -60,7 +61,7 @@ double
atan (double x)
{
double cor, s1, ss1, s2, ss2, t1, t2, t3, t7, t8, t9, t10, u, u2, u3,
- v, vv, w, ww, y, yy, z, zz;
+ v, vv, w, ww, y, yy, z, zz;
#ifndef DLA_FMS
double t4, t5, t6;
#endif
@@ -190,17 +191,17 @@ atan (double x)
yy = cij[i][4].d + z * yy;
yy = cij[i][3].d + z * yy;
yy = cij[i][2].d + z * yy;
- yy = HPI1 - z * yy;
+ yy = HPI1 - z * yy;
t1 = HPI - cij[i][1].d;
if (i < 112)
- u3 = U31; /* w < 1/2 */
+ u3 = U31; /* w < 1/2 */
else
- u3 = U32; /* w >= 1/2 */
+ u3 = U32; /* w >= 1/2 */
if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
return __signArctan (x, y);
- DIV2 (1 , 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
+ DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
t10);
t1 = w - hij[i][0].d;
EADD (t1, ww, z, zz);
@@ -229,7 +230,7 @@ atan (double x)
else
{
if (u < E)
- { /* D <= u < E */
+ { /* D <= u < E */
w = 1 / u;
v = w * w;
EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
@@ -247,7 +248,7 @@ atan (double x)
if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
return __signArctan (x, y);
- DIV2 (1 , 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
+ DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
t9, t10);
MUL2 (w, ww, w, ww, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
@@ -306,8 +307,12 @@ atanMp (double x, const int pr[])
__mp_dbl (&mpy1, &y1, p);
__mp_dbl (&mpy2, &y2, p);
if (y1 == y2)
- return y1;
+ {
+ LIBC_PROBE (slowatan, 3, &p, &x, &y1);
+ return y1;
+ }
}
+ LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
return y1; /*if impossible to do exact computing */
}
diff --git a/libc/sysdeps/ieee754/dbl-64/s_cbrt.c b/libc/sysdeps/ieee754/dbl-64/s_cbrt.c
index a7120e1e1..208a369a6 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_cbrt.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_cbrt.c
@@ -51,16 +51,17 @@ __cbrt (double x)
if (xe == 0 && fpclassify (x) <= FP_ZERO)
return x + x;
- u = (0.354895765043919860
- + ((1.50819193781584896
- + ((-2.11499494167371287
- + ((2.44693122563534430
- + ((-1.83469277483613086
- + (0.784932344976639262 - 0.145263899385486377 * xm) * xm)
+ u = (0.354895765043919860
+ + ((1.50819193781584896
+ + ((-2.11499494167371287
+ + ((2.44693122563534430
+ + ((-1.83469277483613086
+ + (0.784932344976639262 - 0.145263899385486377 * xm)
+ * xm)
+ * xm))
* xm))
- * xm))
- * xm))
- * xm));
+ * xm))
+ * xm));
t2 = u * u * u;
diff --git a/libc/sysdeps/ieee754/dbl-64/s_ceil.c b/libc/sysdeps/ieee754/dbl-64/s_ceil.c
index e048c81c2..b2154b407 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_ceil.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_ceil.c
@@ -25,44 +25,67 @@
static const double huge = 1.0e300;
double
-__ceil(double x)
+__ceil (double x)
{
- int32_t i0,i1,j0;
- u_int32_t i,j;
- EXTRACT_WORDS(i0,i1,x);
- j0 = ((i0>>20)&0x7ff)-0x3ff;
- if(j0<20) {
- if(j0<0) { /* raise inexact if x != 0 */
- math_force_eval(huge+x);
- /* return 0*sign(x) if |x|<1 */
- if(i0<0) {i0=0x80000000;i1=0;}
- else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;}
- } else {
- i = (0x000fffff)>>j0;
- if(((i0&i)|i1)==0) return x; /* x is integral */
- math_force_eval(huge+x); /* raise inexact flag */
- if(i0>0) i0 += (0x00100000)>>j0;
- i0 &= (~i); i1=0;
+ int32_t i0, i1, j0;
+ u_int32_t i, j;
+ EXTRACT_WORDS (i0, i1, x);
+ j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+ if (j0 < 20)
+ {
+ if (j0 < 0) /* raise inexact if x != 0 */
+ {
+ math_force_eval (huge + x);
+ /* return 0*sign(x) if |x|<1 */
+ if (i0 < 0)
+ {
+ i0 = 0x80000000; i1 = 0;
}
- } else if (j0>51) {
- if(j0==0x400) return x+x; /* inf or NaN */
- else return x; /* x is integral */
- } else {
- i = ((u_int32_t)(0xffffffff))>>(j0-20);
- if((i1&i)==0) return x; /* x is integral */
- math_force_eval(huge+x); /* raise inexact flag */
- if(i0>0) {
- if(j0==20) i0+=1;
- else {
- j = i1 + (1<<(52-j0));
- if(j<i1) i0+=1; /* got a carry */
- i1 = j;
- }
+ else if ((i0 | i1) != 0)
+ {
+ i0 = 0x3ff00000; i1 = 0;
}
- i1 &= (~i);
}
- INSERT_WORDS(x,i0,i1);
- return x;
+ else
+ {
+ i = (0x000fffff) >> j0;
+ if (((i0 & i) | i1) == 0)
+ return x; /* x is integral */
+ math_force_eval (huge + x); /* raise inexact flag */
+ if (i0 > 0)
+ i0 += (0x00100000) >> j0;
+ i0 &= (~i); i1 = 0;
+ }
+ }
+ else if (j0 > 51)
+ {
+ if (j0 == 0x400)
+ return x + x; /* inf or NaN */
+ else
+ return x; /* x is integral */
+ }
+ else
+ {
+ i = ((u_int32_t) (0xffffffff)) >> (j0 - 20);
+ if ((i1 & i) == 0)
+ return x; /* x is integral */
+ math_force_eval (huge + x); /* raise inexact flag */
+ if (i0 > 0)
+ {
+ if (j0 == 20)
+ i0 += 1;
+ else
+ {
+ j = i1 + (1 << (52 - j0));
+ if (j < i1)
+ i0 += 1; /* got a carry */
+ i1 = j;
+ }
+ }
+ i1 &= (~i);
+ }
+ INSERT_WORDS (x, i0, i1);
+ return x;
}
#ifndef __ceil
weak_alias (__ceil, ceil)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_copysign.c b/libc/sysdeps/ieee754/dbl-64/s_copysign.c
index a541ceb05..9caf24e8f 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_copysign.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_copysign.c
@@ -23,13 +23,14 @@ static char rcsid[] = "$NetBSD: s_copysign.c,v 1.8 1995/05/10 20:46:57 jtc Exp $
#include <math.h>
#include <math_private.h>
-double __copysign(double x, double y)
+double
+__copysign (double x, double y)
{
- u_int32_t hx,hy;
- GET_HIGH_WORD(hx,x);
- GET_HIGH_WORD(hy,y);
- SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000));
- return x;
+ u_int32_t hx, hy;
+ GET_HIGH_WORD (hx, x);
+ GET_HIGH_WORD (hy, y);
+ SET_HIGH_WORD (x, (hx & 0x7fffffff) | (hy & 0x80000000));
+ return x;
}
weak_alias (__copysign, copysign)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_erf.c b/libc/sysdeps/ieee754/dbl-64/s_erf.c
index e25e28d9d..aab4ed593 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_erf.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_erf.c
@@ -116,181 +116,176 @@ static char rcsid[] = "$NetBSD: s_erf.c,v 1.8 1995/05/10 20:47:05 jtc Exp $";
#include <math_private.h>
static const double
-tiny = 1e-300,
-half= 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
-one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
- /* c = (float)0.84506291151 */
-erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
+ tiny = 1e-300,
+ half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+ one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+ two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
+/* c = (float)0.84506291151 */
+ erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
* Coefficients for approximation to erf on [0,0.84375]
*/
-efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
-efx8= 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
-pp[] = {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
- -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
- -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
- -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
- -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */
-qq[] = {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
- 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
- 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
- 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
- -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */
+ efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
+ efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
+ pp[] = { 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
+ -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
+ -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
+ -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
+ -2.37630166566501626084e-05 }, /* 0xBEF8EAD6, 0x120016AC */
+ qq[] = { 0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
+ 6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
+ 5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
+ 1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
+ -3.96022827877536812320e-06 }, /* 0xBED09C43, 0x42A26120 */
/*
* Coefficients for approximation to erf in [0.84375,1.25]
*/
-pa[] = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
- 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
- -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
- 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
- -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
- 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
- -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */
-qa[] = {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
- 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
- 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
- 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
- 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
- 1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */
+ pa[] = { -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
+ 4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
+ -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
+ 3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
+ -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
+ 3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
+ -2.16637559486879084300e-03 }, /* 0xBF61BF38, 0x0A96073F */
+ qa[] = { 0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
+ 5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
+ 7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
+ 1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
+ 1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
+ 1.19844998467991074170e-02 }, /* 0x3F888B54, 0x5735151D */
/*
* Coefficients for approximation to erfc in [1.25,1/0.35]
*/
-ra[] = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
- -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
- -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
- -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
- -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
- -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
- -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
- -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */
-sa[] = {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
- 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
- 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
- 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
- 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
- 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
- 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
- -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */
+ ra[] = { -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
+ -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
+ -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
+ -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
+ -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
+ -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
+ -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
+ -9.81432934416914548592e+00 }, /* 0xC023A0EF, 0xC69AC25C */
+ sa[] = { 0.0, 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
+ 1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
+ 4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
+ 6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
+ 4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
+ 1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
+ 6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
+ -6.04244152148580987438e-02 }, /* 0xBFAEEFF2, 0xEE749A62 */
/*
* Coefficients for approximation to erfc in [1/.35,28]
*/
-rb[] = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
- -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
- -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
- -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
- -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
- -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
- -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */
-sb[] = {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
- 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
- 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
- 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
- 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
- 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
- -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */
+ rb[] = { -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
+ -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
+ -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
+ -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
+ -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
+ -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
+ -4.83519191608651397019e+02 }, /* 0xC07E384E, 0x9BDC383F */
+ sb[] = { 0.0, 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
+ 3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
+ 1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
+ 3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
+ 2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
+ 4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
+ -2.24409524465858183362e+01 }; /* 0xC03670E2, 0x42712D62 */
-double __erf(double x)
+double
+__erf (double x)
{
- int32_t hx,ix,i;
- double R,S,P,Q,s,y,z,r;
- GET_HIGH_WORD(hx,x);
- ix = hx&0x7fffffff;
- if(ix>=0x7ff00000) { /* erf(nan)=nan */
- i = ((u_int32_t)hx>>31)<<1;
- return (double)(1-i)+one/x; /* erf(+-inf)=+-1 */
- }
+ int32_t hx, ix, i;
+ double R, S, P, Q, s, y, z, r;
+ GET_HIGH_WORD (hx, x);
+ ix = hx & 0x7fffffff;
+ if (ix >= 0x7ff00000) /* erf(nan)=nan */
+ {
+ i = ((u_int32_t) hx >> 31) << 1;
+ return (double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
+ }
- if(ix < 0x3feb0000) { /* |x|<0.84375 */
- double r1,r2,s1,s2,s3,z2,z4;
- if(ix < 0x3e300000) { /* |x|<2**-28 */
- if (ix < 0x00800000)
- return 0.125*(8.0*x+efx8*x); /*avoid underflow */
- return x + efx*x;
- }
- z = x*x;
-#ifdef DO_NOT_USE_THIS
- r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
- s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-#else
- r1 = pp[0]+z*pp[1]; z2=z*z;
- r2 = pp[2]+z*pp[3]; z4=z2*z2;
- s1 = one+z*qq[1];
- s2 = qq[2]+z*qq[3];
- s3 = qq[4]+z*qq[5];
- r = r1 + z2*r2 + z4*pp[4];
- s = s1 + z2*s2 + z4*s3;
-#endif
- y = r/s;
- return x + x*y;
- }
- if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
- double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
- s = fabs(x)-one;
-#ifdef DO_NOT_USE_THIS
- P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
- Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
-#else
- P1 = pa[0]+s*pa[1]; s2=s*s;
- Q1 = one+s*qa[1]; s4=s2*s2;
- P2 = pa[2]+s*pa[3]; s6=s4*s2;
- Q2 = qa[2]+s*qa[3];
- P3 = pa[4]+s*pa[5];
- Q3 = qa[4]+s*qa[5];
- P4 = pa[6];
- Q4 = qa[6];
- P = P1 + s2*P2 + s4*P3 + s6*P4;
- Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
-#endif
- if(hx>=0) return erx + P/Q; else return -erx - P/Q;
- }
- if (ix >= 0x40180000) { /* inf>|x|>=6 */
- if(hx>=0) return one-tiny; else return tiny-one;
- }
- x = fabs(x);
- s = one/(x*x);
- if(ix< 0x4006DB6E) { /* |x| < 1/0.35 */
-#ifdef DO_NOT_USE_THIS
- R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
- ra5+s*(ra6+s*ra7))))));
- S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
- sa5+s*(sa6+s*(sa7+s*sa8)))))));
-#else
- double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
- R1 = ra[0]+s*ra[1];s2 = s*s;
- S1 = one+s*sa[1]; s4 = s2*s2;
- R2 = ra[2]+s*ra[3];s6 = s4*s2;
- S2 = sa[2]+s*sa[3];s8 = s4*s4;
- R3 = ra[4]+s*ra[5];
- S3 = sa[4]+s*sa[5];
- R4 = ra[6]+s*ra[7];
- S4 = sa[6]+s*sa[7];
- R = R1 + s2*R2 + s4*R3 + s6*R4;
- S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
-#endif
- } else { /* |x| >= 1/0.35 */
-#ifdef DO_NOT_USE_THIS
- R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
- rb5+s*rb6)))));
- S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
- sb5+s*(sb6+s*sb7))))));
-#else
- double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
- R1 = rb[0]+s*rb[1];s2 = s*s;
- S1 = one+s*sb[1]; s4 = s2*s2;
- R2 = rb[2]+s*rb[3];s6 = s4*s2;
- S2 = sb[2]+s*sb[3];
- R3 = rb[4]+s*rb[5];
- S3 = sb[4]+s*sb[5];
- S4 = sb[6]+s*sb[7];
- R = R1 + s2*R2 + s4*R3 + s6*rb[6];
- S = S1 + s2*S2 + s4*S3 + s6*S4;
-#endif
+ if (ix < 0x3feb0000) /* |x|<0.84375 */
+ {
+ double r1, r2, s1, s2, s3, z2, z4;
+ if (ix < 0x3e300000) /* |x|<2**-28 */
+ {
+ if (ix < 0x00800000)
+ return 0.125 * (8.0 * x + efx8 * x); /*avoid underflow */
+ return x + efx * x;
}
- z = x;
- SET_LOW_WORD(z,0);
- r = __ieee754_exp(-z*z-0.5625)*__ieee754_exp((z-x)*(z+x)+R/S);
- if(hx>=0) return one-r/x; else return r/x-one;
+ z = x * x;
+ r1 = pp[0] + z * pp[1]; z2 = z * z;
+ r2 = pp[2] + z * pp[3]; z4 = z2 * z2;
+ s1 = one + z * qq[1];
+ s2 = qq[2] + z * qq[3];
+ s3 = qq[4] + z * qq[5];
+ r = r1 + z2 * r2 + z4 * pp[4];
+ s = s1 + z2 * s2 + z4 * s3;
+ y = r / s;
+ return x + x * y;
+ }
+ if (ix < 0x3ff40000) /* 0.84375 <= |x| < 1.25 */
+ {
+ double s2, s4, s6, P1, P2, P3, P4, Q1, Q2, Q3, Q4;
+ s = fabs (x) - one;
+ P1 = pa[0] + s * pa[1]; s2 = s * s;
+ Q1 = one + s * qa[1]; s4 = s2 * s2;
+ P2 = pa[2] + s * pa[3]; s6 = s4 * s2;
+ Q2 = qa[2] + s * qa[3];
+ P3 = pa[4] + s * pa[5];
+ Q3 = qa[4] + s * qa[5];
+ P4 = pa[6];
+ Q4 = qa[6];
+ P = P1 + s2 * P2 + s4 * P3 + s6 * P4;
+ Q = Q1 + s2 * Q2 + s4 * Q3 + s6 * Q4;
+ if (hx >= 0)
+ return erx + P / Q;
+ else
+ return -erx - P / Q;
+ }
+ if (ix >= 0x40180000) /* inf>|x|>=6 */
+ {
+ if (hx >= 0)
+ return one - tiny;
+ else
+ return tiny - one;
+ }
+ x = fabs (x);
+ s = one / (x * x);
+ if (ix < 0x4006DB6E) /* |x| < 1/0.35 */
+ {
+ double R1, R2, R3, R4, S1, S2, S3, S4, s2, s4, s6, s8;
+ R1 = ra[0] + s * ra[1]; s2 = s * s;
+ S1 = one + s * sa[1]; s4 = s2 * s2;
+ R2 = ra[2] + s * ra[3]; s6 = s4 * s2;
+ S2 = sa[2] + s * sa[3]; s8 = s4 * s4;
+ R3 = ra[4] + s * ra[5];
+ S3 = sa[4] + s * sa[5];
+ R4 = ra[6] + s * ra[7];
+ S4 = sa[6] + s * sa[7];
+ R = R1 + s2 * R2 + s4 * R3 + s6 * R4;
+ S = S1 + s2 * S2 + s4 * S3 + s6 * S4 + s8 * sa[8];
+ }
+ else /* |x| >= 1/0.35 */
+ {
+ double R1, R2, R3, S1, S2, S3, S4, s2, s4, s6;
+ R1 = rb[0] + s * rb[1]; s2 = s * s;
+ S1 = one + s * sb[1]; s4 = s2 * s2;
+ R2 = rb[2] + s * rb[3]; s6 = s4 * s2;
+ S2 = sb[2] + s * sb[3];
+ R3 = rb[4] + s * rb[5];
+ S3 = sb[4] + s * sb[5];
+ S4 = sb[6] + s * sb[7];
+ R = R1 + s2 * R2 + s4 * R3 + s6 * rb[6];
+ S = S1 + s2 * S2 + s4 * S3 + s6 * S4;
+ }
+ z = x;
+ SET_LOW_WORD (z, 0);
+ r = __ieee754_exp (-z * z - 0.5625) *
+ __ieee754_exp ((z - x) * (z + x) + R / S);
+ if (hx >= 0)
+ return one - r / x;
+ else
+ return r / x - one;
}
weak_alias (__erf, erf)
#ifdef NO_LONG_DOUBLE
@@ -298,117 +293,115 @@ strong_alias (__erf, __erfl)
weak_alias (__erf, erfl)
#endif
-double __erfc(double x)
+double
+__erfc (double x)
{
- int32_t hx,ix;
- double R,S,P,Q,s,y,z,r;
- GET_HIGH_WORD(hx,x);
- ix = hx&0x7fffffff;
- if(ix>=0x7ff00000) { /* erfc(nan)=nan */
- /* erfc(+-inf)=0,2 */
- return (double)(((u_int32_t)hx>>31)<<1)+one/x;
- }
+ int32_t hx, ix;
+ double R, S, P, Q, s, y, z, r;
+ GET_HIGH_WORD (hx, x);
+ ix = hx & 0x7fffffff;
+ if (ix >= 0x7ff00000) /* erfc(nan)=nan */
+ { /* erfc(+-inf)=0,2 */
+ return (double) (((u_int32_t) hx >> 31) << 1) + one / x;
+ }
- if(ix < 0x3feb0000) { /* |x|<0.84375 */
- double r1,r2,s1,s2,s3,z2,z4;
- if(ix < 0x3c700000) /* |x|<2**-56 */
- return one-x;
- z = x*x;
-#ifdef DO_NOT_USE_THIS
- r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
- s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-#else
- r1 = pp[0]+z*pp[1]; z2=z*z;
- r2 = pp[2]+z*pp[3]; z4=z2*z2;
- s1 = one+z*qq[1];
- s2 = qq[2]+z*qq[3];
- s3 = qq[4]+z*qq[5];
- r = r1 + z2*r2 + z4*pp[4];
- s = s1 + z2*s2 + z4*s3;
-#endif
- y = r/s;
- if(hx < 0x3fd00000) { /* x<1/4 */
- return one-(x+x*y);
- } else {
- r = x*y;
- r += (x-half);
- return half - r ;
- }
+ if (ix < 0x3feb0000) /* |x|<0.84375 */
+ {
+ double r1, r2, s1, s2, s3, z2, z4;
+ if (ix < 0x3c700000) /* |x|<2**-56 */
+ return one - x;
+ z = x * x;
+ r1 = pp[0] + z * pp[1]; z2 = z * z;
+ r2 = pp[2] + z * pp[3]; z4 = z2 * z2;
+ s1 = one + z * qq[1];
+ s2 = qq[2] + z * qq[3];
+ s3 = qq[4] + z * qq[5];
+ r = r1 + z2 * r2 + z4 * pp[4];
+ s = s1 + z2 * s2 + z4 * s3;
+ y = r / s;
+ if (hx < 0x3fd00000) /* x<1/4 */
+ {
+ return one - (x + x * y);
}
- if(ix < 0x3ff40000) { /* 0.84375 <= |x| < 1.25 */
- double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
- s = fabs(x)-one;
-#ifdef DO_NOT_USE_THIS
- P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
- Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
-#else
- P1 = pa[0]+s*pa[1]; s2=s*s;
- Q1 = one+s*qa[1]; s4=s2*s2;
- P2 = pa[2]+s*pa[3]; s6=s4*s2;
- Q2 = qa[2]+s*qa[3];
- P3 = pa[4]+s*pa[5];
- Q3 = qa[4]+s*qa[5];
- P4 = pa[6];
- Q4 = qa[6];
- P = P1 + s2*P2 + s4*P3 + s6*P4;
- Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
-#endif
- if(hx>=0) {
- z = one-erx; return z - P/Q;
- } else {
- z = erx+P/Q; return one+z;
- }
+ else
+ {
+ r = x * y;
+ r += (x - half);
+ return half - r;
}
- if (ix < 0x403c0000) { /* |x|<28 */
- x = fabs(x);
- s = one/(x*x);
- if(ix< 0x4006DB6D) { /* |x| < 1/.35 ~ 2.857143*/
-#ifdef DO_NOT_USE_THIS
- R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
- ra5+s*(ra6+s*ra7))))));
- S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
- sa5+s*(sa6+s*(sa7+s*sa8)))))));
-#else
- double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
- R1 = ra[0]+s*ra[1];s2 = s*s;
- S1 = one+s*sa[1]; s4 = s2*s2;
- R2 = ra[2]+s*ra[3];s6 = s4*s2;
- S2 = sa[2]+s*sa[3];s8 = s4*s4;
- R3 = ra[4]+s*ra[5];
- S3 = sa[4]+s*sa[5];
- R4 = ra[6]+s*ra[7];
- S4 = sa[6]+s*sa[7];
- R = R1 + s2*R2 + s4*R3 + s6*R4;
- S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
-#endif
- } else { /* |x| >= 1/.35 ~ 2.857143 */
- double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
- if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
-#ifdef DO_NOT_USE_THIS
- R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
- rb5+s*rb6)))));
- S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
- sb5+s*(sb6+s*sb7))))));
-#else
- R1 = rb[0]+s*rb[1];s2 = s*s;
- S1 = one+s*sb[1]; s4 = s2*s2;
- R2 = rb[2]+s*rb[3];s6 = s4*s2;
- S2 = sb[2]+s*sb[3];
- R3 = rb[4]+s*rb[5];
- S3 = sb[4]+s*sb[5];
- S4 = sb[6]+s*sb[7];
- R = R1 + s2*R2 + s4*R3 + s6*rb[6];
- S = S1 + s2*S2 + s4*S3 + s6*S4;
-#endif
- }
- z = x;
- SET_LOW_WORD(z,0);
- r = __ieee754_exp(-z*z-0.5625)*
- __ieee754_exp((z-x)*(z+x)+R/S);
- if(hx>0) return r/x; else return two-r/x;
- } else {
- if(hx>0) return tiny*tiny; else return two-tiny;
+ }
+ if (ix < 0x3ff40000) /* 0.84375 <= |x| < 1.25 */
+ {
+ double s2, s4, s6, P1, P2, P3, P4, Q1, Q2, Q3, Q4;
+ s = fabs (x) - one;
+ P1 = pa[0] + s * pa[1]; s2 = s * s;
+ Q1 = one + s * qa[1]; s4 = s2 * s2;
+ P2 = pa[2] + s * pa[3]; s6 = s4 * s2;
+ Q2 = qa[2] + s * qa[3];
+ P3 = pa[4] + s * pa[5];
+ Q3 = qa[4] + s * qa[5];
+ P4 = pa[6];
+ Q4 = qa[6];
+ P = P1 + s2 * P2 + s4 * P3 + s6 * P4;
+ Q = Q1 + s2 * Q2 + s4 * Q3 + s6 * Q4;
+ if (hx >= 0)
+ {
+ z = one - erx; return z - P / Q;
+ }
+ else
+ {
+ z = erx + P / Q; return one + z;
+ }
+ }
+ if (ix < 0x403c0000) /* |x|<28 */
+ {
+ x = fabs (x);
+ s = one / (x * x);
+ if (ix < 0x4006DB6D) /* |x| < 1/.35 ~ 2.857143*/
+ {
+ double R1, R2, R3, R4, S1, S2, S3, S4, s2, s4, s6, s8;
+ R1 = ra[0] + s * ra[1]; s2 = s * s;
+ S1 = one + s * sa[1]; s4 = s2 * s2;
+ R2 = ra[2] + s * ra[3]; s6 = s4 * s2;
+ S2 = sa[2] + s * sa[3]; s8 = s4 * s4;
+ R3 = ra[4] + s * ra[5];
+ S3 = sa[4] + s * sa[5];
+ R4 = ra[6] + s * ra[7];
+ S4 = sa[6] + s * sa[7];
+ R = R1 + s2 * R2 + s4 * R3 + s6 * R4;
+ S = S1 + s2 * S2 + s4 * S3 + s6 * S4 + s8 * sa[8];
+ }
+ else /* |x| >= 1/.35 ~ 2.857143 */
+ {
+ double R1, R2, R3, S1, S2, S3, S4, s2, s4, s6;
+ if (hx < 0 && ix >= 0x40180000)
+ return two - tiny; /* x < -6 */
+ R1 = rb[0] + s * rb[1]; s2 = s * s;
+ S1 = one + s * sb[1]; s4 = s2 * s2;
+ R2 = rb[2] + s * rb[3]; s6 = s4 * s2;
+ S2 = sb[2] + s * sb[3];
+ R3 = rb[4] + s * rb[5];
+ S3 = sb[4] + s * sb[5];
+ S4 = sb[6] + s * sb[7];
+ R = R1 + s2 * R2 + s4 * R3 + s6 * rb[6];
+ S = S1 + s2 * S2 + s4 * S3 + s6 * S4;
}
+ z = x;
+ SET_LOW_WORD (z, 0);
+ r = __ieee754_exp (-z * z - 0.5625) *
+ __ieee754_exp ((z - x) * (z + x) + R / S);
+ if (hx > 0)
+ return r / x;
+ else
+ return two - r / x;
+ }
+ else
+ {
+ if (hx > 0)
+ return tiny * tiny;
+ else
+ return two - tiny;
+ }
}
weak_alias (__erfc, erfc)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_expm1.c b/libc/sysdeps/ieee754/dbl-64/s_expm1.c
index 1a4bcd979..9c9bb3455 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_expm1.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_expm1.c
@@ -11,7 +11,7 @@
*/
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
-*/
+ */
/* expm1(x)
* Returns exp(x)-1, the exponential of x minus 1.
@@ -113,120 +113,145 @@
#include <math_private.h>
#define one Q[0]
static const double
-huge = 1.0e+300,
-tiny = 1.0e-300,
-o_threshold = 7.09782712893383973096e+02,/* 0x40862E42, 0xFEFA39EF */
-ln2_hi = 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */
-ln2_lo = 1.90821492927058770002e-10,/* 0x3dea39ef, 0x35793c76 */
-invln2 = 1.44269504088896338700e+00,/* 0x3ff71547, 0x652b82fe */
- /* scaled coefficients related to expm1 */
-Q[] = {1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
- 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
- -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
- 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
- -2.01099218183624371326e-07}; /* BE8AFDB7 6E09C32D */
+ huge = 1.0e+300,
+ tiny = 1.0e-300,
+ o_threshold = 7.09782712893383973096e+02, /* 0x40862E42, 0xFEFA39EF */
+ ln2_hi = 6.93147180369123816490e-01, /* 0x3fe62e42, 0xfee00000 */
+ ln2_lo = 1.90821492927058770002e-10, /* 0x3dea39ef, 0x35793c76 */
+ invln2 = 1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */
+/* scaled coefficients related to expm1 */
+ Q[] = { 1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
+ 1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
+ -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
+ 4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
+ -2.01099218183624371326e-07 }; /* BE8AFDB7 6E09C32D */
double
-__expm1(double x)
+__expm1 (double x)
{
- double y,hi,lo,c,t,e,hxs,hfx,r1,h2,h4,R1,R2,R3;
- int32_t k,xsb;
- u_int32_t hx;
+ double y, hi, lo, c, t, e, hxs, hfx, r1, h2, h4, R1, R2, R3;
+ int32_t k, xsb;
+ u_int32_t hx;
- GET_HIGH_WORD(hx,x);
- xsb = hx&0x80000000; /* sign bit of x */
- if(xsb==0) y=x; else y= -x; /* y = |x| */
- hx &= 0x7fffffff; /* high word of |x| */
+ GET_HIGH_WORD (hx, x);
+ xsb = hx & 0x80000000; /* sign bit of x */
+ if (xsb == 0)
+ y = x;
+ else
+ y = -x; /* y = |x| */
+ hx &= 0x7fffffff; /* high word of |x| */
- /* filter out huge and non-finite argument */
- if(hx >= 0x4043687A) { /* if |x|>=56*ln2 */
- if(hx >= 0x40862E42) { /* if |x|>=709.78... */
- if(hx>=0x7ff00000) {
- u_int32_t low;
- GET_LOW_WORD(low,x);
- if(((hx&0xfffff)|low)!=0)
- return x+x; /* NaN */
- else return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */
- }
- if(x > o_threshold) {
- __set_errno (ERANGE);
- return huge*huge; /* overflow */
- }
+ /* filter out huge and non-finite argument */
+ if (hx >= 0x4043687A) /* if |x|>=56*ln2 */
+ {
+ if (hx >= 0x40862E42) /* if |x|>=709.78... */
+ {
+ if (hx >= 0x7ff00000)
+ {
+ u_int32_t low;
+ GET_LOW_WORD (low, x);
+ if (((hx & 0xfffff) | low) != 0)
+ return x + x; /* NaN */
+ else
+ return (xsb == 0) ? x : -1.0; /* exp(+-inf)={inf,-1} */
}
- if(xsb!=0) { /* x < -56*ln2, return -1.0 with inexact */
- math_force_eval(x+tiny); /* raise inexact */
- return tiny-one; /* return -1 */
+ if (x > o_threshold)
+ {
+ __set_errno (ERANGE);
+ return huge * huge; /* overflow */
}
}
+ if (xsb != 0) /* x < -56*ln2, return -1.0 with inexact */
+ {
+ math_force_eval (x + tiny); /* raise inexact */
+ return tiny - one; /* return -1 */
+ }
+ }
- /* argument reduction */
- if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
- if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
- if(xsb==0)
- {hi = x - ln2_hi; lo = ln2_lo; k = 1;}
- else
- {hi = x + ln2_hi; lo = -ln2_lo; k = -1;}
- } else {
- k = invln2*x+((xsb==0)?0.5:-0.5);
- t = k;
- hi = x - t*ln2_hi; /* t*ln2_hi is exact here */
- lo = t*ln2_lo;
+ /* argument reduction */
+ if (hx > 0x3fd62e42) /* if |x| > 0.5 ln2 */
+ {
+ if (hx < 0x3FF0A2B2) /* and |x| < 1.5 ln2 */
+ {
+ if (xsb == 0)
+ {
+ hi = x - ln2_hi; lo = ln2_lo; k = 1;
+ }
+ else
+ {
+ hi = x + ln2_hi; lo = -ln2_lo; k = -1;
}
- x = hi - lo;
- c = (hi-x)-lo;
}
- else if(hx < 0x3c900000) { /* when |x|<2**-54, return x */
- t = huge+x; /* return x with inexact flags when x!=0 */
- return x - (t-(huge+x));
+ else
+ {
+ k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
+ t = k;
+ hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
+ lo = t * ln2_lo;
}
- else k = 0;
+ x = hi - lo;
+ c = (hi - x) - lo;
+ }
+ else if (hx < 0x3c900000) /* when |x|<2**-54, return x */
+ {
+ t = huge + x; /* return x with inexact flags when x!=0 */
+ return x - (t - (huge + x));
+ }
+ else
+ k = 0;
- /* x is now in primary range */
- hfx = 0.5*x;
- hxs = x*hfx;
-#ifdef DO_NOT_USE_THIS
- r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
-#else
- R1 = one+hxs*Q[1]; h2 = hxs*hxs;
- R2 = Q[2]+hxs*Q[3]; h4 = h2*h2;
- R3 = Q[4]+hxs*Q[5];
- r1 = R1 + h2*R2 + h4*R3;
-#endif
- t = 3.0-r1*hfx;
- e = hxs*((r1-t)/(6.0 - x*t));
- if(k==0) return x - (x*e-hxs); /* c is 0 */
- else {
- e = (x*(e-c)-c);
- e -= hxs;
- if(k== -1) return 0.5*(x-e)-0.5;
- if(k==1) {
- if(x < -0.25) return -2.0*(e-(x+0.5));
- else return one+2.0*(x-e);
- }
- if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */
- u_int32_t high;
- y = one-(e-x);
- GET_HIGH_WORD(high,y);
- SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
- return y-one;
- }
- t = one;
- if(k<20) {
- u_int32_t high;
- SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */
- y = t-(e-x);
- GET_HIGH_WORD(high,y);
- SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
- } else {
- u_int32_t high;
- SET_HIGH_WORD(t,((0x3ff-k)<<20)); /* 2^-k */
- y = x-(e+t);
- y += one;
- GET_HIGH_WORD(high,y);
- SET_HIGH_WORD(y,high+(k<<20)); /* add k to y's exponent */
- }
+ /* x is now in primary range */
+ hfx = 0.5 * x;
+ hxs = x * hfx;
+ R1 = one + hxs * Q[1]; h2 = hxs * hxs;
+ R2 = Q[2] + hxs * Q[3]; h4 = h2 * h2;
+ R3 = Q[4] + hxs * Q[5];
+ r1 = R1 + h2 * R2 + h4 * R3;
+ t = 3.0 - r1 * hfx;
+ e = hxs * ((r1 - t) / (6.0 - x * t));
+ if (k == 0)
+ return x - (x * e - hxs); /* c is 0 */
+ else
+ {
+ e = (x * (e - c) - c);
+ e -= hxs;
+ if (k == -1)
+ return 0.5 * (x - e) - 0.5;
+ if (k == 1)
+ {
+ if (x < -0.25)
+ return -2.0 * (e - (x + 0.5));
+ else
+ return one + 2.0 * (x - e);
+ }
+ if (k <= -2 || k > 56) /* suffice to return exp(x)-1 */
+ {
+ u_int32_t high;
+ y = one - (e - x);
+ GET_HIGH_WORD (high, y);
+ SET_HIGH_WORD (y, high + (k << 20)); /* add k to y's exponent */
+ return y - one;
+ }
+ t = one;
+ if (k < 20)
+ {
+ u_int32_t high;
+ SET_HIGH_WORD (t, 0x3ff00000 - (0x200000 >> k)); /* t=1-2^-k */
+ y = t - (e - x);
+ GET_HIGH_WORD (high, y);
+ SET_HIGH_WORD (y, high + (k << 20)); /* add k to y's exponent */
+ }
+ else
+ {
+ u_int32_t high;
+ SET_HIGH_WORD (t, ((0x3ff - k) << 20)); /* 2^-k */
+ y = x - (e + t);
+ y += one;
+ GET_HIGH_WORD (high, y);
+ SET_HIGH_WORD (y, high + (k << 20)); /* add k to y's exponent */
}
- return y;
+ }
+ return y;
}
weak_alias (__expm1, expm1)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_fabs.c b/libc/sysdeps/ieee754/dbl-64/s_fabs.c
index 86f1d52be..c82c4210e 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_fabs.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_fabs.c
@@ -21,12 +21,13 @@ static char rcsid[] = "$NetBSD: s_fabs.c,v 1.7 1995/05/10 20:47:13 jtc Exp $";
#include <math.h>
#include <math_private.h>
-double __fabs(double x)
+double
+__fabs (double x)
{
- u_int32_t high;
- GET_HIGH_WORD(high,x);
- SET_HIGH_WORD(x,high&0x7fffffff);
- return x;
+ u_int32_t high;
+ GET_HIGH_WORD (high, x);
+ SET_HIGH_WORD (x, high & 0x7fffffff);
+ return x;
}
weak_alias (__fabs, fabs)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_finite.c b/libc/sysdeps/ieee754/dbl-64/s_finite.c
index 47dad5df2..5dc398efc 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_finite.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_finite.c
@@ -23,11 +23,12 @@ static char rcsid[] = "$NetBSD: s_finite.c,v 1.8 1995/05/10 20:47:17 jtc Exp $";
#include <math_private.h>
#undef __finite
-int __finite(double x)
+int
+__finite (double x)
{
- int32_t hx;
- GET_HIGH_WORD(hx,x);
- return (int)((u_int32_t)((hx&0x7fffffff)-0x7ff00000)>>31);
+ int32_t hx;
+ GET_HIGH_WORD (hx, x);
+ return (int) ((u_int32_t) ((hx & 0x7fffffff) - 0x7ff00000) >> 31);
}
hidden_def (__finite)
weak_alias (__finite, finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_floor.c b/libc/sysdeps/ieee754/dbl-64/s_floor.c
index 5c7297842..bd6afa72e 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_floor.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_floor.c
@@ -24,44 +24,67 @@
static const double huge = 1.0e300;
-double __floor(double x)
+double
+__floor (double x)
{
- int32_t i0,i1,j0;
- u_int32_t i,j;
- EXTRACT_WORDS(i0,i1,x);
- j0 = ((i0>>20)&0x7ff)-0x3ff;
- if(j0<20) {
- if(j0<0) { /* raise inexact if x != 0 */
- math_force_eval(huge+x);/* return 0*sign(x) if |x|<1 */
- if(i0>=0) {i0=i1=0;}
- else if(((i0&0x7fffffff)|i1)!=0)
- { i0=0xbff00000;i1=0;}
- } else {
- i = (0x000fffff)>>j0;
- if(((i0&i)|i1)==0) return x; /* x is integral */
- math_force_eval(huge+x); /* raise inexact flag */
- if(i0<0) i0 += (0x00100000)>>j0;
- i0 &= (~i); i1=0;
+ int32_t i0, i1, j0;
+ u_int32_t i, j;
+ EXTRACT_WORDS (i0, i1, x);
+ j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+ if (j0 < 20)
+ {
+ if (j0 < 0) /* raise inexact if x != 0 */
+ {
+ math_force_eval (huge + x); /* return 0*sign(x) if |x|<1 */
+ if (i0 >= 0)
+ {
+ i0 = i1 = 0;
}
- } else if (j0>51) {
- if(j0==0x400) return x+x; /* inf or NaN */
- else return x; /* x is integral */
- } else {
- i = ((u_int32_t)(0xffffffff))>>(j0-20);
- if((i1&i)==0) return x; /* x is integral */
- math_force_eval(huge+x); /* raise inexact flag */
- if(i0<0) {
- if(j0==20) i0+=1;
- else {
- j = i1+(1<<(52-j0));
- if(j<i1) i0 +=1 ; /* got a carry */
- i1=j;
- }
+ else if (((i0 & 0x7fffffff) | i1) != 0)
+ {
+ i0 = 0xbff00000; i1 = 0;
}
- i1 &= (~i);
}
- INSERT_WORDS(x,i0,i1);
- return x;
+ else
+ {
+ i = (0x000fffff) >> j0;
+ if (((i0 & i) | i1) == 0)
+ return x; /* x is integral */
+ math_force_eval (huge + x); /* raise inexact flag */
+ if (i0 < 0)
+ i0 += (0x00100000) >> j0;
+ i0 &= (~i); i1 = 0;
+ }
+ }
+ else if (j0 > 51)
+ {
+ if (j0 == 0x400)
+ return x + x; /* inf or NaN */
+ else
+ return x; /* x is integral */
+ }
+ else
+ {
+ i = ((u_int32_t) (0xffffffff)) >> (j0 - 20);
+ if ((i1 & i) == 0)
+ return x; /* x is integral */
+ math_force_eval (huge + x); /* raise inexact flag */
+ if (i0 < 0)
+ {
+ if (j0 == 20)
+ i0 += 1;
+ else
+ {
+ j = i1 + (1 << (52 - j0));
+ if (j < i1)
+ i0 += 1; /* got a carry */
+ i1 = j;
+ }
+ }
+ i1 &= (~i);
+ }
+ INSERT_WORDS (x, i0, i1);
+ return x;
}
#ifndef __floor
weak_alias (__floor, floor)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_frexp.c b/libc/sysdeps/ieee754/dbl-64/s_frexp.c
index 516f561a1..1b8d8500b 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_frexp.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_frexp.c
@@ -28,25 +28,28 @@ static char rcsid[] = "$NetBSD: s_frexp.c,v 1.9 1995/05/10 20:47:24 jtc Exp $";
#include <math_private.h>
static const double
-two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
+ two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
-double __frexp(double x, int *eptr)
+double
+__frexp (double x, int *eptr)
{
- int32_t hx, ix, lx;
- EXTRACT_WORDS(hx,lx,x);
- ix = 0x7fffffff&hx;
- *eptr = 0;
- if(ix>=0x7ff00000||((ix|lx)==0)) return x; /* 0,inf,nan */
- if (ix<0x00100000) { /* subnormal */
- x *= two54;
- GET_HIGH_WORD(hx,x);
- ix = hx&0x7fffffff;
- *eptr = -54;
- }
- *eptr += (ix>>20)-1022;
- hx = (hx&0x800fffff)|0x3fe00000;
- SET_HIGH_WORD(x,hx);
- return x;
+ int32_t hx, ix, lx;
+ EXTRACT_WORDS (hx, lx, x);
+ ix = 0x7fffffff & hx;
+ *eptr = 0;
+ if (ix >= 0x7ff00000 || ((ix | lx) == 0))
+ return x; /* 0,inf,nan */
+ if (ix < 0x00100000) /* subnormal */
+ {
+ x *= two54;
+ GET_HIGH_WORD (hx, x);
+ ix = hx & 0x7fffffff;
+ *eptr = -54;
+ }
+ *eptr += (ix >> 20) - 1022;
+ hx = (hx & 0x800fffff) | 0x3fe00000;
+ SET_HIGH_WORD (x, hx);
+ return x;
}
weak_alias (__frexp, frexp)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_isinf.c b/libc/sysdeps/ieee754/dbl-64/s_isinf.c
index 886b346f5..46a7266e7 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_isinf.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_isinf.c
@@ -19,11 +19,11 @@ static char rcsid[] = "$NetBSD: s_isinf.c,v 1.3 1995/05/11 23:20:14 jtc Exp $";
int
__isinf (double x)
{
- int32_t hx,lx;
- EXTRACT_WORDS(hx,lx,x);
- lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
- lx |= -lx;
- return ~(lx >> 31) & (hx >> 30);
+ int32_t hx, lx;
+ EXTRACT_WORDS (hx, lx, x);
+ lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
+ lx |= -lx;
+ return ~(lx >> 31) & (hx >> 30);
}
hidden_def (__isinf)
weak_alias (__isinf, isinf)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c b/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c
index 0ce50352c..d8142bcf6 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c
@@ -14,7 +14,7 @@
int
__isinf_ns (double x)
{
- int32_t hx,lx;
- EXTRACT_WORDS(hx,lx,x);
- return !(lx | ((hx & 0x7fffffff) ^ 0x7ff00000));
+ int32_t hx, lx;
+ EXTRACT_WORDS (hx, lx, x);
+ return !(lx | ((hx & 0x7fffffff) ^ 0x7ff00000));
}
diff --git a/libc/sysdeps/ieee754/dbl-64/s_isnan.c b/libc/sysdeps/ieee754/dbl-64/s_isnan.c
index f8958dcbb..5d9f31be2 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_isnan.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_isnan.c
@@ -23,14 +23,15 @@ static char rcsid[] = "$NetBSD: s_isnan.c,v 1.8 1995/05/10 20:47:36 jtc Exp $";
#include <math_private.h>
#undef __isnan
-int __isnan(double x)
+int
+__isnan (double x)
{
- int32_t hx,lx;
- EXTRACT_WORDS(hx,lx,x);
- hx &= 0x7fffffff;
- hx |= (u_int32_t)(lx|(-lx))>>31;
- hx = 0x7ff00000 - hx;
- return (int)(((u_int32_t)hx)>>31);
+ int32_t hx, lx;
+ EXTRACT_WORDS (hx, lx, x);
+ hx &= 0x7fffffff;
+ hx |= (u_int32_t) (lx | (-lx)) >> 31;
+ hx = 0x7ff00000 - hx;
+ return (int) (((u_int32_t) hx) >> 31);
}
hidden_def (__isnan)
weak_alias (__isnan, isnan)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_llround.c b/libc/sysdeps/ieee754/dbl-64/s_llround.c
index e8c2232e9..7d50a57dd 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_llround.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_llround.c
@@ -66,7 +66,7 @@ __llround (double x)
else
{
/* The number is too large. It is left implementation defined
- what happens. */
+ what happens. */
return (long long int) x;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/s_log1p.c b/libc/sysdeps/ieee754/dbl-64/s_log1p.c
index e3e686096..ea1dc6ca7 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_log1p.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_log1p.c
@@ -11,7 +11,7 @@
*/
/* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
for performance improvement on pipelined processors.
-*/
+ */
/* double log1p(double x)
*
@@ -82,91 +82,112 @@
#include <math_private.h>
static const double
-ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
-ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
-two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
-Lp[] = {0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
- 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
- 2.857142874366239149e-01, /* 3FD24924 94229359 */
- 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
- 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
- 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
- 1.479819860511658591e-01}; /* 3FC2F112 DF3E5244 */
+ ln2_hi = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */
+ ln2_lo = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */
+ two54 = 1.80143985094819840000e+16, /* 43500000 00000000 */
+ Lp[] = { 0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
+ 3.999999999940941908e-01, /* 3FD99999 9997FA04 */
+ 2.857142874366239149e-01, /* 3FD24924 94229359 */
+ 2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
+ 1.818357216161805012e-01, /* 3FC74664 96CB03DE */
+ 1.531383769920937332e-01, /* 3FC39A09 D078C69F */
+ 1.479819860511658591e-01 }; /* 3FC2F112 DF3E5244 */
static const double zero = 0.0;
double
-__log1p(double x)
+__log1p (double x)
{
- double hfsq,f,c,s,z,R,u,z2,z4,z6,R1,R2,R3,R4;
- int32_t k,hx,hu,ax;
+ double hfsq, f, c, s, z, R, u, z2, z4, z6, R1, R2, R3, R4;
+ int32_t k, hx, hu, ax;
- GET_HIGH_WORD(hx,x);
- ax = hx&0x7fffffff;
+ GET_HIGH_WORD (hx, x);
+ ax = hx & 0x7fffffff;
- k = 1;
- if (hx < 0x3FDA827A) { /* x < 0.41422 */
- if(__builtin_expect(ax>=0x3ff00000, 0)) { /* x <= -1.0 */
- if(x==-1.0) return -two54/(x-x);/* log1p(-1)=+inf */
- else return (x-x)/(x-x); /* log1p(x<-1)=NaN */
- }
- if(__builtin_expect(ax<0x3e200000, 0)) { /* |x| < 2**-29 */
- math_force_eval(two54+x); /* raise inexact */
- if (ax<0x3c900000) /* |x| < 2**-54 */
- return x;
- else
- return x - x*x*0.5;
- }
- if(hx>0||hx<=((int32_t)0xbfd2bec3)) {
- k=0;f=x;hu=1;} /* -0.2929<x<0.41422 */
+ k = 1;
+ if (hx < 0x3FDA827A) /* x < 0.41422 */
+ {
+ if (__builtin_expect (ax >= 0x3ff00000, 0)) /* x <= -1.0 */
+ {
+ if (x == -1.0)
+ return -two54 / (x - x); /* log1p(-1)=+inf */
+ else
+ return (x - x) / (x - x); /* log1p(x<-1)=NaN */
}
- else if (__builtin_expect(hx >= 0x7ff00000, 0)) return x+x;
- if(k!=0) {
- if(hx<0x43400000) {
- u = 1.0+x;
- GET_HIGH_WORD(hu,u);
- k = (hu>>20)-1023;
- c = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
- c /= u;
- } else {
- u = x;
- GET_HIGH_WORD(hu,u);
- k = (hu>>20)-1023;
- c = 0;
- }
- hu &= 0x000fffff;
- if(hu<0x6a09e) {
- SET_HIGH_WORD(u,hu|0x3ff00000); /* normalize u */
- } else {
- k += 1;
- SET_HIGH_WORD(u,hu|0x3fe00000); /* normalize u/2 */
- hu = (0x00100000-hu)>>2;
- }
- f = u-1.0;
+ if (__builtin_expect (ax < 0x3e200000, 0)) /* |x| < 2**-29 */
+ {
+ math_force_eval (two54 + x); /* raise inexact */
+ if (ax < 0x3c900000) /* |x| < 2**-54 */
+ return x;
+ else
+ return x - x * x * 0.5;
}
- hfsq=0.5*f*f;
- if(hu==0) { /* |f| < 2**-20 */
- if(f==zero) {
- if(k==0) return zero;
- else {c += k*ln2_lo; return k*ln2_hi+c;}
+ if (hx > 0 || hx <= ((int32_t) 0xbfd2bec3))
+ {
+ k = 0; f = x; hu = 1;
+ } /* -0.2929<x<0.41422 */
+ }
+ else if (__builtin_expect (hx >= 0x7ff00000, 0))
+ return x + x;
+ if (k != 0)
+ {
+ if (hx < 0x43400000)
+ {
+ u = 1.0 + x;
+ GET_HIGH_WORD (hu, u);
+ k = (hu >> 20) - 1023;
+ c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
+ c /= u;
+ }
+ else
+ {
+ u = x;
+ GET_HIGH_WORD (hu, u);
+ k = (hu >> 20) - 1023;
+ c = 0;
+ }
+ hu &= 0x000fffff;
+ if (hu < 0x6a09e)
+ {
+ SET_HIGH_WORD (u, hu | 0x3ff00000); /* normalize u */
+ }
+ else
+ {
+ k += 1;
+ SET_HIGH_WORD (u, hu | 0x3fe00000); /* normalize u/2 */
+ hu = (0x00100000 - hu) >> 2;
+ }
+ f = u - 1.0;
+ }
+ hfsq = 0.5 * f * f;
+ if (hu == 0) /* |f| < 2**-20 */
+ {
+ if (f == zero)
+ {
+ if (k == 0)
+ return zero;
+ else
+ {
+ c += k * ln2_lo; return k * ln2_hi + c;
}
- R = hfsq*(1.0-0.66666666666666666*f);
- if(k==0) return f-R; else
- return k*ln2_hi-((R-(k*ln2_lo+c))-f);
}
- s = f/(2.0+f);
- z = s*s;
-#ifdef DO_NOT_USE_THIS
- R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
-#else
- R1 = z*Lp[1]; z2=z*z;
- R2 = Lp[2]+z*Lp[3]; z4=z2*z2;
- R3 = Lp[4]+z*Lp[5]; z6=z4*z2;
- R4 = Lp[6]+z*Lp[7];
- R = R1 + z2*R2 + z4*R3 + z6*R4;
-#endif
- if(k==0) return f-(hfsq-s*(hfsq+R)); else
- return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
+ R = hfsq * (1.0 - 0.66666666666666666 * f);
+ if (k == 0)
+ return f - R;
+ else
+ return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
+ }
+ s = f / (2.0 + f);
+ z = s * s;
+ R1 = z * Lp[1]; z2 = z * z;
+ R2 = Lp[2] + z * Lp[3]; z4 = z2 * z2;
+ R3 = Lp[4] + z * Lp[5]; z6 = z4 * z2;
+ R4 = Lp[6] + z * Lp[7];
+ R = R1 + z2 * R2 + z4 * R3 + z6 * R4;
+ if (k == 0)
+ return f - (hfsq - s * (hfsq + R));
+ else
+ return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
}
weak_alias (__log1p, log1p)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_logb.c b/libc/sysdeps/ieee754/dbl-64/s_logb.c
index 17aa94b75..c065826dd 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_logb.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_logb.c
@@ -25,7 +25,7 @@ __logb (double x)
int32_t lx, ix, rix;
EXTRACT_WORDS (ix, lx, x);
- ix &= 0x7fffffff; /* high |x| */
+ ix &= 0x7fffffff; /* high |x| */
if ((ix | lx) == 0)
return -1.0 / fabs (x);
if (ix >= 0x7ff00000)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_lrint.c b/libc/sysdeps/ieee754/dbl-64/s_lrint.c
index a68c97e59..1c3037364 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_lrint.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_lrint.c
@@ -33,7 +33,7 @@ long int
__lrint (double x)
{
int32_t j0;
- u_int32_t i0,i1;
+ u_int32_t i0, i1;
volatile double w;
double t;
long int result;
diff --git a/libc/sysdeps/ieee754/dbl-64/s_modf.c b/libc/sysdeps/ieee754/dbl-64/s_modf.c
index b9911c1af..1dce6381a 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_modf.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_modf.c
@@ -25,45 +25,59 @@
static const double one = 1.0;
double
-__modf(double x, double *iptr)
+__modf (double x, double *iptr)
{
- int32_t i0,i1,j0;
- u_int32_t i;
- EXTRACT_WORDS(i0,i1,x);
- j0 = ((i0>>20)&0x7ff)-0x3ff; /* exponent of x */
- if(j0<20) { /* integer part in high x */
- if(j0<0) { /* |x|<1 */
- INSERT_WORDS(*iptr,i0&0x80000000,0); /* *iptr = +-0 */
- return x;
- } else {
- i = (0x000fffff)>>j0;
- if(((i0&i)|i1)==0) { /* x is integral */
- *iptr = x;
- INSERT_WORDS(x,i0&0x80000000,0); /* return +-0 */
- return x;
- } else {
- INSERT_WORDS(*iptr,i0&(~i),0);
- return x - *iptr;
- }
+ int32_t i0, i1, j0;
+ u_int32_t i;
+ EXTRACT_WORDS (i0, i1, x);
+ j0 = ((i0 >> 20) & 0x7ff) - 0x3ff; /* exponent of x */
+ if (j0 < 20) /* integer part in high x */
+ {
+ if (j0 < 0) /* |x|<1 */
+ {
+ INSERT_WORDS (*iptr, i0 & 0x80000000, 0); /* *iptr = +-0 */
+ return x;
+ }
+ else
+ {
+ i = (0x000fffff) >> j0;
+ if (((i0 & i) | i1) == 0) /* x is integral */
+ {
+ *iptr = x;
+ INSERT_WORDS (x, i0 & 0x80000000, 0); /* return +-0 */
+ return x;
}
- } else if (__builtin_expect(j0>51, 0)) { /* no fraction part */
- *iptr = x*one;
- /* We must handle NaNs separately. */
- if (j0 == 0x400 && ((i0 & 0xfffff) | i1))
- return x*one;
- INSERT_WORDS(x,i0&0x80000000,0); /* return +-0 */
- return x;
- } else { /* fraction part in low x */
- i = ((u_int32_t)(0xffffffff))>>(j0-20);
- if((i1&i)==0) { /* x is integral */
- *iptr = x;
- INSERT_WORDS(x,i0&0x80000000,0); /* return +-0 */
- return x;
- } else {
- INSERT_WORDS(*iptr,i0,i1&(~i));
- return x - *iptr;
+ else
+ {
+ INSERT_WORDS (*iptr, i0 & (~i), 0);
+ return x - *iptr;
}
}
+ }
+ else if (__builtin_expect (j0 > 51, 0)) /* no fraction part */
+ {
+ *iptr = x * one;
+ /* We must handle NaNs separately. */
+ if (j0 == 0x400 && ((i0 & 0xfffff) | i1))
+ return x * one;
+ INSERT_WORDS (x, i0 & 0x80000000, 0); /* return +-0 */
+ return x;
+ }
+ else /* fraction part in low x */
+ {
+ i = ((u_int32_t) (0xffffffff)) >> (j0 - 20);
+ if ((i1 & i) == 0) /* x is integral */
+ {
+ *iptr = x;
+ INSERT_WORDS (x, i0 & 0x80000000, 0); /* return +-0 */
+ return x;
+ }
+ else
+ {
+ INSERT_WORDS (*iptr, i0, i1 & (~i));
+ return x - *iptr;
+ }
+ }
}
weak_alias (__modf, modf)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c b/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c
index 5017f471d..dec0c5d6e 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c
@@ -29,40 +29,47 @@ static char rcsid[] = "$NetBSD: s_rint.c,v 1.8 1995/05/10 20:48:04 jtc Exp $";
#include <math_private.h>
static const double
-TWO52[2]={
+ TWO52[2] = {
4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
-4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
};
-double __nearbyint(double x)
+double
+__nearbyint (double x)
{
- fenv_t env;
- int32_t i0,j0,sx;
- double w,t;
- GET_HIGH_WORD(i0,x);
- sx = (i0>>31)&1;
- j0 = ((i0>>20)&0x7ff)-0x3ff;
- if(j0<52) {
- if(j0<0) {
- libc_feholdexcept (&env);
- w = TWO52[sx]+x;
- t = w-TWO52[sx];
- math_force_eval (t);
- libc_fesetenv (&env);
- GET_HIGH_WORD(i0,t);
- SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31));
- return t;
- }
- } else {
- if(j0==0x400) return x+x; /* inf or NaN */
- else return x; /* x is integral */
+ fenv_t env;
+ int32_t i0, j0, sx;
+ double w, t;
+ GET_HIGH_WORD (i0, x);
+ sx = (i0 >> 31) & 1;
+ j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+ if (j0 < 52)
+ {
+ if (j0 < 0)
+ {
+ libc_feholdexcept (&env);
+ w = TWO52[sx] + x;
+ t = w - TWO52[sx];
+ math_force_eval (t);
+ libc_fesetenv (&env);
+ GET_HIGH_WORD (i0, t);
+ SET_HIGH_WORD (t, (i0 & 0x7fffffff) | (sx << 31));
+ return t;
}
- libc_feholdexcept (&env);
- w = TWO52[sx]+x;
- t = w-TWO52[sx];
- math_force_eval (t);
- libc_fesetenv (&env);
- return t;
+ }
+ else
+ {
+ if (j0 == 0x400)
+ return x + x; /* inf or NaN */
+ else
+ return x; /* x is integral */
+ }
+ libc_feholdexcept (&env);
+ w = TWO52[sx] + x;
+ t = w - TWO52[sx];
+ math_force_eval (t);
+ libc_fesetenv (&env);
+ return t;
}
weak_alias (__nearbyint, nearbyint)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_remquo.c b/libc/sysdeps/ieee754/dbl-64/s_remquo.c
index 642581ede..c40615004 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_remquo.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_remquo.c
@@ -28,8 +28,8 @@ static const double zero = 0.0;
double
__remquo (double x, double y, int *quo)
{
- int32_t hx,hy;
- u_int32_t sx,lx,ly;
+ int32_t hx, hy;
+ u_int32_t sx, lx, ly;
int cquo, qs;
EXTRACT_WORDS (hx, lx, x);
@@ -41,14 +41,14 @@ __remquo (double x, double y, int *quo)
/* Purge off exception values. */
if ((hy | ly) == 0)
- return (x * y) / (x * y); /* y = 0 */
- if ((hx >= 0x7ff00000) /* x not finite */
- || ((hy >= 0x7ff00000) /* p is NaN */
+ return (x * y) / (x * y); /* y = 0 */
+ if ((hx >= 0x7ff00000) /* x not finite */
+ || ((hy >= 0x7ff00000) /* p is NaN */
&& (((hy - 0x7ff00000) | ly) != 0)))
return (x * y) / (x * y);
if (hy <= 0x7fbfffff)
- x = __ieee754_fmod (x, 8 * y); /* now x < 8y */
+ x = __ieee754_fmod (x, 8 * y); /* now x < 8y */
if (((hx - hy) | (lx - ly)) == 0)
{
@@ -56,8 +56,8 @@ __remquo (double x, double y, int *quo)
return zero * x;
}
- x = fabs (x);
- y = fabs (y);
+ x = fabs (x);
+ y = fabs (y);
cquo = 0;
if (x >= 4 * y)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_rint.c b/libc/sysdeps/ieee754/dbl-64/s_rint.c
index 845890916..a9c0d2784 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_rint.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_rint.c
@@ -24,33 +24,39 @@
#include <math_private.h>
static const double
-TWO52[2]={
+ TWO52[2] = {
4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
-4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
};
double
-__rint(double x)
+__rint (double x)
{
- int32_t i0,j0,sx;
- double w,t;
- GET_HIGH_WORD(i0,x);
- sx = (i0>>31)&1;
- j0 = ((i0>>20)&0x7ff)-0x3ff;
- if(j0<52) {
- if(j0<0) {
- w = TWO52[sx]+x;
- t = w-TWO52[sx];
- GET_HIGH_WORD(i0,t);
- SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31));
- return t;
- }
- } else {
- if(j0==0x400) return x+x; /* inf or NaN */
- else return x; /* x is integral */
+ int32_t i0, j0, sx;
+ double w, t;
+ GET_HIGH_WORD (i0, x);
+ sx = (i0 >> 31) & 1;
+ j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+ if (j0 < 52)
+ {
+ if (j0 < 0)
+ {
+ w = TWO52[sx] + x;
+ t = w - TWO52[sx];
+ GET_HIGH_WORD (i0, t);
+ SET_HIGH_WORD (t, (i0 & 0x7fffffff) | (sx << 31));
+ return t;
}
- w = TWO52[sx]+x;
- return w-TWO52[sx];
+ }
+ else
+ {
+ if (j0 == 0x400)
+ return x + x; /* inf or NaN */
+ else
+ return x; /* x is integral */
+ }
+ w = TWO52[sx] + x;
+ return w - TWO52[sx];
}
#ifndef __rint
weak_alias (__rint, rint)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_scalbln.c b/libc/sysdeps/ieee754/dbl-64/s_scalbln.c
index 271a24c3e..6402927d2 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_scalbln.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_scalbln.c
@@ -20,38 +20,43 @@
#include <math_private.h>
static const double
-two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-huge = 1.0e+300,
-tiny = 1.0e-300;
+ two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
+ twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
+ huge = 1.0e+300,
+ tiny = 1.0e-300;
double
__scalbln (double x, long int n)
{
- int32_t k,hx,lx;
- EXTRACT_WORDS(hx,lx,x);
- k = (hx&0x7ff00000)>>20; /* extract exponent */
- if (__builtin_expect(k==0, 0)) { /* 0 or subnormal x */
- if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
- x *= two54;
- GET_HIGH_WORD(hx,x);
- k = ((hx&0x7ff00000)>>20) - 54;
- }
- if (__builtin_expect(k==0x7ff, 0)) return x+x; /* NaN or Inf */
- if (__builtin_expect(n< -50000, 0))
- return tiny*__copysign(tiny,x); /*underflow*/
- if (__builtin_expect(n> 50000 || k+n > 0x7fe, 0))
- return huge*__copysign(huge,x); /* overflow */
- /* Now k and n are bounded we know that k = k+n does not
- overflow. */
- k = k+n;
- if (__builtin_expect(k > 0, 1)) /* normal result */
- {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
- if (k <= -54)
- return tiny*__copysign(tiny,x); /*underflow*/
- k += 54; /* subnormal result */
- SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
- return x*twom54;
+ int32_t k, hx, lx;
+ EXTRACT_WORDS (hx, lx, x);
+ k = (hx & 0x7ff00000) >> 20; /* extract exponent */
+ if (__builtin_expect (k == 0, 0)) /* 0 or subnormal x */
+ {
+ if ((lx | (hx & 0x7fffffff)) == 0)
+ return x; /* +-0 */
+ x *= two54;
+ GET_HIGH_WORD (hx, x);
+ k = ((hx & 0x7ff00000) >> 20) - 54;
+ }
+ if (__builtin_expect (k == 0x7ff, 0))
+ return x + x; /* NaN or Inf */
+ if (__builtin_expect (n < -50000, 0))
+ return tiny * __copysign (tiny, x); /*underflow*/
+ if (__builtin_expect (n > 50000 || k + n > 0x7fe, 0))
+ return huge * __copysign (huge, x); /* overflow */
+ /* Now k and n are bounded we know that k = k+n does not
+ overflow. */
+ k = k + n;
+ if (__builtin_expect (k > 0, 1)) /* normal result */
+ {
+ SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20)); return x;
+ }
+ if (k <= -54)
+ return tiny * __copysign (tiny, x); /*underflow*/
+ k += 54; /* subnormal result */
+ SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20));
+ return x * twom54;
}
weak_alias (__scalbln, scalbln)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_scalbn.c b/libc/sysdeps/ieee754/dbl-64/s_scalbn.c
index 1f302557e..6e7d5ad21 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_scalbn.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_scalbn.c
@@ -20,38 +20,43 @@
#include <math_private.h>
static const double
-two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-huge = 1.0e+300,
-tiny = 1.0e-300;
+ two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
+ twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
+ huge = 1.0e+300,
+ tiny = 1.0e-300;
double
__scalbn (double x, int n)
{
- int32_t k,hx,lx;
- EXTRACT_WORDS(hx,lx,x);
- k = (hx&0x7ff00000)>>20; /* extract exponent */
- if (__builtin_expect(k==0, 0)) { /* 0 or subnormal x */
- if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
- x *= two54;
- GET_HIGH_WORD(hx,x);
- k = ((hx&0x7ff00000)>>20) - 54;
- }
- if (__builtin_expect(k==0x7ff, 0)) return x+x; /* NaN or Inf */
- if (__builtin_expect(n< -50000, 0))
- return tiny*__copysign(tiny,x); /*underflow*/
- if (__builtin_expect(n> 50000 || k+n > 0x7fe, 0))
- return huge*__copysign(huge,x); /* overflow */
- /* Now k and n are bounded we know that k = k+n does not
- overflow. */
- k = k+n;
- if (__builtin_expect(k > 0, 1)) /* normal result */
- {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
- if (k <= -54)
- return tiny*__copysign(tiny,x); /*underflow*/
- k += 54; /* subnormal result */
- SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
- return x*twom54;
+ int32_t k, hx, lx;
+ EXTRACT_WORDS (hx, lx, x);
+ k = (hx & 0x7ff00000) >> 20; /* extract exponent */
+ if (__builtin_expect (k == 0, 0)) /* 0 or subnormal x */
+ {
+ if ((lx | (hx & 0x7fffffff)) == 0)
+ return x; /* +-0 */
+ x *= two54;
+ GET_HIGH_WORD (hx, x);
+ k = ((hx & 0x7ff00000) >> 20) - 54;
+ }
+ if (__builtin_expect (k == 0x7ff, 0))
+ return x + x; /* NaN or Inf */
+ if (__builtin_expect (n < -50000, 0))
+ return tiny * __copysign (tiny, x); /*underflow*/
+ if (__builtin_expect (n > 50000 || k + n > 0x7fe, 0))
+ return huge * __copysign (huge, x); /* overflow */
+ /* Now k and n are bounded we know that k = k+n does not
+ overflow. */
+ k = k + n;
+ if (__builtin_expect (k > 0, 1)) /* normal result */
+ {
+ SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20)); return x;
+ }
+ if (k <= -54)
+ return tiny * __copysign (tiny, x); /*underflow*/
+ k += 54; /* subnormal result */
+ SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20));
+ return x * twom54;
}
weak_alias (__scalbn, scalbn)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_sin.c b/libc/sysdeps/ieee754/dbl-64/s_sin.c
index 5c388c8b9..53eef6002 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_sin.c
@@ -55,6 +55,59 @@
#include <math_private.h>
#include <fenv.h>
+/* Helper macros to compute sin of the input values. */
+#define POLYNOMIAL2(xx) ((((s5.x * (xx) + s4.x) * (xx) + s3.x) * (xx) + s2.x) \
+ * (xx))
+
+#define POLYNOMIAL(xx) (POLYNOMIAL2 (xx) + s1.x)
+
+/* The computed polynomial is a variation of the Taylor series expansion for
+ sin(a):
+
+ a - a^3/3! + a^5/5! - a^7/7! + a^9/9! + (1 - a^2) * da / 2
+
+ The constants s1, s2, s3, etc. are pre-computed values of 1/3!, 1/5! and so
+ on. The result is returned to LHS and correction in COR. */
+#define TAYLOR_SINCOS(xx, a, da, cor) \
+({ \
+ double t = ((POLYNOMIAL (xx) * (a) - 0.5 * (da)) * (xx) + (da)); \
+ double res = (a) + t; \
+ (cor) = ((a) - res) + t; \
+ res; \
+})
+
+/* This is again a variation of the Taylor series expansion with the term
+ x^3/3! expanded into the following for better accuracy:
+
+ bb * x ^ 3 + 3 * aa * x * x1 * x2 + aa * x1 ^ 3 + aa * x2 ^ 3
+
+ The correction term is dx and bb + aa = -1/3!
+ */
+#define TAYLOR_SLOW(x0, dx, cor) \
+({ \
+ static const double th2_36 = 206158430208.0; /* 1.5*2**37 */ \
+ double xx = (x0) * (x0); \
+ double x1 = ((x0) + th2_36) - th2_36; \
+ double y = aa.x * x1 * x1 * x1; \
+ double r = (x0) + y; \
+ double x2 = ((x0) - x1) + (dx); \
+ double t = (((POLYNOMIAL2 (xx) + bb.x) * xx + 3.0 * aa.x * x1 * x2) \
+ * (x0) + aa.x * x2 * x2 * x2 + (dx)); \
+ t = (((x0) - r) + y) + t; \
+ double res = r + t; \
+ (cor) = (r - res) + t; \
+ res; \
+})
+
+#define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \
+({ \
+ int4 k = u.i[LOW_HALF] << 2; \
+ sn = __sincostab.x[k]; \
+ ssn = __sincostab.x[k + 1]; \
+ cs = __sincostab.x[k + 2]; \
+ ccs = __sincostab.x[k + 3]; \
+})
+
#ifndef SECTION
# define SECTION
#endif
@@ -74,10 +127,8 @@ static const double
void __dubsin (double x, double dx, double w[]);
void __docos (double x, double dx, double w[]);
-double __mpsin (double x, double dx);
-double __mpcos (double x, double dx);
-double __mpsin1 (double x);
-double __mpcos1 (double x);
+double __mpsin (double x, double dx, bool reduce_range);
+double __mpcos (double x, double dx, bool reduce_range);
static double slow (double x);
static double slow1 (double x);
static double slow2 (double x);
@@ -93,6 +144,39 @@ static double csloww (double x, double dx, double orig);
static double csloww1 (double x, double dx, double orig);
static double csloww2 (double x, double dx, double orig, int n);
+/* Reduce range of X and compute sin of a + da. K is the amount by which to
+ rotate the quadrants. This allows us to use the same routine to compute cos
+ by simply rotating the quadrants by 1. */
+static inline double
+__always_inline
+reduce_and_compute (double x, double a, double da, unsigned int k)
+{
+ double retval = 0;
+ unsigned int n = __branred (x, &a, &da);
+ k = (n + k) % 4;
+ switch (k)
+ {
+ case 0:
+ if (a * a < 0.01588)
+ retval = bsloww (a, da, x, n);
+ else
+ retval = bsloww1 (a, da, x, n);
+ break;
+ case 2:
+ if (a * a < 0.01588)
+ retval = bsloww (-a, -da, x, n);
+ else
+ retval = bsloww1 (-a, -da, x, n);
+ break;
+
+ case 1:
+ case 3:
+ retval = bsloww2 (a, da, x, n);
+ break;
+ }
+ return retval;
+}
+
/*******************************************************************/
/* An ultimate sin routine. Given an IEEE double machine number x */
/* it computes the correctly rounded (to nearest) value of sin(x) */
@@ -113,21 +197,16 @@ __sin (double x)
m = u.i[HIGH_HALF];
k = 0x7fffffff & m; /* no sign */
if (k < 0x3e500000) /* if x->0 =>sin(x)=x */
- {
- retval = x;
- goto ret;
- }
+ retval = x;
/*---------------------------- 2^-26 < |x|< 0.25 ----------------------*/
else if (k < 0x3fd00000)
{
xx = x * x;
- /*Taylor series. */
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + s1.x)
- * (xx * x));
+ /* Taylor series. */
+ t = POLYNOMIAL (xx) * (xx * x);
res = x + t;
cor = (x - res) + t;
retval = (res == res + 1.07 * cor) ? res : slow (x);
- goto ret;
} /* else if (k < 0x3fd00000) */
/*---------------------------- 0.25<|x|< 0.855469---------------------- */
else if (k < 0x3feb6000)
@@ -137,16 +216,16 @@ __sin (double x)
xx = y * y;
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = (m > 0) ? __sincostab.x[k] : -__sincostab.x[k];
- ssn = (m > 0) ? __sincostab.x[k + 1] : -__sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
+ if (m <= 0)
+ {
+ sn = -sn;
+ ssn = -ssn;
+ }
cor = (ssn + s * ccs - sn * c) + cs * s;
res = sn + cor;
cor = (sn - res) + cor;
retval = (res == res + 1.096 * cor) ? res : slow1 (x);
- goto ret;
} /* else if (k < 0x3feb6000) */
/*----------------------- 0.855469 <|x|<2.426265 ----------------------*/
@@ -167,16 +246,11 @@ __sin (double x)
xx = y * y;
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ccs - s * ssn - cs * c) - sn * s;
res = cs + cor;
cor = (cs - res) + cor;
retval = (res == res + 1.020 * cor) ? ((m > 0) ? res : -res) : slow2 (x);
- goto ret;
} /* else if (k < 0x400368fd) */
/*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
@@ -204,14 +278,10 @@ __sin (double x)
}
if (xx < 0.01588)
{
- /*Taylor series */
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
- + s1.x) * a - 0.5 * da) * xx + da;
- res = a + t;
- cor = (a - res) + t;
+ /* Taylor series. */
+ res = TAYLOR_SINCOS (xx, a, da, cor);
cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
retval = (res == res + cor) ? res : sloww (a, da, x);
- goto ret;
}
else
{
@@ -232,18 +302,13 @@ __sin (double x)
xx = y * y;
s = y + (db + y * xx * (sn3 + xx * sn5));
c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
res = sn + cor;
cor = (sn - res) + cor;
cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
retval = ((res == res + cor) ? ((m) ? res : -res)
: sloww1 (a, da, x));
- goto ret;
}
break;
@@ -257,11 +322,7 @@ __sin (double x)
u.x = big.x + a;
y = a - (u.x - big.x) + da;
xx = y * y;
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -270,11 +331,8 @@ __sin (double x)
cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
retval = ((res == res + cor) ? ((n & 2) ? -res : res)
: sloww2 (a, da, x, n));
- goto ret;
-
break;
}
-
} /* else if (k < 0x419921FB ) */
/*---------------------105414350 <|x|< 281474976710656 --------------------*/
@@ -307,14 +365,10 @@ __sin (double x)
}
if (xx < 0.01588)
{
- /* Taylor series */
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
- + s1.x) * a - 0.5 * da) * xx + da;
- res = a + t;
- cor = (a - res) + t;
+ /* Taylor series. */
+ res = TAYLOR_SINCOS (xx, a, da, cor);
cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
retval = (res == res + cor) ? res : bsloww (a, da, x, n);
- goto ret;
}
else
{
@@ -335,18 +389,13 @@ __sin (double x)
xx = y * y;
s = y + (db + y * xx * (sn3 + xx * sn5));
c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
res = sn + cor;
cor = (sn - res) + cor;
cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
retval = ((res == res + cor) ? ((m) ? res : -res)
: bsloww1 (a, da, x, n));
- goto ret;
}
break;
@@ -360,11 +409,7 @@ __sin (double x)
u.x = big.x + a;
y = a - (u.x - big.x) + da;
xx = y * y;
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -373,40 +418,13 @@ __sin (double x)
cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
retval = ((res == res + cor) ? ((n & 2) ? -res : res)
: bsloww2 (a, da, x, n));
- goto ret;
-
break;
}
} /* else if (k < 0x42F00000 ) */
/* -----------------281474976710656 <|x| <2^1024----------------------------*/
else if (k < 0x7ff00000)
- {
- n = __branred (x, &a, &da);
- switch (n)
- {
- case 0:
- if (a * a < 0.01588)
- retval = bsloww (a, da, x, n);
- else
- retval = bsloww1 (a, da, x, n);
- goto ret;
- break;
- case 2:
- if (a * a < 0.01588)
- retval = bsloww (-a, -da, x, n);
- else
- retval = bsloww1 (-a, -da, x, n);
- goto ret;
- break;
-
- case 1:
- case 3:
- retval = bsloww2 (a, da, x, n);
- goto ret;
- break;
- }
- } /* else if (k < 0x7ff00000 ) */
+ retval = reduce_and_compute (x, a, da, 0);
/*--------------------- |x| > 2^1024 ----------------------------------*/
else
@@ -414,10 +432,8 @@ __sin (double x)
if (k == 0x7ff00000 && u.i[LOW_HALF] == 0)
__set_errno (EDOM);
retval = x / x;
- goto ret;
}
-ret:
return retval;
}
@@ -444,11 +460,9 @@ __cos (double x)
m = u.i[HIGH_HALF];
k = 0x7fffffff & m;
+ /* |x|<2^-27 => cos(x)=1 */
if (k < 0x3e400000)
- {
- retval = 1.0;
- goto ret;
- } /* |x|<2^-27 => cos(x)=1 */
+ retval = 1.0;
else if (k < 0x3feb6000)
{ /* 2^-27 < |x| < 0.855469 */
@@ -458,16 +472,11 @@ __cos (double x)
xx = y * y;
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ccs - s * ssn - cs * c) - sn * s;
res = cs + cor;
cor = (cs - res) + cor;
retval = (res == res + 1.020 * cor) ? res : cslow2 (x);
- goto ret;
} /* else if (k < 0x3feb6000) */
else if (k < 0x400368fd)
@@ -478,13 +487,9 @@ __cos (double x)
xx = a * a;
if (xx < 0.01588)
{
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + s1.x)
- * a - 0.5 * da) * xx + da;
- res = a + t;
- cor = (a - res) + t;
+ res = TAYLOR_SINCOS (xx, a, da, cor);
cor = (cor > 0) ? 1.02 * cor + 1.0e-31 : 1.02 * cor - 1.0e-31;
retval = (res == res + cor) ? res : csloww (a, da, x);
- goto ret;
}
else
{
@@ -505,18 +510,13 @@ __cos (double x)
xx = y * y;
s = y + (db + y * xx * (sn3 + xx * sn5));
c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
res = sn + cor;
cor = (sn - res) + cor;
cor = (cor > 0) ? 1.035 * cor + 1.0e-31 : 1.035 * cor - 1.0e-31;
retval = ((res == res + cor) ? ((m) ? res : -res)
: csloww1 (a, da, x));
- goto ret;
}
} /* else if (k < 0x400368fd) */
@@ -546,13 +546,9 @@ __cos (double x)
}
if (xx < 0.01588)
{
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
- + s1.x) * a - 0.5 * da) * xx + da;
- res = a + t;
- cor = (a - res) + t;
+ res = TAYLOR_SINCOS (xx, a, da, cor);
cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
retval = (res == res + cor) ? res : csloww (a, da, x);
- goto ret;
}
else
{
@@ -573,18 +569,13 @@ __cos (double x)
xx = y * y;
s = y + (db + y * xx * (sn3 + xx * sn5));
c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
res = sn + cor;
cor = (sn - res) + cor;
cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
retval = ((res == res + cor) ? ((m) ? res : -res)
: csloww1 (a, da, x));
- goto ret;
}
break;
@@ -598,11 +589,7 @@ __cos (double x)
u.x = big.x + a;
y = a - (u.x - big.x) + da;
xx = y * y;
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -611,8 +598,6 @@ __cos (double x)
cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
retval = ((res == res + cor) ? ((n) ? -res : res)
: csloww2 (a, da, x, n));
- goto ret;
-
break;
}
} /* else if (k < 0x419921FB ) */
@@ -646,13 +631,9 @@ __cos (double x)
}
if (xx < 0.01588)
{
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
- + s1.x) * a - 0.5 * da) * xx + da;
- res = a + t;
- cor = (a - res) + t;
+ res = TAYLOR_SINCOS (xx, a, da, cor);
cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
retval = (res == res + cor) ? res : bsloww (a, da, x, n);
- goto ret;
}
else
{
@@ -673,18 +654,13 @@ __cos (double x)
xx = y * y;
s = y + (db + y * xx * (sn3 + xx * sn5));
c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
cor = (ssn + s * ccs - sn * c) + cs * s;
res = sn + cor;
cor = (sn - res) + cor;
cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
retval = ((res == res + cor) ? ((m) ? res : -res)
: bsloww1 (a, da, x, n));
- goto ret;
}
break;
@@ -698,11 +674,7 @@ __cos (double x)
u.x = big.x + a;
y = a - (u.x - big.x) + da;
xx = y * y;
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
s = y + y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -711,49 +683,21 @@ __cos (double x)
cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
retval = ((res == res + cor) ? ((n) ? -res : res)
: bsloww2 (a, da, x, n));
- goto ret;
break;
}
} /* else if (k < 0x42F00000 ) */
+ /* 281474976710656 <|x| <2^1024 */
else if (k < 0x7ff00000)
- { /* 281474976710656 <|x| <2^1024 */
-
- n = __branred (x, &a, &da);
- switch (n)
- {
- case 1:
- if (a * a < 0.01588)
- retval = bsloww (-a, -da, x, n);
- else
- retval = bsloww1 (-a, -da, x, n);
- goto ret;
- break;
- case 3:
- if (a * a < 0.01588)
- retval = bsloww (a, da, x, n);
- else
- retval = bsloww1 (a, da, x, n);
- goto ret;
- break;
-
- case 0:
- case 2:
- retval = bsloww2 (a, da, x, n);
- goto ret;
- break;
- }
- } /* else if (k < 0x7ff00000 ) */
+ retval = reduce_and_compute (x, a, da, 1);
else
{
if (k == 0x7ff00000 && u.i[LOW_HALF] == 0)
__set_errno (EDOM);
retval = x / x; /* |x| > 2^1024 */
- goto ret;
}
-ret:
return retval;
}
@@ -766,18 +710,8 @@ static double
SECTION
slow (double x)
{
- static const double th2_36 = 206158430208.0; /* 1.5*2**37 */
- double y, x1, x2, xx, r, t, res, cor, w[2];
- x1 = (x + th2_36) - th2_36;
- y = aa.x * x1 * x1 * x1;
- r = x + y;
- x2 = x - x1;
- xx = x * x;
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
- + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2;
- t = ((x - r) + y) + t;
- res = r + t;
- cor = (r - res) + t;
+ double res, cor, w[2];
+ res = TAYLOR_SLOW (x, 0, cor);
if (res == res + 1.0007 * cor)
return res;
else
@@ -786,12 +720,12 @@ slow (double x)
if (w[0] == w[0] + 1.000000001 * w[1])
return (x > 0) ? w[0] : -w[0];
else
- return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+ return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
}
}
/*******************************************************************************/
-/* Routine compute sin(x) for 0.25<|x|< 0.855469 by __sincostab.tbl and Taylor */
+/* Routine compute sin(x) for 0.25<|x|< 0.855469 by __sincostab.tbl and Taylor */
/* and if result still doesn't accurate enough by mpsin or dubsin */
/*******************************************************************************/
@@ -802,18 +736,13 @@ slow1 (double x)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
y = y - (u.x - big.x);
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k]; /* Data */
- ssn = __sincostab.x[k + 1]; /* from */
- cs = __sincostab.x[k + 2]; /* tables */
- ccs = __sincostab.x[k + 3]; /* __sincostab.tbl */
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = y - y1;
c1 = (cs + t22) - t22;
@@ -831,7 +760,7 @@ slow1 (double x)
if (w[0] == w[0] + 1.000000005 * w[1])
return (x > 0) ? w[0] : -w[0];
else
- return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+ return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
}
}
@@ -846,7 +775,6 @@ slow2 (double x)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res, del;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
y = hp0.x - y;
if (y >= 0)
@@ -864,11 +792,7 @@ slow2 (double x)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = y * del + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + del;
e1 = (sn + t22) - t22;
@@ -889,7 +813,7 @@ slow2 (double x)
if (w[0] == w[0] + 1.000000005 * w[1])
return (x > 0) ? w[0] : -w[0];
else
- return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+ return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
}
}
@@ -905,24 +829,14 @@ static double
SECTION
sloww (double x, double dx, double orig)
{
- static const double th2_36 = 206158430208.0; /* 1.5*2**37 */
- double y, x1, x2, xx, r, t, res, cor, w[2], a, da, xn;
+ double y, t, res, cor, w[2], a, da, xn;
union
{
int4 i[2];
double x;
} v;
int4 n;
- x1 = (x + th2_36) - th2_36;
- y = aa.x * x1 * x1 * x1;
- r = x + y;
- x2 = (x - x1) + dx;
- xx = x * x;
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
- + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2 + dx;
- t = ((x - r) + y) + t;
- res = r + t;
- cor = (r - res) + t;
+ res = TAYLOR_SLOW (x, dx, cor);
cor =
(cor >
0) ? 1.0005 * cor + ABS (orig) * 3.1e-30 : 1.0005 * cor -
@@ -966,7 +880,7 @@ sloww (double x, double dx, double orig)
if (w[0] == w[0] + cor)
return (a > 0) ? w[0] : -w[0];
else
- return __mpsin1 (orig);
+ return __mpsin (orig, 0, true);
}
}
}
@@ -985,7 +899,6 @@ sloww1 (double x, double dx, double orig)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -994,11 +907,7 @@ sloww1 (double x, double dx, double orig)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + dx;
c1 = (cs + t22) - t22;
@@ -1028,7 +937,7 @@ sloww1 (double x, double dx, double orig)
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return __mpsin1 (orig);
+ return __mpsin (orig, 0, true);
}
}
@@ -1046,7 +955,6 @@ sloww2 (double x, double dx, double orig, int n)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -1055,11 +963,7 @@ sloww2 (double x, double dx, double orig, int n)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = y * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + dx;
@@ -1090,7 +994,7 @@ sloww2 (double x, double dx, double orig, int n)
if (w[0] == w[0] + cor)
return (n & 2) ? -w[0] : w[0];
else
- return __mpsin1 (orig);
+ return __mpsin (orig, 0, true);
}
}
@@ -1106,19 +1010,9 @@ static double
SECTION
bsloww (double x, double dx, double orig, int n)
{
- static const double th2_36 = 206158430208.0; /* 1.5*2**37 */
- double y, x1, x2, xx, r, t, res, cor, w[2];
-
- x1 = (x + th2_36) - th2_36;
- y = aa.x * x1 * x1 * x1;
- r = x + y;
- x2 = (x - x1) + dx;
- xx = x * x;
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
- + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2 + dx;
- t = ((x - r) + y) + t;
- res = r + t;
- cor = (r - res) + t;
+ double res, cor, w[2];
+
+ res = TAYLOR_SLOW (x, dx, cor);
cor = (cor > 0) ? 1.0005 * cor + 1.1e-24 : 1.0005 * cor - 1.1e-24;
if (res == res + cor)
return res;
@@ -1132,7 +1026,7 @@ bsloww (double x, double dx, double orig, int n)
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+ return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
}
}
@@ -1150,7 +1044,6 @@ bsloww1 (double x, double dx, double orig, int n)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -1159,11 +1052,7 @@ bsloww1 (double x, double dx, double orig, int n)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + dx;
c1 = (cs + t22) - t22;
@@ -1188,7 +1077,7 @@ bsloww1 (double x, double dx, double orig, int n)
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+ return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
}
}
@@ -1206,7 +1095,6 @@ bsloww2 (double x, double dx, double orig, int n)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -1215,11 +1103,7 @@ bsloww2 (double x, double dx, double orig, int n)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = y * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + dx;
@@ -1245,7 +1129,7 @@ bsloww2 (double x, double dx, double orig, int n)
if (w[0] == w[0] + cor)
return (n & 2) ? -w[0] : w[0];
else
- return (n & 1) ? __mpsin1 (orig) : __mpcos1 (orig);
+ return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
}
}
@@ -1261,7 +1145,6 @@ cslow2 (double x)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -1269,11 +1152,7 @@ cslow2 (double x)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = y - y1;
e1 = (sn + t22) - t22;
@@ -1292,7 +1171,7 @@ cslow2 (double x)
if (w[0] == w[0] + 1.000000005 * w[1])
return w[0];
else
- return __mpcos (x, 0);
+ return __mpcos (x, 0, false);
}
}
@@ -1308,8 +1187,7 @@ static double
SECTION
csloww (double x, double dx, double orig)
{
- static const double th2_36 = 206158430208.0; /* 1.5*2**37 */
- double y, x1, x2, xx, r, t, res, cor, w[2], a, da, xn;
+ double y, t, res, cor, w[2], a, da, xn;
union
{
int4 i[2];
@@ -1317,17 +1195,8 @@ csloww (double x, double dx, double orig)
} v;
int4 n;
- x1 = (x + th2_36) - th2_36;
- y = aa.x * x1 * x1 * x1;
- r = x + y;
- x2 = (x - x1) + dx;
- xx = x * x;
/* Taylor series */
- t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
- + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2 + dx;
- t = ((x - r) + y) + t;
- res = r + t;
- cor = (r - res) + t;
+ t = TAYLOR_SLOW (x, dx, cor);
if (cor > 0)
cor = 1.0005 * cor + ABS (orig) * 3.1e-30;
@@ -1375,7 +1244,7 @@ csloww (double x, double dx, double orig)
if (w[0] == w[0] + cor)
return (a > 0) ? w[0] : -w[0];
else
- return __mpcos1 (orig);
+ return __mpcos (orig, 0, true);
}
}
}
@@ -1394,7 +1263,6 @@ csloww1 (double x, double dx, double orig)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -1403,11 +1271,7 @@ csloww1 (double x, double dx, double orig)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + dx;
c1 = (cs + t22) - t22;
@@ -1435,7 +1299,7 @@ csloww1 (double x, double dx, double orig)
if (w[0] == w[0] + cor)
return (x > 0) ? w[0] : -w[0];
else
- return __mpcos1 (orig);
+ return __mpcos (orig, 0, true);
}
}
@@ -1454,7 +1318,6 @@ csloww2 (double x, double dx, double orig, int n)
mynumber u;
double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
static const double t22 = 6291456.0;
- int4 k;
y = ABS (x);
u.x = big.x + y;
@@ -1463,11 +1326,7 @@ csloww2 (double x, double dx, double orig, int n)
xx = y * y;
s = y * xx * (sn3 + xx * sn5);
c = y * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
- k = u.i[LOW_HALF] << 2;
- sn = __sincostab.x[k];
- ssn = __sincostab.x[k + 1];
- cs = __sincostab.x[k + 2];
- ccs = __sincostab.x[k + 3];
+ SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
y1 = (y + t22) - t22;
y2 = (y - y1) + dx;
@@ -1496,7 +1355,7 @@ csloww2 (double x, double dx, double orig, int n)
if (w[0] == w[0] + cor)
return (n) ? -w[0] : w[0];
else
- return __mpcos1 (orig);
+ return __mpcos (orig, 0, true);
}
}
diff --git a/libc/sysdeps/ieee754/dbl-64/s_sincos.c b/libc/sysdeps/ieee754/dbl-64/s_sincos.c
index b6d5432f4..01d1bdf97 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -32,7 +32,7 @@ __sincos (double x, double *sinx, double *cosx)
/* |x| ~< pi/4 */
ix &= 0x7fffffff;
- if (ix>=0x7ff00000)
+ if (ix >= 0x7ff00000)
{
/* sin(Inf or NaN) is NaN */
*sinx = *cosx = x - x;
diff --git a/libc/sysdeps/ieee754/dbl-64/s_tan.c b/libc/sysdeps/ieee754/dbl-64/s_tan.c
index 54f863e54..09db096d0 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_tan.c
@@ -41,6 +41,7 @@
#include <math.h>
#include <math_private.h>
#include <fenv.h>
+#include <stap-probe.h>
#ifndef SECTION
# define SECTION
@@ -58,8 +59,8 @@ tan (double x)
int ux, i, n;
double a, da, a2, b, db, c, dc, c1, cc1, c2, cc2, c3, cc3, fi, ffi, gi, pz,
- s, sy, t, t1, t2, t3, t4, t7, t8, t9, t10, w, x2, xn, xx2, y, ya, yya, z0,
- z, zz, z2, zz2;
+ s, sy, t, t1, t2, t3, t4, t7, t8, t9, t10, w, x2, xn, xx2, y, ya,
+ yya, z0, z, zz, z2, zz2;
#ifndef DLA_FMS
double t5, t6;
#endif
@@ -97,7 +98,6 @@ tan (double x)
/* (II) The case 1.259e-8 < abs(x) <= 0.0608 */
if (w <= g2.d)
{
-
/* First stage */
x2 = x * x;
@@ -149,7 +149,6 @@ tan (double x)
/* (III) The case 0.0608 < abs(x) <= 0.787 */
if (w <= g3.d)
{
-
/* First stage */
i = ((int) (mfftnhf.d + TWO8 * w));
z = w - xfg[i][0].d;
@@ -376,7 +375,7 @@ tan (double x)
/* Second stage */
ffi = xfg[i][3].d;
EADD (z0, yya, z, zz)
- MUL2 (z, zz, z, zz, z2, zz2, t1, t2, t3, t4, t5, t6, t7, t8);
+ MUL2 (z, zz, z, zz, z2, zz2, t1, t2, t3, t4, t5, t6, t7, t8);
c1 = z2 * (a7.d + z2 * (a9.d + z2 * a11.d));
ADD2 (a5.d, aa5.d, c1, 0.0, c2, cc2, t1, t2);
MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2, t3, t4, t5, t6, t7, t8);
@@ -838,6 +837,7 @@ tanMp (double x)
p = 32;
__mptan (x, &mpy, p);
__mp_dbl (&mpy, &y, p);
+ LIBC_PROBE (slowtan, 2, &x, &y);
return y;
}
diff --git a/libc/sysdeps/ieee754/dbl-64/s_tanh.c b/libc/sysdeps/ieee754/dbl-64/s_tanh.c
index ded0d6025..23cfcdead 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_tanh.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_tanh.c
@@ -41,41 +41,51 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7 1995/05/10 20:48:22 jtc Exp $";
#include <math.h>
#include <math_private.h>
-static const double one=1.0, two=2.0, tiny = 1.0e-300;
+static const double one = 1.0, two = 2.0, tiny = 1.0e-300;
-double __tanh(double x)
+double
+__tanh (double x)
{
- double t,z;
- int32_t jx,ix,lx;
+ double t, z;
+ int32_t jx, ix, lx;
- /* High word of |x|. */
- EXTRACT_WORDS(jx,lx,x);
- ix = jx&0x7fffffff;
+ /* High word of |x|. */
+ EXTRACT_WORDS (jx, lx, x);
+ ix = jx & 0x7fffffff;
- /* x is INF or NaN */
- if(ix>=0x7ff00000) {
- if (jx>=0) return one/x+one; /* tanh(+-inf)=+-1 */
- else return one/x-one; /* tanh(NaN) = NaN */
- }
+ /* x is INF or NaN */
+ if (ix >= 0x7ff00000)
+ {
+ if (jx >= 0)
+ return one / x + one; /* tanh(+-inf)=+-1 */
+ else
+ return one / x - one; /* tanh(NaN) = NaN */
+ }
- /* |x| < 22 */
- if (ix < 0x40360000) { /* |x|<22 */
- if ((ix | lx) == 0)
- return x; /* x == +-0 */
- if (ix<0x3c800000) /* |x|<2**-55 */
- return x*(one+x); /* tanh(small) = small */
- if (ix>=0x3ff00000) { /* |x|>=1 */
- t = __expm1(two*fabs(x));
- z = one - two/(t+two);
- } else {
- t = __expm1(-two*fabs(x));
- z= -t/(t+two);
- }
- /* |x| > 22, return +-1 */
- } else {
- z = one - tiny; /* raised inexact flag */
+ /* |x| < 22 */
+ if (ix < 0x40360000) /* |x|<22 */
+ {
+ if ((ix | lx) == 0)
+ return x; /* x == +-0 */
+ if (ix < 0x3c800000) /* |x|<2**-55 */
+ return x * (one + x); /* tanh(small) = small */
+ if (ix >= 0x3ff00000) /* |x|>=1 */
+ {
+ t = __expm1 (two * fabs (x));
+ z = one - two / (t + two);
+ }
+ else
+ {
+ t = __expm1 (-two * fabs (x));
+ z = -t / (t + two);
}
- return (jx>=0)? z: -z;
+ /* |x| > 22, return +-1 */
+ }
+ else
+ {
+ z = one - tiny; /* raised inexact flag */
+ }
+ return (jx >= 0) ? z : -z;
}
weak_alias (__tanh, tanh)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/sincos32.c b/libc/sysdeps/ieee754/dbl-64/sincos32.c
index 954db66d6..f253b8ce8 100644
--- a/libc/sysdeps/ieee754/dbl-64/sincos32.c
+++ b/libc/sysdeps/ieee754/dbl-64/sincos32.c
@@ -48,312 +48,318 @@
# define SECTION
#endif
-/****************************************************************/
-/* Compute Multi-Precision sin() function for given p. Receive */
-/* Multi Precision number x and result stored at y */
-/****************************************************************/
+/* Compute Multi-Precision sin() function for given p. Receive Multi Precision
+ number x and result stored at y. */
static void
SECTION
-ss32(mp_no *x, mp_no *y, int p) {
+ss32 (mp_no *x, mp_no *y, int p)
+{
int i;
double a;
- mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
- for (i=1;i<=p;i++) mpk.d[i]=0;
+ mp_no mpt1, x2, gor, sum, mpk = {1, {1.0}};
+ for (i = 1; i <= p; i++)
+ mpk.d[i] = 0;
- __sqr(x,&x2,p);
- __cpy(&oofac27,&gor,p);
- __cpy(&gor,&sum,p);
- for (a=27.0;a>1.0;a-=2.0) {
- mpk.d[1]=a*(a-1.0);
- __mul(&gor,&mpk,&mpt1,p);
- __cpy(&mpt1,&gor,p);
- __mul(&x2,&sum,&mpt1,p);
- __sub(&gor,&mpt1,&sum,p);
- }
- __mul(x,&sum,y,p);
+ __sqr (x, &x2, p);
+ __cpy (&oofac27, &gor, p);
+ __cpy (&gor, &sum, p);
+ for (a = 27.0; a > 1.0; a -= 2.0)
+ {
+ mpk.d[1] = a * (a - 1.0);
+ __mul (&gor, &mpk, &mpt1, p);
+ __cpy (&mpt1, &gor, p);
+ __mul (&x2, &sum, &mpt1, p);
+ __sub (&gor, &mpt1, &sum, p);
+ }
+ __mul (x, &sum, y, p);
}
-/**********************************************************************/
-/* Compute Multi-Precision cos() function for given p. Receive Multi */
-/* Precision number x and result stored at y */
-/**********************************************************************/
+/* Compute Multi-Precision cos() function for given p. Receive Multi Precision
+ number x and result stored at y. */
static void
SECTION
-cc32(mp_no *x, mp_no *y, int p) {
+cc32 (mp_no *x, mp_no *y, int p)
+{
int i;
double a;
- mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
- for (i=1;i<=p;i++) mpk.d[i]=0;
+ mp_no mpt1, x2, gor, sum, mpk = {1, {1.0}};
+ for (i = 1; i <= p; i++)
+ mpk.d[i] = 0;
- __sqr(x,&x2,p);
- mpk.d[1]=27.0;
- __mul(&oofac27,&mpk,&gor,p);
- __cpy(&gor,&sum,p);
- for (a=26.0;a>2.0;a-=2.0) {
- mpk.d[1]=a*(a-1.0);
- __mul(&gor,&mpk,&mpt1,p);
- __cpy(&mpt1,&gor,p);
- __mul(&x2,&sum,&mpt1,p);
- __sub(&gor,&mpt1,&sum,p);
- }
- __mul(&x2,&sum,y,p);
+ __sqr (x, &x2, p);
+ mpk.d[1] = 27.0;
+ __mul (&oofac27, &mpk, &gor, p);
+ __cpy (&gor, &sum, p);
+ for (a = 26.0; a > 2.0; a -= 2.0)
+ {
+ mpk.d[1] = a * (a - 1.0);
+ __mul (&gor, &mpk, &mpt1, p);
+ __cpy (&mpt1, &gor, p);
+ __mul (&x2, &sum, &mpt1, p);
+ __sub (&gor, &mpt1, &sum, p);
+ }
+ __mul (&x2, &sum, y, p);
}
-/***************************************************************************/
-/* c32() computes both sin(x), cos(x) as Multi precision numbers */
-/***************************************************************************/
+/* Compute both sin(x), cos(x) as Multi precision numbers. */
void
SECTION
-__c32(mp_no *x, mp_no *y, mp_no *z, int p) {
- mp_no u,t,t1,t2,c,s;
+__c32 (mp_no *x, mp_no *y, mp_no *z, int p)
+{
+ mp_no u, t, t1, t2, c, s;
int i;
- __cpy(x,&u,p);
- u.e=u.e-1;
- cc32(&u,&c,p);
- ss32(&u,&s,p);
- for (i=0;i<24;i++) {
- __mul(&c,&s,&t,p);
- __sub(&s,&t,&t1,p);
- __add(&t1,&t1,&s,p);
- __sub(&mptwo,&c,&t1,p);
- __mul(&t1,&c,&t2,p);
- __add(&t2,&t2,&c,p);
- }
- __sub(&mpone,&c,y,p);
- __cpy(&s,z,p);
+ __cpy (x, &u, p);
+ u.e = u.e - 1;
+ cc32 (&u, &c, p);
+ ss32 (&u, &s, p);
+ for (i = 0; i < 24; i++)
+ {
+ __mul (&c, &s, &t, p);
+ __sub (&s, &t, &t1, p);
+ __add (&t1, &t1, &s, p);
+ __sub (&mptwo, &c, &t1, p);
+ __mul (&t1, &c, &t2, p);
+ __add (&t2, &t2, &c, p);
+ }
+ __sub (&mpone, &c, y, p);
+ __cpy (&s, z, p);
}
-/************************************************************************/
-/*Routine receive double x and two double results of sin(x) and return */
-/*result which is more accurate */
-/*Computing sin(x) with multi precision routine c32 */
-/************************************************************************/
+/* Receive double x and two double results of sin(x) and return result which is
+ more accurate, computing sin(x) with multi precision routine c32. */
double
SECTION
-__sin32(double x, double res, double res1) {
+__sin32 (double x, double res, double res1)
+{
int p;
- mp_no a,b,c;
- p=32;
- __dbl_mp(res,&a,p);
- __dbl_mp(0.5*(res1-res),&b,p);
- __add(&a,&b,&c,p);
- if (x>0.8)
- { __sub(&hp,&c,&a,p);
- __c32(&a,&b,&c,p);
- }
- else __c32(&c,&a,&b,p); /* b=sin(0.5*(res+res1)) */
- __dbl_mp(x,&c,p); /* c = x */
- __sub(&b,&c,&a,p);
- /* if a>0 return min(res,res1), otherwise return max(res,res1) */
- if (a.d[0]>0) return (res<res1)?res:res1;
- else return (res>res1)?res:res1;
+ mp_no a, b, c;
+ p = 32;
+ __dbl_mp (res, &a, p);
+ __dbl_mp (0.5 * (res1 - res), &b, p);
+ __add (&a, &b, &c, p);
+ if (x > 0.8)
+ {
+ __sub (&hp, &c, &a, p);
+ __c32 (&a, &b, &c, p);
+ }
+ else
+ __c32 (&c, &a, &b, p); /* b=sin(0.5*(res+res1)) */
+ __dbl_mp (x, &c, p); /* c = x */
+ __sub (&b, &c, &a, p);
+ /* if a > 0 return min (res, res1), otherwise return max (res, res1). */
+ if (a.d[0] > 0)
+ return (res < res1) ? res : res1;
+ else
+ return (res > res1) ? res : res1;
}
-/************************************************************************/
-/*Routine receive double x and two double results of cos(x) and return */
-/*result which is more accurate */
-/*Computing cos(x) with multi precision routine c32 */
-/************************************************************************/
+/* Receive double x and two double results of cos(x) and return result which is
+ more accurate, computing cos(x) with multi precision routine c32. */
double
SECTION
-__cos32(double x, double res, double res1) {
+__cos32 (double x, double res, double res1)
+{
int p;
- mp_no a,b,c;
- p=32;
- __dbl_mp(res,&a,p);
- __dbl_mp(0.5*(res1-res),&b,p);
- __add(&a,&b,&c,p);
- if (x>2.4)
- { __sub(&pi,&c,&a,p);
- __c32(&a,&b,&c,p);
- b.d[0]=-b.d[0];
- }
- else if (x>0.8)
- { __sub(&hp,&c,&a,p);
- __c32(&a,&c,&b,p);
- }
- else __c32(&c,&b,&a,p); /* b=cos(0.5*(res+res1)) */
- __dbl_mp(x,&c,p); /* c = x */
- __sub(&b,&c,&a,p);
- /* if a>0 return max(res,res1), otherwise return min(res,res1) */
- if (a.d[0]>0) return (res>res1)?res:res1;
- else return (res<res1)?res:res1;
+ mp_no a, b, c;
+ p = 32;
+ __dbl_mp (res, &a, p);
+ __dbl_mp (0.5 * (res1 - res), &b, p);
+ __add (&a, &b, &c, p);
+ if (x > 2.4)
+ {
+ __sub (&pi, &c, &a, p);
+ __c32 (&a, &b, &c, p);
+ b.d[0] = -b.d[0];
+ }
+ else if (x > 0.8)
+ {
+ __sub (&hp, &c, &a, p);
+ __c32 (&a, &c, &b, p);
+ }
+ else
+ __c32 (&c, &b, &a, p); /* b=cos(0.5*(res+res1)) */
+ __dbl_mp (x, &c, p); /* c = x */
+ __sub (&b, &c, &a, p);
+ /* if a > 0 return max (res, res1), otherwise return min (res, res1). */
+ if (a.d[0] > 0)
+ return (res > res1) ? res : res1;
+ else
+ return (res < res1) ? res : res1;
}
-/*******************************************************************/
-/*Compute sin(x+dx) as Multi Precision number and return result as */
-/* double */
-/*******************************************************************/
+/* Compute sin() of double-length number (X + DX) as Multi Precision number and
+ return result as double. If REDUCE_RANGE is true, X is assumed to be the
+ original input and DX is ignored. */
double
SECTION
-__mpsin(double x, double dx) {
- int p;
+__mpsin (double x, double dx, bool reduce_range)
+{
double y;
- mp_no a,b,c;
- p=32;
- __dbl_mp(x,&a,p);
- __dbl_mp(dx,&b,p);
- __add(&a,&b,&c,p);
- if (x>0.8) { __sub(&hp,&c,&a,p); __c32(&a,&b,&c,p); }
- else __c32(&c,&a,&b,p); /* b = sin(x+dx) */
- __mp_dbl(&b,&y,p);
- return y;
-}
+ mp_no a, b, c, s;
+ int n;
+ int p = 32;
-/*******************************************************************/
-/* Compute cos()of double-length number (x+dx) as Multi Precision */
-/* number and return result as double */
-/*******************************************************************/
-double
-SECTION
-__mpcos(double x, double dx) {
- int p;
- double y;
- mp_no a,b,c;
- p=32;
- __dbl_mp(x,&a,p);
- __dbl_mp(dx,&b,p);
- __add(&a,&b,&c,p);
- if (x>0.8)
- { __sub(&hp,&c,&b,p);
- __c32(&b,&c,&a,p);
- }
- else __c32(&c,&a,&b,p); /* a = cos(x+dx) */
- __mp_dbl(&a,&y,p);
- return y;
-}
+ if (reduce_range)
+ {
+ n = __mpranred (x, &a, p); /* n is 0, 1, 2 or 3. */
+ __c32 (&a, &c, &s, p);
+ }
+ else
+ {
+ n = -1;
+ __dbl_mp (x, &b, p);
+ __dbl_mp (dx, &c, p);
+ __add (&b, &c, &a, p);
+ if (x > 0.8)
+ {
+ __sub (&hp, &a, &b, p);
+ __c32 (&b, &s, &c, p);
+ }
+ else
+ __c32 (&a, &c, &s, p); /* b = sin(x+dx) */
+ }
-/******************************************************************/
-/* mpranred() performs range reduction of a double number x into */
-/* multi precision number y, such that y=x-n*pi/2, abs(y)<pi/4, */
-/* n=0,+-1,+-2,.... */
-/* Return int which indicates in which quarter of circle x is */
-/******************************************************************/
-int
-SECTION
-__mpranred(double x, mp_no *y, int p)
-{
- number v;
- double t,xn;
- int i,k,n;
- mp_no a,b,c;
+ /* Convert result based on which quarter of unit circle y is in. */
+ switch (n)
+ {
+ case 1:
+ __mp_dbl (&c, &y, p);
+ break;
+
+ case 3:
+ __mp_dbl (&c, &y, p);
+ y = -y;
+ break;
+
+ case 2:
+ __mp_dbl (&s, &y, p);
+ y = -y;
+ break;
- if (ABS(x) < 2.8e14) {
- t = (x*hpinv.d + toint.d);
- xn = t - toint.d;
- v.d = t;
- n =v.i[LOW_HALF]&3;
- __dbl_mp(xn,&a,p);
- __mul(&a,&hp,&b,p);
- __dbl_mp(x,&c,p);
- __sub(&c,&b,y,p);
- return n;
- }
- else { /* if x is very big more precision required */
- __dbl_mp(x,&a,p);
- a.d[0]=1.0;
- k = a.e-5;
- if (k < 0) k=0;
- b.e = -k;
- b.d[0] = 1.0;
- for (i=0;i<p;i++) b.d[i+1] = toverp[i+k];
- __mul(&a,&b,&c,p);
- t = c.d[c.e];
- for (i=1;i<=p-c.e;i++) c.d[i]=c.d[i+c.e];
- for (i=p+1-c.e;i<=p;i++) c.d[i]=0;
- c.e=0;
- if (c.d[1] >= HALFRAD)
- { t +=1.0;
- __sub(&c,&mpone,&b,p);
- __mul(&b,&hp,y,p);
+ /* Quadrant not set, so the result must be sin (X + DX), which is also in
+ S. */
+ case 0:
+ default:
+ __mp_dbl (&s, &y, p);
}
- else __mul(&c,&hp,y,p);
- n = (int) t;
- if (x < 0) { y->d[0] = - y->d[0]; n = -n; }
- return (n&3);
- }
+ return y;
}
-/*******************************************************************/
-/* Multi-Precision sin() function subroutine, for p=32. It is */
-/* based on the routines mpranred() and c32(). */
-/*******************************************************************/
+/* Compute cos() of double-length number (X + DX) as Multi Precision number and
+ return result as double. If REDUCE_RANGE is true, X is assumed to be the
+ original input and DX is ignored. */
double
SECTION
-__mpsin1(double x)
+__mpcos (double x, double dx, bool reduce_range)
{
- int p;
- int n;
- mp_no u,s,c;
double y;
- p=32;
- n=__mpranred(x,&u,p); /* n is 0, 1, 2 or 3 */
- __c32(&u,&c,&s,p);
- switch (n) { /* in which quarter of unit circle y is*/
- case 0:
- __mp_dbl(&s,&y,p);
- return y;
- break;
+ mp_no a, b, c, s;
+ int n;
+ int p = 32;
- case 2:
- __mp_dbl(&s,&y,p);
- return -y;
- break;
+ if (reduce_range)
+ {
+ n = __mpranred (x, &a, p); /* n is 0, 1, 2 or 3. */
+ __c32 (&a, &c, &s, p);
+ }
+ else
+ {
+ n = -1;
+ __dbl_mp (x, &b, p);
+ __dbl_mp (dx, &c, p);
+ __add (&b, &c, &a, p);
+ if (x > 0.8)
+ {
+ __sub (&hp, &a, &b, p);
+ __c32 (&b, &s, &c, p);
+ }
+ else
+ __c32 (&a, &c, &s, p); /* a = cos(x+dx) */
+ }
- case 1:
- __mp_dbl(&c,&y,p);
- return y;
- break;
+ /* Convert result based on which quarter of unit circle y is in. */
+ switch (n)
+ {
+ case 1:
+ __mp_dbl (&s, &y, p);
+ y = -y;
+ break;
- case 3:
- __mp_dbl(&c,&y,p);
- return -y;
- break;
+ case 3:
+ __mp_dbl (&s, &y, p);
+ break;
- }
- return 0; /* unreachable, to make the compiler happy */
-}
+ case 2:
+ __mp_dbl (&c, &y, p);
+ y = -y;
+ break;
-/*****************************************************************/
-/* Multi-Precision cos() function subroutine, for p=32. It is */
-/* based on the routines mpranred() and c32(). */
-/*****************************************************************/
+ /* Quadrant not set, so the result must be cos (X + DX), which is also
+ stored in C. */
+ case 0:
+ default:
+ __mp_dbl (&c, &y, p);
+ }
+ return y;
+}
-double
+/* Perform range reduction of a double number x into multi precision number y,
+ such that y = x - n * pi / 2, abs (y) < pi / 4, n = 0, +-1, +-2, ...
+ Return int which indicates in which quarter of circle x is. */
+int
SECTION
-__mpcos1(double x)
+__mpranred (double x, mp_no *y, int p)
{
- int p;
- int n;
- mp_no u,s,c;
- double y;
-
- p=32;
- n=__mpranred(x,&u,p); /* n is 0, 1, 2 or 3 */
- __c32(&u,&c,&s,p);
- switch (n) { /* in what quarter of unit circle y is*/
-
- case 0:
- __mp_dbl(&c,&y,p);
- return y;
- break;
-
- case 2:
- __mp_dbl(&c,&y,p);
- return -y;
- break;
-
- case 1:
- __mp_dbl(&s,&y,p);
- return -y;
- break;
-
- case 3:
- __mp_dbl(&s,&y,p);
- return y;
- break;
+ number v;
+ double t, xn;
+ int i, k, n;
+ mp_no a, b, c;
- }
- return 0; /* unreachable, to make the compiler happy */
+ if (ABS (x) < 2.8e14)
+ {
+ t = (x * hpinv.d + toint.d);
+ xn = t - toint.d;
+ v.d = t;
+ n = v.i[LOW_HALF] & 3;
+ __dbl_mp (xn, &a, p);
+ __mul (&a, &hp, &b, p);
+ __dbl_mp (x, &c, p);
+ __sub (&c, &b, y, p);
+ return n;
+ }
+ else
+ {
+ /* If x is very big more precision required. */
+ __dbl_mp (x, &a, p);
+ a.d[0] = 1.0;
+ k = a.e - 5;
+ if (k < 0)
+ k = 0;
+ b.e = -k;
+ b.d[0] = 1.0;
+ for (i = 0; i < p; i++)
+ b.d[i + 1] = toverp[i + k];
+ __mul (&a, &b, &c, p);
+ t = c.d[c.e];
+ for (i = 1; i <= p - c.e; i++)
+ c.d[i] = c.d[i + c.e];
+ for (i = p + 1 - c.e; i <= p; i++)
+ c.d[i] = 0;
+ c.e = 0;
+ if (c.d[1] >= HALFRAD)
+ {
+ t += 1.0;
+ __sub (&c, &mpone, &b, p);
+ __mul (&b, &hp, y, p);
+ }
+ else
+ __mul (&c, &hp, y, p);
+ n = (int) t;
+ if (x < 0)
+ {
+ y->d[0] = -y->d[0];
+ n = -n;
+ }
+ return (n & 3);
+ }
}
-/******************************************************************/
diff --git a/libc/sysdeps/ieee754/dbl-64/slowexp.c b/libc/sysdeps/ieee754/dbl-64/slowexp.c
index 8f353f634..525224f44 100644
--- a/libc/sysdeps/ieee754/dbl-64/slowexp.c
+++ b/libc/sysdeps/ieee754/dbl-64/slowexp.c
@@ -29,6 +29,8 @@
/**************************************************************************/
#include <math_private.h>
+#include <stap-probe.h>
+
#ifndef USE_LONG_DOUBLE_FOR_MP
# include "mpa.h"
void __mpexp (mp_no *x, mp_no *y, int p);
@@ -60,13 +62,22 @@ __slowexp (double x)
__mp_dbl (&mpw, &w, p);
__mp_dbl (&mpz, &z, p);
if (w == z)
- return w;
+ {
+ /* Track how often we get to the slow exp code plus
+ its input/output values. */
+ LIBC_PROBE (slowexp_p6, 2, &x, &w);
+ return w;
+ }
else
{
p = 32;
__dbl_mp (x, &mpx, p);
__mpexp (&mpx, &mpy, p);
__mp_dbl (&mpy, &res, p);
+
+ /* Track how often we get to the uber-slow exp code plus
+ its input/output values. */
+ LIBC_PROBE (slowexp_p32, 2, &x, &res);
return res;
}
#else
diff --git a/libc/sysdeps/ieee754/dbl-64/slowpow.c b/libc/sysdeps/ieee754/dbl-64/slowpow.c
index a379728b1..d200c39e5 100644
--- a/libc/sysdeps/ieee754/dbl-64/slowpow.c
+++ b/libc/sysdeps/ieee754/dbl-64/slowpow.c
@@ -34,6 +34,8 @@
#include "mpa.h"
#include <math_private.h>
+#include <stap-probe.h>
+
#ifndef SECTION
# define SECTION
#endif
@@ -97,7 +99,12 @@ __slowpow (double x, double y, double z)
__sub (&mpp, &eps, &mpr1, p);
__mp_dbl (&mpr1, &res1, p);
if (res == res1)
- return res;
+ {
+ /* Track how often we get to the slow pow code plus
+ its input/output values. */
+ LIBC_PROBE (slowpow_p10, 4, &x, &y, &z, &res);
+ return res;
+ }
/* If we don't, then we repeat using a higher precision. 768 bits of
precision ought to be enough for anybody. */
@@ -109,5 +116,10 @@ __slowpow (double x, double y, double z)
__mul (&mpy, &mpz, &mpw, p);
__mpexp (&mpw, &mpp, p);
__mp_dbl (&mpp, &res, p);
+
+ /* Track how often we get to the uber-slow pow code plus
+ its input/output values. */
+ LIBC_PROBE (slowpow_p32, 4, &x, &y, &z, &res);
+
return res;
}
diff --git a/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c b/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c
index c9e09a4b7..e82228a53 100644
--- a/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c
+++ b/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c
@@ -24,13 +24,15 @@ do { \
digits we use only the implicit digits for the number before \
the decimal point. */ \
unsigned long long int num0, num1; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
assert (sizeof (long double) == 16); \
\
- num0 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
- num1 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa2) << 32 \
- | fpnum.ldbl.ieee.mantissa3); \
+ num0 = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
+ num1 = (((unsigned long long int) u.ieee.mantissa2) << 32 \
+ | u.ieee.mantissa3); \
\
zero_mantissa = (num0|num1) == 0; \
\
@@ -75,9 +77,9 @@ do { \
*--wnumstr = L'0'; \
} \
\
- leading = fpnum.ldbl.ieee.exponent == 0 ? '0' : '1'; \
+ leading = u.ieee.exponent == 0 ? '0' : '1'; \
\
- exponent = fpnum.ldbl.ieee.exponent; \
+ exponent = u.ieee.exponent; \
\
if (exponent == 0) \
{ \
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
index abc78a35b..8a4a5bb7b 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
@@ -36,8 +36,12 @@ __ieee754_acoshl(long double x)
{
long double t;
int64_t hx;
- u_int64_t lx;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ uint64_t lx;
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
if(hx<0x3ff0000000000000LL) { /* x < 1 */
return (x-x)/(x-x);
} else if(hx >=0x41b0000000000000LL) { /* x > 2**28 */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
index 5d2af3034..2cb288238 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
@@ -151,26 +151,27 @@ static const long double
long double
__ieee754_acosl (long double x)
{
- long double z, r, w, p, q, s, t, f2;
- ieee854_long_double_shape_type u;
+ long double a, z, r, w, p, q, s, t, f2;
- u.value = __builtin_fabsl (x);
- if (u.value == 1.0L)
+ if (__glibc_unlikely (__isnanl (x)))
+ return x + x;
+ a = __builtin_fabsl (x);
+ if (a == 1.0L)
{
if (x > 0.0L)
return 0.0; /* acos(1) = 0 */
else
return (2.0 * pio2_hi) + (2.0 * pio2_lo); /* acos(-1)= pi */
}
- else if (u.value > 1.0L)
+ else if (a > 1.0L)
{
return (x - x) / (x - x); /* acos(|x| > 1) is NaN */
}
- if (u.value < 0.5L)
+ if (a < 0.5L)
{
- if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */
+ if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */
return pio2_hi + pio2_lo;
- if (u.value < 0.4375L)
+ if (a < 0.4375L)
{
/* Arcsine of x. */
z = x * x;
@@ -199,7 +200,7 @@ __ieee754_acosl (long double x)
return z;
}
/* .4375 <= |x| < .5 */
- t = u.value - 0.4375L;
+ t = a - 0.4375L;
p = ((((((((((P10 * t
+ P9) * t
+ P8) * t
@@ -230,9 +231,9 @@ __ieee754_acosl (long double x)
r = acosr4375 + r;
return r;
}
- else if (u.value < 0.625L)
+ else if (a < 0.625L)
{
- t = u.value - 0.5625L;
+ t = a - 0.5625L;
p = ((((((((((rS10 * t
+ rS9) * t
+ rS8) * t
@@ -264,7 +265,9 @@ __ieee754_acosl (long double x)
}
else
{ /* |x| >= .625 */
- z = (one - u.value) * 0.5;
+ double shi, slo;
+
+ z = (one - a) * 0.5;
s = __ieee754_sqrtl (z);
/* Compute an extended precision square root from
the Newton iteration s -> 0.5 * (s + z / s).
@@ -273,12 +276,11 @@ __ieee754_acosl (long double x)
Express s = f1 + f2 where f1 * f1 is exactly representable.
w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
s + w has extended precision. */
- u.value = s;
- u.parts32.w2 = 0;
- u.parts32.w3 = 0;
- f2 = s - u.value;
- w = z - u.value * u.value;
- w = w - 2.0 * u.value * f2;
+ ldbl_unpack (s, &shi, &slo);
+ a = shi;
+ f2 = slo;
+ w = z - a * a;
+ w = w - 2.0 * a * f2;
w = w - f2 * f2;
w = w / (2.0 * s);
/* Arcsine of s. */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
index b39543949..dece11875 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
@@ -131,19 +131,20 @@ static const long double
long double
__ieee754_asinl (long double x)
{
- long double t, w, p, q, c, r, s;
+ long double a, t, w, p, q, c, r, s;
int flag;
- ieee854_long_double_shape_type u;
+ if (__glibc_unlikely (__isnanl (x)))
+ return x + x;
flag = 0;
- u.value = __builtin_fabsl (x);
- if (u.value == 1.0L) /* |x|>= 1 */
+ a = __builtin_fabsl (x);
+ if (a == 1.0L) /* |x|>= 1 */
return x * pio2_hi + x * pio2_lo; /* asin(1)=+-pi/2 with inexact */
- else if (u.value >= 1.0L)
+ else if (a >= 1.0L)
return (x - x) / (x - x); /* asin(|x|>1) is NaN */
- else if (u.value < 0.5L)
+ else if (a < 0.5L)
{
- if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */
+ if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */
{
if (huge + x > one)
return x; /* return x with inexact if x!=0 */
@@ -155,9 +156,9 @@ __ieee754_asinl (long double x)
flag = 1;
}
}
- else if (u.value < 0.625L)
+ else if (a < 0.625L)
{
- t = u.value - 0.5625;
+ t = a - 0.5625;
p = ((((((((((rS10 * t
+ rS9) * t
+ rS8) * t
@@ -190,7 +191,7 @@ __ieee754_asinl (long double x)
else
{
/* 1 > |x| >= 0.625 */
- w = one - u.value;
+ w = one - a;
t = w * 0.5;
}
@@ -223,17 +224,14 @@ __ieee754_asinl (long double x)
}
s = __ieee754_sqrtl (t);
- if (u.value > 0.975L)
+ if (a > 0.975L)
{
w = p / q;
t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
}
else
{
- u.value = s;
- u.parts32.w3 = 0;
- u.parts32.w2 = 0;
- w = u.value;
+ w = ldbl_high (s);
c = (t - w * w) / (s + w);
r = p / q;
p = 2.0 * s * r - (pio2_lo - 2.0 * c);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c
index 3e0535561..b625323df 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c
@@ -56,11 +56,15 @@ __ieee754_atan2l(long double y, long double x)
{
long double z;
int64_t k,m,hx,hy,ix,iy;
- u_int64_t lx,ly;
+ uint64_t lx;
+ double xhi, xlo, yhi;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
ix = hx&0x7fffffffffffffffLL;
- GET_LDOUBLE_WORDS64(hy,ly,y);
+ yhi = ldbl_high (y);
+ EXTRACT_WORDS64 (hy, yhi);
iy = hy&0x7fffffffffffffffLL;
if(((ix)>0x7ff0000000000000LL)||
((iy)>0x7ff0000000000000LL)) /* x or y is NaN */
@@ -70,7 +74,7 @@ __ieee754_atan2l(long double y, long double x)
m = ((hy>>63)&1)|((hx>>62)&2); /* 2*sign(x)+sign(y) */
/* when y = 0 */
- if((iy|(ly&0x7fffffffffffffffLL))==0) {
+ if(iy==0) {
switch(m) {
case 0:
case 1: return y; /* atan(+-0,+anything)=+-0 */
@@ -79,7 +83,7 @@ __ieee754_atan2l(long double y, long double x)
}
}
/* when x = 0 */
- if((ix|(lx&0x7fffffffffffffff))==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
+ if(ix==0) return (hy<0)? -pi_o_2-tiny: pi_o_2+tiny;
/* when x is INF */
if(ix==0x7ff0000000000000LL) {
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
index f35182f03..29f2e9207 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
@@ -40,8 +40,10 @@ __ieee754_atanhl(long double x)
{
long double t;
int64_t hx,ix;
- u_int64_t lx __attribute__ ((unused));
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx&0x7fffffffffffffffLL;
if (ix >= 0x3ff0000000000000LL) { /* |x|>=1 */
if (ix > 0x3ff0000000000000LL)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
index 3e8e1875c..05683bc02 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
@@ -41,9 +41,11 @@ __ieee754_coshl (long double x)
{
long double t,w;
int64_t ix;
+ double xhi;
/* High word of |x|. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
ix &= 0x7fffffffffffffffLL;
/* x is INF or NaN */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
index 1eaf2fe39..49121ca31 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
@@ -36,9 +36,9 @@ __ieee754_exp10l (long double arg)
else if (arg > LDBL_MAX_10_EXP + 1)
return LDBL_MAX * LDBL_MAX;
- u.d = arg;
- arg_high = u.dd[0];
- arg_low = u.dd[1];
+ u.ld = arg;
+ arg_high = u.d[0].d;
+ arg_low = u.d[1].d;
exp_high = arg_high * log10_high;
exp_low = arg_high * log10_low + arg_low * M_LN10l;
return __ieee754_expl (exp_high) * __ieee754_expl (exp_low);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index 1b994cd7a..f7c50bfd3 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -162,39 +162,39 @@ __ieee754_expl (long double x)
x = x + xl;
/* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]). */
- ex2_u.d = __expl_table[T_EXPL_RES1 + tval1]
- * __expl_table[T_EXPL_RES2 + tval2];
+ ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1]
+ * __expl_table[T_EXPL_RES2 + tval2]);
n_i = (int)n;
/* 'unsafe' is 1 iff n_1 != 0. */
unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1;
- ex2_u.ieee.exponent += n_i >> unsafe;
+ ex2_u.d[0].ieee.exponent += n_i >> unsafe;
/* Fortunately, there are no subnormal lowpart doubles in
__expl_table, only normal values and zeros.
But after scaling it can be subnormal. */
- exponent2 = ex2_u.ieee.exponent2 + (n_i >> unsafe);
- if (ex2_u.ieee.exponent2 == 0)
- /* assert ((ex2_u.ieee.mantissa2|ex2_u.ieee.mantissa3) == 0) */;
+ exponent2 = ex2_u.d[1].ieee.exponent + (n_i >> unsafe);
+ if (ex2_u.d[1].ieee.exponent == 0)
+ /* assert ((ex2_u.d[1].ieee.mantissa0|ex2_u.d[1].ieee.mantissa1) == 0) */;
else if (exponent2 > 0)
- ex2_u.ieee.exponent2 = exponent2;
+ ex2_u.d[1].ieee.exponent = exponent2;
else if (exponent2 <= -54)
{
- ex2_u.ieee.exponent2 = 0;
- ex2_u.ieee.mantissa2 = 0;
- ex2_u.ieee.mantissa3 = 0;
+ ex2_u.d[1].ieee.exponent = 0;
+ ex2_u.d[1].ieee.mantissa0 = 0;
+ ex2_u.d[1].ieee.mantissa1 = 0;
}
else
{
static const double
two54 = 1.80143985094819840000e+16, /* 4350000000000000 */
twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */
- ex2_u.dd[1] *= two54;
- ex2_u.ieee.exponent2 += n_i >> unsafe;
- ex2_u.dd[1] *= twom54;
+ ex2_u.d[1].d *= two54;
+ ex2_u.d[1].ieee.exponent += n_i >> unsafe;
+ ex2_u.d[1].d *= twom54;
}
/* Compute scale = 2^n_1. */
- scale_u.d = 1.0L;
- scale_u.ieee.exponent += n_i - (n_i >> unsafe);
+ scale_u.ld = 1.0L;
+ scale_u.d[0].ieee.exponent += n_i - (n_i >> unsafe);
/* Approximate e^x2 - 1, using a seventh-degree polynomial,
with maximum error in [-2^-16-2^-53,2^-16+2^-53]
@@ -204,7 +204,7 @@ __ieee754_expl (long double x)
/* Return result. */
fesetenv (&oldenv);
- result = x22 * ex2_u.d + ex2_u.d;
+ result = x22 * ex2_u.ld + ex2_u.ld;
/* Now we can test whether the result is ultimate or if we are unsure.
In the later case we should probably call a mpn based routine to give
@@ -238,7 +238,7 @@ __ieee754_expl (long double x)
if (!unsafe)
return result;
else
- return result * scale_u.d;
+ return result * scale_u.ld;
}
/* Exceptional cases: */
else if (isless (x, himark))
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
index a60963c84..a140fb322 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
@@ -27,76 +27,83 @@ static const long double one = 1.0, Zero[] = {0.0, -0.0,};
long double
__ieee754_fmodl (long double x, long double y)
{
- int64_t n,hx,hy,hz,ix,iy,sx, i;
- u_int64_t lx,ly,lz;
- int temp;
+ int64_t hx, hy, hz, sx, sy;
+ uint64_t lx, ly, lz;
+ int n, ix, iy;
+ double xhi, xlo, yhi, ylo;
- GET_LDOUBLE_WORDS64(hx,lx,x);
- GET_LDOUBLE_WORDS64(hy,ly,y);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
+ ldbl_unpack (y, &yhi, &ylo);
+ EXTRACT_WORDS64 (hy, yhi);
+ EXTRACT_WORDS64 (ly, ylo);
sx = hx&0x8000000000000000ULL; /* sign of x */
- hx ^=sx; /* |x| */
- hy &= 0x7fffffffffffffffLL; /* |y| */
+ hx ^= sx; /* |x| */
+ sy = hy&0x8000000000000000ULL; /* sign of y */
+ hy ^= sy; /* |y| */
/* purge off exception values */
- if(__builtin_expect((hy|(ly&0x7fffffffffffffff))==0 ||
+ if(__builtin_expect(hy==0 ||
(hx>=0x7ff0000000000000LL)|| /* y=0,or x not finite */
(hy>0x7ff0000000000000LL),0)) /* or y is NaN */
return (x*y)/(x*y);
- if(__builtin_expect(hx<=hy,0)) {
- if((hx<hy)||(lx<ly)) return x; /* |x|<|y| return x */
- if(lx==ly)
- return Zero[(u_int64_t)sx>>63]; /* |x|=|y| return x*0*/
+ if (__builtin_expect (hx <= hy, 0))
+ {
+ /* If |x| < |y| return x. */
+ if (hx < hy)
+ return x;
+ /* At this point the absolute value of the high doubles of
+ x and y must be equal. */
+ /* If the low double of y is the same sign as the high
+ double of y (ie. the low double increases |y|)... */
+ if (((ly ^ sy) & 0x8000000000000000LL) == 0
+ /* ... then a different sign low double to high double
+ for x or same sign but lower magnitude... */
+ && (int64_t) (lx ^ sx) < (int64_t) (ly ^ sy))
+ /* ... means |x| < |y|. */
+ return x;
+ /* If the low double of x differs in sign to the high
+ double of x (ie. the low double decreases |x|)... */
+ if (((lx ^ sx) & 0x8000000000000000LL) != 0
+ /* ... then a different sign low double to high double
+ for y with lower magnitude (we've already caught
+ the same sign for y case above)... */
+ && (int64_t) (lx ^ sx) > (int64_t) (ly ^ sy))
+ /* ... means |x| < |y|. */
+ return x;
+ /* If |x| == |y| return x*0. */
+ if ((lx ^ sx) == (ly ^ sy))
+ return Zero[(uint64_t) sx >> 63];
}
- /* determine ix = ilogb(x) */
- if(__builtin_expect(hx<0x0010000000000000LL,0)) { /* subnormal x */
- if(hx==0) {
- for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
- } else {
- for (ix = -1022, i=(hx<<11); i>0; i<<=1) ix -=1;
- }
- } else ix = (hx>>52)-0x3ff;
-
- /* determine iy = ilogb(y) */
- if(__builtin_expect(hy<0x0010000000000000LL,0)) { /* subnormal y */
- if(hy==0) {
- for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
- } else {
- for (iy = -1022, i=(hy<<11); i>0; i<<=1) iy -=1;
- }
- } else iy = (hy>>52)-0x3ff;
-
/* Make the IBM extended format 105 bit mantissa look like the ieee854 112
bit mantissa so the following operations will give the correct
result. */
- ldbl_extract_mantissa(&hx, &lx, &temp, x);
- ldbl_extract_mantissa(&hy, &ly, &temp, y);
+ ldbl_extract_mantissa(&hx, &lx, &ix, x);
+ ldbl_extract_mantissa(&hy, &ly, &iy, y);
- /* set up {hx,lx}, {hy,ly} and align y to x */
- if(__builtin_expect(ix >= -1022, 1))
- hx = 0x0001000000000000LL|(0x0000ffffffffffffLL&hx);
- else { /* subnormal x, shift x to normal */
- n = -1022-ix;
- if(n<=63) {
- hx = (hx<<n)|(lx>>(64-n));
- lx <<= n;
- } else {
- hx = lx<<(n-64);
- lx = 0;
- }
- }
- if(__builtin_expect(iy >= -1022, 1))
- hy = 0x0001000000000000LL|(0x0000ffffffffffffLL&hy);
- else { /* subnormal y, shift y to normal */
- n = -1022-iy;
- if(n<=63) {
- hy = (hy<<n)|(ly>>(64-n));
- ly <<= n;
- } else {
- hy = ly<<(n-64);
- ly = 0;
- }
- }
+ if (__builtin_expect (ix == -IEEE754_DOUBLE_BIAS, 0))
+ {
+ /* subnormal x, shift x to normal. */
+ while ((hx & (1LL << 48)) == 0)
+ {
+ hx = (hx << 1) | (lx >> 63);
+ lx = lx << 1;
+ ix -= 1;
+ }
+ }
+
+ if (__builtin_expect (iy == -IEEE754_DOUBLE_BIAS, 0))
+ {
+ /* subnormal y, shift y to normal. */
+ while ((hy & (1LL << 48)) == 0)
+ {
+ hy = (hy << 1) | (ly >> 63);
+ ly = ly << 1;
+ iy -= 1;
+ }
+ }
/* fix point fmod */
n = ix - iy;
@@ -104,7 +111,7 @@ __ieee754_fmodl (long double x, long double y)
hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
if(hz<0){hx = hx+hx+(lx>>63); lx = lx+lx;}
else {
- if((hz|(lz&0x7fffffffffffffff))==0) /* return sign(x)*0 */
+ if((hz|lz)==0) /* return sign(x)*0 */
return Zero[(u_int64_t)sx>>63];
hx = hz+hz+(lz>>63); lx = lz+lz;
}
@@ -113,7 +120,7 @@ __ieee754_fmodl (long double x, long double y)
if(hz>=0) {hx=hz;lx=lz;}
/* convert back to floating value and restore the sign */
- if((hx|(lx&0x7fffffffffffffff))==0) /* return sign(x)*0 */
+ if((hx|lx)==0) /* return sign(x)*0 */
return Zero[(u_int64_t)sx>>63];
while(hx<0x0001000000000000LL) { /* normalize x */
hx = hx+hx+(lx>>63); lx = lx+lx;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c
index 90d8e3f0d..84c13de9b 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c
@@ -122,11 +122,12 @@ long double
__ieee754_gammal_r (long double x, int *signgamp)
{
int64_t hx;
- u_int64_t lx;
+ double xhi;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
- if (((hx | lx) & 0x7fffffffffffffffLL) == 0)
+ if ((hx & 0x7fffffffffffffffLL) == 0)
{
/* Return value for x == 0 is Inf with divide by zero exception. */
*signgamp = 0;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
index 768bd3b06..3b07a47b4 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
@@ -45,76 +45,84 @@
#include <math.h>
#include <math_private.h>
-static const long double two600 = 0x1.0p+600L;
-static const long double two1022 = 0x1.0p+1022L;
-
long double
__ieee754_hypotl(long double x, long double y)
{
- long double a,b,t1,t2,y1,y2,w,kld;
+ long double a,b,a1,a2,b1,b2,w,kld;
int64_t j,k,ha,hb;
+ double xhi, yhi, hi, lo;
- GET_LDOUBLE_MSW64(ha,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ha, xhi);
+ yhi = ldbl_high (y);
+ EXTRACT_WORDS64 (hb, yhi);
ha &= 0x7fffffffffffffffLL;
- GET_LDOUBLE_MSW64(hb,y);
hb &= 0x7fffffffffffffffLL;
if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
a = fabsl(a); /* a <- |a| */
b = fabsl(b); /* b <- |b| */
- if((ha-hb)>0x780000000000000LL) {return a+b;} /* x/y > 2**120 */
+ if((ha-hb)>0x0780000000000000LL) {return a+b;} /* x/y > 2**120 */
k=0;
kld = 1.0L;
if(ha > 0x5f30000000000000LL) { /* a>2**500 */
if(ha >= 0x7ff0000000000000LL) { /* Inf or NaN */
- u_int64_t low;
w = a+b; /* for sNaN */
- GET_LDOUBLE_LSW64(low,a);
- if(((ha&0xfffffffffffffLL)|(low&0x7fffffffffffffffLL))==0)
+ if(ha == 0x7ff0000000000000LL)
w = a;
- GET_LDOUBLE_LSW64(low,b);
- if(((hb^0x7ff0000000000000LL)|(low&0x7fffffffffffffffLL))==0)
+ if(hb == 0x7ff0000000000000LL)
w = b;
return w;
}
/* scale a and b by 2**-600 */
- ha -= 0x2580000000000000LL; hb -= 0x2580000000000000LL; k += 600;
- a /= two600;
- b /= two600;
- k += 600;
- kld = two600;
+ a *= 0x1p-600L;
+ b *= 0x1p-600L;
+ k = 600;
+ kld = 0x1p+600L;
}
- if(hb < 0x23d0000000000000LL) { /* b < 2**-450 */
+ else if(hb < 0x23d0000000000000LL) { /* b < 2**-450 */
if(hb <= 0x000fffffffffffffLL) { /* subnormal b or 0 */
- u_int64_t low;
- GET_LDOUBLE_LSW64(low,b);
- if((hb|(low&0x7fffffffffffffffLL))==0) return a;
- t1=two1022; /* t1=2^1022 */
- b *= t1;
- a *= t1;
- k -= 1022;
- kld = kld / two1022;
+ if(hb==0) return a;
+ a *= 0x1p+1022L;
+ b *= 0x1p+1022L;
+ k = -1022;
+ kld = 0x1p-1022L;
} else { /* scale a and b by 2^600 */
- ha += 0x2580000000000000LL; /* a *= 2^600 */
- hb += 0x2580000000000000LL; /* b *= 2^600 */
- k -= 600;
- a *= two600;
- b *= two600;
- kld = kld / two600;
+ a *= 0x1p+600L;
+ b *= 0x1p+600L;
+ k = -600;
+ kld = 0x1p-600L;
}
}
/* medium size a and b */
w = a-b;
if (w>b) {
- SET_LDOUBLE_WORDS64(t1,ha,0);
- t2 = a-t1;
- w = __ieee754_sqrtl(t1*t1-(b*(-b)-t2*(a+t1)));
+ ldbl_unpack (a, &hi, &lo);
+ a1 = hi;
+ a2 = lo;
+ /* a*a + b*b
+ = (a1+a2)*a + b*b
+ = a1*a + a2*a + b*b
+ = a1*(a1+a2) + a2*a + b*b
+ = a1*a1 + a1*a2 + a2*a + b*b
+ = a1*a1 + a2*(a+a1) + b*b */
+ w = __ieee754_sqrtl(a1*a1-(b*(-b)-a2*(a+a1)));
} else {
a = a+a;
- SET_LDOUBLE_WORDS64(y1,hb,0);
- y2 = b - y1;
- SET_LDOUBLE_WORDS64(t1,ha+0x0010000000000000LL,0);
- t2 = a - t1;
- w = __ieee754_sqrtl(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+ ldbl_unpack (b, &hi, &lo);
+ b1 = hi;
+ b2 = lo;
+ ldbl_unpack (a, &hi, &lo);
+ a1 = hi;
+ a2 = lo;
+ /* a*a + b*b
+ = a*a + (a-b)*(a-b) - (a-b)*(a-b) + b*b
+ = a*a + w*w - (a*a - 2*a*b + b*b) + b*b
+ = w*w + 2*a*b
+ = w*w + (a1+a2)*b
+ = w*w + a1*b + a2*b
+ = w*w + a1*(b1+b2) + a2*b
+ = w*w + a1*b1 + a1*b2 + a2*b */
+ w = __ieee754_sqrtl(a1*b1-(w*(-w)-(a1*b2+a2*b)));
}
if(k!=0)
return w*kld;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c
index 55f87ed42..aeace7c97 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c
@@ -31,26 +31,24 @@ static char rcsid[] = "$NetBSD: $";
int __ieee754_ilogbl(long double x)
{
- int64_t hx,lx;
+ int64_t hx;
int ix;
+ double xhi;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
hx &= 0x7fffffffffffffffLL;
if(hx <= 0x0010000000000000LL) {
- if((hx|(lx&0x7fffffffffffffffLL))==0)
+ if(hx==0)
return FP_ILOGB0; /* ilogbl(0) = FP_ILOGB0 */
else /* subnormal x */
- if(hx==0) {
- for (ix = -1043; lx>0; lx<<=1) ix -=1;
- } else {
- for (ix = -1022, hx<<=11; hx>0; hx<<=1) ix -=1;
- }
+ for (ix = -1022, hx<<=11; hx>0; hx<<=1) ix -=1;
return ix;
}
else if (hx<0x7ff0000000000000LL) return (hx>>52)-0x3ff;
else if (FP_ILOGBNAN != INT_MAX) {
/* ISO C99 requires ilogbl(+-Inf) == INT_MAX. */
- if (((hx^0x7ff0000000000000LL)|lx) == 0)
+ if (hx==0x7ff0000000000000LL)
return INT_MAX;
}
return FP_ILOGBNAN;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c
index 40012e41e..817977da5 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c
@@ -70,26 +70,25 @@ static const long double
long double
__ieee754_jnl (int n, long double x)
{
- u_int32_t se;
+ uint32_t se, lx;
int32_t i, ix, sgn;
long double a, b, temp, di;
long double z, w;
- ieee854_long_double_shape_type u;
+ double xhi;
/* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
* Thus, J(-n,x) = J(n,-x)
*/
- u.value = x;
- se = u.parts32.w0;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (se, lx, xhi);
ix = se & 0x7fffffff;
/* if J(n,NaN) is NaN */
if (ix >= 0x7ff00000)
{
- if ((u.parts32.w0 & 0xfffff) | u.parts32.w1
- | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3)
+ if (((ix - 0x7ff00000) | lx) != 0)
return x + x;
}
@@ -298,21 +297,20 @@ strong_alias (__ieee754_jnl, __jnl_finite)
long double
__ieee754_ynl (int n, long double x)
{
- u_int32_t se;
+ uint32_t se, lx;
int32_t i, ix;
int32_t sign;
long double a, b, temp;
- ieee854_long_double_shape_type u;
+ double xhi;
- u.value = x;
- se = u.parts32.w0;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (se, lx, xhi);
ix = se & 0x7fffffff;
/* if Y(n,NaN) is NaN */
if (ix >= 0x7ff00000)
{
- if ((u.parts32.w0 & 0xfffff) | u.parts32.w1
- | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3)
+ if (((ix - 0x7ff00000) | lx) != 0)
return x + x;
}
if (x <= 0.0L)
@@ -377,14 +375,16 @@ __ieee754_ynl (int n, long double x)
a = __ieee754_y0l (x);
b = __ieee754_y1l (x);
/* quit if b is -inf */
- u.value = b;
- se = u.parts32.w0 & 0xfff00000;
+ xhi = ldbl_high (b);
+ GET_HIGH_WORD (se, xhi);
+ se &= 0xfff00000;
for (i = 1; i < n && se != 0xfff00000; i++)
{
temp = b;
b = ((long double) (i + i) / x) * b - a;
- u.value = b;
- se = u.parts32.w0 & 0xfff00000;
+ xhi = ldbl_high (b);
+ GET_HIGH_WORD (se, xhi);
+ se &= 0xfff00000;
a = temp;
}
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c
index fae774cea..1a6a4a0fa 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c
@@ -182,11 +182,13 @@ __ieee754_log10l (long double x)
long double z;
long double y;
int e;
- int64_t hx, lx;
+ int64_t hx;
+ double xhi;
/* Test for domain */
- GET_LDOUBLE_WORDS64 (hx, lx, x);
- if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0)
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ if ((hx & 0x7fffffffffffffffLL) == 0)
return (-1.0L / (x - x));
if (hx < 0)
return (x - x) / (x - x);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
index f0098f6c7..323ded0c0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
@@ -177,11 +177,13 @@ __ieee754_log2l (x)
long double z;
long double y;
int e;
- int64_t hx, lx;
+ int64_t hx;
+ double xhi;
/* Test for domain */
- GET_LDOUBLE_WORDS64 (hx, lx, x);
- if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0)
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ if ((hx & 0x7fffffffffffffffLL) == 0)
return (-1.0L / (x - x));
if (hx < 0)
return (x - x) / (x - x);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c
index 15b5edfab..b7db2b978 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c
@@ -188,18 +188,20 @@ static const long double
long double
__ieee754_logl(long double x)
{
- long double z, y, w;
- ieee854_long_double_shape_type u, t;
+ long double z, y, w, t;
unsigned int m;
int k, e;
+ double xhi;
+ uint32_t hx, lx;
- u.value = x;
- m = u.parts32.w0;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (hx, lx, xhi);
+ m = hx;
/* Check for IEEE special cases. */
k = m & 0x7fffffff;
/* log(0) = -infinity. */
- if ((k | u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) == 0)
+ if ((k | lx) == 0)
{
return -0.5L / ZERO;
}
@@ -219,7 +221,7 @@ __ieee754_logl(long double x)
{
z = x - 1.0L;
k = 64;
- t.value = 1.0L;
+ t = 1.0L;
e = 0;
}
else
@@ -236,10 +238,8 @@ __ieee754_logl(long double x)
k = (m - 0xff000) >> 13;
/* t is the argument 0.5 + (k+26)/128
of the nearest item to u in the lookup table. */
- t.parts32.w0 = 0x3ff00000 + (k << 13);
- t.parts32.w1 = 0;
- t.parts32.w2 = 0;
- t.parts32.w3 = 0;
+ INSERT_WORDS (xhi, 0x3ff00000 + (k << 13), 0);
+ t = xhi;
w0 += 0x100000;
e -= 1;
k += 64;
@@ -247,17 +247,15 @@ __ieee754_logl(long double x)
else
{
k = (m - 0xfe000) >> 14;
- t.parts32.w0 = 0x3fe00000 + (k << 14);
- t.parts32.w1 = 0;
- t.parts32.w2 = 0;
- t.parts32.w3 = 0;
+ INSERT_WORDS (xhi, 0x3fe00000 + (k << 14), 0);
+ t = xhi;
}
- u.value = __scalbnl (u.value, ((int) ((w0 - u.parts32.w0) * 2)) >> 21);
+ x = __scalbnl (x, ((int) ((w0 - hx) * 2)) >> 21);
/* log(u) = log( t u/t ) = log(t) + log(u/t)
log(t) is tabulated in the lookup table.
Express log(u/t) = log(1+z), where z = u/t - 1 = (u-t)/t.
cf. Cody & Waite. */
- z = (u.value - t.value) / t.value;
+ z = (x - t) / t;
}
/* Series expansion of log(1+z). */
w = z * z;
@@ -284,7 +282,7 @@ __ieee754_logl(long double x)
y += e * ln2b; /* Base 2 exponent offset times ln(2). */
y += z;
y += logtbl[k-26]; /* log(t) - (t-1) */
- y += (t.value - 1.0L);
+ y += (t - 1.0L);
y += e * ln2a;
return y;
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c
index 8bd35d0c8..c942f2f24 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c
@@ -151,37 +151,32 @@ __ieee754_powl (long double x, long double y)
long double y1, t1, t2, r, s, t, u, v, w;
long double s2, s_h, s_l, t_h, t_l, ay;
int32_t i, j, k, yisint, n;
- u_int32_t ix, iy;
- int32_t hx, hy;
- ieee854_long_double_shape_type o, p, q;
+ uint32_t ix, iy;
+ int32_t hx, hy, hax;
+ double ohi, xhi, xlo, yhi, ylo;
+ uint32_t lx, ly, lj;
- p.value = x;
- hx = p.parts32.w0;
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS (hx, lx, xhi);
ix = hx & 0x7fffffff;
- q.value = y;
- hy = q.parts32.w0;
+ ldbl_unpack (y, &yhi, &ylo);
+ EXTRACT_WORDS (hy, ly, yhi);
iy = hy & 0x7fffffff;
-
/* y==zero: x**0 = 1 */
- if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
+ if ((iy | ly) == 0)
return one;
/* 1.0**y = 1; -1.0**+-Inf = 1 */
if (x == one)
return one;
- if (x == -1.0L && iy == 0x7ff00000
- && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
+ if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
return one;
/* +-NaN return x+y */
- if ((ix > 0x7ff00000)
- || ((ix == 0x7ff00000)
- && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
- || (iy > 0x7ff00000)
- || ((iy == 0x7ff00000)
- && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
+ if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
+ || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
return x + y;
/* determine if y is an odd int when x < 0
@@ -192,7 +187,10 @@ __ieee754_powl (long double x, long double y)
yisint = 0;
if (hx < 0)
{
- if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */
+ uint32_t low_ye;
+
+ GET_HIGH_WORD (low_ye, ylo);
+ if ((low_ye & 0x7fffffff) >= 0x43400000) /* Low part >= 2^53 */
yisint = 2; /* even integer y */
else if (iy >= 0x3ff00000) /* 1.0 */
{
@@ -207,42 +205,43 @@ __ieee754_powl (long double x, long double y)
}
}
+ ax = fabsl (x);
+
/* special value of y */
- if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
+ if (ly == 0)
{
- if (iy == 0x7ff00000 && q.parts32.w1 == 0) /* y is +-inf */
+ if (iy == 0x7ff00000) /* y is +-inf */
{
- if (((ix - 0x3ff00000) | p.parts32.w1
- | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
- return y - y; /* inf**+-1 is NaN */
- else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
+ if (ax > one)
/* (|x|>1)**+-inf = inf,0 */
return (hy >= 0) ? y : zero;
else
/* (|x|<1)**-,+inf = inf,0 */
return (hy < 0) ? -y : zero;
}
- if (iy == 0x3ff00000)
- { /* y is +-1 */
- if (hy < 0)
- return one / x;
- else
- return x;
- }
- if (hy == 0x40000000)
- return x * x; /* y is 2 */
- if (hy == 0x3fe00000)
- { /* y is 0.5 */
- if (hx >= 0) /* x >= +0 */
- return __ieee754_sqrtl (x);
+ if (ylo == 0.0)
+ {
+ if (iy == 0x3ff00000)
+ { /* y is +-1 */
+ if (hy < 0)
+ return one / x;
+ else
+ return x;
+ }
+ if (hy == 0x40000000)
+ return x * x; /* y is 2 */
+ if (hy == 0x3fe00000)
+ { /* y is 0.5 */
+ if (hx >= 0) /* x >= +0 */
+ return __ieee754_sqrtl (x);
+ }
}
}
- ax = fabsl (x);
/* special value of x */
- if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
+ if (lx == 0)
{
- if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
+ if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
{
z = ax; /*x is +-0,+-inf,+-1 */
if (hy < 0)
@@ -294,8 +293,8 @@ __ieee754_powl (long double x, long double y)
{
ax *= two113;
n -= 113;
- o.value = ax;
- ix = o.parts32.w0;
+ ohi = ldbl_high (ax);
+ GET_HIGH_WORD (ix, ohi);
}
n += ((ix) >> 20) - 0x3ff;
j = ix & 0x000fffff;
@@ -312,26 +311,19 @@ __ieee754_powl (long double x, long double y)
ix -= 0x00100000;
}
- o.value = ax;
- o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
- ax = o.value;
+ ohi = ldbl_high (ax);
+ GET_HIGH_WORD (hax, ohi);
+ ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */
v = one / (ax + bp[k]);
s = u * v;
- s_h = s;
+ s_h = ldbl_high (s);
- o.value = s_h;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- s_h = o.value;
/* t_h=ax+bp[k] High */
t_h = ax + bp[k];
- o.value = t_h;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- t_h = o.value;
+ t_h = ldbl_high (t_h);
t_l = ax - (t_h - bp[k]);
s_l = v * ((u - s_h * t_h) - s_h * t_l);
/* compute log(ax) */
@@ -342,30 +334,21 @@ __ieee754_powl (long double x, long double y)
r += s_l * (s_h + s);
s2 = s_h * s_h;
t_h = 3.0 + s2 + r;
- o.value = t_h;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- t_h = o.value;
+ t_h = ldbl_high (t_h);
t_l = r - ((t_h - 3.0) - s2);
/* u+v = s*(1+...) */
u = s_h * t_h;
v = s_l * t_h + t_l * s;
/* 2/(3log2)*(s+...) */
p_h = u + v;
- o.value = p_h;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- p_h = o.value;
+ p_h = ldbl_high (p_h);
p_l = v - (p_h - u);
z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */
z_l = cp_l * p_h + p_l * cp + dp_l[k];
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
t = (long double) n;
t1 = (((z_h + z_l) + dp_h[k]) + t);
- o.value = t1;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- t1 = o.value;
+ t1 = ldbl_high (t1);
t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
/* s (sign of result -ve**odd) = -1 else = 1 */
@@ -374,21 +357,16 @@ __ieee754_powl (long double x, long double y)
s = -one; /* (-ve)**(odd int) */
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
- y1 = y;
- o.value = y1;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- y1 = o.value;
+ y1 = ldbl_high (y);
p_l = (y - y1) * t1 + y * t2;
p_h = y1 * t1;
z = p_l + p_h;
- o.value = z;
- j = o.parts32.w0;
+ ohi = ldbl_high (z);
+ EXTRACT_WORDS (j, lj, ohi);
if (j >= 0x40d00000) /* z >= 16384 */
{
/* if z > 16384 */
- if (((j - 0x40d00000) | o.parts32.w1
- | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
+ if (((j - 0x40d00000) | lj) != 0)
return s * huge * huge; /* overflow */
else
{
@@ -399,8 +377,7 @@ __ieee754_powl (long double x, long double y)
else if ((j & 0x7fffffff) >= 0x40d01b90) /* z <= -16495 */
{
/* z < -16495 */
- if (((j - 0xc0d01bc0) | o.parts32.w1
- | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
+ if (((j - 0xc0d01bc0) | lj) != 0)
return s * tiny * tiny; /* underflow */
else
{
@@ -419,10 +396,7 @@ __ieee754_powl (long double x, long double y)
p_h -= t;
}
t = p_l + p_h;
- o.value = t;
- o.parts32.w3 = 0;
- o.parts32.w2 = 0;
- t = o.value;
+ t = ldbl_high (t);
u = t * lg2_h;
v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
z = u + v;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
index 6a72d6a85..36bc03226 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
@@ -200,10 +200,11 @@ int32_t __ieee754_rem_pio2l(long double x, long double *y)
double tx[8];
int exp;
int64_t n, ix, hx, ixd;
- u_int64_t lx __attribute__ ((unused));
u_int64_t lxd;
+ double xhi;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx & 0x7fffffffffffffffLL;
if (ix <= 0x3fe921fb54442d10LL) /* x in <-pi/4, pi/4> */
{
@@ -243,7 +244,7 @@ int32_t __ieee754_rem_pio2l(long double x, long double *y)
We split the 113 bits of the mantissa into 5 24bit integers
stored in a double array. */
/* Make the IBM extended format 105 bit mantissa look like the ieee854 112
- bit mantissa so the next operatation will give the correct result. */
+ bit mantissa so the next operation will give the correct result. */
ldbl_extract_mantissa (&ixd, &lxd, &exp, x);
exp = exp - 23;
/* This is faster than doing this in floating point, because we
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
index 67d7db7fb..800416f29 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
@@ -33,18 +33,22 @@ __ieee754_remainderl(long double x, long double p)
int64_t hx,hp;
u_int64_t sx,lx,lp;
long double p_half;
+ double xhi, xlo, phi, plo;
- GET_LDOUBLE_WORDS64(hx,lx,x);
- GET_LDOUBLE_WORDS64(hp,lp,p);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
+ ldbl_unpack (p, &phi, &plo);
+ EXTRACT_WORDS64 (hp, phi);
+ EXTRACT_WORDS64 (lp, plo);
sx = hx&0x8000000000000000ULL;
hp &= 0x7fffffffffffffffLL;
hx &= 0x7fffffffffffffffLL;
/* purge off exception values */
- if((hp|(lp&0x7fffffffffffffff))==0) return (x*p)/(x*p); /* p = 0 */
+ if(hp==0) return (x*p)/(x*p); /* p = 0 */
if((hx>=0x7ff0000000000000LL)|| /* x not finite */
- ((hp>=0x7ff0000000000000LL)&& /* p is NaN */
- (((hp-0x7ff0000000000000LL)|lp)!=0)))
+ (hp>0x7ff0000000000000LL)) /* p is NaN */
return (x*p)/(x*p);
@@ -64,8 +68,8 @@ __ieee754_remainderl(long double x, long double p)
if(x>=p_half) x -= p;
}
}
- GET_LDOUBLE_MSW64(hx,x);
- SET_LDOUBLE_MSW64(x,hx^sx);
+ if (sx)
+ x = -x;
return x;
}
strong_alias (__ieee754_remainderl, __remainderl_finite)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
index 4e8481c41..1790bef87 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
@@ -38,9 +38,11 @@ __ieee754_sinhl(long double x)
{
long double t,w,h;
int64_t ix,jx;
+ double xhi;
/* High word of |x|. */
- GET_LDOUBLE_MSW64(jx,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (jx, xhi);
ix = jx&0x7fffffffffffffffLL;
/* x is INF or NaN */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
index 2b0f7c62e..61feb367f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
@@ -34,15 +34,13 @@
#include <math_private.h>
-typedef unsigned int int4;
-typedef union {int4 i[4]; long double x; double d[2]; } mynumber;
+typedef union {int64_t i[2]; long double x; double d[2]; } mynumber;
-static const mynumber
- t512 = {{0x5ff00000, 0x00000000, 0x00000000, 0x00000000 }}, /* 2^512 */
- tm256 = {{0x2ff00000, 0x00000000, 0x00000000, 0x00000000 }}; /* 2^-256 */
static const double
-two54 = 1.80143985094819840000e+16, /* 0x4350000000000000 */
-twom54 = 5.55111512312578270212e-17; /* 0x3C90000000000000 */
+ t512 = 0x1p512,
+ tm256 = 0x1p-256,
+ two54 = 0x1p54, /* 0x4350000000000000 */
+ twom54 = 0x1p-54; /* 0x3C90000000000000 */
/*********************************************************************/
/* An ultimate sqrt routine. Given an IEEE double machine number x */
@@ -54,56 +52,53 @@ long double __ieee754_sqrtl(long double x)
static const long double big = 134217728.0, big1 = 134217729.0;
long double t,s,i;
mynumber a,c;
- int4 k, l, m;
- int n;
+ uint64_t k, l;
+ int64_t m, n;
double d;
a.x=x;
- k=a.i[0] & 0x7fffffff;
+ k=a.i[0] & INT64_C(0x7fffffffffffffff);
/*----------------- 2^-1022 <= | x |< 2^1024 -----------------*/
- if (k>0x000fffff && k<0x7ff00000) {
+ if (k>INT64_C(0x000fffff00000000) && k<INT64_C(0x7ff0000000000000)) {
if (x < 0) return (big1-big1)/(big-big);
- l = (k&0x001fffff)|0x3fe00000;
- if (((a.i[2] & 0x7fffffff) | a.i[3]) != 0) {
- n = (int) ((l - k) * 2) >> 21;
- m = (a.i[2] >> 20) & 0x7ff;
+ l = (k&INT64_C(0x001fffffffffffff))|INT64_C(0x3fe0000000000000);
+ if ((a.i[1] & INT64_C(0x7fffffffffffffff)) != 0) {
+ n = (int64_t) ((l - k) * 2) >> 53;
+ m = (a.i[1] >> 52) & 0x7ff;
if (m == 0) {
a.d[1] *= two54;
- m = ((a.i[2] >> 20) & 0x7ff) - 54;
+ m = ((a.i[1] >> 52) & 0x7ff) - 54;
}
m += n;
- if ((int) m > 0)
- a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
- else if ((int) m <= -54) {
- a.i[2] &= 0x80000000;
- a.i[3] = 0;
+ if (m > 0)
+ a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52);
+ else if (m <= -54) {
+ a.i[1] &= INT64_C(0x8000000000000000);
} else {
m += 54;
- a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
+ a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52);
a.d[1] *= twom54;
}
}
a.i[0] = l;
s = a.x;
d = __ieee754_sqrt (a.d[0]);
- c.i[0] = 0x20000000+((k&0x7fe00000)>>1);
+ c.i[0] = INT64_C(0x2000000000000000)+((k&INT64_C(0x7fe0000000000000))>>1);
c.i[1] = 0;
- c.i[2] = 0;
- c.i[3] = 0;
i = d;
t = 0.5L * (i + s / i);
i = 0.5L * (t + s / t);
return c.x * i;
}
else {
- if (k>=0x7ff00000) {
- if (a.i[0] == 0xfff00000 && a.i[1] == 0)
+ if (k>=INT64_C(0x7ff0000000000000)) {
+ if (a.i[0] == INT64_C(0xfff0000000000000))
return (big1-big1)/(big-big); /* sqrt (-Inf) = NaN. */
return x; /* sqrt (NaN) = NaN, sqrt (+Inf) = +Inf. */
}
if (x == 0) return x;
if (x < 0) return (big1-big1)/(big-big);
- return tm256.x*__ieee754_sqrtl(x*t512.x);
+ return tm256*__ieee754_sqrtl(x*t512);
}
}
strong_alias (__ieee754_sqrtl, __sqrtl_finite)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h b/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h
index 9e94f53b0..0c97a9920 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h
@@ -111,61 +111,6 @@ union ieee754_double
#define IEEE754_DOUBLE_BIAS 0x3ff /* Added to exponent. */
-union ieee854_long_double
- {
- long double d;
-
- /* This is the IEEE 854 quad-precision format. */
- struct
- {
-#if __BYTE_ORDER == __BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:15;
- /* Together these comprise the mantissa. */
- unsigned int mantissa0:16;
- unsigned int mantissa1:32;
- unsigned int mantissa2:32;
- unsigned int mantissa3:32;
-#endif /* Big endian. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- /* Together these comprise the mantissa. */
- unsigned int mantissa3:32;
- unsigned int mantissa2:32;
- unsigned int mantissa1:32;
- unsigned int mantissa0:16;
- unsigned int exponent:15;
- unsigned int negative:1;
-#endif /* Little endian. */
- } ieee;
-
- /* This format makes it easier to see if a NaN is a signalling NaN. */
- struct
- {
-#if __BYTE_ORDER == __BIG_ENDIAN
- unsigned int negative:1;
- unsigned int exponent:15;
- unsigned int quiet_nan:1;
- /* Together these comprise the mantissa. */
- unsigned int mantissa0:15;
- unsigned int mantissa1:32;
- unsigned int mantissa2:32;
- unsigned int mantissa3:32;
-#else
- /* Together these comprise the mantissa. */
- unsigned int mantissa3:32;
- unsigned int mantissa2:32;
- unsigned int mantissa1:32;
- unsigned int mantissa0:15;
- unsigned int quiet_nan:1;
- unsigned int exponent:15;
- unsigned int negative:1;
-#endif
- } ieee_nan;
- };
-
-#define IEEE854_LONG_DOUBLE_BIAS 0x3fff /* Added to exponent. */
-
-
/* IBM extended format for long double.
Each long double is made up of two IEEE doubles. The value of the
@@ -179,49 +124,10 @@ union ieee854_long_double
union ibm_extended_long_double
{
- long double d;
- double dd[2];
-
- /* This is the IBM extended format long double. */
- struct
- { /* Big endian. There is no other. */
-
- unsigned int negative:1;
- unsigned int exponent:11;
- /* Together Mantissa0-3 comprise the mantissa. */
- unsigned int mantissa0:20;
- unsigned int mantissa1:32;
-
- unsigned int negative2:1;
- unsigned int exponent2:11;
- /* There is an implied 1 here? */
- /* Together these comprise the mantissa. */
- unsigned int mantissa2:20;
- unsigned int mantissa3:32;
- } ieee;
-
- /* This format makes it easier to see if a NaN is a signalling NaN. */
- struct
- { /* Big endian. There is no other. */
-
- unsigned int negative:1;
- unsigned int exponent:11;
- unsigned int quiet_nan:1;
- /* Together Mantissa0-3 comprise the mantissa. */
- unsigned int mantissa0:19;
- unsigned int mantissa1:32;
-
- unsigned int negative2:1;
- unsigned int exponent2:11;
- /* There is an implied 1 here? */
- /* Together these comprise the mantissa. */
- unsigned int mantissa2:20;
- unsigned int mantissa3:32;
- } ieee_nan;
+ long double ld;
+ union ieee754_double d[2];
};
-#define IBM_EXTENDED_LONG_DOUBLE_BIAS 0x3ff /* Added to exponent. */
-
__END_DECLS
#endif /* ieee754.h */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
index 0b81782fd..046f3b573 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
@@ -81,8 +81,11 @@ __kernel_cosl(long double x, long double y)
{
long double h, l, z, sin_l, cos_l_m1;
int64_t ix;
- u_int32_t tix, hix, index;
- GET_LDOUBLE_MSW64 (ix, x);
+ uint32_t tix, hix, index;
+ double xhi, hhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
tix = ((u_int64_t)ix) >> 32;
tix &= ~0x80000000; /* tix = |x|'s high 32 bits */
if (tix < 0x3fc30000) /* |x| < 0.1484375 */
@@ -136,7 +139,8 @@ __kernel_cosl(long double x, long double y)
case 2: index = (hix - 0x3fc30000) >> 14; break;
}
*/
- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+ h = hhi;
l = y - (h - x);
z = l * l;
sin_l = l*(ONE+z*(SSIN1+z*(SSIN2+z*(SSIN3+z*(SSIN4+z*SSIN5)))));
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
index fc1ead659..3ba9d7e90 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
@@ -100,9 +100,12 @@ __kernel_sincosl(long double x, long double y, long double *sinx, long double *c
{
long double h, l, z, sin_l, cos_l_m1;
int64_t ix;
- u_int32_t tix, hix, index;
- GET_LDOUBLE_MSW64 (ix, x);
- tix = ((u_int64_t)ix) >> 32;
+ uint32_t tix, hix, index;
+ double xhi, hhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
+ tix = ((uint64_t)ix) >> 32;
tix &= ~0x80000000; /* tix = |x|'s high 32 bits */
if (tix < 0x3fc30000) /* |x| < 0.1484375 */
{
@@ -164,7 +167,8 @@ __kernel_sincosl(long double x, long double y, long double *sinx, long double *c
case 2: index = (hix - 0x3fc30000) >> 14; break;
}
*/
- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+ h = hhi;
if (iy)
l = y - (h - x);
else
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
index f17c0ae5d..b12ea134d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
@@ -82,7 +82,10 @@ __kernel_sinl(long double x, long double y, int iy)
long double h, l, z, sin_l, cos_l_m1;
int64_t ix;
u_int32_t tix, hix, index;
- GET_LDOUBLE_MSW64 (ix, x);
+ double xhi, hhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
tix = ((u_int64_t)ix) >> 32;
tix &= ~0x80000000; /* tix = |x|'s high 32 bits */
if (tix < 0x3fc30000) /* |x| < 0.1484375 */
@@ -132,7 +135,8 @@ __kernel_sinl(long double x, long double y, int iy)
case 2: index = (hix - 0x3fc30000) >> 14; break;
}
*/
- SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+ INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+ h = hhi;
if (iy)
l = (ix < 0 ? -y : y) - (h - x);
else
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c
index 1f6bad241..bcf8b5e7d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c
@@ -85,17 +85,17 @@ long double
__kernel_tanl (long double x, long double y, int iy)
{
long double z, r, v, w, s;
- int32_t ix, sign;
- ieee854_long_double_shape_type u, u1;
+ int32_t ix, sign, hx, lx;
+ double xhi;
- u.value = x;
- ix = u.parts32.w0 & 0x7fffffff;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (hx, lx, xhi);
+ ix = hx & 0x7fffffff;
if (ix < 0x3c600000) /* x < 2**-57 */
{
- if ((int) x == 0)
- { /* generate inexact */
- if ((ix | u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3
- | (iy + 1)) == 0)
+ if ((int) x == 0) /* generate inexact */
+ {
+ if ((ix | lx | (iy + 1)) == 0)
return one / fabs (x);
else
return (iy == 1) ? x : -one / x;
@@ -103,7 +103,7 @@ __kernel_tanl (long double x, long double y, int iy)
}
if (ix >= 0x3fe59420) /* |x| >= 0.6743316650390625 */
{
- if ((u.parts32.w0 & 0x80000000) != 0)
+ if ((hx & 0x80000000) != 0)
{
x = -x;
y = -y;
@@ -139,15 +139,13 @@ __kernel_tanl (long double x, long double y, int iy)
{ /* if allow error up to 2 ulp,
simply return -1.0/(x+r) here */
/* compute -1.0/(x+r) accurately */
- u1.value = w;
- u1.parts32.w2 = 0;
- u1.parts32.w3 = 0;
- v = r - (u1.value - x); /* u1+v = r+x */
+ long double u1, z1;
+
+ u1 = ldbl_high (w);
+ v = r - (u1 - x); /* u1+v = r+x */
z = -1.0 / w;
- u.value = z;
- u.parts32.w2 = 0;
- u.parts32.w3 = 0;
- s = 1.0 + u.value * u1.value;
- return u.value + z * (s + u.value * v);
+ z1 = ldbl_high (z);
+ s = 1.0 + z1 * u1;
+ return z1 + z * (s + z1 * v);
}
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c b/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
index 00e44b8b9..e46fde74f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
@@ -36,34 +36,44 @@ __mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size,
union ibm_extended_long_double u;
unsigned long long hi, lo;
int ediff;
- u.d = value;
- *is_neg = u.ieee.negative;
- *expt = (int) u.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
+ u.ld = value;
- lo = ((long long) u.ieee.mantissa2 << 32) | u.ieee.mantissa3;
- hi = ((long long) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
- /* If the lower double is not a denomal or zero then set the hidden
+ *is_neg = u.d[0].ieee.negative;
+ *expt = (int) u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
+
+ lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
+ hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
+
+ /* If the lower double is not a denormal or zero then set the hidden
53rd bit. */
- if (u.ieee.exponent2 > 0)
- {
- lo |= 1LL << 52;
+ if (u.d[1].ieee.exponent != 0)
+ lo |= 1ULL << 52;
+ else
+ lo = lo << 1;
- /* The lower double is normalized separately from the upper. We may
- need to adjust the lower manitissa to reflect this. */
- ediff = u.ieee.exponent - u.ieee.exponent2;
- if (ediff > 53)
- lo = lo >> (ediff-53);
+ /* The lower double is normalized separately from the upper. We may
+ need to adjust the lower manitissa to reflect this. */
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
+ if (ediff > 0)
+ {
+ if (ediff < 64)
+ lo = lo >> ediff;
+ else
+ lo = 0;
}
+ else if (ediff < 0)
+ lo = lo << -ediff;
+
/* The high double may be rounded and the low double reflects the
difference between the long double and the rounded high double
value. This is indicated by a differnce between the signs of the
high and low doubles. */
- if ((u.ieee.negative != u.ieee.negative2)
- && ((u.ieee.exponent2 != 0) && (lo != 0L)))
+ if (u.d[0].ieee.negative != u.d[1].ieee.negative
+ && lo != 0)
{
lo = (1ULL << 53) - lo;
- if (hi == 0LL)
+ if (hi == 0)
{
/* we have a borrow from the hidden bit, so shift left 1. */
hi = 0x0ffffffffffffeLL | (lo >> 51);
@@ -92,7 +102,7 @@ __mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size,
#define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
- (LDBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB)))
- if (u.ieee.exponent == 0)
+ if (u.d[0].ieee.exponent == 0)
{
/* A biased exponent of zero is a special case.
Either it is a zero or it is a denormal number. */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
index 046293e59..1b6e27a9f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
@@ -2,10 +2,13 @@
#error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
#endif
-#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
#include <ieee754.h>
#include <stdint.h>
+/* To suit our callers we return *hi64 and *lo64 as if they came from
+ an ieee854 112 bit mantissa, that is, 48 bits in *hi64 (plus one
+ implicit bit) and 64 bits in *lo64. */
+
static inline void
ldbl_extract_mantissa (int64_t *hi64, uint64_t *lo64, int *exp, long double x)
{
@@ -14,77 +17,119 @@ ldbl_extract_mantissa (int64_t *hi64, uint64_t *lo64, int *exp, long double x)
the number before the decimal point and the second implicit bit
as bit 53 of the mantissa. */
uint64_t hi, lo;
- int ediff;
- union ibm_extended_long_double eldbl;
- eldbl.d = x;
- *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
-
- lo = ((int64_t)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
- hi = ((int64_t)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
- /* If the lower double is not a denomal or zero then set the hidden
- 53rd bit. */
- if (eldbl.ieee.exponent2 > 0x001)
- {
- lo |= (1ULL << 52);
- lo = lo << 7; /* pre-shift lo to match ieee854. */
- /* The lower double is normalized separately from the upper. We
- may need to adjust the lower manitissa to reflect this. */
- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
- if (ediff > 53)
- lo = lo >> (ediff-53);
- hi |= (1ULL << 52);
- }
+ union ibm_extended_long_double u;
- if ((eldbl.ieee.negative != eldbl.ieee.negative2)
- && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
+ u.ld = x;
+ *exp = u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
+
+ lo = ((uint64_t) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
+ hi = ((uint64_t) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
+
+ if (u.d[0].ieee.exponent != 0)
{
- hi--;
- lo = (1ULL << 60) - lo;
- if (hi < (1ULL << 52))
+ int ediff;
+
+ /* If not a denormal or zero then we have an implicit 53rd bit. */
+ hi |= (uint64_t) 1 << 52;
+
+ if (u.d[1].ieee.exponent != 0)
+ lo |= (uint64_t) 1 << 52;
+ else
+ /* A denormal is to be interpreted as having a biased exponent
+ of 1. */
+ lo = lo << 1;
+
+ /* We are going to shift 4 bits out of hi later, because we only
+ want 48 bits in *hi64. That means we want 60 bits in lo, but
+ we currently only have 53. Shift the value up. */
+ lo = lo << 7;
+
+ /* The lower double is normalized separately from the upper.
+ We may need to adjust the lower mantissa to reflect this.
+ The difference between the exponents can be larger than 53
+ when the low double is much less than 1ULP of the upper
+ (in which case there are significant bits, all 0's or all
+ 1's, between the two significands). The difference between
+ the exponents can be less than 53 when the upper double
+ exponent is nearing its minimum value (in which case the low
+ double is denormal ie. has an exponent of zero). */
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
+ if (ediff > 0)
+ {
+ if (ediff < 64)
+ lo = lo >> ediff;
+ else
+ lo = 0;
+ }
+ else if (ediff < 0)
+ lo = lo << -ediff;
+
+ if (u.d[0].ieee.negative != u.d[1].ieee.negative
+ && lo != 0)
{
- /* we have a borrow from the hidden bit, so shift left 1. */
- hi = (hi << 1) | (lo >> 59);
- lo = 0xfffffffffffffffLL & (lo << 1);
- *exp = *exp - 1;
+ hi--;
+ lo = ((uint64_t) 1 << 60) - lo;
+ if (hi < (uint64_t) 1 << 52)
+ {
+ /* We have a borrow from the hidden bit, so shift left 1. */
+ hi = (hi << 1) | (lo >> 59);
+ lo = (((uint64_t) 1 << 60) - 1) & (lo << 1);
+ *exp = *exp - 1;
+ }
}
}
+ else
+ /* If the larger magnitude double is denormal then the smaller
+ one must be zero. */
+ hi = hi << 1;
+
*lo64 = (hi << 60) | lo;
*hi64 = hi >> 4;
}
static inline long double
-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
+ldbl_insert_mantissa (int sign, int exp, int64_t hi64, uint64_t lo64)
{
union ibm_extended_long_double u;
- unsigned long hidden2, lzcount;
- unsigned long long hi, lo;
+ int expnt2;
+ uint64_t hi, lo;
+
+ u.d[0].ieee.negative = sign;
+ u.d[1].ieee.negative = sign;
+ u.d[0].ieee.exponent = exp + IEEE754_DOUBLE_BIAS;
+ u.d[1].ieee.exponent = 0;
+ expnt2 = exp - 53 + IEEE754_DOUBLE_BIAS;
- u.ieee.negative = sign;
- u.ieee.negative2 = sign;
- u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
- u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
/* Expect 113 bits (112 bits + hidden) right justified in two longs.
The low order 53 bits (52 + hidden) go into the lower double */
- lo = (lo64 >> 7)& ((1ULL << 53) - 1);
- hidden2 = (lo64 >> 59) & 1ULL;
+ lo = (lo64 >> 7) & (((uint64_t) 1 << 53) - 1);
/* The high order 53 bits (52 + hidden) go into the upper double */
- hi = (lo64 >> 60) & ((1ULL << 11) - 1);
- hi |= (hi64 << 4);
+ hi = lo64 >> 60;
+ hi |= hi64 << 4;
- if (lo != 0LL)
+ if (lo != 0)
{
- /* hidden2 bit of low double controls rounding of the high double.
- If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
+ int lzcount;
+
+ /* hidden bit of low double controls rounding of the high double.
+ If hidden is '1' and either the explicit mantissa is non-zero
+ or hi is odd, then round up hi and adjust lo (2nd mantissa)
plus change the sign of the low double to compensate. */
- if (hidden2)
+ if ((lo & ((uint64_t) 1 << 52)) != 0
+ && ((hi & 1) != 0 || (lo & (((uint64_t) 1 << 52) - 1)) != 0))
{
hi++;
- u.ieee.negative2 = !sign;
- lo = (1ULL << 53) - lo;
+ if ((hi & ((uint64_t) 1 << 53)) != 0)
+ {
+ hi = hi >> 1;
+ u.d[0].ieee.exponent++;
+ }
+ u.d[1].ieee.negative = !sign;
+ lo = ((uint64_t) 1 << 53) - lo;
}
- /* The hidden bit of the lo mantissa is zero so we need to
- normalize the it for the low double. Shift it left until the
- hidden bit is '1' then adjust the 2nd exponent accordingly. */
+
+ /* Normalize the low double. Shift the mantissa left until
+ the hidden bit is '1' and adjust the exponent accordingly. */
if (sizeof (lo) == sizeof (long))
lzcount = __builtin_clzl (lo);
@@ -92,35 +137,31 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
lzcount = __builtin_clzl ((long) (lo >> 32));
else
lzcount = __builtin_clzl ((long) lo) + 32;
- lzcount = lzcount - 11;
- if (lzcount > 0)
+ lzcount = lzcount - (64 - 53);
+ lo <<= lzcount;
+ expnt2 -= lzcount;
+
+ if (expnt2 >= 1)
+ /* Not denormal. */
+ u.d[1].ieee.exponent = expnt2;
+ else
{
- int expnt2 = u.ieee.exponent2 - lzcount;
- if (expnt2 >= 1)
- {
- /* Not denormal. Normalize and set low exponent. */
- lo = lo << lzcount;
- u.ieee.exponent2 = expnt2;
- }
+ /* Is denormal. Note that biased exponent of 0 is treated
+ as if it was 1, hence the extra shift. */
+ if (expnt2 > -53)
+ lo >>= 1 - expnt2;
else
- {
- /* Is denormal. */
- lo = lo << (lzcount + expnt2);
- u.ieee.exponent2 = 0;
- }
+ lo = 0;
}
}
else
- {
- u.ieee.negative2 = 0;
- u.ieee.exponent2 = 0;
- }
+ u.d[1].ieee.negative = 0;
- u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
- u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
- u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
- u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
- return u.d;
+ u.d[1].ieee.mantissa1 = lo;
+ u.d[1].ieee.mantissa0 = lo >> 32;
+ u.d[0].ieee.mantissa1 = hi;
+ u.d[0].ieee.mantissa0 = hi >> 32;
+ return u.ld;
}
/* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
@@ -129,18 +170,18 @@ static inline long double
default_ldbl_pack (double a, double aa)
{
union ibm_extended_long_double u;
- u.dd[0] = a;
- u.dd[1] = aa;
- return u.d;
+ u.d[0].d = a;
+ u.d[1].d = aa;
+ return u.ld;
}
static inline void
default_ldbl_unpack (long double l, double *a, double *aa)
{
union ibm_extended_long_double u;
- u.d = l;
- *a = u.dd[0];
- *aa = u.dd[1];
+ u.ld = l;
+ *a = u.d[0].d;
+ *aa = u.d[1].d;
}
#ifndef ldbl_pack
@@ -150,6 +191,9 @@ default_ldbl_unpack (long double l, double *a, double *aa)
# define ldbl_unpack default_ldbl_unpack
#endif
+/* Extract high double. */
+#define ldbl_high(x) ((double) x)
+
/* Convert a finite long double to canonical form.
Does not handle +/-Inf properly. */
static inline void
@@ -163,13 +207,13 @@ ldbl_canonicalize (double *a, double *aa)
*aa = xl;
}
-/* Simple inline nearbyint (double) function .
+/* Simple inline nearbyint (double) function.
Only works in the default rounding mode
but is useful in long double rounding functions. */
static inline double
ldbl_nearbyint (double a)
{
- double two52 = 0x10000000000000LL;
+ double two52 = 0x1p52;
if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
{
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c b/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
index 3df42c566..c96852dfd 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
@@ -33,11 +33,11 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
unsigned long long hi, lo;
int exponent2;
- u.ieee.negative = sign;
- u.ieee.negative2 = sign;
- u.ieee.exponent = expt + IBM_EXTENDED_LONG_DOUBLE_BIAS;
- u.ieee.exponent2 = 0;
- exponent2 = expt - 53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
+ u.d[0].ieee.negative = sign;
+ u.d[1].ieee.negative = sign;
+ u.d[0].ieee.exponent = expt + IEEE754_DOUBLE_BIAS;
+ u.d[1].ieee.exponent = 0;
+ exponent2 = expt - 53 + IEEE754_DOUBLE_BIAS;
#if BITS_PER_MP_LIMB == 32
/* The low order 53 bits (52 + hidden) go into the lower double */
@@ -69,19 +69,19 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
else
lzcount = __builtin_clzl ((long) val) + 32;
if (hi)
- lzcount = lzcount - 11;
+ lzcount = lzcount - (64 - 53);
else
- lzcount = lzcount + 42;
+ lzcount = lzcount + 53 - (64 - 53);
- if (lzcount > u.ieee.exponent)
+ if (lzcount > u.d[0].ieee.exponent)
{
- lzcount = u.ieee.exponent;
- u.ieee.exponent = 0;
+ lzcount = u.d[0].ieee.exponent;
+ u.d[0].ieee.exponent = 0;
exponent2 -= lzcount;
}
else
{
- u.ieee.exponent -= (lzcount - 1);
+ u.d[0].ieee.exponent -= (lzcount - 1);
exponent2 -= (lzcount - 1);
}
@@ -97,29 +97,27 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
}
}
- if (lo != 0L)
+ if (lo != 0)
{
- /* hidden2 bit of low double controls rounding of the high double.
- If hidden2 is '1' and either the explicit mantissa is non-zero
+ /* hidden bit of low double controls rounding of the high double.
+ If hidden is '1' and either the explicit mantissa is non-zero
or hi is odd, then round up hi and adjust lo (2nd mantissa)
plus change the sign of the low double to compensate. */
if ((lo & (1LL << 52)) != 0
- && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1))))
+ && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1)) != 0))
{
hi++;
- if ((hi & ((1LL << 52) - 1)) == 0)
+ if ((hi & (1LL << 53)) != 0)
{
- if ((hi & (1LL << 53)) != 0)
- hi -= 1LL << 52;
- u.ieee.exponent++;
+ hi >>= 1;
+ u.d[0].ieee.exponent++;
}
- u.ieee.negative2 = !sign;
+ u.d[1].ieee.negative = !sign;
lo = (1LL << 53) - lo;
}
- /* The hidden bit of the lo mantissa is zero so we need to normalize
- it for the low double. Shift it left until the hidden bit is '1'
- then adjust the 2nd exponent accordingly. */
+ /* Normalize the low double. Shift the mantissa left until
+ the hidden bit is '1' and adjust the exponent accordingly. */
if (sizeof (lo) == sizeof (long))
lzcount = __builtin_clzl (lo);
@@ -127,24 +125,24 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
lzcount = __builtin_clzl ((long) (lo >> 32));
else
lzcount = __builtin_clzl ((long) lo) + 32;
- lzcount = lzcount - 11;
- if (lzcount > 0)
- {
- lo = lo << lzcount;
- exponent2 = exponent2 - lzcount;
- }
+ lzcount = lzcount - (64 - 53);
+ lo <<= lzcount;
+ exponent2 -= lzcount;
+
if (exponent2 > 0)
- u.ieee.exponent2 = exponent2;
- else
+ u.d[1].ieee.exponent = exponent2;
+ else if (exponent2 > -53)
lo >>= 1 - exponent2;
+ else
+ lo = 0;
}
else
- u.ieee.negative2 = 0;
+ u.d[1].ieee.negative = 0;
- u.ieee.mantissa3 = lo & 0xffffffffLL;
- u.ieee.mantissa2 = (lo >> 32) & 0xfffff;
- u.ieee.mantissa1 = hi & 0xffffffffLL;
- u.ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1);
+ u.d[1].ieee.mantissa1 = lo;
+ u.d[1].ieee.mantissa0 = lo >> 32;
+ u.d[0].ieee.mantissa1 = hi;
+ u.d[0].ieee.mantissa0 = hi >> 32;
- return u.d;
+ return u.ld;
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c b/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
index 247dc20e5..e0ec422b0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
@@ -26,31 +26,31 @@ do { \
unsigned long long int num0, num1; \
unsigned long long hi, lo; \
int ediff; \
- union ibm_extended_long_double eldbl; \
- eldbl.d = fpnum.ldbl.d; \
+ union ibm_extended_long_double u; \
+ u.ld = fpnum.ldbl; \
\
assert (sizeof (long double) == 16); \
\
- lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3; \
- hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1; \
+ lo = ((long long)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1; \
+ hi = ((long long)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1; \
lo <<= 7; /* pre-shift lo to match ieee854. */ \
- /* If the lower double is not a denomal or zero then set the hidden \
+ /* If the lower double is not a denormal or zero then set the hidden \
53rd bit. */ \
- if (eldbl.ieee.exponent2 != 0) \
+ if (u.d[1].ieee.exponent != 0) \
lo |= (1ULL << (52 + 7)); \
else \
lo <<= 1; \
/* The lower double is normalized separately from the upper. We \
may need to adjust the lower manitissa to reflect this. */ \
- ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; \
- if (ediff > 53 + 63) \
+ ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53; \
+ if (ediff > 63) \
lo = 0; \
- else if (ediff > 53) \
- lo = lo >> (ediff - 53); \
- else if (eldbl.ieee.exponent2 == 0 && ediff < 53) \
- lo = lo << (53 - ediff); \
- if (eldbl.ieee.negative != eldbl.ieee.negative2 \
- && (eldbl.ieee.exponent2 != 0 || lo != 0L)) \
+ else if (ediff > 0) \
+ lo = lo >> ediff; \
+ else if (ediff < 0) \
+ lo = lo << -ediff; \
+ if (u.d[0].ieee.negative != u.d[1].ieee.negative \
+ && lo != 0) \
{ \
lo = (1ULL << 60) - lo; \
if (hi == 0L) \
@@ -58,7 +58,7 @@ do { \
/* we have a borrow from the hidden bit, so shift left 1. */ \
hi = 0xffffffffffffeLL | (lo >> 59); \
lo = 0xfffffffffffffffLL & (lo << 1); \
- eldbl.ieee.exponent--; \
+ u.d[0].ieee.exponent--; \
} \
else \
hi--; \
@@ -109,9 +109,9 @@ do { \
*--wnumstr = L'0'; \
} \
\
- leading = eldbl.ieee.exponent == 0 ? '0' : '1'; \
+ leading = u.d[0].ieee.exponent == 0 ? '0' : '1'; \
\
- exponent = eldbl.ieee.exponent; \
+ exponent = u.d[0].ieee.exponent; \
\
if (exponent == 0) \
{ \
@@ -121,18 +121,18 @@ do { \
{ \
/* This is a denormalized number. */ \
expnegative = 1; \
- exponent = IBM_EXTENDED_LONG_DOUBLE_BIAS - 1; \
+ exponent = IEEE754_DOUBLE_BIAS - 1; \
} \
} \
- else if (exponent >= IBM_EXTENDED_LONG_DOUBLE_BIAS) \
+ else if (exponent >= IEEE754_DOUBLE_BIAS) \
{ \
expnegative = 0; \
- exponent -= IBM_EXTENDED_LONG_DOUBLE_BIAS; \
+ exponent -= IEEE754_DOUBLE_BIAS; \
} \
else \
{ \
expnegative = 1; \
- exponent = -(exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS); \
+ exponent = -(exponent - IEEE754_DOUBLE_BIAS); \
} \
} while (0)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
index a833457ea..63c6edbb1 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
@@ -38,7 +38,10 @@ long double __asinhl(long double x)
{
long double t,w;
int64_t hx,ix;
- GET_LDOUBLE_MSW64(hx,x);
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx&0x7fffffffffffffffLL;
if(ix>=0x7ff0000000000000LL) return x+x; /* x is inf or NaN */
if(ix< 0x3e20000000000000LL) { /* |x|<2**-29 */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
index 2a36d16bb..41dde2399 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
@@ -173,23 +173,20 @@ static const long double
long double
__atanl (long double x)
{
- int k, sign;
+ int32_t k, sign, lx;
long double t, u, p, q;
- ieee854_long_double_shape_type s;
+ double xhi;
- s.value = x;
- k = s.parts32.w0;
- if (k & 0x80000000)
- sign = 1;
- else
- sign = 0;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (k, lx, xhi);
+ sign = k & 0x80000000;
/* Check for IEEE special cases. */
k &= 0x7fffffff;
if (k >= 0x7ff00000)
{
/* NaN. */
- if ((k & 0xfffff) | s.parts32.w1 )
+ if (((k - 0x7ff00000) | lx) != 0)
return (x + x);
/* Infinity. */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
index 23148392f..54c6cc77d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
@@ -53,9 +53,11 @@ long double __cosl(long double x)
{
long double y[2],z=0.0L;
int64_t n, ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
index 6a4475ed6..c861c65cc 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
@@ -760,16 +760,16 @@ long double
__erfl (long double x)
{
long double a, y, z;
- int32_t i, ix, sign;
- ieee854_long_double_shape_type u;
+ int32_t i, ix, hx;
+ double xhi;
- u.value = x;
- sign = u.parts32.w0;
- ix = sign & 0x7fffffff;
+ xhi = ldbl_high (x);
+ GET_HIGH_WORD (hx, xhi);
+ ix = hx & 0x7fffffff;
if (ix >= 0x7ff00000)
{ /* erf(nan)=nan */
- i = ((sign & 0xfff00000) >> 31) << 1;
+ i = ((uint32_t) hx >> 31) << 1;
return (long double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
}
@@ -778,7 +778,7 @@ __erfl (long double x)
if (ix >= 0x4039A0DE)
{
/* __erfcl (x) underflows if x > 25.6283 */
- if (sign)
+ if ((hx & 0x80000000) == 0)
return one-tiny;
else
return tiny-one;
@@ -789,8 +789,9 @@ __erfl (long double x)
return (one - y);
}
}
- u.parts32.w0 = ix;
- a = u.value;
+ a = x;
+ if ((hx & 0x80000000) != 0)
+ a = -a;
z = x * x;
if (ix < 0x3fec0000) /* a < 0.875 */
{
@@ -814,7 +815,7 @@ __erfl (long double x)
y = erf_const + neval (a, TN2, NTN2) / deval (a, TD2, NTD2);
}
- if (sign & 0x80000000) /* x < 0 */
+ if (hx & 0x80000000) /* x < 0 */
y = -y;
return( y );
}
@@ -824,18 +825,18 @@ long double
__erfcl (long double x)
{
long double y, z, p, r;
- int32_t i, ix, sign;
- ieee854_long_double_shape_type u;
+ int32_t i, ix;
+ uint32_t hx;
+ double xhi;
- u.value = x;
- sign = u.parts32.w0;
- ix = sign & 0x7fffffff;
- u.parts32.w0 = ix;
+ xhi = ldbl_high (x);
+ GET_HIGH_WORD (hx, xhi);
+ ix = hx & 0x7fffffff;
if (ix >= 0x7ff00000)
{ /* erfc(nan)=nan */
/* erfc(+-inf)=0,2 */
- return (long double) (((u_int32_t) sign >> 31) << 1) + one / x;
+ return (long double) ((hx >> 31) << 1) + one / x;
}
if (ix < 0x3fd00000) /* |x| <1/4 */
@@ -846,7 +847,8 @@ __erfcl (long double x)
}
if (ix < 0x3ff40000) /* 1.25 */
{
- x = u.value;
+ if ((hx & 0x80000000) != 0)
+ x = -x;
i = 8.0 * x;
switch (i)
{
@@ -891,7 +893,7 @@ __erfcl (long double x)
y += C20a;
break;
}
- if (sign & 0x80000000)
+ if (hx & 0x80000000)
y = 2.0L - y;
return y;
}
@@ -899,10 +901,11 @@ __erfcl (long double x)
if (ix < 0x405ac000)
{
/* x < -9 */
- if ((ix >= 0x40220000) && (sign & 0x80000000))
+ if (hx >= 0xc0220000)
return two - tiny;
- x = fabsl (x);
+ if ((hx & 0x80000000) != 0)
+ x = -x;
z = one / (x * x);
i = 8.0 / x;
switch (i)
@@ -933,21 +936,17 @@ __erfcl (long double x)
p = neval (z, RNr8, NRNr8) / deval (z, RDr8, NRDr8);
break;
}
- u.value = x;
- u.parts32.w3 = 0;
- u.parts32.w2 = 0;
- u.parts32.w1 &= 0xf8000000;
- z = u.value;
+ z = (float) x;
r = __ieee754_expl (-z * z - 0.5625) *
__ieee754_expl ((z - x) * (z + x) + p);
- if ((sign & 0x80000000) == 0)
+ if ((hx & 0x80000000) == 0)
return r / x;
else
return two - r / x;
}
else
{
- if ((sign & 0x80000000) == 0)
+ if ((hx & 0x80000000) == 0)
return tiny * tiny;
else
return two - tiny;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c
index 8808dcd89..007e78534 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c
@@ -92,19 +92,19 @@ long double
__expm1l (long double x)
{
long double px, qx, xx;
- int32_t ix, sign;
- ieee854_long_double_shape_type u;
+ int32_t ix, lx, sign;
int k;
+ double xhi;
/* Detect infinity and NaN. */
- u.value = x;
- ix = u.parts32.w0;
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS (ix, lx, xhi);
sign = ix & 0x80000000;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000)
{
/* Infinity. */
- if (((ix & 0xfffff) | u.parts32.w1 | (u.parts32.w2&0x7fffffff) | u.parts32.w3) == 0)
+ if (((ix - 0x7ff00000) | lx) == 0)
{
if (sign)
return -1.0L;
@@ -116,7 +116,7 @@ __expm1l (long double x)
}
/* expm1(+- 0) = +- 0. */
- if ((ix == 0) && (u.parts32.w1 | (u.parts32.w2&0x7fffffff) | u.parts32.w3) == 0)
+ if ((ix | lx) == 0)
return x;
/* Overflow. */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
index 99146d802..c801c9706 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
@@ -29,10 +29,16 @@ static char rcsid[] = "$NetBSD: $";
long double __fabsl(long double x)
{
u_int64_t hx, lx;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
lx = lx ^ ( hx & 0x8000000000000000LL );
hx = hx & 0x7fffffffffffffffLL;
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
long_double_symbol (libm, __fabsl, fabsl);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
index 8edb34154..7b4655fad 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
@@ -29,10 +29,14 @@ static char rcsid[] = "$NetBSD: $";
int
___finitel (long double x)
{
- int64_t hx;
- GET_LDOUBLE_MSW64(hx,x);
- return (int)((u_int64_t)((hx&0x7fffffffffffffffLL)
- -0x7ff0000000000000LL)>>63);
+ uint64_t hx;
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ hx &= 0x7fffffffffffffffLL;
+ hx -= 0x7ff0000000000000LL;
+ return hx >> 63;
}
hidden_ver (___finitel, __finitel)
weak_alias (___finitel, ____finitel)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
index f4a90b08c..90586e822 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
@@ -46,8 +46,10 @@ ___fpclassifyl (long double x)
{
u_int64_t hx, lx;
int retval = FP_NORMAL;
+ double xhi, xlo;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
if ((hx & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL) {
/* +/-NaN or +/-Inf */
if (hx & 0x000fffffffffffffULL) {
@@ -65,6 +67,7 @@ ___fpclassifyl (long double x)
retval = FP_NORMAL;
} else {
if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) {
+ EXTRACT_WORDS64 (lx, xlo);
if ((lx & 0x7fffffffffffffff) /* lower is non-zero */
&& ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */
/* +/- denormal */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c
index 3ac537411..7e40663fd 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c
@@ -36,16 +36,21 @@ two107 = 162259276829213363391578010288128.0; /* 0x4670000000000000, 0 */
long double __frexpl(long double x, int *eptr)
{
- u_int64_t hx, lx, ix, ixl;
+ uint64_t hx, lx, ix, ixl;
int64_t explo;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
ixl = 0x7fffffffffffffffULL&lx;
ix = 0x7fffffffffffffffULL&hx;
*eptr = 0;
- if(ix>=0x7ff0000000000000ULL||((ix|ixl)==0)) return x; /* 0,inf,nan */
+ if(ix>=0x7ff0000000000000ULL||ix==0) return x; /* 0,inf,nan */
if (ix<0x0010000000000000ULL) { /* subnormal */
x *= two107;
- GET_LDOUBLE_MSW64(hx,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
ix = hx&0x7fffffffffffffffULL;
*eptr = -107;
}
@@ -54,7 +59,7 @@ long double __frexpl(long double x, int *eptr)
if (ixl != 0ULL) {
explo = (ixl>>52) - (ix>>52) + 0x3fe;
if ((ixl&0x7ff0000000000000ULL) == 0LL) {
- /* the lower double is a denomal so we need to correct its
+ /* the lower double is a denormal so we need to correct its
mantissa and perhaps its exponent. */
int cnt;
@@ -73,7 +78,9 @@ long double __frexpl(long double x, int *eptr)
lx = 0ULL;
hx = (hx&0x800fffffffffffffULL) | 0x3fe0000000000000ULL;
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
#ifdef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c
index c8dd9ff98..54e72c916 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c
@@ -1,6 +1,7 @@
/*
* __isinf_nsl(x) returns != 0 if x is ±inf, else 0;
* no branching!
+ * slightly dodgy in relying on signed shift right copying sign bit
*/
#include <math.h>
@@ -9,8 +10,14 @@
int
__isinf_nsl (long double x)
{
- int64_t hx,lx;
- GET_LDOUBLE_WORDS64(hx,lx,x);
- return !((lx & 0x7fffffffffffffffLL)
- | ((hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL));
+ double xhi;
+ int64_t hx, mask;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+
+ mask = (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL;
+ mask |= -mask;
+ mask >>= 63;
+ return ~mask;
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c
index 5f5b0144b..6a728221f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c
@@ -11,6 +11,7 @@ static char rcsid[] = "$NetBSD: $";
/*
* isinfl(x) returns 1 if x is inf, -1 if x is -inf, else 0;
* no branching!
+ * slightly dodgy in relying on signed shift right copying sign bit
*/
#include <math.h>
@@ -20,12 +21,16 @@ static char rcsid[] = "$NetBSD: $";
int
___isinfl (long double x)
{
- int64_t hx,lx;
- GET_LDOUBLE_WORDS64(hx,lx,x);
- lx = (lx & 0x7fffffffffffffffLL);
- lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL;
- lx |= -lx;
- return ~(lx >> 63) & (hx >> 62);
+ double xhi;
+ int64_t hx, mask;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+
+ mask = (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL;
+ mask |= -mask;
+ mask >>= 63;
+ return ~mask & (hx >> 62);
}
hidden_ver (___isinfl, __isinfl)
#ifndef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
index 264dec745..d12f1d3bf 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
@@ -29,12 +29,14 @@ static char rcsid[] = "$NetBSD: $";
int
___isnanl (long double x)
{
- int64_t hx;
- int64_t lx __attribute__ ((unused));
- GET_LDOUBLE_WORDS64(hx,lx,x);
- hx &= 0x7fffffffffffffffLL;
- hx = 0x7ff0000000000000LL - hx;
- return (int)((u_int64_t)hx>>63);
+ uint64_t hx;
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
+ hx &= 0x7fffffffffffffffLL;
+ hx = 0x7ff0000000000000LL - hx;
+ return (int) (hx >> 63);
}
hidden_ver (___isnanl, __isnanl)
#ifndef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c
index 96fab1aff..bdd58f8f2 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c
@@ -22,10 +22,13 @@
int
__issignalingl (long double x)
{
- u_int64_t xi;
+ uint64_t xi;
/* For inspecting NaN status, we only have to look at the first of the pair
of IEEE 754 64-bit precision numbers. */
- GET_LDOUBLE_MSW64 (xi, x);
+ double xhi;
+
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (xi, xhi);
#ifdef HIGH_ORDER_BIT_IS_SET_FOR_SNAN
# error untested
/* We only have to care about the high-order bit of x's significand, because
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c
index 77c4fdea8..a34638305 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c
@@ -126,19 +126,18 @@ long double
__log1pl (long double xm1)
{
long double x, y, z, r, s;
- ieee854_long_double_shape_type u;
- int32_t hx;
+ double xhi;
+ int32_t hx, lx;
int e;
/* Test for NaN or infinity input. */
- u.value = xm1;
- hx = u.parts32.w0;
+ xhi = ldbl_high (xm1);
+ EXTRACT_WORDS (hx, lx, xhi);
if (hx >= 0x7ff00000)
return xm1;
/* log1p(+- 0) = +- 0. */
- if (((hx & 0x7fffffff) == 0)
- && (u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) == 0)
+ if (((hx & 0x7fffffff) | lx) == 0)
return xm1;
x = xm1 + 1.0L;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
index 6cbfcfa1c..e14028861 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
@@ -27,9 +27,10 @@ long double
__logbl (long double x)
{
int64_t hx, rhx;
- int64_t lx __attribute__ ((unused));
+ double xhi;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (hx, xhi);
hx &= 0x7fffffffffffffffLL; /* high |x| */
if (hx == 0)
return -1.0 / fabs (x);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c
index 39de9d4bf..ed03ce236 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c
@@ -37,43 +37,54 @@ long double __modfl(long double x, long double *iptr)
{
int64_t i0,i1,j0;
u_int64_t i;
- GET_LDOUBLE_WORDS64(i0,i1,x);
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (i0, xhi);
+ EXTRACT_WORDS64 (i1, xlo);
i1 &= 0x000fffffffffffffLL;
j0 = ((i0>>52)&0x7ff)-0x3ff; /* exponent of x */
if(j0<52) { /* integer part in high x */
if(j0<0) { /* |x|<1 */
/* *iptr = +-0 */
- SET_LDOUBLE_WORDS64(*iptr,i0&0x8000000000000000ULL,0);
+ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+ *iptr = xhi;
return x;
} else {
i = (0x000fffffffffffffLL)>>j0;
if(((i0&i)|(i1&0x7fffffffffffffffLL))==0) { /* x is integral */
*iptr = x;
/* return +-0 */
- SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0);
+ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+ x = xhi;
return x;
} else {
- SET_LDOUBLE_WORDS64(*iptr,i0&(~i),0);
+ INSERT_WORDS64 (xhi, i0&(~i));
+ *iptr = xhi;
return x - *iptr;
}
}
} else if (j0>103) { /* no fraction part */
*iptr = x*one;
/* We must handle NaNs separately. */
- if (j0 == 0x400 && ((i0 & 0x000fffffffffffffLL) | i1))
+ if ((i0 & 0x7fffffffffffffffLL) > 0x7ff0000000000000LL)
return x*one;
/* return +-0 */
- SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0);
+ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+ x = xhi;
return x;
} else { /* fraction part in low x */
i = -1ULL>>(j0-52);
if((i1&i)==0) { /* x is integral */
*iptr = x;
/* return +-0 */
- SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0);
+ INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+ x = xhi;
return x;
} else {
- SET_LDOUBLE_WORDS64(*iptr,i0,i1&(~i));
+ INSERT_WORDS64 (xhi, i0);
+ INSERT_WORDS64 (xlo, i1&(~i));
+ *iptr = ldbl_pack (xhi, xlo);
return x - *iptr;
}
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
index bfcd11044..92ced5218 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
@@ -34,11 +34,11 @@ __nearbyintl (long double x)
fenv_t env;
static const long double TWO52 = 4503599627370496.0L;
union ibm_extended_long_double u;
- u.d = x;
+ u.ld = x;
- if (fabs (u.dd[0]) < TWO52)
+ if (fabs (u.d[0].d) < TWO52)
{
- double high = u.dd[0];
+ double high = u.d[0].d;
feholdexcept (&env);
if (high > 0.0)
{
@@ -52,13 +52,13 @@ __nearbyintl (long double x)
high += TWO52;
if (high == 0.0) high = -0.0;
}
- u.dd[0] = high;
- u.dd[1] = 0.0;
- math_force_eval (u.dd[0]);
- math_force_eval (u.dd[1]);
+ u.d[0].d = high;
+ u.d[1].d = 0.0;
+ math_force_eval (u.d[0]);
+ math_force_eval (u.d[1]);
fesetenv (&env);
}
- else if (fabs (u.dd[1]) < TWO52 && u.dd[1] != 0.0)
+ else if (fabs (u.d[1].d) < TWO52 && u.d[1].d != 0.0)
{
double high, low, tau;
/* In this case we have to round the low double and handle any
@@ -67,57 +67,57 @@ __nearbyintl (long double x)
may already be rounded and the low double may have the
opposite sign to compensate. */
feholdexcept (&env);
- if (u.dd[0] > 0.0)
+ if (u.d[0].d > 0.0)
{
- if (u.dd[1] > 0.0)
+ if (u.d[1].d > 0.0)
{
/* If the high/low doubles are the same sign then simply
round the low double. */
- high = u.dd[0];
- low = u.dd[1];
+ high = u.d[0].d;
+ low = u.d[1].d;
}
- else if (u.dd[1] < 0.0)
+ else if (u.d[1].d < 0.0)
{
/* Else the high double is pre rounded and we need to
adjust for that. */
- tau = __nextafter (u.dd[0], 0.0);
- tau = (u.dd[0] - tau) * 2.0;
- high = u.dd[0] - tau;
- low = u.dd[1] + tau;
+ tau = __nextafter (u.d[0].d, 0.0);
+ tau = (u.d[0].d - tau) * 2.0;
+ high = u.d[0].d - tau;
+ low = u.d[1].d + tau;
}
low += TWO52;
low -= TWO52;
}
- else if (u.dd[0] < 0.0)
+ else if (u.d[0].d < 0.0)
{
- if (u.dd[1] < 0.0)
+ if (u.d[1].d < 0.0)
{
/* If the high/low doubles are the same sign then simply
round the low double. */
- high = u.dd[0];
- low = u.dd[1];
+ high = u.d[0].d;
+ low = u.d[1].d;
}
- else if (u.dd[1] > 0.0)
+ else if (u.d[1].d > 0.0)
{
/* Else the high double is pre rounded and we need to
adjust for that. */
- tau = __nextafter (u.dd[0], 0.0);
- tau = (u.dd[0] - tau) * 2.0;
- high = u.dd[0] - tau;
- low = u.dd[1] + tau;
+ tau = __nextafter (u.d[0].d, 0.0);
+ tau = (u.d[0].d - tau) * 2.0;
+ high = u.d[0].d - tau;
+ low = u.d[1].d + tau;
}
low = TWO52 - low;
low = -(low - TWO52);
}
- u.dd[0] = high + low;
- u.dd[1] = high - u.dd[0] + low;
- math_force_eval (u.dd[0]);
- math_force_eval (u.dd[1]);
+ u.d[0].d = high + low;
+ u.d[1].d = high - u.d[0].d + low;
+ math_force_eval (u.d[0]);
+ math_force_eval (u.d[1]);
fesetenv (&env);
}
- return u.d;
+ return u.ld;
}
long_double_symbol (libm, __nearbyintl, nearbyintl);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c
index 7e581274a..c050944c0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c
@@ -30,27 +30,28 @@ static char rcsid[] = "$NetBSD: $";
long double __nextafterl(long double x, long double y)
{
- int64_t hx,hy,ihx,ihy,ilx;
- u_int64_t lx;
- u_int64_t ly __attribute__ ((unused));
+ int64_t hx,hy,ihx,ihy;
+ uint64_t lx;
+ double xhi, xlo, yhi;
- GET_LDOUBLE_WORDS64(hx,lx,x);
- GET_LDOUBLE_WORDS64(hy,ly,y);
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
+ yhi = ldbl_high (y);
+ EXTRACT_WORDS64 (hy, yhi);
ihx = hx&0x7fffffffffffffffLL; /* |hx| */
- ilx = lx&0x7fffffffffffffffLL; /* |lx| */
ihy = hy&0x7fffffffffffffffLL; /* |hy| */
- if((((ihx&0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
- ((ihx&0x000fffffffffffffLL)!=0)) || /* x is nan */
- (((ihy&0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
- ((ihy&0x000fffffffffffffLL)!=0))) /* y is nan */
+ if((ihx>0x7ff0000000000000LL) || /* x is nan */
+ (ihy>0x7ff0000000000000LL)) /* y is nan */
return x+y; /* signal the nan */
if(x==y)
return y; /* x=y, return y */
- if(ihx == 0 && ilx == 0) { /* x == 0 */
- long double u;
+ if(ihx == 0) { /* x == 0 */
+ long double u; /* return +-minsubnormal */
hy = (hy & 0x8000000000000000ULL) | 1;
- SET_LDOUBLE_WORDS64(x,hy,0ULL);/* return +-minsubnormal */
+ INSERT_WORDS64 (yhi, hy);
+ x = yhi;
u = math_opt_barrier (x);
u = u * u;
math_force_eval (u); /* raise underflow flag */
@@ -59,10 +60,16 @@ long double __nextafterl(long double x, long double y)
long double u;
if(x > y) { /* x > y, x -= ulp */
+ /* This isn't the largest magnitude correctly rounded
+ long double as you can see from the lowest mantissa
+ bit being zero. It is however the largest magnitude
+ long double with a 106 bit mantissa, and nextafterl
+ is insane with variable precision. So to make
+ nextafterl sane we assume 106 bit precision. */
if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
return x+x; /* overflow, return -inf */
if (hx >= 0x7ff0000000000000LL) {
- SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL);
+ u = 0x1.fffffffffffff7ffffffffffff8p+1023L;
return u;
}
if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */
@@ -77,16 +84,19 @@ long double __nextafterl(long double x, long double y)
return x;
}
if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+ INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52));
+ u = yhi;
u *= 0x1.0000000000000p-105L;
- } else
- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+ } else {
+ INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52));
+ u = yhi;
+ }
return x - u;
} else { /* x < y, x += ulp */
if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
return x+x; /* overflow, return +inf */
- if ((u_int64_t) hx >= 0xfff0000000000000ULL) {
- SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL);
+ if ((uint64_t) hx >= 0xfff0000000000000ULL) {
+ u = -0x1.fffffffffffff7ffffffffffff8p+1023L;
return u;
}
if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */
@@ -103,10 +113,13 @@ long double __nextafterl(long double x, long double y)
return x;
}
if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+ INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52));
+ u = yhi;
u *= 0x1.0000000000000p-105L;
- } else
- SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+ } else {
+ INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52));
+ u = yhi;
+ }
return x + u;
}
}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
index 7e288a436..b40cf167f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
@@ -34,23 +34,22 @@ double __nexttoward(double x, long double y)
{
int32_t hx,ix;
int64_t hy,iy;
- u_int32_t lx;
- u_int64_t ly,uly;
+ uint32_t lx;
+ double yhi;
EXTRACT_WORDS(hx,lx,x);
- GET_LDOUBLE_WORDS64(hy,ly,y);
+ yhi = ldbl_high (y);
+ EXTRACT_WORDS64(hy,yhi);
ix = hx&0x7fffffff; /* |x| */
iy = hy&0x7fffffffffffffffLL; /* |y| */
- uly = ly&0x7fffffffffffffffLL; /* |y| */
if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */
- ((iy>=0x7ff0000000000000LL)&&((iy-0x7ff0000000000000LL)|uly)!=0))
- /* y is nan */
+ iy>0x7ff0000000000000LL) /* y is nan */
return x+y;
if((long double) x==y) return y; /* x=y, return y */
if((ix|lx)==0) { /* x == 0 */
double u;
- INSERT_WORDS(x,(u_int32_t)((hy>>32)&0x80000000),1);/* return +-minsub */
+ INSERT_WORDS(x,(uint32_t)((hy>>32)&0x80000000),1);/* return +-minsub */
u = math_opt_barrier (x);
u = u * u;
math_force_eval (u); /* raise underflow flag */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
index b387a9119..19522f476 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
@@ -27,16 +27,16 @@ float __nexttowardf(float x, long double y)
{
int32_t hx,ix;
int64_t hy,iy;
- u_int64_t ly, uly;
+ double yhi;
GET_FLOAT_WORD(hx,x);
- GET_LDOUBLE_WORDS64(hy,ly,y);
+ yhi = ldbl_high (y);
+ EXTRACT_WORDS64 (hy, yhi);
ix = hx&0x7fffffff; /* |x| */
iy = hy&0x7fffffffffffffffLL; /* |y| */
- uly = ly&0x7fffffffffffffffLL; /* |y| */
if((ix>0x7f800000) || /* x is nan */
- ((iy>=0x7ff0000000000000LL)&&((iy-0x7ff0000000000000LL)|uly)!=0))
+ (iy>0x7ff0000000000000LL))
/* y is nan */
return x+y;
if((long double) x==y) return y; /* x=y, return y */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c
index f4777a0e1..195e108ca 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c
@@ -33,20 +33,24 @@ __remquol (long double x, long double y, int *quo)
int64_t hx,hy;
u_int64_t sx,lx,ly,qs;
int cquo;
-
- GET_LDOUBLE_WORDS64 (hx, lx, x);
- GET_LDOUBLE_WORDS64 (hy, ly, y);
+ double xhi, xlo, yhi, ylo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
+ ldbl_unpack (y, &yhi, &ylo);
+ EXTRACT_WORDS64 (hy, yhi);
+ EXTRACT_WORDS64 (ly, ylo);
sx = hx & 0x8000000000000000ULL;
qs = sx ^ (hy & 0x8000000000000000ULL);
hy &= 0x7fffffffffffffffLL;
hx &= 0x7fffffffffffffffLL;
/* Purge off exception values. */
- if ((hy | (ly & 0x7fffffffffffffff)) == 0)
+ if (hy == 0)
return (x * y) / (x * y); /* y = 0 */
if ((hx >= 0x7ff0000000000000LL) /* x not finite */
- || ((hy >= 0x7ff0000000000000LL) /* y is NaN */
- && (((hy - 0x7ff0000000000000LL) | ly) != 0)))
+ || (hy > 0x7ff0000000000000LL)) /* y is NaN */
return (x * y) / (x * y);
if (hy <= 0x7fbfffffffffffffLL)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c
index d75256888..03d459727 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c
@@ -41,11 +41,15 @@ long double __scalblnl (long double x, long int n)
{
int64_t k,l,hx,lx;
union { int64_t i; double d; } u;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
k = (hx>>52)&0x7ff; /* extract exponent */
l = (lx>>52)&0x7ff;
if (k==0) { /* 0 or subnormal x */
- if (((hx|lx)&0x7fffffffffffffffULL)==0) return x; /* +-0 */
+ if ((hx&0x7fffffffffffffffULL)==0) return x; /* +-0 */
u.i = hx;
u.d *= two54;
hx = u.i;
@@ -61,7 +65,9 @@ long double __scalblnl (long double x, long int n)
if (k > 0) { /* normal result */
hx = (hx&0x800fffffffffffffULL)|(k<<52);
if ((lx & 0x7fffffffffffffffULL) == 0) { /* low part +-0 */
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
if (l == 0) { /* low part subnormal */
@@ -81,14 +87,19 @@ long double __scalblnl (long double x, long int n)
u.d *= twom54;
lx = u.i;
}
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
if (k <= -54)
return tiny*__copysignl(tiny,x); /*underflow*/
k += 54; /* subnormal result */
lx &= 0x8000000000000000ULL;
- SET_LDOUBLE_WORDS64(x,(hx&0x800fffffffffffffULL)|(k<<52),lx);
+ hx &= 0x800fffffffffffffULL;
+ INSERT_WORDS64 (xhi, hx|(k<<52));
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x*twolm54;
}
long_double_symbol (libm, __scalblnl, scalblnl);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c
index bcdb23bda..161172db6 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c
@@ -41,11 +41,15 @@ long double __scalbnl (long double x, int n)
{
int64_t k,l,hx,lx;
union { int64_t i; double d; } u;
- GET_LDOUBLE_WORDS64(hx,lx,x);
+ double xhi, xlo;
+
+ ldbl_unpack (x, &xhi, &xlo);
+ EXTRACT_WORDS64 (hx, xhi);
+ EXTRACT_WORDS64 (lx, xlo);
k = (hx>>52)&0x7ff; /* extract exponent */
l = (lx>>52)&0x7ff;
if (k==0) { /* 0 or subnormal x */
- if (((hx|lx)&0x7fffffffffffffffULL)==0) return x; /* +-0 */
+ if ((hx&0x7fffffffffffffffULL)==0) return x; /* +-0 */
u.i = hx;
u.d *= two54;
hx = u.i;
@@ -61,7 +65,9 @@ long double __scalbnl (long double x, int n)
if (k > 0) { /* normal result */
hx = (hx&0x800fffffffffffffULL)|(k<<52);
if ((lx & 0x7fffffffffffffffULL) == 0) { /* low part +-0 */
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
if (l == 0) { /* low part subnormal */
@@ -81,14 +87,19 @@ long double __scalbnl (long double x, int n)
u.d *= twom54;
lx = u.i;
}
- SET_LDOUBLE_WORDS64(x,hx,lx);
+ INSERT_WORDS64 (xhi, hx);
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x;
}
if (k <= -54)
return tiny*__copysignl(tiny,x); /*underflow*/
k += 54; /* subnormal result */
lx &= 0x8000000000000000ULL;
- SET_LDOUBLE_WORDS64(x,(hx&0x800fffffffffffffULL)|(k<<52),lx);
+ hx &= 0x800fffffffffffffULL;
+ INSERT_WORDS64 (xhi, hx|(k<<52));
+ INSERT_WORDS64 (xlo, lx);
+ x = ldbl_pack (xhi, xlo);
return x*twolm54;
}
#ifdef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
index ee4aea6cf..aecb1fd79 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
@@ -25,8 +25,10 @@ int
___signbitl (long double x)
{
int64_t e;
+ double xhi;
- GET_LDOUBLE_MSW64 (e, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (e, xhi);
return e < 0;
}
#ifdef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
index 3b1e547bd..a9e2f3d19 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
@@ -27,9 +27,11 @@ void
__sincosl (long double x, long double *sinx, long double *cosx)
{
int64_t ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64 (ix, x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
index 6fec16f85..087921a91 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
@@ -53,9 +53,11 @@ long double __sinl(long double x)
{
long double y[2],z=0.0L;
int64_t n, ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c
index 138b63cd1..c63e25345 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c
@@ -47,10 +47,12 @@ static const long double one=1.0L, two=2.0L, tiny = 1.0e-300L;
long double __tanhl(long double x)
{
long double t,z;
- int64_t jx,ix,lx;
+ int64_t jx,ix;
+ double xhi;
/* High word of |x|. */
- GET_LDOUBLE_WORDS64(jx,lx,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (jx, xhi);
ix = jx&0x7fffffffffffffffLL;
/* x is INF or NaN */
@@ -61,7 +63,7 @@ long double __tanhl(long double x)
/* |x| < 22 */
if (ix < 0x4036000000000000LL) { /* |x|<22 */
- if ((ix | (lx&0x7fffffffffffffffLL)) == 0)
+ if (ix == 0)
return x; /* x == +-0 */
if (ix<0x3c60000000000000LL) /* |x|<2**-57 */
return x*(one+x); /* tanh(small) = small */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
index 9967d0c20..66b8a0621 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
@@ -53,9 +53,11 @@ long double __tanl(long double x)
{
long double y[2],z=0.0L;
int64_t n, ix;
+ double xhi;
/* High word of x. */
- GET_LDOUBLE_MSW64(ix,x);
+ xhi = ldbl_high (x);
+ EXTRACT_WORDS64 (ix, xhi);
/* |x| ~< pi/4 */
ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c b/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
index 04e328857..93a80c5ee 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
@@ -43,11 +43,11 @@ libc_hidden_proto (STRTOF)
#define FLOAT_HUGE_VAL HUGE_VALL
# define SET_MANTISSA(flt, mant) \
do { union ibm_extended_long_double u; \
- u.d = (flt); \
- u.ieee_nan.mantissa0 = (mant) >> 32; \
- u.ieee_nan.mantissa1 = (mant); \
- if ((u.ieee.mantissa0 | u.ieee.mantissa1) != 0) \
- (flt) = u.d; \
+ u.ld = (flt); \
+ u.d[0].ieee_nan.mantissa0 = (mant) >> 32; \
+ u.d[0].ieee_nan.mantissa1 = (mant); \
+ if ((u.d[0].ieee.mantissa0 | u.d[0].ieee.mantissa1) != 0) \
+ (flt) = u.ld; \
} while (0)
#include <strtod_l.c>
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c b/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
index ed0d4a506..06dcf02ff 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
@@ -89,23 +89,23 @@ __x2y2m1l (long double x, long double y)
double vals[12];
SET_RESTORE_ROUND (FE_TONEAREST);
union ibm_extended_long_double xu, yu;
- xu.d = x;
- yu.d = y;
- if (fabs (xu.dd[1]) < 0x1p-500)
- xu.dd[1] = 0.0;
- if (fabs (yu.dd[1]) < 0x1p-500)
- yu.dd[1] = 0.0;
- mul_split (&vals[1], &vals[0], xu.dd[0], xu.dd[0]);
- mul_split (&vals[3], &vals[2], xu.dd[0], xu.dd[1]);
+ xu.ld = x;
+ yu.ld = y;
+ if (fabs (xu.d[1].d) < 0x1p-500)
+ xu.d[1].d = 0.0;
+ if (fabs (yu.d[1].d) < 0x1p-500)
+ yu.d[1].d = 0.0;
+ mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d);
+ mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d);
vals[2] *= 2.0;
vals[3] *= 2.0;
- mul_split (&vals[5], &vals[4], xu.dd[1], xu.dd[1]);
- mul_split (&vals[7], &vals[6], yu.dd[0], yu.dd[0]);
- mul_split (&vals[9], &vals[8], yu.dd[0], yu.dd[1]);
+ mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d);
+ mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d);
+ mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d);
vals[8] *= 2.0;
vals[9] *= 2.0;
- mul_split (&vals[11], &vals[10], yu.dd[1], yu.dd[1]);
- if (xu.dd[0] >= 0.75)
+ mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d);
+ if (xu.d[0].d >= 0.75)
vals[1] -= 1.0;
else
{
diff --git a/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c b/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c
index f356a4843..715c93b50 100644
--- a/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c
+++ b/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c
@@ -25,11 +25,13 @@ do { \
/* The "strange" 80 bit format on ix86 and m68k has an explicit \
leading digit in the 64 bit mantissa. */ \
unsigned long long int num; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
assert (sizeof (long double) == 12); \
\
- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
+ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
\
zero_mantissa = num == 0; \
\
@@ -62,7 +64,7 @@ do { \
\
/* We have 3 bits from the mantissa in the leading nibble. \
Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \
- exponent = fpnum.ldbl.ieee.exponent; \
+ exponent = u.ieee.exponent; \
\
if (exponent == 0) \
{ \
diff --git a/libc/sysdeps/mach/hurd/fork.c b/libc/sysdeps/mach/hurd/fork.c
index ab11babff..321421fbb 100644
--- a/libc/sysdeps/mach/hurd/fork.c
+++ b/libc/sysdeps/mach/hurd/fork.c
@@ -34,6 +34,11 @@
symbol_set_declare (_hurd_fork_locks)
+/* Application callbacks registered through pthread_atfork. */
+DEFINE_HOOK (_hurd_atfork_prepare_hook, (void));
+DEFINE_HOOK (_hurd_atfork_child_hook, (void));
+DEFINE_HOOK (_hurd_atfork_parent_hook, (void));
+
/* Things that want to be called before we fork, to prepare the parent for
task_create, when the new child task will inherit our address space. */
DEFINE_HOOK (_hurd_fork_prepare_hook, (void));
@@ -62,6 +67,8 @@ __fork (void)
error_t err;
struct hurd_sigstate *volatile ss;
+ RUN_HOOK (_hurd_atfork_prepare_hook, ());
+
ss = _hurd_self_sigstate ();
__spin_lock (&ss->critical_section_lock);
@@ -695,6 +702,14 @@ __fork (void)
_hurd_critical_section_unlock (ss);
+ if (!err)
+ {
+ if (pid != 0)
+ RUN_HOOK (_hurd_atfork_parent_hook, ());
+ else
+ RUN_HOOK (_hurd_atfork_child_hook, ());
+ }
+
return err ? __hurd_fail (err) : pid;
}
libc_hidden_def (__fork)
diff --git a/libc/sysdeps/mach/hurd/i386/tls.h b/libc/sysdeps/mach/hurd/i386/tls.h
index 4f4c7c5df..da8c16aa0 100644
--- a/libc/sysdeps/mach/hurd/i386/tls.h
+++ b/libc/sysdeps/mach/hurd/i386/tls.h
@@ -118,7 +118,6 @@ _hurd_tls_init (tcbhead_t *tcb, int secondcall)
operation can cause a failure 'errno' must not be touched. */
# define TLS_INIT_TP(descr, secondcall) \
_hurd_tls_init ((tcbhead_t *) (descr), (secondcall))
-# define TLS_INIT_TP_EXPENSIVE 1
/* Return the TCB address of the current thread. */
# define THREAD_SELF \
diff --git a/libc/sysdeps/posix/dirstream.h b/libc/sysdeps/posix/dirstream.h
index 8e8570dd4..be2089505 100644
--- a/libc/sysdeps/posix/dirstream.h
+++ b/libc/sysdeps/posix/dirstream.h
@@ -41,8 +41,13 @@ struct __dirstream
int errcode; /* Delayed error code. */
- /* Directory block. */
- char data[0] __attribute__ ((aligned (__alignof__ (void*))));
+ /* Directory block. We must make sure that this block starts
+ at an address that is aligned adequately enough to store
+ dirent entries. Using the alignment of "void *" is not
+ sufficient because dirents on 32-bit platforms can require
+ 64-bit alignment. We use "long double" here to be consistent
+ with what malloc uses. */
+ char data[0] __attribute__ ((aligned (__alignof__ (long double))));
};
#define _DIR_dirfd(dirp) ((dirp)->fd)
diff --git a/libc/sysdeps/posix/getaddrinfo.c b/libc/sysdeps/posix/getaddrinfo.c
index 52177e454..0f4b88514 100644
--- a/libc/sysdeps/posix/getaddrinfo.c
+++ b/libc/sysdeps/posix/getaddrinfo.c
@@ -558,16 +558,17 @@ gaih_inet (const char *name, const struct gaih_service *service,
struct gaih_addrtuple **pat = &at;
int no_data = 0;
int no_inet6_data = 0;
- service_user *nip = NULL;
+ service_user *nip;
enum nss_status inet6_status = NSS_STATUS_UNAVAIL;
enum nss_status status = NSS_STATUS_UNAVAIL;
int no_more;
int old_res_options;
- /* If we do not have to look for IPv6 addresses, use
- the simple, old functions, which do not support
- IPv6 scope ids. */
- if (req->ai_family == AF_INET)
+ /* If we do not have to look for IPv6 addresses or the canonical
+ name, use the simple, old functions, which do not support
+ IPv6 scope ids, nor retrieving the canonical name. */
+ if (req->ai_family == AF_INET
+ && (req->ai_flags & AI_CANONNAME) == 0)
{
/* Allocate additional room for struct host_data. */
size_t tmpbuflen = (512 + MAX_NR_ALIASES * sizeof(char*)
@@ -791,15 +792,13 @@ gaih_inet (const char *name, const struct gaih_service *service,
}
#endif
- if (__nss_hosts_database != NULL)
- {
- no_more = 0;
- nip = __nss_hosts_database;
- }
- else
+ if (__nss_hosts_database == NULL)
no_more = __nss_database_lookup ("hosts", NULL,
"dns [!UNAVAIL=return] files",
- &nip);
+ &__nss_hosts_database);
+ else
+ no_more = 0;
+ nip = __nss_hosts_database;
/* Initialize configurations. */
if (__glibc_unlikely (!_res_hconf.initialized))
@@ -1011,8 +1010,9 @@ gaih_inet (const char *name, const struct gaih_service *service,
canon = s;
else
{
- /* Set to name now to avoid using
- gethostbyaddr. */
+ /* If the canonical name cannot be
+ determined, use the passed in
+ string. */
if (malloc_canonbuf)
{
free (canonbuf);
@@ -1127,70 +1127,10 @@ gaih_inet (const char *name, const struct gaih_service *service,
/* Only the first entry gets the canonical name. */
if (at2 == at && (req->ai_flags & AI_CANONNAME) != 0)
{
- char *tmpbuf2 = NULL;
- bool malloc_tmpbuf2 = false;
-
if (canon == NULL)
- {
- struct hostent *h = NULL;
- int herrno;
- struct hostent th;
- /* Add room for struct host_data. */
- size_t tmpbuf2len = (512 + (MAX_NR_ALIASES+MAX_NR_ADDRS+1)
- * sizeof(char*) + 16 * sizeof(char));
-
- do
- {
- if (__libc_use_alloca (alloca_used + 2 * tmpbuf2len))
- tmpbuf2 = extend_alloca_account (tmpbuf2, tmpbuf2len,
- tmpbuf2len * 2,
- alloca_used);
- else
- {
- char *newp = realloc (malloc_tmpbuf2 ? tmpbuf2 : NULL,
- 2 * tmpbuf2len);
- if (newp == NULL)
- {
- if (malloc_tmpbuf2)
- free (tmpbuf2);
- result = -EAI_MEMORY;
- goto free_and_return;
- }
-
- tmpbuf2 = newp;
- tmpbuf2len = 2 * tmpbuf2len;
- malloc_tmpbuf2 = true;
- }
-
- rc = __gethostbyaddr_r (at2->addr,
- ((at2->family == AF_INET6)
- ? sizeof (struct in6_addr)
- : sizeof (struct in_addr)),
- at2->family, &th, tmpbuf2,
- tmpbuf2len, &h, &herrno);
- }
- while (rc == ERANGE && herrno == NETDB_INTERNAL);
-
- if (rc != 0 && herrno == NETDB_INTERNAL)
- {
- if (malloc_tmpbuf2)
- free (tmpbuf2);
-
- __set_h_errno (herrno);
- result = -EAI_SYSTEM;
- goto free_and_return;
- }
-
- if (h != NULL)
- canon = h->h_name;
- else
- {
- assert (orig_name != NULL);
- /* If the canonical name cannot be determined, use
- the passed in string. */
- canon = orig_name;
- }
- }
+ /* If the canonical name cannot be determined, use
+ the passed in string. */
+ canon = orig_name;
#ifdef HAVE_LIBIDN
if (req->ai_flags & AI_CANONIDN)
@@ -1205,9 +1145,6 @@ gaih_inet (const char *name, const struct gaih_service *service,
int rc = __idna_to_unicode_lzlz (canon, &out, idn_flags);
if (rc != IDNA_SUCCESS)
{
- if (malloc_tmpbuf2)
- free (tmpbuf2);
-
if (rc == IDNA_MALLOC_ERROR)
result = -EAI_MEMORY;
else if (rc == IDNA_DLOPEN_ERROR)
@@ -1237,17 +1174,11 @@ gaih_inet (const char *name, const struct gaih_service *service,
canon = strdup (canon);
if (canon == NULL)
{
- if (malloc_tmpbuf2)
- free (tmpbuf2);
-
result = -EAI_MEMORY;
goto free_and_return;
}
}
}
-
- if (malloc_tmpbuf2)
- free (tmpbuf2);
}
family = at2->family;
diff --git a/libc/sysdeps/powerpc/bits/fenv.h b/libc/sysdeps/powerpc/bits/fenv.h
index 07cd3c8e5..122edd3dc 100644
--- a/libc/sysdeps/powerpc/bits/fenv.h
+++ b/libc/sysdeps/powerpc/bits/fenv.h
@@ -19,75 +19,6 @@
# error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
#endif
-#if defined __NO_FPRS__ && !defined _SOFT_FLOAT /* E500 */
-
-/* Define bits representing the exception. We use the bit positions of
- the appropriate bits in the SPEFSCR... */
-enum
- {
- FE_INEXACT =
-#define FE_INEXACT (1 << (63 - 42))
- FE_INEXACT,
- FE_INVALID =
-#define FE_INVALID (1 << (63 - 43))
- FE_INVALID,
- FE_DIVBYZERO =
-#define FE_DIVBYZERO (1 << (63 - 44))
- FE_DIVBYZERO,
- FE_UNDERFLOW =
-#define FE_UNDERFLOW (1 << (63 - 45))
- FE_UNDERFLOW,
- FE_OVERFLOW =
-#define FE_OVERFLOW (1 << (63 - 46))
- FE_OVERFLOW
- };
-
-#define FE_ALL_EXCEPT \
- (FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID)
-
-/* The E500 support all of the four defined rounding modes. We use
- the bit pattern in the SPEFSCR as the values for the appropriate
- macros. */
-enum
- {
- FE_TONEAREST =
-#define FE_TONEAREST 0
- FE_TONEAREST,
- FE_TOWARDZERO =
-#define FE_TOWARDZERO 1
- FE_TOWARDZERO,
- FE_UPWARD =
-#define FE_UPWARD 2
- FE_UPWARD,
- FE_DOWNWARD =
-#define FE_DOWNWARD 3
- FE_DOWNWARD
- };
-
-/* Type representing exception flags. */
-typedef unsigned int fexcept_t;
-
-typedef double fenv_t;
-
-/* If the default argument is used we use this value. */
-extern const fenv_t __fe_dfl_env;
-#define FE_DFL_ENV (&__fe_dfl_env)
-
-#ifdef __USE_GNU
-/* Floating-point environment where all exceptions are enabled. Note that
- this is not sufficient to give you SIGFPE. */
-extern const fenv_t __fe_enabled_env;
-# define FE_ENABLED_ENV (&__fe_enabled_env)
-
-/* Floating-point environment with all exceptions enabled. Note that
- just evaluating this value will set the processor into 'FPU
- exceptions imprecise recoverable' mode, which may cause a significant
- performance penalty (but have no other visible effect). */
-extern const fenv_t *__fe_nomask_env (void);
-# define FE_NOMASK_ENV (__fe_nomask_env ())
-#endif
-
-#else /* PowerPC 6xx floating-point. */
/* Define bits representing the exception. We use the bit positions of
the appropriate bits in the FPSCR... */
@@ -244,5 +175,3 @@ extern const fenv_t *__fe_mask_env (void);
__END_DECLS
#endif
-
-#endif
diff --git a/libc/sysdeps/powerpc/bits/mathinline.h b/libc/sysdeps/powerpc/bits/mathinline.h
index 140fff08e..cef5b29b1 100644
--- a/libc/sysdeps/powerpc/bits/mathinline.h
+++ b/libc/sysdeps/powerpc/bits/mathinline.h
@@ -61,21 +61,28 @@
__MATH_INLINE int
__NTH (__signbitf (float __x))
{
+#if __GNUC_PREREQ (4, 0)
+ return __builtin_signbitf (__x);
+#else
__extension__ union { float __f; int __i; } __u = { __f: __x };
return __u.__i < 0;
+#endif
}
__MATH_INLINE int
__NTH (__signbit (double __x))
{
- __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
- return __u.__i[0] < 0;
+#if __GNUC_PREREQ (4, 0)
+ return __builtin_signbit (__x);
+#else
+ __extension__ union { double __d; long long __i; } __u = { __d: __x };
+ return __u.__i < 0;
+#endif
}
# ifdef __LONG_DOUBLE_128__
__MATH_INLINE int
__NTH (__signbitl (long double __x))
{
- __extension__ union { long double __d; int __i[4]; } __u = { __d: __x };
- return __u.__i[0] < 0;
+ return __signbit ((double) __x);
}
# endif
# endif
@@ -92,22 +99,17 @@ __NTH (lrint (double __x))
{
union {
double __d;
- int __ll[2];
+ long long __ll;
} __u;
__asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
- return __u.__ll[1];
+ return __u.__ll;
}
__MATH_INLINE long int lrintf (float __x) __THROW;
__MATH_INLINE long int
__NTH (lrintf (float __x))
{
- union {
- double __d;
- int __ll[2];
- } __u;
- __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
- return __u.__ll[1];
+ return lrint ((double) __x);
}
# endif
diff --git a/libc/sysdeps/powerpc/ffs.c b/libc/sysdeps/powerpc/ffs.c
index e0fee46b3..deba0cdd0 100644
--- a/libc/sysdeps/powerpc/ffs.c
+++ b/libc/sysdeps/powerpc/ffs.c
@@ -35,6 +35,7 @@ __ffs (int x)
return 32 - cnt;
}
weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
libc_hidden_builtin_def (ffs)
#if ULONG_MAX == UINT_MAX
#undef ffsl
diff --git a/libc/sysdeps/powerpc/fpu/e_sqrt.c b/libc/sysdeps/powerpc/fpu/e_sqrt.c
index 3efe277f3..2d50fb525 100644
--- a/libc/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/libc/sysdeps/powerpc/fpu/e_sqrt.c
@@ -145,7 +145,7 @@ __slow_ieee754_sqrt (double x)
feraiseexcept (FE_INVALID_SQRT);
fenv_union_t u = { .fenv = fegetenv_register () };
- if ((u.l[1] & FE_INVALID) == 0)
+ if ((u.l & FE_INVALID) == 0)
#endif
feraiseexcept (FE_INVALID);
x = a_nan.value;
diff --git a/libc/sysdeps/powerpc/fpu/e_sqrtf.c b/libc/sysdeps/powerpc/fpu/e_sqrtf.c
index 6e50a3cd7..91d2d37d7 100644
--- a/libc/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/libc/sysdeps/powerpc/fpu/e_sqrtf.c
@@ -121,7 +121,7 @@ __slow_ieee754_sqrtf (float x)
feraiseexcept (FE_INVALID_SQRT);
fenv_union_t u = { .fenv = fegetenv_register () };
- if ((u.l[1] & FE_INVALID) == 0)
+ if ((u.l & FE_INVALID) == 0)
#endif
feraiseexcept (FE_INVALID);
x = a_nan.value;
diff --git a/libc/sysdeps/powerpc/fpu/fclrexcpt.c b/libc/sysdeps/powerpc/fpu/fclrexcpt.c
index 86575dba6..7f66e21ce 100644
--- a/libc/sysdeps/powerpc/fpu/fclrexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fclrexcpt.c
@@ -28,8 +28,8 @@ __feclearexcept (int excepts)
u.fenv = fegetenv_register ();
/* Clear the relevant bits. */
- u.l[1] = u.l[1] & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
- | (excepts & FPSCR_STICKY_BITS));
+ u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
+ | (excepts & FPSCR_STICKY_BITS));
/* Put the new state in effect. */
fesetenv_register (u.fenv);
diff --git a/libc/sysdeps/powerpc/fpu/fedisblxcpt.c b/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
index 659566b67..f2c45a60c 100644
--- a/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
@@ -32,15 +32,15 @@ fedisableexcept (int excepts)
fe.fenv = fegetenv_register ();
if (excepts & FE_INEXACT)
- fe.l[1] &= ~(1 << (31 - FPSCR_XE));
+ fe.l &= ~(1 << (31 - FPSCR_XE));
if (excepts & FE_DIVBYZERO)
- fe.l[1] &= ~(1 << (31 - FPSCR_ZE));
+ fe.l &= ~(1 << (31 - FPSCR_ZE));
if (excepts & FE_UNDERFLOW)
- fe.l[1] &= ~(1 << (31 - FPSCR_UE));
+ fe.l &= ~(1 << (31 - FPSCR_UE));
if (excepts & FE_OVERFLOW)
- fe.l[1] &= ~(1 << (31 - FPSCR_OE));
+ fe.l &= ~(1 << (31 - FPSCR_OE));
if (excepts & FE_INVALID)
- fe.l[1] &= ~(1 << (31 - FPSCR_VE));
+ fe.l &= ~(1 << (31 - FPSCR_VE));
fesetenv_register (fe.fenv);
new = __fegetexcept ();
diff --git a/libc/sysdeps/powerpc/fpu/feenablxcpt.c b/libc/sysdeps/powerpc/fpu/feenablxcpt.c
index fc4bfffad..472796d15 100644
--- a/libc/sysdeps/powerpc/fpu/feenablxcpt.c
+++ b/libc/sysdeps/powerpc/fpu/feenablxcpt.c
@@ -32,15 +32,15 @@ feenableexcept (int excepts)
fe.fenv = fegetenv_register ();
if (excepts & FE_INEXACT)
- fe.l[1] |= (1 << (31 - FPSCR_XE));
+ fe.l |= (1 << (31 - FPSCR_XE));
if (excepts & FE_DIVBYZERO)
- fe.l[1] |= (1 << (31 - FPSCR_ZE));
+ fe.l |= (1 << (31 - FPSCR_ZE));
if (excepts & FE_UNDERFLOW)
- fe.l[1] |= (1 << (31 - FPSCR_UE));
+ fe.l |= (1 << (31 - FPSCR_UE));
if (excepts & FE_OVERFLOW)
- fe.l[1] |= (1 << (31 - FPSCR_OE));
+ fe.l |= (1 << (31 - FPSCR_OE));
if (excepts & FE_INVALID)
- fe.l[1] |= (1 << (31 - FPSCR_VE));
+ fe.l |= (1 << (31 - FPSCR_VE));
fesetenv_register (fe.fenv);
new = __fegetexcept ();
diff --git a/libc/sysdeps/powerpc/fpu/fegetexcept.c b/libc/sysdeps/powerpc/fpu/fegetexcept.c
index f3d5724e9..23d47a27e 100644
--- a/libc/sysdeps/powerpc/fpu/fegetexcept.c
+++ b/libc/sysdeps/powerpc/fpu/fegetexcept.c
@@ -27,15 +27,15 @@ __fegetexcept (void)
fe.fenv = fegetenv_register ();
- if (fe.l[1] & (1 << (31 - FPSCR_XE)))
+ if (fe.l & (1 << (31 - FPSCR_XE)))
result |= FE_INEXACT;
- if (fe.l[1] & (1 << (31 - FPSCR_ZE)))
+ if (fe.l & (1 << (31 - FPSCR_ZE)))
result |= FE_DIVBYZERO;
- if (fe.l[1] & (1 << (31 - FPSCR_UE)))
+ if (fe.l & (1 << (31 - FPSCR_UE)))
result |= FE_UNDERFLOW;
- if (fe.l[1] & (1 << (31 - FPSCR_OE)))
+ if (fe.l & (1 << (31 - FPSCR_OE)))
result |= FE_OVERFLOW;
- if (fe.l[1] & (1 << (31 - FPSCR_VE)))
+ if (fe.l & (1 << (31 - FPSCR_VE)))
result |= FE_INVALID;
return result;
diff --git a/libc/sysdeps/powerpc/fpu/feholdexcpt.c b/libc/sysdeps/powerpc/fpu/feholdexcpt.c
index 013d2bfbb..0ecf0f7bc 100644
--- a/libc/sysdeps/powerpc/fpu/feholdexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/feholdexcpt.c
@@ -30,13 +30,12 @@ feholdexcept (fenv_t *envp)
/* Clear everything except for the rounding modes and non-IEEE arithmetic
flag. */
- new.l[1] = old.l[1] & 7;
- new.l[0] = old.l[0];
+ new.l = old.l & 0xffffffff00000007LL;
/* If the old env had any enabled exceptions, then mask SIGFPE in the
MSR FE0/FE1 bits. This may allow the FPU to run faster because it
always takes the default action and can not generate SIGFPE. */
- if ((old.l[1] & _FPU_MASK_ALL) != 0)
+ if ((old.l & _FPU_MASK_ALL) != 0)
(void)__fe_mask_env ();
/* Put the new state in effect. */
diff --git a/libc/sysdeps/powerpc/fpu/fenv_libc.h b/libc/sysdeps/powerpc/fpu/fenv_libc.h
index 191095156..baa2a7d39 100644
--- a/libc/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/libc/sysdeps/powerpc/fpu/fenv_libc.h
@@ -69,7 +69,7 @@ libm_hidden_proto (__fe_nomask_env)
typedef union
{
fenv_t fenv;
- unsigned int l[2];
+ unsigned long long l;
} fenv_union_t;
diff --git a/libc/sysdeps/powerpc/fpu/fesetenv.c b/libc/sysdeps/powerpc/fpu/fesetenv.c
index e92adb4c5..6c00b267a 100644
--- a/libc/sysdeps/powerpc/fpu/fesetenv.c
+++ b/libc/sysdeps/powerpc/fpu/fesetenv.c
@@ -34,14 +34,14 @@ __fesetenv (const fenv_t *envp)
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the
hardware into "precise mode" and may cause the FPU to run slower on some
hardware. */
- if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
(void)__fe_nomask_env ();
/* If the old env had any enabled exceptions and the new env has no enabled
exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
FPU to run faster because it always takes the default action and can not
generate SIGFPE. */
- if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
(void)__fe_mask_env ();
fesetenv_register (*envp);
diff --git a/libc/sysdeps/powerpc/fpu/feupdateenv.c b/libc/sysdeps/powerpc/fpu/feupdateenv.c
index 6500ea173..677504416 100644
--- a/libc/sysdeps/powerpc/fpu/feupdateenv.c
+++ b/libc/sysdeps/powerpc/fpu/feupdateenv.c
@@ -34,20 +34,20 @@ __feupdateenv (const fenv_t *envp)
/* Restore rounding mode and exception enable from *envp and merge
exceptions. Leave fraction rounded/inexact and FP result/CC bits
unchanged. */
- new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF);
+ new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff);
/* If the old env has no enabled exceptions and the new env has any enabled
exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put
the hardware into "precise mode" and may cause the FPU to run slower on
some hardware. */
- if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+ if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
(void)__fe_nomask_env ();
/* If the old env had any enabled exceptions and the new env has no enabled
exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the
FPU to run faster because it always takes the default action and can not
generate SIGFPE. */
- if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+ if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
(void)__fe_mask_env ();
/* Atomically enable and raise (if appropriate) exceptions set in `new'. */
diff --git a/libc/sysdeps/powerpc/fpu/fgetexcptflg.c b/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
index f6327ce17..1395bede0 100644
--- a/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
+++ b/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
@@ -27,7 +27,7 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
u.fenv = fegetenv_register ();
/* Return (all of) it. */
- *flagp = u.l[1] & excepts & FE_ALL_EXCEPT;
+ *flagp = u.l & excepts & FE_ALL_EXCEPT;
/* Success. */
return 0;
diff --git a/libc/sysdeps/powerpc/fpu/fraiseexcpt.c b/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
index 9118c1954..6193071bd 100644
--- a/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
@@ -33,11 +33,11 @@ __feraiseexcept (int excepts)
u.fenv = fegetenv_register ();
/* Add the exceptions */
- u.l[1] = (u.l[1]
- | (excepts & FPSCR_STICKY_BITS)
- /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */
- | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
- & FE_INVALID_SOFTWARE));
+ u.l = (u.l
+ | (excepts & FPSCR_STICKY_BITS)
+ /* Turn FE_INVALID into FE_INVALID_SOFTWARE. */
+ | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+ & FE_INVALID_SOFTWARE));
/* Store the new status word (along with the rest of the environment),
triggering any appropriate exceptions. */
@@ -49,7 +49,7 @@ __feraiseexcept (int excepts)
don't have FE_INVALID_SOFTWARE implemented. Detect this
case and raise FE_INVALID_SNAN instead. */
u.fenv = fegetenv_register ();
- if ((u.l[1] & FE_INVALID) == 0)
+ if ((u.l & FE_INVALID) == 0)
set_fpscr_bit (FPSCR_VXSNAN);
}
diff --git a/libc/sysdeps/powerpc/fpu/fsetexcptflg.c b/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
index c050d4022..0d309c8d5 100644
--- a/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
+++ b/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
@@ -31,10 +31,10 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
flag = *flagp & excepts;
/* Replace the exception status */
- u.l[1] = ((u.l[1] & ~(FPSCR_STICKY_BITS & excepts))
- | (flag & FPSCR_STICKY_BITS)
- | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
- & FE_INVALID_SOFTWARE));
+ u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
+ | (flag & FPSCR_STICKY_BITS)
+ | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+ & FE_INVALID_SOFTWARE));
/* Store the new status word (along with the rest of the environment).
This may cause floating-point exceptions if the restored state
diff --git a/libc/sysdeps/powerpc/fpu/ftestexcept.c b/libc/sysdeps/powerpc/fpu/ftestexcept.c
index 0dbc3befb..86eea0fb0 100644
--- a/libc/sysdeps/powerpc/fpu/ftestexcept.c
+++ b/libc/sysdeps/powerpc/fpu/ftestexcept.c
@@ -28,6 +28,6 @@ fetestexcept (int excepts)
/* The FE_INVALID bit is dealt with correctly by the hardware, so we can
just: */
- return u.l[1] & excepts;
+ return u.l & excepts;
}
libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/fpu/libm-test-ulps b/libc/sysdeps/powerpc/fpu/libm-test-ulps
index 6fdace9ee..37b2ca192 100644
--- a/libc/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/libc/sysdeps/powerpc/fpu/libm-test-ulps
@@ -5927,11 +5927,31 @@ double: 1
idouble: 1
# gamma
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
Test "gamma (0.7)":
double: 1
float: 1
idouble: 1
ifloat: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
Test "gamma (1.2)":
double: 1
float: 2
@@ -6131,9 +6151,9 @@ ildouble: 1
ldouble: 1
Test "jn (10, 10.0)":
double: 2
-float: 1
+float: 2
idouble: 2
-ifloat: 1
+ifloat: 2
ildouble: 4
ldouble: 4
Test "jn (10, 2.0)":
@@ -6146,6 +6166,14 @@ double: 2
float: 2
idouble: 2
ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
Test "jn (2, 2.4048255576957729)":
double: 2
float: 1
@@ -6226,11 +6254,31 @@ ildouble: 7
ldouble: 7
# lgamma
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
Test "lgamma (0.7)":
double: 1
float: 1
idouble: 1
ifloat: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
Test "lgamma (1.2)":
double: 1
float: 2
@@ -6592,6 +6640,9 @@ float: 1
ifloat: 1
ildouble: 2
ldouble: 2
+Test "tan_towardzero (2)":
+ildouble: 1
+ldouble: 1
Test "tan_towardzero (3)":
float: 1
ifloat: 1
@@ -7334,6 +7385,19 @@ idouble: 3
ifloat: 1
ildouble: 1
ldouble: 1
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
Test "yn (3, 0.125)":
double: 1
idouble: 1
diff --git a/libc/sysdeps/powerpc/fpu/s_float_bitwise.h b/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
index 8e4adca86..c0a4e56be 100644
--- a/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
+++ b/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
@@ -23,18 +23,19 @@
#include <math_private.h>
/* Returns (int)(num & 0x7FFFFFF0 == value) */
-static inline
-int __float_and_test28 (float num, float value)
+static inline int
+__float_and_test28 (float num, float value)
{
float ret;
#ifdef _ARCH_PWR7
- vector int mask = (vector int) {
- 0x7ffffffe, 0x00000000, 0x00000000, 0x0000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7ffffff0 };
__asm__ (
- /* the 'f' constrain is use on mask because we just need
+ /* the 'f' constraint is used on mask because we just need
* to compare floats, not full vector */
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
#else
int32_t inum;
@@ -46,16 +47,17 @@ int __float_and_test28 (float num, float value)
}
/* Returns (int)(num & 0x7FFFFF00 == value) */
-static inline
-int __float_and_test24 (float num, float value)
+static inline int
+__float_and_test24 (float num, float value)
{
float ret;
#ifdef _ARCH_PWR7
- vector int mask = (vector int) {
- 0x7fffffe0, 0x00000000, 0x00000000, 0x0000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7fffff00 };
__asm__ (
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
#else
int32_t inum;
@@ -67,16 +69,17 @@ int __float_and_test24 (float num, float value)
}
/* Returns (float)(num & 0x7F800000) */
-static inline
-float __float_and8 (float num)
+static inline float
+__float_and8 (float num)
{
float ret;
#ifdef _ARCH_PWR7
- vector int mask = (vector int) {
- 0x7ff00000, 0x00000000, 0x00000000, 0x00000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7f800000 };
__asm__ (
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
#else
int32_t inum;
@@ -88,17 +91,18 @@ float __float_and8 (float num)
}
/* Returns ((int32_t)(num & 0x7F800000) >> 23) */
-static inline
-int32_t __float_get_exp (float num)
+static inline int32_t
+__float_get_exp (float num)
{
int32_t inum;
#ifdef _ARCH_PWR7
float ret;
- vector int mask = (vector int) {
- 0x7ff00000, 0x00000000, 0x00000000, 0x00000000
- };
+ union {
+ int i;
+ float f;
+ } mask = { .i = 0x7f800000 };
__asm__ (
- "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+ "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
);
GET_FLOAT_WORD(inum, ret);
#else
diff --git a/libc/sysdeps/powerpc/fpu/s_llround.c b/libc/sysdeps/powerpc/fpu/s_llround.c
index 9a0182653..995d0a724 100644
--- a/libc/sysdeps/powerpc/fpu/s_llround.c
+++ b/libc/sysdeps/powerpc/fpu/s_llround.c
@@ -19,29 +19,28 @@
#include <math.h>
#include <math_ldbl_opt.h>
-/* I think that what this routine is supposed to do is round a value
- to the nearest integer, with values exactly on the boundary rounded
- away from zero. */
-/* This routine relies on (long long)x, when x is out of range of a long long,
- clipping to MAX_LLONG or MIN_LLONG. */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+ rounded away from zero, regardless of the current rounding mode.
+ If (long long)x, when x is out of range of a long long, clips at
+ LLONG_MAX or LLONG_MIN, then this implementation also clips. */
long long int
__llround (double x)
{
- double xrf;
- long long int xr;
- xr = (long long int) x;
- xrf = (double) xr;
+ long long xr = (long long) x;
+ double xrf = (double) xr;
+
if (x >= 0.0)
- if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
- return x+1;
- else
- return x;
+ {
+ if (x - xrf >= 0.5)
+ xr += (long long) ((unsigned long long) xr + 1) > 0;
+ }
else
- if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
- return x-1;
- else
- return x;
+ {
+ if (xrf - x >= 0.5)
+ xr -= (long long) ((unsigned long long) xr - 1) < 0;
+ }
+ return xr;
}
weak_alias (__llround, llround)
#ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/powerpc/fpu/s_llroundf.c b/libc/sysdeps/powerpc/fpu/s_llroundf.c
index 07d12adbf..0935de662 100644
--- a/libc/sysdeps/powerpc/fpu/s_llroundf.c
+++ b/libc/sysdeps/powerpc/fpu/s_llroundf.c
@@ -18,28 +18,27 @@
#include <math.h>
-/* I think that what this routine is supposed to do is round a value
- to the nearest integer, with values exactly on the boundary rounded
- away from zero. */
-/* This routine relies on (long long)x, when x is out of range of a long long,
- clipping to MAX_LLONG or MIN_LLONG. */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+ rounded away from zero, regardless of the current rounding mode.
+ If (long long)x, when x is out of range of a long long, clips at
+ LLONG_MAX or LLONG_MIN, then this implementation also clips. */
long long int
__llroundf (float x)
{
- float xrf;
- long long int xr;
- xr = (long long int) x;
- xrf = (float) xr;
+ long long xr = (long long) x;
+ float xrf = (float) xr;
+
if (x >= 0.0)
- if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
- return x+1;
- else
- return x;
+ {
+ if (x - xrf >= 0.5)
+ xr += (long long) ((unsigned long long) xr + 1) > 0;
+ }
else
- if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
- return x-1;
- else
- return x;
+ {
+ if (xrf - x >= 0.5)
+ xr -= (long long) ((unsigned long long) xr - 1) < 0;
+ }
+ return xr;
}
weak_alias (__llroundf, llroundf)
diff --git a/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c b/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
index feffa6b4f..cc9b320bf 100644
--- a/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
+++ b/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
@@ -83,7 +83,7 @@ ElfW(Addr) query_auxv(int type)
return 0;
}
-typedef unsigned long long di_fpscr_t __attribute__ ((__mode__ (__DI__)));
+typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__)));
typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
#define _FPSCR_RESERVED 0xfffffff8ffffff04ULL
@@ -95,50 +95,51 @@ typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
#define _FPSCR_TEST1_RN 0x0000000000000002ULL
/* Macros for accessing the hardware control word on Power6[x]. */
-# define _GET_DI_FPSCR(__fpscr) ({ \
- union { double d; \
- di_fpscr_t fpscr; } \
- tmp __attribute__ ((__aligned__(8))); \
- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
- (__fpscr)=tmp.fpscr; \
- tmp.fpscr; })
-
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+#define _GET_DI_FPSCR(__fpscr) \
+ ({union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ __asm__ ("mffs %0" : "=f" (fr)); \
+ u.d = fr; \
+ (__fpscr) = u.fpscr; \
+ u.fpscr; \
+ })
+
+/* We make sure to zero fp after we use it in order to prevent stale data
in an fp register from making a test-case pass erroneously. */
-# define _SET_DI_FPSCR(__fpscr) { \
- union { double d; di_fpscr_t fpscr; } \
- tmp __attribute__ ((__aligned__(8))); \
- tmp.fpscr = __fpscr; \
- /* Set the entire 64-bit FPSCR. */ \
- __asm__ ("lfd%U0 0,%0; " \
- ".machine push; " \
- ".machine \"power6\"; " \
- "mtfsf 255,0,1,0; " \
- ".machine pop" : : "m" (tmp.d) : "fr0"); \
- tmp.d = 0; \
- __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0"); \
-}
-
-# define _GET_SI_FPSCR(__fpscr) ({ \
- union { double d; \
- si_fpscr_t cw[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
- (__fpscr)=tmp.cw[1]; \
- tmp.cw[0]; })
-
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+# define _SET_DI_FPSCR(__fpscr) \
+ { union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ u.fpscr = __fpscr; \
+ fr = u.d; \
+ /* Set the entire 64-bit FPSCR. */ \
+ __asm__ (".machine push; " \
+ ".machine \"power6\"; " \
+ "mtfsf 255,%0,1,0; " \
+ ".machine pop" : : "f" (fr)); \
+ fr = 0.0; \
+ }
+
+# define _GET_SI_FPSCR(__fpscr) \
+ ({union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ __asm__ ("mffs %0" : "=f" (fr)); \
+ u.d = fr; \
+ (__fpscr) = (si_fpscr_t) u.fpscr; \
+ (si_fpscr_t) u.fpscr; \
+ })
+
+/* We make sure to zero fp after we use it in order to prevent stale data
in an fp register from making a test-case pass erroneously. */
-# define _SET_SI_FPSCR(__fpscr) { \
- union { double d; si_fpscr_t fpscr[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- /* More-or-less arbitrary; this is a QNaN. */ \
- tmp.fpscr[0] = 0xFFF80000; \
- tmp.fpscr[1] = __fpscr; \
- __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \
- tmp.d = 0; \
- __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0"); \
-}
+# define _SET_SI_FPSCR(__fpscr) \
+ { union { double d; di_fpscr_t fpscr; } u; \
+ register double fr; \
+ /* More-or-less arbitrary; this is a QNaN. */ \
+ u.fpscr = 0xfff80000ULL << 32; \
+ u.fpscr |= __fpscr & 0xffffffffULL; \
+ fr = u.d; \
+ __asm__ ("mtfsf 255,%0" : : "f" (fr)); \
+ fr = 0.0; \
+ }
void prime_special_regs(int which)
{
diff --git a/libc/sysdeps/powerpc/fpu_control.h b/libc/sysdeps/powerpc/fpu_control.h
index d03b8eb80..e82e7913c 100644
--- a/libc/sysdeps/powerpc/fpu_control.h
+++ b/libc/sysdeps/powerpc/fpu_control.h
@@ -28,7 +28,7 @@ typedef unsigned int fpu_control_t;
# define _FPU_SETCW(cw) (void) (cw)
extern fpu_control_t __fpu_control;
-#elif defined __NO_FPRS__ /* E500 */
+#elif defined __NO_FPRS__ /* e500 */
/* rounding control */
# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */
@@ -37,33 +37,28 @@ extern fpu_control_t __fpu_control;
# define _FPU_RC_ZERO 0x01
/* masking of interrupts */
-#define _FPU_MASK_ZM 0x10 /* zero divide */
-#define _FPU_MASK_OM 0x40 /* overflow */
-#define _FPU_MASK_UM 0x80 /* underflow */
-#define _FPU_MASK_XM 0x40 /* inexact */
-#define _FPU_MASK_IM 0x20 /* invalid operation */
+# define _FPU_MASK_ZM 0x10 /* zero divide */
+# define _FPU_MASK_OM 0x04 /* overflow */
+# define _FPU_MASK_UM 0x08 /* underflow */
+# define _FPU_MASK_XM 0x40 /* inexact */
+# define _FPU_MASK_IM 0x20 /* invalid operation */
-#define _FPU_RESERVED 0xff3fff7f /* These bits are reserved are not changed. */
+# define _FPU_RESERVED 0x00c10080 /* These bits are reserved and not changed. */
-/* The fdlibm code requires no interrupts for exceptions. */
-#define _FPU_DEFAULT 0x00000000 /* Default value. */
-
-/* IEEE: same as above, but (some) exceptions;
- we leave the 'inexact' exception off.
- */
-#define _FPU_IEEE 0x000003c0
+/* Correct IEEE semantics require traps to be enabled at the hardware
+ level; the kernel then does the emulation and determines whether
+ generation of signals from those traps was enabled using prctl. */
+# define _FPU_DEFAULT 0x0000003c /* Default value. */
+# define _FPU_IEEE _FPU_DEFAULT
/* Type of the control word. */
typedef unsigned int fpu_control_t;
/* Macros for accessing the hardware control word. */
-#define _FPU_GETCW(__cw) ({ \
- unsigned int env; \
- asm volatile ("mfspefscr %0" : "=r" (env)); \
- (__cw) = env; })
-#define _FPU_SETCW(__cw) ({ \
- unsigned int env = __cw; \
- asm volatile ("mtspefscr %0" : : "r" (env)); })
+# define _FPU_GETCW(cw) \
+ __asm__ volatile ("mfspefscr %0" : "=r" (cw))
+# define _FPU_SETCW(cw) \
+ __asm__ volatile ("mtspefscr %0" : : "r" (cw))
/* Default control word set at startup. */
extern fpu_control_t __fpu_control;
@@ -96,22 +91,26 @@ extern fpu_control_t __fpu_control;
# define _FPU_IEEE 0x000000f0
/* Type of the control word. */
-typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__)));
+typedef unsigned int fpu_control_t;
/* Macros for accessing the hardware control word. */
-# define _FPU_GETCW(__cw) ( { \
- union { double d; fpu_control_t cw[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
- (__cw)=tmp.cw[1]; \
- tmp.cw[1]; } )
-# define _FPU_SETCW(__cw) { \
- union { double d; fpu_control_t cw[2]; } \
- tmp __attribute__ ((__aligned__(8))); \
- tmp.cw[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */ \
- tmp.cw[1] = __cw; \
- __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \
-}
+# define _FPU_GETCW(cw) \
+ ({union { double __d; unsigned long long __ll; } __u; \
+ register double __fr; \
+ __asm__ ("mffs %0" : "=f" (__fr)); \
+ __u.__d = __fr; \
+ (cw) = (fpu_control_t) __u.__ll; \
+ (fpu_control_t) __u.__ll; \
+ })
+
+# define _FPU_SETCW(cw) \
+ { union { double __d; unsigned long long __ll; } __u; \
+ register double __fr; \
+ __u.__ll = 0xfff80000LL << 32; /* This is a QNaN. */ \
+ __u.__ll |= (cw) & 0xffffffffLL; \
+ __fr = __u.__d; \
+ __asm__ ("mtfsf 255,%0" : : "f" (__fr)); \
+ }
/* Default control word set at startup. */
extern fpu_control_t __fpu_control;
diff --git a/libc/sysdeps/powerpc/jmpbuf-offsets.h b/libc/sysdeps/powerpc/jmpbuf-offsets.h
index 64c658a58..f2116bd70 100644
--- a/libc/sysdeps/powerpc/jmpbuf-offsets.h
+++ b/libc/sysdeps/powerpc/jmpbuf-offsets.h
@@ -21,12 +21,10 @@
#define JB_LR 2 /* The address we will return to */
#if __WORDSIZE == 64
# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18*2 words total. */
-# define JB_CR 21 /* Condition code registers with the VRSAVE at */
- /* offset 172 (low half of the double word. */
+# define JB_CR 21 /* Shared dword with VRSAVE. CR word at offset 172. */
# define JB_FPRS 22 /* FPRs 14 through 31 are saved, 18*2 words total. */
# define JB_SIZE (64 * 8) /* As per PPC64-VMX ABI. */
-# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
- /* 168 (high half of the double word). */
+# define JB_VRSAVE 21 /* Shared dword with CR. VRSAVE word at offset 168. */
# define JB_VRS 40 /* VRs 20 through 31 are saved, 12*4 words total. */
#else
# define JB_GPRS 3 /* GPRs 14 through 31 are saved, 18 in total. */
diff --git a/libc/sysdeps/powerpc/longjmp.c b/libc/sysdeps/powerpc/longjmp.c
index 198c89420..189fc03ab 100644
--- a/libc/sysdeps/powerpc/longjmp.c
+++ b/libc/sysdeps/powerpc/longjmp.c
@@ -55,6 +55,6 @@ weak_alias (__vmx__libc_siglongjmp, __vmxsiglongjmp)
default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE);
default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE);
-default_symbol_version (__vmx_longjmp, _longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxlongjmp, longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxsiglongjmp, siglongjmp, GLIBC_2.3.4);
+versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4);
diff --git a/libc/sysdeps/powerpc/nofpu/Makefile b/libc/sysdeps/powerpc/nofpu/Makefile
new file mode 100644
index 000000000..b9cbf8023
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Makefile
@@ -0,0 +1,30 @@
+# Makefile fragment for PowerPC with no FPU.
+
+ifeq ($(subdir),soft-fp)
+sysdep_routines += $(gcc-single-routines) $(gcc-double-routines) \
+ sim-full
+endif
+
+ifeq ($(subdir),math)
+libm-support += fenv_const
+CPPFLAGS += -I../soft-fp/
+# The follow CFLAGS are a work around for GCC Bugzilla Bug 29253
+# "expand_abs wrong default code for floating point"
+# As this is not a regression, a fix is not likely to go into
+# gcc-4.1.1 and may be too late for gcc-4.2. So we need these flags
+# until the fix in a gcc release and glibc drops support for earlier
+# versions of gcc.
+CFLAGS-e_hypotl.c += -fno-builtin-fabsl
+CFLAGS-e_powl.c += -fno-builtin-fabsl
+CFLAGS-s_ccoshl.c += -fno-builtin-fabsl
+CFLAGS-s_csinhl.c += -fno-builtin-fabsl
+CFLAGS-s_clogl.c += -fno-builtin-fabsl
+CFLAGS-s_clog10l.c += -fno-builtin-fabsl
+CFLAGS-s_csinl.c += -fno-builtin-fabsl
+CFLAGS-s_csqrtl.c += -fno-builtin-fabsl
+CFLAGS-w_acosl.c += -fno-builtin-fabsl
+CFLAGS-w_asinl.c += -fno-builtin-fabsl
+CFLAGS-w_atanhl.c += -fno-builtin-fabsl
+CFLAGS-w_j0l.c += -fno-builtin-fabsl
+CFLAGS-w_j1l.c += -fno-builtin-fabsl
+endif
diff --git a/libc/sysdeps/powerpc/nofpu/Subdirs b/libc/sysdeps/powerpc/nofpu/Subdirs
new file mode 100644
index 000000000..87eadf302
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Subdirs
@@ -0,0 +1 @@
+soft-fp
diff --git a/libc/sysdeps/powerpc/nofpu/Versions b/libc/sysdeps/powerpc/nofpu/Versions
new file mode 100644
index 000000000..1a29319d5
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Versions
@@ -0,0 +1,20 @@
+libc {
+ GLIBC_2.3.2 {
+ __sim_exceptions; __sim_disabled_exceptions; __sim_round_mode;
+ __adddf3; __addsf3; __divdf3; __divsf3; __eqdf2; __eqsf2;
+ __extendsfdf2; __fixdfdi; __fixdfsi; __fixsfdi; __fixsfsi;
+ __fixunsdfdi; __fixunsdfsi; __fixunssfdi; __fixunssfsi;
+ __floatdidf; __floatdisf; __floatsidf; __floatsisf;
+ __gedf2; __gesf2; __ledf2; __lesf2; __muldf3; __mulsf3;
+ __negdf2; __negsf2; __sqrtdf2; __sqrtsf2; __subdf3;
+ __subsf3; __truncdfsf2;
+ }
+ GLIBC_2.4 {
+ __floatundidf; __floatundisf;
+ __floatunsidf; __floatunsisf;
+ __unorddf2; __unordsf2;
+ __nedf2; __nesf2;
+ __gtdf2; __gtsf2;
+ __ltdf2; __ltsf2;
+ }
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fclrexcpt.c b/libc/sysdeps/powerpc/nofpu/fclrexcpt.c
new file mode 100644
index 000000000..fabda0ab9
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fclrexcpt.c
@@ -0,0 +1,37 @@
+/* Clear floating-point exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__feclearexcept (int x)
+{
+ __sim_exceptions &= ~x;
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c b/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c
new file mode 100644
index 000000000..e06c8f767
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c
@@ -0,0 +1,32 @@
+/* Disable exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <fenv.h>
+
+int
+fedisableexcept (int x)
+{
+ int old_exceptions = ~__sim_disabled_exceptions & FE_ALL_EXCEPT;
+
+ __sim_disabled_exceptions |= x;
+
+ return old_exceptions;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/feenablxcpt.c b/libc/sysdeps/powerpc/nofpu/feenablxcpt.c
new file mode 100644
index 000000000..93249abf6
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feenablxcpt.c
@@ -0,0 +1,32 @@
+/* Enable exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+
+extern int __sim_disabled_exceptions;
+
+int
+feenableexcept (int exceptions)
+{
+ int old_exceptions = ~__sim_disabled_exceptions & FE_ALL_EXCEPT;
+
+ __sim_disabled_exceptions &= ~exceptions;
+
+ return old_exceptions;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fegetenv.c b/libc/sysdeps/powerpc/nofpu/fegetenv.c
new file mode 100644
index 000000000..51bcef30a
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetenv.c
@@ -0,0 +1,48 @@
+/* Store current floating-point environment (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002, 2010.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+extern int __sim_exceptions;
+extern int __sim_disabled_exceptions;
+extern int __sim_round_mode;
+
+int
+__fegetenv (fenv_t *envp)
+{
+ fenv_union_t u;
+
+ u.l[0] = __sim_exceptions;
+ u.l[0] |= __sim_round_mode;
+ u.l[1] = __sim_disabled_exceptions;
+
+ *envp = u.fenv;
+
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fegetenv, fegetenv)
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fegetexcept.c b/libc/sysdeps/powerpc/nofpu/fegetexcept.c
new file mode 100644
index 000000000..ea39a82b7
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetexcept.c
@@ -0,0 +1,27 @@
+/* Get floating-point exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fegetexcept (void)
+{
+ return (__sim_disabled_exceptions ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fegetround.c b/libc/sysdeps/powerpc/nofpu/fegetround.c
new file mode 100644
index 000000000..c232ae379
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetround.c
@@ -0,0 +1,28 @@
+/* Return current rounding mode (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+#undef fegetround
+int
+fegetround (void)
+{
+ return __sim_round_mode;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/feholdexcpt.c b/libc/sysdeps/powerpc/nofpu/feholdexcpt.c
new file mode 100644
index 000000000..ba6a53acc
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feholdexcpt.c
@@ -0,0 +1,43 @@
+/* Store current floating-point environment and clear exceptions
+ (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+feholdexcept (fenv_t *envp)
+{
+ fenv_union_t u;
+
+ /* Get the current state. */
+ __fegetenv (envp);
+
+ u.fenv = *envp;
+ /* Clear everything except the rounding mode. */
+ u.l[0] &= 0x3;
+ /* Disable exceptions */
+ u.l[1] = FE_ALL_EXCEPT;
+
+ /* Put the new state in effect. */
+ fesetenv (&u.fenv);
+
+ return 0;
+}
+libm_hidden_def (feholdexcept)
diff --git a/libc/sysdeps/powerpc/nofpu/fenv_const.c b/libc/sysdeps/powerpc/nofpu/fenv_const.c
new file mode 100644
index 000000000..291b1accc
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fenv_const.c
@@ -0,0 +1,34 @@
+/* Constants for fenv_bits.h (soft float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* We want to specify the bit pattern of the __fe_*_env constants, so
+ pretend they're really `long long' instead of `double'. */
+
+/* If the default argument is used we use this value. Disable all
+ signalling exceptions as default. */
+const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) =
+0x000000003e000000ULL;
+
+/* Floating-point environment where none of the exceptions are masked. */
+const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) =
+0xfff80000000000f8ULL;
+
+/* Floating-point environment with the NI bit set. */
+const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) =
+0xfff8000000000004ULL;
diff --git a/libc/sysdeps/powerpc/nofpu/fenv_libc.h b/libc/sysdeps/powerpc/nofpu/fenv_libc.h
new file mode 100644
index 000000000..14a2d04a2
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fenv_libc.h
@@ -0,0 +1,28 @@
+/* Internal libc stuff for floating point environment routines.
+ Copyright (C) 2007-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _FENV_LIBC_H
+#define _FENV_LIBC_H 1
+
+/* fenv_libc.h is used in libm implementations of ldbl-128ibm. So we
+ need this version in the soft-fp to at minimum include fenv.h to
+ get the fegetround definition. */
+
+#include <fenv.h>
+
+#endif /* fenv_libc.h */
diff --git a/libc/sysdeps/powerpc/nofpu/fesetenv.c b/libc/sysdeps/powerpc/nofpu/fesetenv.c
new file mode 100644
index 000000000..3f35909b6
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fesetenv.c
@@ -0,0 +1,42 @@
+/* Set floating point environment (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fesetenv (const fenv_t *envp)
+{
+ fenv_union_t u;
+
+ u.fenv = *envp;
+ __sim_exceptions = u.l[0] & FE_ALL_EXCEPT;
+ __sim_round_mode = u.l[0] & 0x3;
+ __sim_disabled_exceptions = u.l[1];
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fesetround.c b/libc/sysdeps/powerpc/nofpu/fesetround.c
new file mode 100644
index 000000000..028c1300c
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fesetround.c
@@ -0,0 +1,33 @@
+/* Set rounding mode (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fesetround (int round)
+{
+ if ((unsigned int) round > FE_DOWNWARD)
+ return 1;
+
+ __sim_round_mode = round;
+
+ return 0;
+}
+libm_hidden_def (fesetround)
diff --git a/libc/sysdeps/powerpc/nofpu/feupdateenv.c b/libc/sysdeps/powerpc/nofpu/feupdateenv.c
new file mode 100644
index 000000000..163f67310
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feupdateenv.c
@@ -0,0 +1,51 @@
+/* Install given floating-point environment and raise exceptions
+ (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <signal.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+ int saved_exceptions;
+
+ /* Save currently set exceptions. */
+ saved_exceptions = __sim_exceptions;
+
+ /* Set environment. */
+ fesetenv (envp);
+
+ /* Raise old exceptions. */
+ __sim_exceptions |= saved_exceptions;
+ if (saved_exceptions & ~__sim_disabled_exceptions)
+ raise (SIGFPE);
+
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c b/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c
new file mode 100644
index 000000000..2373fa400
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c
@@ -0,0 +1,37 @@
+/* Store current representation for exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+ *flagp = (fexcept_t) __sim_exceptions & excepts & FE_ALL_EXCEPT;
+
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c b/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c
new file mode 100644
index 000000000..cd142b60b
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c
@@ -0,0 +1,41 @@
+/* Raise given exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <signal.h>
+
+#undef feraiseexcept
+int
+__feraiseexcept (int x)
+{
+ __sim_exceptions |= x;
+ if (x & ~__sim_disabled_exceptions)
+ raise (SIGFPE);
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c b/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c
new file mode 100644
index 000000000..3dc368fdd
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c
@@ -0,0 +1,38 @@
+/* Set floating-point environment exception handling (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fesetexceptflag(const fexcept_t *flagp, int excepts)
+{
+ /* Ignore exceptions not listed in 'excepts'. */
+ __sim_exceptions = (__sim_exceptions & ~excepts) | (*flagp & excepts);
+
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/ftestexcept.c b/libc/sysdeps/powerpc/nofpu/ftestexcept.c
new file mode 100644
index 000000000..f5d01e881
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/ftestexcept.c
@@ -0,0 +1,28 @@
+/* Test floating-point exceptions (soft-float edition).
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fetestexcept (int x)
+{
+ return __sim_exceptions & x;
+}
+libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h b/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h
new file mode 100644
index 000000000..20eb81030
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h
@@ -0,0 +1,35 @@
+/* Determine floating-point rounding mode within libc. PowerPC
+ soft-float version.
+ Copyright (C) 2012-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _POWERPC_NOFPU_GET_ROUNDING_MODE_H
+#define _POWERPC_NOFPU_GET_ROUNDING_MODE_H 1
+
+#include <fenv.h>
+
+#include "soft-supp.h"
+
+/* Return the floating-point rounding mode. */
+
+static inline int
+get_rounding_mode (void)
+{
+ return __sim_round_mode;
+}
+
+#endif /* get-rounding-mode.h */
diff --git a/libc/sysdeps/powerpc/nofpu/libm-test-ulps b/libc/sysdeps/powerpc/nofpu/libm-test-ulps
new file mode 100644
index 000000000..ad5a9cd42
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/libm-test-ulps
@@ -0,0 +1,7297 @@
+# Begin of automatic generation
+
+# acos
+Test "acos (-0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "acos (-0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "acos (2e-17)":
+ildouble: 1
+ldouble: 1
+
+# acos_downward
+Test "acos_downward (-0)":
+float: 1
+ifloat: 1
+Test "acos_downward (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "acos_downward (-1)":
+float: 1
+ifloat: 1
+Test "acos_downward (0)":
+float: 1
+ifloat: 1
+Test "acos_downward (0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# acos_towardzero
+Test "acos_towardzero (-0)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "acos_towardzero (-1)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (0)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# acos_upward
+Test "acos_upward (-0)":
+ildouble: 2
+ldouble: 2
+Test "acos_upward (-1)":
+ildouble: 2
+ldouble: 2
+Test "acos_upward (0)":
+ildouble: 2
+ldouble: 2
+
+# asin
+Test "asin (-0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "asin (-0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "asin (0.75)":
+ildouble: 2
+ldouble: 2
+Test "asin (0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "asin (0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+
+# asin_downward
+Test "asin_downward (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_downward (-1.0)":
+ildouble: 1
+ldouble: 1
+Test "asin_downward (0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_downward (1.0)":
+float: 1
+ifloat: 1
+
+# asin_towardzero
+Test "asin_towardzero (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_towardzero (-1.0)":
+float: 1
+ifloat: 1
+Test "asin_towardzero (0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_towardzero (1.0)":
+float: 1
+ifloat: 1
+
+# asin_upward
+Test "asin_upward (-1.0)":
+float: 1
+ifloat: 1
+Test "asin_upward (1.0)":
+ildouble: 1
+ldouble: 1
+
+# atan2
+Test "atan2 (-0.00756827042671106339, -.001792735857538728036)":
+ildouble: 1
+ldouble: 1
+Test "atan2 (-0.75, -1.0)":
+float: 1
+ifloat: 1
+Test "atan2 (-inf, -inf)":
+ildouble: 1
+ldouble: 1
+Test "atan2 (-max_value, -min_value)":
+float: 1
+ifloat: 1
+Test "atan2 (0.75, -1.0)":
+float: 1
+ifloat: 1
+Test "atan2 (1.390625, 0.9296875)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "atan2 (inf, -inf)":
+ildouble: 1
+ldouble: 1
+
+# atanh
+Test "atanh (0.75)":
+float: 1
+ifloat: 1
+
+# cabs
+Test "cabs (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+
+# cacos
+Test "Imaginary part of: cacos (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.25 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.25 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + +0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (-0x1.0000000000000002p0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000000002p0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacos (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-100 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-100 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 1.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 1.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-30 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-30 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-105 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-105 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-23 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-23 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-1.0 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 + 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 + 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-1.0 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 - 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 - 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (0x0.ffffffp0 - 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (0x1.0000000000001p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1.0000000000001p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1.000002p0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacos (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (0x1.000002p0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacos (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x1.fp-100 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (0x1.fp-100 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 + 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (1.0 + 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 - 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (1.0 - 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+
+# cacosh
+Test "Real part of: cacosh (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.25 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.25 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0.5 + +0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0.5 - 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (-0x1.0000000000000002p0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000000002p0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-100 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-100 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 1.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 1.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-30 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-30 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-1.0 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 + 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 + 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-1.0 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 - 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 - 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1.0000000000001p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1.0000000000001p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (0x1.000002p0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacosh (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (0x1.000002p0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacosh (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1.fp-100 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1.fp-100 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 + 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 + 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 - 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 - 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+
+# carg
+Test "carg (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "carg (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+
+# casin
+Test "Imaginary part of: casin (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: casin (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casin (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casin (-0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-106 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-106 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 + 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1p-23 + 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-23 - 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1p-23 - 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-63 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-63 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 + 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 - 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: casin (0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casin (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casin (0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-106 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-106 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 + 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1p-23 + 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-23 - 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1p-23 - 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-63 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-63 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 + 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 - 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# casinh
+Test "Imaginary part of: casinh (-0.0 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.25 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0.25 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0.5 + +0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffp0 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x0.ffffffp0 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000000000000000000000000008p0 + 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000000000000000000000000008p0 - 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000002p0 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000002p0 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-1025 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-1025 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-129 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-129 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-129 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-129 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-105 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-105 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-112 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-112 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (-0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1p-23 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 - 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (-0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-23 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-63 + 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-63 - 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + +0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 - 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0.0 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.25 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0.25 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0.5 + +0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x0.ffffffp0 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x0.ffffffp0 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000000000000000000000000008p0 + 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000000000000000000000000008p0 - 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000002p0 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000002p0 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-1025 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-1025 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-129 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x1.fp-129 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-129 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x1.fp-129 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-105 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-105 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-112 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-112 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 + 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-23 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1p-23 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 - 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-23 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-63 + 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-63 - 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + +0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 - 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+
+# catan
+Test "Imaginary part of: catan (-0x0.fffffffffffff8p0 + 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x0.fffffffffffff8p0 - 0x1p-27 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x0.ffffffp0 + 0x1p-13 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000000000000000000000000008p0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-0x1.000000000000000000000000008p0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.0000000000001p0 - 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (-0x1.000002p0 + 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 - 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1.000002p0 - 0x1p-13 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1p-1020 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x1p-1020 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (-0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-54 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-54 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-57 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-57 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x0.fffffffffffff8p0 + 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x0.fffffffffffff8p0 - 0x1p-27 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x0.ffffffp0 + 0x1p-13 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000000000000000000000000008p0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (0x1.000000000000000000000000008p0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.0000000000001p0 - 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (0x1.000002p0 + 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 - 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1.000002p0 - 0x1p-13 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1p-1020 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x1p-1020 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-54 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-54 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-57 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-57 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+
+# catanh
+Test "Real part of: catanh (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-126 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-126 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-13 + 0x1.000002p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-13 - 0x1.000002p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-27 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1p-27 + 0x1.0000000000001p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catanh (-0x1p-27 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1p-27 - 0x1.0000000000001p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-1.0 + 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 + 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 + 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-2 - 3 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-126 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-126 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-13 + 0x0.ffffffp0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-13 - 0x0.ffffffp0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-27 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catanh (0x1p-27 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (0x1p-54 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (1.0 + 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 + 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 + 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-57 i)":
+float: 1
+ifloat: 1
+
+# cbrt
+Test "cbrt (-27.0)":
+double: 1
+idouble: 1
+Test "cbrt (0.75)":
+double: 1
+idouble: 1
+Test "cbrt (0.9921875)":
+double: 1
+idouble: 1
+
+# ccos
+Test "Imaginary part of: ccos (-0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (-0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (-0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (-0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: ccos (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: ccos (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccos (0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (0x1p-1074 + 1440 i)":
+double: 1
+idouble: 1
+
+# ccosh
+Test "Real part of: ccosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccosh (-710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (-710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (-89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (-89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: ccosh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ccosh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cexp
+Test "Imaginary part of: cexp (-2.0 - 3.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cexp (-95 + 0.75 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cexp (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cexp (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Real part of: cexp (50 + 0x1p127 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (50 + 0x1p127 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cexp (500 + 0x1p1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cexp (500 + 0x1p1023 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cexp (709.8125 + 0.75 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (709.8125 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Real part of: cexp (88.75 + 0.75 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cexp (88.75 + 0.75 i)":
+float: 2
+ifloat: 2
+
+# clog
+Test "Real part of: clog (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog (-0x1.234566p-40 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1.fp+127 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1.fp+127 - 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (0x0.ffffffp0 + 0x0.ffffffp-100 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.000566p0 + 0x1.234p-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.fp+127 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.fp+127 - 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x11682p-23 + 0x7ffed1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x13836d58a13448d750b4b9p-85 + 0x195ca7bc3ab4f9161edbe6p-85 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x155f8afc4c48685bf63610p-85 + 0x17d0cf2652cdbeb1294e19p-85 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: clog (0x15cfbd1990d1ffp-53 + 0x176a3973e09a9ap-53 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog (0x1p-147 + 0x1p-147 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x2818p-15 + 0x798fp-15 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x4d9c37e2b5cb4533p-63 + 0x65c98be2385a042ep-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x6241ef0da53f539f02fad67dabp-106 + 0x3fb46641182f7efd9caa769dac0p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xa1f2c1p-24 + 0xc643aep-24 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xa4722f19346cp-51 + 0x7f9631c5e7f07p-51 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xf2p-10 + 0x3e3p-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (1.0 + 0x1.234566p-10 i)":
+float: 1
+ifloat: 1
+
+# clog10
+Test "Imaginary part of: clog10 (-0 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 2
+idouble: 2
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 2
+idouble: 2
+Test "Imaginary part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+1023 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+1023 - 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+127 + 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0x1.fp+127 - 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0x1p-1074 + 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-1074 - 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-149 + 0x1.fp+127 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-149 - 0x1.fp+127 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-1.0 + 0x1.234566p-20 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-2 - 3 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (-3 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-3 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + 1 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + inf i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-inf - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf - 1 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x0.fffffffffffff8p0 + 0x0.fffffffffffff8p-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x0.ffffffp0 + 0x0.ffffffp-100 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "Real part of: clog10 (0x1.000566p0 + 0x1.234p-10 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.000566p0 + 0x1.234p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x1.000566p0 + 0x1.234p-100 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-30 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-50 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-60 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1.fffffep+127 + 0x1.fffffep+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x1.fffffep+127 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x10673dd0f2481p-51 + 0x7ef1d17cefbd2p-51 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x1367a310575591p-54 + 0x3cfcc0a0541f60p-54 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1367a310575591p-54 + 0x3cfcc0a0541f60p-54 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x13836d58a13448d750b4b9p-85 + 0x195ca7bc3ab4f9161edbe6p-85 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x155f8afc4c48685bf63610p-85 + 0x17d0cf2652cdbeb1294e19p-85 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: clog10 (0x15d8ab6ed05ca514086ac3a1e84p-105 + 0x1761e480aa094c0b10b34b09ce9p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x164c74eea876p-45 + 0x16f393482f77p-45 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1a6p-10 + 0x3a5p-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1df515eb171a808b9e400266p-95 + 0x7c71eb0cd4688dfe98581c77p-95 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1p-1073 + 0x1p-1073 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 + 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 - 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-147 + 0x1p-147 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 + 0x1.fp+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 + 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 - 0x1.fp+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-509 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-510 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-511 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-61 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1p-62 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-63 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x2818p-15 + 0x798fp-15 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x2818p-15 + 0x798fp-15 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x298c62cb546588a7p-63 + 0x7911b1dfcc4ecdaep-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x2dd46725bp-35 + 0x7783a1284p-35 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x2ede88p-23 + 0x771c3fp-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x2ede88p-23 + 0x771c3fp-23 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x4447d7175p-35 + 0x6c445e00ap-35 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x4d4ep-15 + 0x6605p-15 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x5b06b680ea2ccp-52 + 0xef452b965da9fp-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x81b7efa81fc35ad1p-65 + 0x1ef4b835f1c79d812p-65 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x9b57bp-20 + 0xcb7b4p-20 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0xe33f66c9542ca25cc43c867p-95 + 0x7f35a68ebd3704a43c465864p-95 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0xf2p-10 + 0x3e3p-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0xf2p-10 + 0x3e3p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0xfe961079616p-45 + 0x1bc37e09e6d1p-45 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (1.0 + 0x1.234566p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (3 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (3 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (inf + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (inf - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+
+# cos
+Test "cos (0x1p+120)":
+float: 1
+ifloat: 1
+Test "cos (0x1p+127)":
+float: 1
+ifloat: 1
+Test "cos (M_PI_6l * 2.0)":
+double: 1
+idouble: 1
+Test "cos (M_PI_6l * 4.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cos_downward
+Test "cos_downward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_downward (10)":
+ildouble: 1
+ldouble: 1
+Test "cos_downward (2)":
+float: 1
+ifloat: 1
+Test "cos_downward (3)":
+float: 1
+ifloat: 1
+Test "cos_downward (4)":
+float: 1
+ifloat: 1
+Test "cos_downward (5)":
+float: 1
+ifloat: 1
+Test "cos_downward (6)":
+ildouble: 1
+ldouble: 1
+Test "cos_downward (7)":
+float: 1
+ifloat: 1
+Test "cos_downward (8)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "cos_downward (9)":
+ildouble: 1
+ldouble: 1
+
+# cos_tonearest
+Test "cos_tonearest (7)":
+float: 1
+ifloat: 1
+
+# cos_towardzero
+Test "cos_towardzero (1)":
+ildouble: 1
+ldouble: 1
+Test "cos_towardzero (10)":
+ildouble: 1
+ldouble: 1
+Test "cos_towardzero (2)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (3)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (5)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (7)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (8)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cos_upward
+Test "cos_upward (10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_upward (4)":
+ildouble: 1
+ldouble: 1
+Test "cos_upward (5)":
+ildouble: 1
+ldouble: 1
+Test "cos_upward (6)":
+float: 1
+ifloat: 1
+Test "cos_upward (7)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_upward (9)":
+float: 2
+ifloat: 2
+
+# cosh_downward
+Test "cosh_downward (22)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_downward (23)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_downward (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# cosh_tonearest
+Test "cosh_tonearest (24)":
+ildouble: 1
+ldouble: 1
+
+# cosh_towardzero
+Test "cosh_towardzero (22)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_towardzero (23)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_towardzero (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# cosh_upward
+Test "cosh_upward (22)":
+ildouble: 2
+ldouble: 2
+Test "cosh_upward (23)":
+ildouble: 2
+ldouble: 2
+Test "cosh_upward (24)":
+ildouble: 2
+ldouble: 2
+
+# cpow
+Test "Real part of: cpow (0.75 + 1.25 i, 0.0 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cpow (0.75 + 1.25 i, 0.75 + 1.25 i)":
+double: 1
+float: 4
+idouble: 1
+ifloat: 4
+ildouble: 2
+ldouble: 2
+Test "Real part of: cpow (0.75 + 1.25 i, 1.0 + 0.0 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: cpow (0.75 + 1.25 i, 1.0 + 1.0 i)":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "Real part of: cpow (2 + 0 i, 10 + 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cpow (2 + 3 i, 4 + 0 i)":
+double: 1
+float: 4
+idouble: 1
+ifloat: 4
+Test "Imaginary part of: cpow (2 + 3 i, 4 + 0 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+# csin
+Test "Real part of: csin (-0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (-0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (-0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (-0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0x1p-1074 + 1440 i)":
+double: 1
+idouble: 1
+
+# csinh
+Test "Imaginary part of: csinh (-2 - 3 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (-710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (-710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (-89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: csinh (-89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: csinh (89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# csqrt
+Test "Real part of: csqrt (-0x1.000002p-126 - 0x1.000002p-126 i)":
+double: 1
+idouble: 1
+Test "Real part of: csqrt (-2 + 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: csqrt (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: csqrt (0x1.000002p-126 + 0x1.000002p-126 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffep+127 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: csqrt (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffffffffffp+1023 + 0x1p+1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csqrt (0x1p-1074 + 0x1p-1074 i)":
+ildouble: 1
+ldouble: 1
+
+# ctan
+Test "Real part of: ctan (-2 - 3 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan (-2 - 3 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ctan (0.75 + 1.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: ctan (0x1p1023 + 1 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ctan (0x1p1023 + 1 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: ctan (0x1p127 + 1 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan (0x1p127 + 1 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: ctan (0x3.243f6cp-1 + 0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: ctan (1 + 47 i)":
+ildouble: 2
+ldouble: 2
+
+# ctan_downward
+Test "Real part of: ctan_downward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 3
+ldouble: 3
+Test "Real part of: ctan_downward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctan_downward (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+# ctan_tonearest
+Test "Real part of: ctan_tonearest (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan_tonearest (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# ctan_towardzero
+Test "Real part of: ctan_towardzero (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 4
+ldouble: 4
+Test "Imaginary part of: ctan_towardzero (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 13
+ldouble: 13
+Test "Real part of: ctan_towardzero (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctan_towardzero (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 8
+ldouble: 8
+
+# ctan_upward
+Test "Real part of: ctan_upward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 6
+Test "Imaginary part of: ctan_upward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 10
+ldouble: 10
+Test "Real part of: ctan_upward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+Test "Imaginary part of: ctan_upward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+# ctanh
+Test "Real part of: ctanh (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: ctanh (0 + 0x3.243f6cp-1 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (0 + pi/4 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: ctanh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (0.75 + 1.25 i)":
+float: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+Test "Real part of: ctanh (1 + 0x1p1023 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (1 + 0x1p1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: ctanh (1 + 0x1p127 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (1 + 0x1p127 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (47 + 1 i)":
+ildouble: 2
+ldouble: 2
+
+# ctanh_downward
+Test "Imaginary part of: ctanh_downward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 3
+ldouble: 3
+Test "Real part of: ctanh_downward (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+Test "Imaginary part of: ctanh_downward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# ctanh_tonearest
+Test "Real part of: ctanh_tonearest (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh_tonearest (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# ctanh_towardzero
+Test "Real part of: ctanh_towardzero (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 13
+ldouble: 13
+Test "Imaginary part of: ctanh_towardzero (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 4
+ldouble: 4
+Test "Real part of: ctanh_towardzero (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 8
+ldouble: 8
+Test "Imaginary part of: ctanh_towardzero (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# ctanh_upward
+Test "Real part of: ctanh_upward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 10
+ldouble: 10
+Test "Imaginary part of: ctanh_upward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 6
+Test "Real part of: ctanh_upward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+Test "Imaginary part of: ctanh_upward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+# erf
+Test "erf (1.25)":
+double: 1
+idouble: 1
+
+# erfc
+Test "erfc (0x1.f7303cp+1)":
+double: 1
+idouble: 1
+Test "erfc (0x1.ffa002p+2)":
+float: 1
+ifloat: 1
+Test "erfc (0x1.ffff56789abcdef0123456789a8p+2)":
+ildouble: 1
+ldouble: 1
+Test "erfc (2.0)":
+double: 1
+idouble: 1
+Test "erfc (4.125)":
+double: 1
+idouble: 1
+
+# exp
+Test "exp (0.75)":
+ildouble: 1
+ldouble: 1
+Test "exp (50.0)":
+ildouble: 1
+ldouble: 1
+
+# exp10
+Test "exp10 (-1)":
+double: 1
+idouble: 1
+Test "exp10 (-305)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "exp10 (-36)":
+double: 1
+idouble: 1
+Test "exp10 (3)":
+double: 1
+idouble: 1
+Test "exp10 (36)":
+double: 1
+idouble: 1
+
+# exp_downward
+Test "exp_downward (2)":
+float: 1
+ifloat: 1
+Test "exp_downward (3)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# exp_towardzero
+Test "exp_towardzero (2)":
+float: 1
+ifloat: 1
+Test "exp_towardzero (3)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# exp_upward
+Test "exp_upward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# expm1
+Test "expm1 (0.75)":
+double: 1
+idouble: 1
+Test "expm1 (1)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "expm1 (500.0)":
+double: 1
+idouble: 1
+
+# gamma
+Test "gamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (1.2)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+# hypot
+Test "hypot (-0.7, -12.4)":
+float: 1
+ifloat: 1
+Test "hypot (-0.7, 12.4)":
+float: 1
+ifloat: 1
+Test "hypot (-12.4, -0.7)":
+float: 1
+ifloat: 1
+Test "hypot (-12.4, 0.7)":
+float: 1
+ifloat: 1
+Test "hypot (0.7, -12.4)":
+float: 1
+ifloat: 1
+Test "hypot (0.7, 12.4)":
+float: 1
+ifloat: 1
+Test "hypot (0.75, 1.25)":
+ildouble: 1
+ldouble: 1
+Test "hypot (12.4, -0.7)":
+float: 1
+ifloat: 1
+Test "hypot (12.4, 0.7)":
+float: 1
+ifloat: 1
+
+# j0
+Test "j0 (-0x1.001000001p+593)":
+ildouble: 2
+ldouble: 2
+Test "j0 (-4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "j0 (0.75)":
+float: 1
+ifloat: 1
+Test "j0 (0x1.d7ce3ap+107)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "j0 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "j0 (10.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "j0 (2.0)":
+float: 2
+ifloat: 2
+Test "j0 (4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "j0 (8.0)":
+float: 1
+ifloat: 1
+
+# j1
+Test "j1 (0x1.3ffp+74)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "j1 (0x1.ff00000000002p+840)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "j1 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "j1 (10.0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "j1 (2.0)":
+double: 1
+idouble: 1
+Test "j1 (8.0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+# jn
+Test "jn (0, -4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (0, 0.75)":
+float: 1
+ifloat: 1
+Test "jn (0, 10.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "jn (0, 2.0)":
+float: 2
+ifloat: 2
+Test "jn (0, 4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (0, 8.0)":
+float: 1
+ifloat: 1
+Test "jn (1, 10.0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "jn (1, 2.0)":
+double: 1
+idouble: 1
+Test "jn (1, 8.0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "jn (10, -1.0)":
+ildouble: 1
+ldouble: 1
+Test "jn (10, 0.125)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "jn (10, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (10, 1.0)":
+ildouble: 1
+ldouble: 1
+Test "jn (10, 10.0)":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "jn (10, 2.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "jn (2, 0x1.ffff62p+99)":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+Test "jn (2, 2.4048255576957729)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "jn (3, 0.125)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (3, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (3, 10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "jn (3, 2.0)":
+float: 1
+ifloat: 1
+Test "jn (3, 2.4048255576957729)":
+double: 3
+idouble: 3
+ildouble: 1
+ldouble: 1
+Test "jn (4, 2.4048255576957729)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "jn (5, 2.4048255576957729)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "jn (6, 2.4048255576957729)":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "jn (7, 2.4048255576957729)":
+double: 3
+float: 5
+idouble: 3
+ifloat: 5
+ildouble: 2
+ldouble: 2
+Test "jn (8, 2.4048255576957729)":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+Test "jn (9, 2.4048255576957729)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+# lgamma
+Test "lgamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (1.2)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+# log10
+Test "log10 (0.75)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "log10 (e)":
+float: 1
+ifloat: 1
+
+# log1p
+Test "log1p (-0.25)":
+float: 1
+ifloat: 1
+
+# log2
+Test "log2 (e)":
+ildouble: 1
+ldouble: 1
+
+# pow
+Test "pow (0x0.ffffffp0, -0x1p24)":
+float: 1
+ifloat: 1
+Test "pow (0x0.ffffffp0, 0x1p24)":
+float: 1
+ifloat: 1
+Test "pow (0x1.000002p0, 0x1p24)":
+float: 1
+ifloat: 1
+
+# pow10
+Test "pow10 (-1)":
+double: 1
+idouble: 1
+Test "pow10 (-305)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "pow10 (-36)":
+double: 1
+idouble: 1
+Test "pow10 (3)":
+double: 1
+idouble: 1
+Test "pow10 (36)":
+double: 1
+idouble: 1
+
+# pow_downward
+Test "pow_downward (1.0625, 1.125)":
+ildouble: 1
+ldouble: 1
+Test "pow_downward (1.5, 1.03125)":
+float: 1
+ifloat: 1
+
+# pow_towardzero
+Test "pow_towardzero (1.0625, 1.125)":
+ildouble: 1
+ldouble: 1
+Test "pow_towardzero (1.5, 1.03125)":
+float: 1
+ifloat: 1
+
+# pow_upward
+Test "pow_upward (1.0625, 1.125)":
+float: 1
+ifloat: 1
+
+# sin_downward
+Test "sin_downward (1)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (10)":
+float: 1
+ifloat: 1
+Test "sin_downward (2)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (3)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sin_downward (4)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (5)":
+float: 1
+ifloat: 1
+Test "sin_downward (6)":
+float: 1
+ifloat: 1
+Test "sin_downward (8)":
+ildouble: 1
+ldouble: 1
+
+# sin_tonearest
+Test "sin_tonearest (1)":
+float: 1
+ifloat: 1
+
+# sin_towardzero
+Test "sin_towardzero (1)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "sin_towardzero (10)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (2)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (3)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (4)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (5)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (8)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (9)":
+float: 1
+ifloat: 1
+
+# sin_upward
+Test "sin_upward (1)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "sin_upward (2)":
+float: 2
+ifloat: 2
+Test "sin_upward (3)":
+ildouble: 1
+ldouble: 1
+Test "sin_upward (4)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "sin_upward (6)":
+ildouble: 1
+ldouble: 1
+Test "sin_upward (9)":
+float: 1
+ifloat: 1
+
+# sincos
+Test "sincos (0x1p+120) extra output 2":
+float: 1
+ifloat: 1
+Test "sincos (0x1p+127) extra output 2":
+float: 1
+ifloat: 1
+Test "sincos (M_PI_6l*2.0) extra output 1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "sincos (M_PI_6l*2.0) extra output 2":
+double: 1
+idouble: 1
+Test "sincos (pi/6) extra output 2":
+float: 1
+ifloat: 1
+
+# sinh
+Test "sinh (0.75)":
+ildouble: 1
+ldouble: 1
+
+# sinh_downward
+Test "sinh_downward (22)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sinh_downward (23)":
+float: 1
+ifloat: 1
+Test "sinh_downward (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# sinh_towardzero
+Test "sinh_towardzero (22)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sinh_towardzero (23)":
+float: 1
+ifloat: 1
+Test "sinh_towardzero (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# sinh_upward
+Test "sinh_upward (23)":
+ildouble: 1
+ldouble: 1
+Test "sinh_upward (24)":
+ildouble: 1
+ldouble: 1
+
+# tan
+Test "tan (-0xc.908p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90cp-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90ep-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90f8p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90fcp-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.90fd8p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.90fdap-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.92p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.9p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.908p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90cp-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90ep-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90f8p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90fcp-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.90fd8p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.90fdap-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.92p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.9p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (pi/4)":
+ildouble: 1
+ldouble: 1
+
+# tan_downward
+Test "tan_downward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (10)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tan_downward (2)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (6)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (8)":
+float: 1
+ifloat: 1
+Test "tan_downward (9)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# tan_tonearest
+Test "tan_tonearest (10)":
+ildouble: 1
+ldouble: 1
+Test "tan_tonearest (4)":
+ildouble: 1
+ldouble: 1
+Test "tan_tonearest (7)":
+ildouble: 1
+ldouble: 1
+
+# tan_towardzero
+Test "tan_towardzero (10)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tan_towardzero (3)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "tan_towardzero (4)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_towardzero (5)":
+float: 1
+ifloat: 1
+Test "tan_towardzero (6)":
+ildouble: 1
+ldouble: 1
+Test "tan_towardzero (7)":
+ildouble: 2
+ldouble: 2
+Test "tan_towardzero (9)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# tan_upward
+Test "tan_upward (1)":
+float: 1
+ifloat: 1
+Test "tan_upward (10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_upward (3)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "tan_upward (5)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_upward (6)":
+ildouble: 1
+ldouble: 1
+Test "tan_upward (7)":
+ildouble: 1
+ldouble: 1
+Test "tan_upward (9)":
+ildouble: 1
+ldouble: 1
+
+# tanh
+Test "tanh (-0.75)":
+ildouble: 1
+ldouble: 1
+Test "tanh (0.75)":
+ildouble: 1
+ldouble: 1
+
+# tgamma
+Test "tgamma (-0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x0.fffffffffffff8p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x0.ffffffp0)":
+float: 1
+ifloat: 1
+Test "tgamma (-0x1.000002p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x1.0a32a2p+5)":
+float: 2
+ifloat: 2
+Test "tgamma (-0x1.fffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x13.ffffep0)":
+float: 2
+ifloat: 2
+Test "tgamma (-0x13.ffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x14.000000000001p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x14.00002p0)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x1d.ffffep0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1d.fffffffffffffffffffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1d.ffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1e.00000000000000000000000008p0)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x1e.000000000001p0)":
+double: 3
+idouble: 3
+ildouble: 3
+ldouble: 3
+Test "tgamma (-0x1e.00002p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x2.0000000000002p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x2.000004p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x2.fffffcp0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x27.fffffffffffep0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x27.fffffffffffffffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.000000000002p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.00004p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x29.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x29.000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x29.00004p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x29.ffffcp0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x29.fffffffffffep0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x2a.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x3.000004p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (-0x3.fffffcp0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x3.ffffffffffffep0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x31.fffffffffffep0)":
+double: 3
+idouble: 3
+Test "tgamma (-0x32.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x32.000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x4.000008p0)":
+float: 1
+ifloat: 1
+Test "tgamma (-0x4.fffff8p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x4.ffffffffffffcp0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x5.0000000000004p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x5.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x5.ffffffffffffcp0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x6.000008p0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x6.fffff8p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x6.ffffffffffffcp0)":
+double: 4
+idouble: 4
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x63.fffffffffffcp0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x63.ffffffffffffffffffffffffep0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x64.0000000000000000000000002p0)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x64.000000000004p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.00000000000000000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.0000000000004p0)":
+double: 3
+idouble: 3
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x7.fffff8p0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+Test "tgamma (-0x7.ffffffffffffcp0)":
+double: 3
+idouble: 3
+ildouble: 3
+ldouble: 3
+Test "tgamma (-0x8.00000000000000000000000004p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x8.00001p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x9.ffffffffffff8p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x9.fffffp0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x96.000000000008p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0xa.00001p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-2.5)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-3.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-4.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-5.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-6.5)":
+float: 1
+ifloat: 1
+Test "tgamma (-7.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (-8.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-9.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0.5)":
+float: 1
+ifloat: 1
+Test "tgamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x1.fffffep0)":
+float: 1
+ifloat: 1
+Test "tgamma (0x1.fffffffffffffp0)":
+double: 1
+idouble: 1
+Test "tgamma (0x1p-24)":
+float: 1
+ifloat: 1
+Test "tgamma (0x1p-53)":
+double: 1
+idouble: 1
+Test "tgamma (0x2.30a43cp+4)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "tgamma (0x2.fffffcp0)":
+float: 3
+ifloat: 3
+Test "tgamma (0x3.fffffcp0)":
+float: 1
+ifloat: 1
+Test "tgamma (0x3.ffffffffffffep0)":
+double: 1
+idouble: 1
+Test "tgamma (0x4.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x4.ffffffffffffcp0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x5.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x5.000008p0)":
+float: 2
+ifloat: 2
+Test "tgamma (0x5.fffff8p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x6.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x6.000008p0)":
+float: 2
+ifloat: 2
+Test "tgamma (0x6.fffff8p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x6.ffffffffffffcp0)":
+double: 4
+idouble: 4
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x7.0000000000004p0)":
+double: 4
+idouble: 4
+Test "tgamma (0x7.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x7.fffff8p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (0x7.ffffffffffffcp0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x8.00001p0)":
+double: 2
+idouble: 2
+Test "tgamma (0xa.b9fd72b0fb238p+4)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (0xa.b9fd72b0fb23a9ddbf0d3804f4p+4)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (10)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (18.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (19.5)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (2.5)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (23.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (29.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (3)":
+float: 1
+ifloat: 1
+Test "tgamma (3.5)":
+float: 2
+ifloat: 2
+Test "tgamma (30.5)":
+float: 1
+ifloat: 1
+Test "tgamma (33.5)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (34.5)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "tgamma (4)":
+float: 1
+ifloat: 1
+Test "tgamma (4.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (6)":
+float: 1
+ifloat: 1
+Test "tgamma (6.5)":
+float: 1
+ifloat: 1
+Test "tgamma (7)":
+double: 1
+idouble: 1
+Test "tgamma (7.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (8)":
+double: 1
+idouble: 1
+Test "tgamma (8.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (9)":
+double: 1
+idouble: 1
+Test "tgamma (9.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+# y0
+Test "y0 (0.125)":
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1.3ffp+74)":
+double: 1
+idouble: 1
+Test "y0 (0x1.ff00000000002p+840)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-10)":
+double: 1
+idouble: 1
+Test "y0 (0x1p-100)":
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-110)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-20)":
+float: 1
+ifloat: 1
+Test "y0 (0x1p-30)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-40)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "y0 (0x1p-50)":
+float: 1
+ifloat: 1
+Test "y0 (0x1p-70)":
+double: 1
+idouble: 1
+Test "y0 (0x1p-80)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "y0 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "y0 (1.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (1.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "y0 (10.0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (8.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# y1
+Test "y1 (0.125)":
+double: 1
+idouble: 1
+Test "y1 (0x1.001000001p+593)":
+ildouble: 2
+ldouble: 2
+Test "y1 (0x1.27e204p+99)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y1 (0x1p-10)":
+double: 1
+idouble: 1
+Test "y1 (0x1p-20)":
+ildouble: 1
+ldouble: 1
+Test "y1 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "y1 (1.5)":
+float: 1
+ifloat: 1
+Test "y1 (10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "y1 (2.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "y1 (8.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+
+# yn
+Test "yn (0, 0.125)":
+ildouble: 1
+ldouble: 1
+Test "yn (0, 1.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (0, 1.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "yn (0, 10.0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (0, 8.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (1, 0.125)":
+double: 1
+idouble: 1
+Test "yn (1, 1.5)":
+float: 1
+ifloat: 1
+Test "yn (1, 10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (1, 2.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (1, 8.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "yn (10, 0.125)":
+double: 1
+idouble: 1
+Test "yn (10, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "yn (10, 1.0)":
+double: 1
+idouble: 1
+Test "yn (10, 10.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (10, 2.0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "yn (3, 0.125)":
+double: 1
+idouble: 1
+Test "yn (3, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "yn (3, 10.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (3, 2.0)":
+double: 1
+idouble: 1
+
+# Maximal error of functions:
+Function: "acos":
+ildouble: 1
+ldouble: 1
+
+Function: "acos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "acos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_upward":
+ildouble: 2
+ldouble: 2
+
+Function: "acosh":
+ildouble: 1
+ldouble: 1
+
+Function: "asin":
+ildouble: 2
+ldouble: 2
+
+Function: "asin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "asin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh":
+ildouble: 1
+ldouble: 1
+
+Function: "atan2":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atanh":
+float: 1
+ifloat: 1
+
+Function: "cabs":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "carg":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casin":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casinh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catan":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catanh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "clog":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog10":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "clog10":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos_towardzero":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_upward":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "cosh":
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_upward":
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow":
+double: 2
+float: 4
+idouble: 2
+ifloat: 4
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "cpow":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csinh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctan":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctan_downward":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "ctan_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctan_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 13
+ldouble: 13
+
+Function: Real part of "ctan_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "ctan_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 10
+ldouble: 10
+
+Function: Real part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctanh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctanh_downward":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "ctanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctanh_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctanh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 13
+ldouble: 13
+
+Function: Imaginary part of "ctanh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctanh_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 10
+ldouble: 10
+
+Function: Imaginary part of "ctanh_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: "erf":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erfc":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp":
+ildouble: 1
+ldouble: 1
+
+Function: "exp10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "exp_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "expm1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "gamma":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "hypot":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j0":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "j1":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "jn":
+double: 4
+float: 5
+idouble: 4
+ifloat: 5
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "log":
+ildouble: 1
+ldouble: 1
+
+Function: "log10":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log1p":
+float: 1
+ifloat: 1
+
+Function: "log2":
+ildouble: 1
+ldouble: 1
+
+Function: "pow":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_upward":
+float: 1
+ifloat: 1
+
+Function: "sin":
+ildouble: 1
+ldouble: 1
+
+Function: "sin_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sin_towardzero":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sin_upward":
+float: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "sincos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sinh":
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_upward":
+ildouble: 1
+ldouble: 1
+
+Function: "tan":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "tan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tan_upward":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh":
+ildouble: 1
+ldouble: 1
+
+Function: "tgamma":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "y0":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "y1":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "yn":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+# end of automatic generation
diff --git a/libc/sysdeps/powerpc/nofpu/shlib-versions b/libc/sysdeps/powerpc/nofpu/shlib-versions
new file mode 100644
index 000000000..72085ddf4
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/shlib-versions
@@ -0,0 +1 @@
+powerpc.*-.*-.* ABI powerpcsoft-@OS@
diff --git a/libc/sysdeps/powerpc/nofpu/sim-full.c b/libc/sysdeps/powerpc/nofpu/sim-full.c
new file mode 100644
index 000000000..e16703323
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/sim-full.c
@@ -0,0 +1,46 @@
+/* Software floating-point exception handling emulation.
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <signal.h>
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+ 15483) and ideally preserved across signal handlers, like hardware
+ FP status words, but the latter is quite difficult to accomplish in
+ userland. */
+
+/* Global to store sticky exceptions. */
+int __sim_exceptions __attribute__ ((nocommon));
+libc_hidden_data_def (__sim_exceptions);
+
+/* By default, no exceptions should trap. */
+int __sim_disabled_exceptions = 0xffffffff;
+libc_hidden_data_def (__sim_disabled_exceptions);
+
+int __sim_round_mode __attribute__ ((nocommon));
+libc_hidden_data_def (__sim_round_mode);
+
+void
+__simulate_exceptions (int x)
+{
+ __sim_exceptions |= x;
+ if (x & ~__sim_disabled_exceptions)
+ raise (SIGFPE);
+}
diff --git a/libc/sysdeps/powerpc/nofpu/soft-supp.h b/libc/sysdeps/powerpc/nofpu/soft-supp.h
new file mode 100644
index 000000000..18b4550e3
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/soft-supp.h
@@ -0,0 +1,48 @@
+/* Internal support stuff for complete soft float.
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined __NO_FPRS__ && !defined _SOFT_FLOAT
+
+# include <fenv_libc.h>
+
+#else
+
+# include <fenv.h>
+
+typedef union
+{
+ fenv_t fenv;
+ unsigned int l[2];
+} fenv_union_t;
+
+#endif
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+ 15483) and ideally preserved across signal handlers, like hardware
+ FP status words, but the latter is quite difficult to accomplish in
+ userland. */
+
+extern int __sim_exceptions;
+libc_hidden_proto (__sim_exceptions);
+extern int __sim_disabled_exceptions;
+libc_hidden_proto (__sim_disabled_exceptions);
+extern int __sim_round_mode;
+libc_hidden_proto (__sim_round_mode);
+
+extern void __simulate_exceptions (int x) attribute_hidden;
diff --git a/libc/sysdeps/powerpc/novmx-longjmp.c b/libc/sysdeps/powerpc/novmx-longjmp.c
index 8f6ea357d..b2c0e4cf5 100644
--- a/libc/sysdeps/powerpc/novmx-longjmp.c
+++ b/libc/sysdeps/powerpc/novmx-longjmp.c
@@ -50,13 +50,7 @@ weak_alias (__novmx__libc_siglongjmp, __novmx_longjmp)
weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp)
weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp)
-# if __WORDSIZE == 64
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.3);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.3);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.3);
-# else
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.0);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.0);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.0);
-# endif
+compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0);
#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)) */
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memcmp.S b/libc/sysdeps/powerpc/powerpc32/405/memcmp.S
new file mode 100644
index 000000000..2849461cd
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memcmp.S
@@ -0,0 +1,128 @@
+/* Optimized memcmp implementation for PowerPC476.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* memcmp
+
+ r3:source1 address, return equality
+ r4:source2 address
+ r5:byte count
+
+ Check 2 words from src1 and src2. If unequal jump to end and
+ return src1 > src2 or src1 < src2.
+ If count = zero check bytes before zero counter and then jump to end and
+ return src1 > src2, src1 < src2 or src1 = src2.
+ If src1 = src2 and no null, repeat. */
+
+EALIGN (memcmp, 5, 0)
+ srwi. r6,r5,5
+ beq L(preword2_count_loop)
+ mtctr r6
+ clrlwi r5,r5,27
+
+L(word8_compare_loop):
+ lwz r10,0(r3)
+ lwz r6,4(r3)
+ lwz r8,0(r4)
+ lwz r9,4(r4)
+ cmplw cr5,r8,r10
+ cmplw cr1,r9,r6
+ bne cr5,L(st2)
+ bne cr1,L(st1)
+ lwz r10,8(r3)
+ lwz r6,12(r3)
+ lwz r8,8(r4)
+ lwz r9,12(r4)
+ cmplw cr5,r8,r10
+ cmplw cr1,r9,r6
+ bne cr5,L(st2)
+ bne cr1,L(st1)
+ lwz r10,16(r3)
+ lwz r6,20(r3)
+ lwz r8,16(r4)
+ lwz r9,20(r4)
+ cmplw cr5,r8,r10
+ cmplw cr1,r9,r6
+ bne cr5,L(st2)
+ bne cr1,L(st1)
+ lwz r10,24(r3)
+ lwz r6,28(r3)
+ addi r3,r3,0x20
+ lwz r8,24(r4)
+ lwz r9,28(r4)
+ addi r4,r4,0x20
+ cmplw cr5,r8,r10
+ cmplw cr1,r9,r6
+ bne cr5,L(st2)
+ bne cr1,L(st1)
+ bdnz L(word8_compare_loop)
+
+L(preword2_count_loop):
+ srwi. r6,r5,3
+ beq L(prebyte_count_loop)
+ mtctr r6
+ clrlwi r5,r5,29
+
+L(word2_count_loop):
+ lwz r10,0(r3)
+ lwz r6,4(r3)
+ addi r3,r3,0x08
+ lwz r8,0(r4)
+ lwz r9,4(r4)
+ addi r4,r4,0x08
+ cmplw cr5,r8,r10
+ cmplw cr1,r9,r6
+ bne cr5,L(st2)
+ bne cr1,L(st1)
+ bdnz L(word2_count_loop)
+
+L(prebyte_count_loop):
+ addi r5,r5,1
+ mtctr r5
+ bdz L(end_memcmp)
+
+L(byte_count_loop):
+ lbz r6,0(r3)
+ addi r3,r3,0x01
+ lbz r8,0(r4)
+ addi r4,r4,0x01
+ cmplw cr5,r8,r6
+ bne cr5,L(st2)
+ bdnz L(byte_count_loop)
+
+L(end_memcmp):
+ addi r3,r0,0
+ blr
+
+L(l_r):
+ addi r3,r0,1
+ blr
+
+L(st1):
+ blt cr1,L(l_r)
+ addi r3,r0,-1
+ blr
+
+L(st2):
+ blt cr5,L(l_r)
+ addi r3,r0,-1
+ blr
+END (memcmp)
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp,bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memcpy.S b/libc/sysdeps/powerpc/powerpc32/405/memcpy.S
new file mode 100644
index 000000000..b01d53920
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memcpy.S
@@ -0,0 +1,130 @@
+/* Optimized memcpy implementation for PowerPC476.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* memcpy
+
+ r0:return address
+ r3:destination address
+ r4:source address
+ r5:byte count
+
+ Save return address in r0.
+ If destinationn and source are unaligned and copy count is greater than 256
+ then copy 0-3 bytes to make destination aligned.
+ If 32 or more bytes to copy we use 32 byte copy loop.
+ Finaly we copy 0-31 extra bytes. */
+
+EALIGN (memcpy, 5, 0)
+/* Check if bytes to copy are greater than 256 and if
+ source and destination are unaligned */
+ cmpwi r5,0x0100
+ addi r0,r3,0
+ ble L(string_count_loop)
+ neg r6,r3
+ clrlwi. r6,r6,30
+ beq L(string_count_loop)
+ neg r6,r4
+ clrlwi. r6,r6,30
+ beq L(string_count_loop)
+ mtctr r6
+ subf r5,r6,r5
+
+L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */
+ lbz r8,0x0(r4)
+ addi r4,r4,1
+ stb r8,0x0(r3)
+ addi r3,r3,1
+ bdnz L(unaligned_bytecopy_loop)
+ srwi. r7,r5,5
+ beq L(preword2_count_loop)
+ mtctr r7
+
+L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+ lwz r6,0(r4)
+ lwz r7,4(r4)
+ lwz r8,8(r4)
+ lwz r9,12(r4)
+ subi r5,r5,0x20
+ stw r6,0(r3)
+ stw r7,4(r3)
+ stw r8,8(r3)
+ stw r9,12(r3)
+ lwz r6,16(r4)
+ lwz r7,20(r4)
+ lwz r8,24(r4)
+ lwz r9,28(r4)
+ addi r4,r4,0x20
+ stw r6,16(r3)
+ stw r7,20(r3)
+ stw r8,24(r3)
+ stw r9,28(r3)
+ addi r3,r3,0x20
+ bdnz L(word8_count_loop_no_dcbt)
+
+L(preword2_count_loop): /* Copy remaining 0-31 bytes */
+ clrlwi. r12,r5,27
+ beq L(end_memcpy)
+ mtxer r12
+ lswx r5,0,r4
+ stswx r5,0,r3
+ mr r3,r0
+ blr
+
+L(string_count_loop): /* Copy odd 0-31 bytes */
+ clrlwi. r12,r5,28
+ add r3,r3,r5
+ add r4,r4,r5
+ beq L(pre_string_copy)
+ mtxer r12
+ subf r4,r12,r4
+ subf r3,r12,r3
+ lswx r6,0,r4
+ stswx r6,0,r3
+
+L(pre_string_copy): /* Check how many 32 byte chunks to copy */
+ srwi. r7,r5,4
+ beq L(end_memcpy)
+ mtctr r7
+
+L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+ lwz r6,-4(r4)
+ lwz r7,-8(r4)
+ lwz r8,-12(r4)
+ lwzu r9,-16(r4)
+ stw r6,-4(r3)
+ stw r7,-8(r3)
+ stw r8,-12(r3)
+ stwu r9,-16(r3)
+ bdz L(end_memcpy)
+ lwz r6,-4(r4)
+ lwz r7,-8(r4)
+ lwz r8,-12(r4)
+ lwzu r9,-16(r4)
+ stw r6,-4(r3)
+ stw r7,-8(r3)
+ stw r8,-12(r3)
+ stwu r9,-16(r3)
+ bdnz L(word4_count_loop_no_dcbt)
+
+L(end_memcpy):
+ mr r3,r0
+ blr
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memset.S b/libc/sysdeps/powerpc/powerpc32/405/memset.S
new file mode 100644
index 000000000..b73dba887
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memset.S
@@ -0,0 +1,152 @@
+/* Optimized memset for PowerPC405,440,464 (32-byte cacheline).
+ Copyright (C) 2012-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* memset
+
+ r3:destination address and return address
+ r4:source integer to copy
+ r5:byte count
+ r11:sources integer to copy in all 32 bits of reg
+ r12:temp return address
+
+ Save return address in r12
+ If destinationn is unaligned and count is greater tha 255 bytes
+ set 0-3 bytes to make destination aligned
+ If count is greater tha 255 bytes and setting zero to memory
+ use dbcz to set memeory when we can
+ otherwsie do the follwoing
+ If 16 or more words to set we use 16 word copy loop.
+ Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (memset, 5, 0)
+ rlwinm r11,r4,0,24,31
+ rlwimi r11,r4,8,16,23
+ rlwimi r11,r11,16,0,15
+ addi r12,r3,0
+ cmpwi r5,0x00FF
+ ble L(preword8_count_loop)
+ cmpwi r4,0x00
+ beq L(use_dcbz)
+ neg r6,r3
+ clrlwi. r6,r6,30
+ beq L(preword8_count_loop)
+ addi r8,0,1
+ mtctr r6
+ subi r3,r3,1
+
+L(unaligned_bytecopy_loop):
+ stbu r11,0x1(r3)
+ subf. r5,r8,r5
+ beq L(end_memset)
+ bdnz L(unaligned_bytecopy_loop)
+ addi r3,r3,1
+
+L(preword8_count_loop):
+ srwi. r6,r5,4
+ beq L(preword2_count_loop)
+ mtctr r6
+ addi r3,r3,-4
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+
+L(word8_count_loop_no_dcbt):
+ stwu r8,4(r3)
+ stwu r9,4(r3)
+ subi r5,r5,0x10
+ stwu r10,4(r3)
+ stwu r11,4(r3)
+ bdnz L(word8_count_loop_no_dcbt)
+ addi r3,r3,4
+
+L(preword2_count_loop):
+ clrlwi. r7,r5,28
+ beq L(end_memset)
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+ mtxer r7
+ stswx r8,0,r3
+
+L(end_memset):
+ addi r3,r12,0
+ blr
+
+L(use_dcbz):
+ neg r6,r3
+ clrlwi. r7,r6,28
+ beq L(skip_string_loop)
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+ subf r5,r7,r5
+ mtxer r7
+ stswx r8,0,r3
+ add r3,r3,r7
+
+L(skip_string_loop):
+ clrlwi r8,r6,27
+ srwi. r8,r8,4
+ beq L(dcbz_pre_loop)
+ mtctr r8
+
+L(word_loop):
+ stw r11,0(r3)
+ subi r5,r5,0x10
+ stw r11,4(r3)
+ stw r11,8(r3)
+ stw r11,12(r3)
+ addi r3,r3,0x10
+ bdnz L(word_loop)
+
+L(dcbz_pre_loop):
+ srwi r6,r5,5
+ mtctr r6
+ addi r7,0,0
+
+L(dcbz_loop):
+ dcbz r3,r7
+ addi r3,r3,0x20
+ subi r5,r5,0x20
+ bdnz L(dcbz_loop)
+ srwi. r6,r5,4
+ beq L(postword2_count_loop)
+ mtctr r6
+
+L(postword8_count_loop):
+ stw r11,0(r3)
+ subi r5,r5,0x10
+ stw r11,4(r3)
+ stw r11,8(r3)
+ stw r11,12(r3)
+ addi r3,r3,0x10
+ bdnz L(postword8_count_loop)
+
+L(postword2_count_loop):
+ clrlwi. r7,r5,28
+ beq L(end_memset)
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+ mtxer r7
+ stswx r8,0,r3
+ b L(end_memset)
+END (memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strcmp.S b/libc/sysdeps/powerpc/powerpc32/405/strcmp.S
new file mode 100644
index 000000000..c0b21907b
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strcmp.S
@@ -0,0 +1,134 @@
+/* Optimized strcmp implementation for PowerPC476.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* strcmp
+
+ Register Use
+ r0:temp return equality
+ r3:source1 address, return equality
+ r4:source2 address
+
+ Implementation description
+ Check 2 words from src1 and src2. If unequal jump to end and
+ return src1 > src2 or src1 < src2.
+ If null check bytes before null and then jump to end and
+ return src1 > src2, src1 < src2 or src1 = src2.
+ If src1 = src2 and no null, repeat. */
+
+EALIGN (strcmp,5,0)
+ neg r7,r3
+ clrlwi r7,r7,20
+ neg r8,r4
+ clrlwi r8,r8,20
+ srwi. r7,r7,5
+ beq L(byte_loop)
+ srwi. r8,r8,5
+ beq L(byte_loop)
+ cmplw r7,r8
+ mtctr r7
+ ble L(big_loop)
+ mtctr r8
+
+L(big_loop):
+ lwz r5,0(r3)
+ lwz r6,4(r3)
+ lwz r8,0(r4)
+ lwz r9,4(r4)
+ dlmzb. r12,r5,r6
+ bne L(end_check)
+ cmplw r5,r8
+ bne L(st1)
+ cmplw r6,r9
+ bne L(st1)
+ lwz r5,8(r3)
+ lwz r6,12(r3)
+ lwz r8,8(r4)
+ lwz r9,12(r4)
+ dlmzb. r12,r5,r6
+ bne L(end_check)
+ cmplw r5,r8
+ bne L(st1)
+ cmplw r6,r9
+ bne L(st1)
+ lwz r5,16(r3)
+ lwz r6,20(r3)
+ lwz r8,16(r4)
+ lwz r9,20(r4)
+ dlmzb. r12,r5,r6
+ bne L(end_check)
+ cmplw r5,r8
+ bne L(st1)
+ cmplw r6,r9
+ bne L(st1)
+ lwz r5,24(r3)
+ lwz r6,28(r3)
+ addi r3,r3,0x20
+ lwz r8,24(r4)
+ lwz r9,28(r4)
+ addi r4,r4,0x20
+ dlmzb. r12,r5,r6
+ bne L(end_check)
+ cmplw r5,r8
+ bne L(st1)
+ cmplw r6,r9
+ bne L(st1)
+ bdnz L(big_loop)
+ b L(byte_loop)
+
+L(end_check):
+ subfic r12,r12,4
+ blt L(end_check2)
+ rlwinm r12,r12,3,0,31
+ srw r5,r5,r12
+ srw r8,r8,r12
+ cmplw r5,r8
+ bne L(st1)
+ b L(end_strcmp)
+
+L(end_check2):
+ addi r12,r12,4
+ cmplw r5,r8
+ rlwinm r12,r12,3,0,31
+ bne L(st1)
+ srw r6,r6,r12
+ srw r9,r9,r12
+ cmplw r6,r9
+ bne L(st1)
+
+L(end_strcmp):
+ addi r3,r0,0
+ blr
+
+L(st1):
+ mfcr r3
+ blr
+
+L(byte_loop):
+ lbz r5,0(r3)
+ addi r3,r3,1
+ lbz r6,0(r4)
+ addi r4,r4,1
+ cmplw r5,r6
+ bne L(st1)
+ cmpwi r5,0
+ beq L(end_strcmp)
+ b L(byte_loop)
+END (strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strcpy.S b/libc/sysdeps/powerpc/powerpc32/405/strcpy.S
new file mode 100644
index 000000000..d7c84569d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strcpy.S
@@ -0,0 +1,107 @@
+/* Optimized strcpy implementation for PowerPC476.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* strcpy
+
+ Register Use
+ r3:destination and return address
+ r4:source address
+ r10:temp destination address
+
+ Implementation description
+ Loop by checking 2 words at a time, with dlmzb. Check if there is a null
+ in the 2 words. If there is a null jump to end checking to determine
+ where in the last 8 bytes it is. Copy the appropriate bytes of the last
+ 8 according to the null position. */
+
+EALIGN (strcpy, 5, 0)
+ neg r7,r4
+ subi r4,r4,1
+ clrlwi. r8,r7,29
+ subi r10,r3,1
+ beq L(pre_word8_loop)
+ mtctr r8
+
+L(loop):
+ lbzu r5,0x01(r4)
+ cmpi cr5,r5,0x0
+ stbu r5,0x01(r10)
+ beq cr5,L(end_strcpy)
+ bdnz L(loop)
+
+L(pre_word8_loop):
+ subi r4,r4,3
+ subi r10,r10,3
+
+L(word8_loop):
+ lwzu r5,0x04(r4)
+ lwzu r6,0x04(r4)
+ dlmzb. r11,r5,r6
+ bne L(byte_copy)
+ stwu r5,0x04(r10)
+ stwu r6,0x04(r10)
+ lwzu r5,0x04(r4)
+ lwzu r6,0x04(r4)
+ dlmzb. r11,r5,r6
+ bne L(byte_copy)
+ stwu r5,0x04(r10)
+ stwu r6,0x04(r10)
+ lwzu r5,0x04(r4)
+ lwzu r6,0x04(r4)
+ dlmzb. r11,r5,r6
+ bne L(byte_copy)
+ stwu r5,0x04(r10)
+ stwu r6,0x04(r10)
+ lwzu r5,0x04(r4)
+ lwzu r6,0x04(r4)
+ dlmzb. r11,r5,r6
+ bne L(byte_copy)
+ stwu r5,0x04(r10)
+ stwu r6,0x04(r10)
+ b L(word8_loop)
+
+L(last_bytes_copy):
+ stwu r5,0x04(r10)
+ subi r11,r11,4
+ mtctr r11
+ addi r10,r10,3
+ subi r4,r4,1
+
+L(last_bytes_copy_loop):
+ lbzu r5,0x01(r4)
+ stbu r5,0x01(r10)
+ bdnz L(last_bytes_copy_loop)
+ blr
+
+L(byte_copy):
+ blt L(last_bytes_copy)
+ mtctr r11
+ addi r10,r10,3
+ subi r4,r4,5
+
+L(last_bytes_copy_loop2):
+ lbzu r5,0x01(r4)
+ stbu r5,0x01(r10)
+ bdnz L(last_bytes_copy_loop2)
+
+L(end_strcpy):
+ blr
+END (strcpy)
+libc_hidden_builtin_def (strcpy)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strlen.S b/libc/sysdeps/powerpc/powerpc32/405/strlen.S
new file mode 100644
index 000000000..77d22ea67
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strlen.S
@@ -0,0 +1,75 @@
+/* Optimized strlen implementation for PowerPC476.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* strlen
+
+ Register Use
+ r3:source address and return length of string
+ r4:byte counter
+
+ Implementation description
+ Load 2 words at a time and count bytes, if we find null we subtract one from
+ the count and return the count value. We need to subtract one because
+ we don't count the null character as a byte. */
+
+EALIGN (strlen,5,0)
+ neg r7,r3
+ clrlwi. r8,r7,29
+ addi r4,0,0
+ beq L(byte_count_loop)
+ mtctr r8
+
+L(loop):
+ lbz r5,0(r3)
+ cmpi cr5,r5,0x0
+ addi r3,r3,0x1
+ addi r4,r4,0x1
+ beq cr5,L(end_strlen)
+ bdnz L(loop)
+
+L(byte_count_loop):
+ lwz r5,0(r3)
+ lwz r6,4(r3)
+ dlmzb. r12,r5,r6
+ add r4,r4,r12
+ bne L(end_strlen)
+ lwz r5,8(r3)
+ lwz r6,12(r3)
+ dlmzb. r12,r5,r6
+ add r4,r4,r12
+ bne L(end_strlen)
+ lwz r5,16(r3)
+ lwz r6,20(r3)
+ dlmzb. r12,r5,r6
+ add r4,r4,r12
+ bne L(end_strlen)
+ lwz r5,24(r3)
+ lwz r6,28(r3)
+ addi r3,r3,0x20
+ dlmzb. r12,r5,r6
+ add r4,r4,r12
+ bne L(end_strlen)
+ b L(byte_count_loop)
+
+L(end_strlen):
+ addi r3,r4,-1
+ blr
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strncmp.S b/libc/sysdeps/powerpc/powerpc32/405/strncmp.S
new file mode 100644
index 000000000..3e2ba5f85
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strncmp.S
@@ -0,0 +1,128 @@
+/* Optimized strncmp implementation for PowerPC476.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* strncmp
+
+ Register Use
+ r0:temp return equality
+ r3:source1 address, return equality
+ r4:source2 address
+ r5:byte count
+
+ Implementation description
+ Touch in 3 lines of D-cache.
+ If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned
+ Check 2 words from src1 and src2. If unequal jump to end and
+ return src1 > src2 or src1 < src2.
+ If null check bytes before null and then jump to end and
+ return src1 > src2, src1 < src2 or src1 = src2.
+ If count = zero check bytes before zero counter and then jump to end and
+ return src1 > src2, src1 < src2 or src1 = src2.
+ If src1 = src2 and no null, repeat. */
+
+EALIGN (strncmp,5,0)
+ neg r7,r3
+ clrlwi r7,r7,20
+ neg r8,r4
+ clrlwi r8,r8,20
+ srwi. r7,r7,3
+ beq L(prebyte_count_loop)
+ srwi. r8,r8,3
+ beq L(prebyte_count_loop)
+ cmplw r7,r8
+ mtctr r7
+ ble L(preword2_count_loop)
+ mtctr r8
+
+L(preword2_count_loop):
+ srwi. r6,r5,3
+ beq L(prebyte_count_loop)
+ mfctr r7
+ cmplw r6,r7
+ bgt L(set_count_loop)
+ mtctr r6
+ clrlwi r5,r5,29
+
+L(word2_count_loop):
+ lwz r10,0(r3)
+ lwz r6,4(r3)
+ addi r3,r3,0x08
+ lwz r8,0(r4)
+ lwz r9,4(r4)
+ addi r4,r4,0x08
+ dlmzb. r12,r10,r6
+ bne L(end_check)
+ cmplw r10,r8
+ bne L(st1)
+ cmplw r6,r9
+ bne L(st1)
+ bdnz L(word2_count_loop)
+
+L(prebyte_count_loop):
+ addi r5,r5,1
+ mtctr r5
+ bdz L(end_strncmp)
+
+L(byte_count_loop):
+ lbz r6,0(r3)
+ addi r3,r3,1
+ lbz r7,0(r4)
+ addi r4,r4,1
+ cmplw r6,r7
+ bne L(st1)
+ cmpwi r6,0
+ beq L(end_strncmp)
+ bdnz L(byte_count_loop)
+ b L(end_strncmp)
+
+L(set_count_loop):
+ slwi r7,r7,3
+ subf r5,r7,r5
+ b L(word2_count_loop)
+
+L(end_check):
+ subfic r12,r12,4
+ blt L(end_check2)
+ rlwinm r12,r12,3,0,31
+ srw r10,r10,r12
+ srw r8,r8,r12
+ cmplw r10,r8
+ bne L(st1)
+ b L(end_strncmp)
+
+L(end_check2):
+ addi r12,r12,4
+ cmplw r10,r8
+ rlwinm r12,r12,3,0,31
+ bne L(st1)
+ srw r6,r6,r12
+ srw r9,r9,r12
+ cmplw r6,r9
+ bne L(st1)
+
+L(end_strncmp):
+ addi r3,r0,0
+ blr
+
+L(st1):
+ mfcr r3
+ blr
+END (strncmp)
+libc_hidden_builtin_def (strncmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/440/Implies b/libc/sysdeps/powerpc/powerpc32/440/Implies
new file mode 100644
index 000000000..70c0d2eda
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/libc/sysdeps/powerpc/powerpc32/464/Implies b/libc/sysdeps/powerpc/powerpc32/464/Implies
new file mode 100644
index 000000000..c3e52c550
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/libc/sysdeps/powerpc/powerpc32/476/Implies b/libc/sysdeps/powerpc/powerpc32/476/Implies
new file mode 100644
index 000000000..2829f9cca
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/libc/sysdeps/powerpc/powerpc32/476/memset.S b/libc/sysdeps/powerpc/powerpc32/476/memset.S
new file mode 100644
index 000000000..48c21d620
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/476/memset.S
@@ -0,0 +1,152 @@
+/* Optimized memset for PowerPC476 (128-byte cacheline).
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* memset
+
+ r3:destination address and return address
+ r4:source integer to copy
+ r5:byte count
+ r11:sources integer to copy in all 32 bits of reg
+ r12:temp return address
+
+ Save return address in r12
+ If destinationn is unaligned and count is greater tha 255 bytes
+ set 0-3 bytes to make destination aligned
+ If count is greater tha 255 bytes and setting zero to memory
+ use dbcz to set memeory when we can
+ otherwsie do the follwoing
+ If 16 or more words to set we use 16 word copy loop.
+ Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (memset, 5, 0)
+ rlwinm r11,r4,0,24,31
+ rlwimi r11,r4,8,16,23
+ rlwimi r11,r11,16,0,15
+ addi r12,r3,0
+ cmpwi r5,0x00FF
+ ble L(preword8_count_loop)
+ cmpwi r4,0x00
+ beq L(use_dcbz)
+ neg r6,r3
+ clrlwi. r6,r6,30
+ beq L(preword8_count_loop)
+ addi r8,0,1
+ mtctr r6
+ subi r3,r3,1
+
+L(unaligned_bytecopy_loop):
+ stbu r11,0x1(r3)
+ subf. r5,r8,r5
+ beq L(end_memset)
+ bdnz L(unaligned_bytecopy_loop)
+ addi r3,r3,1
+
+L(preword8_count_loop):
+ srwi. r6,r5,4
+ beq L(preword2_count_loop)
+ mtctr r6
+ addi r3,r3,-4
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+
+L(word8_count_loop_no_dcbt):
+ stwu r8,4(r3)
+ stwu r9,4(r3)
+ subi r5,r5,0x10
+ stwu r10,4(r3)
+ stwu r11,4(r3)
+ bdnz L(word8_count_loop_no_dcbt)
+ addi r3,r3,4
+
+L(preword2_count_loop):
+ clrlwi. r7,r5,28
+ beq L(end_memset)
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+ mtxer r7
+ stswx r8,0,r3
+
+L(end_memset):
+ addi r3,r12,0
+ blr
+
+L(use_dcbz):
+ neg r6,r3
+ clrlwi. r7,r6,28
+ beq L(skip_string_loop)
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+ subf r5,r7,r5
+ mtxer r7
+ stswx r8,0,r3
+ add r3,r3,r7
+
+L(skip_string_loop):
+ clrlwi r8,r6,25
+ srwi. r8,r8,4
+ beq L(dcbz_pre_loop)
+ mtctr r8
+
+L(word_loop):
+ stw r11,0(r3)
+ subi r5,r5,0x10
+ stw r11,4(r3)
+ stw r11,8(r3)
+ stw r11,12(r3)
+ addi r3,r3,0x10
+ bdnz L(word_loop)
+
+L(dcbz_pre_loop):
+ srwi r6,r5,7
+ mtctr r6
+ addi r7,0,0
+
+L(dcbz_loop):
+ dcbz r3,r7
+ addi r3,r3,0x80
+ subi r5,r5,0x80
+ bdnz L(dcbz_loop)
+ srwi. r6,r5,4
+ beq L(postword2_count_loop)
+ mtctr r6
+
+L(postword8_count_loop):
+ stw r11,0(r3)
+ subi r5,r5,0x10
+ stw r11,4(r3)
+ stw r11,8(r3)
+ stw r11,12(r3)
+ addi r3,r3,0x10
+ bdnz L(postword8_count_loop)
+
+L(postword2_count_loop):
+ clrlwi. r7,r5,28
+ beq L(end_memset)
+ mr r8,r11
+ mr r9,r11
+ mr r10,r11
+ mtxer r7
+ stswx r8,0,r3
+ b L(end_memset)
+END (memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/sysdeps/powerpc/powerpc32/Makefile b/libc/sysdeps/powerpc/powerpc32/Makefile
index 64f79003a..cf620c826 100644
--- a/libc/sysdeps/powerpc/powerpc32/Makefile
+++ b/libc/sysdeps/powerpc/powerpc32/Makefile
@@ -1,8 +1,12 @@
# Powerpc32 specific build options.
-ifeq ($(with-fp),no)
-+cflags += -msoft-float
-sysdep-LDFLAGS += -msoft-float
+# Some Powerpc32 variants assume soft-fp is the default even though there is
+# an fp variant so provide -mhard-float if --with-fp is explicitly passed.
+
+ifeq ($(with-fp),yes)
++cflags += -mhard-float
+ASFLAGS += -mhard-float
+sysdep-LDFLAGS += -mhard-float
endif
ifeq ($(subdir),gmon)
diff --git a/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
index 787447363..df1d5195f 100644
--- a/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
@@ -24,6 +24,12 @@
# include <jmpbuf-offsets.h>
#endif
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+# define LOAD_GP(N) evldd r##N,((JB_FPRS+((N)-14)*2)*4)(r3)
+#else
+# define LOAD_GP(N) lwz r##N,((JB_GPRS+(N)-14)*4)(r3)
+#endif
+
ENTRY (__longjmp)
#if defined PTR_DEMANGLE || defined CHECK_SP
@@ -39,13 +45,13 @@ ENTRY (__longjmp)
lwz r1,(JB_GPR1*4)(r3)
#endif
lwz r0,(JB_LR*4)(r3)
- lwz r14,((JB_GPRS+0)*4)(r3)
- lwz r15,((JB_GPRS+1)*4)(r3)
- lwz r16,((JB_GPRS+2)*4)(r3)
- lwz r17,((JB_GPRS+3)*4)(r3)
- lwz r18,((JB_GPRS+4)*4)(r3)
- lwz r19,((JB_GPRS+5)*4)(r3)
- lwz r20,((JB_GPRS+6)*4)(r3)
+ LOAD_GP (14)
+ LOAD_GP (15)
+ LOAD_GP (16)
+ LOAD_GP (17)
+ LOAD_GP (18)
+ LOAD_GP (19)
+ LOAD_GP (20)
#ifdef PTR_DEMANGLE
# ifndef CHECK_SP
PTR_DEMANGLE3 (r1, r24, r25)
@@ -53,19 +59,19 @@ ENTRY (__longjmp)
PTR_DEMANGLE2 (r0, r25)
#endif
mtlr r0
- lwz r21,((JB_GPRS+7)*4)(r3)
- lwz r22,((JB_GPRS+8)*4)(r3)
+ LOAD_GP (21)
+ LOAD_GP (22)
lwz r0,(JB_CR*4)(r3)
- lwz r23,((JB_GPRS+9)*4)(r3)
- lwz r24,((JB_GPRS+10)*4)(r3)
- lwz r25,((JB_GPRS+11)*4)(r3)
+ LOAD_GP (23)
+ LOAD_GP (24)
+ LOAD_GP (25)
mtcrf 0xFF,r0
- lwz r26,((JB_GPRS+12)*4)(r3)
- lwz r27,((JB_GPRS+13)*4)(r3)
- lwz r28,((JB_GPRS+14)*4)(r3)
- lwz r29,((JB_GPRS+15)*4)(r3)
- lwz r30,((JB_GPRS+16)*4)(r3)
- lwz r31,((JB_GPRS+17)*4)(r3)
+ LOAD_GP (26)
+ LOAD_GP (27)
+ LOAD_GP (28)
+ LOAD_GP (29)
+ LOAD_GP (30)
+ LOAD_GP (31)
mr r3,r4
blr
END (__longjmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S b/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
index 95e8a5aa1..ad2b5ffdb 100644
--- a/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
@@ -30,7 +30,7 @@ libc_hidden_def (_setjmp)
/* Build a versioned object for libc. */
# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0);
ENTRY (__novmx_setjmp)
li r4,0 /* Set second argument to 0. */
@@ -39,7 +39,7 @@ END (__novmx_setjmp)
libc_hidden_def (__novmx_setjmp)
# endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
/* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined
as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c
if HAVE_CLEANUP_JMP_BUF is defined */
diff --git a/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S b/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
index 1113ea533..5e1e860d8 100644
--- a/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
@@ -26,7 +26,7 @@ ENTRY (__novmxsetjmp)
b __novmx__sigsetjmp@local
END (__novmxsetjmp)
strong_alias (__novmxsetjmp, __novmx__setjmp)
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.0)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0)
#endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */
@@ -36,4 +36,4 @@ ENTRY (__vmxsetjmp)
END (__vmxsetjmp)
strong_alias (__vmxsetjmp, __vmx__setjmp)
strong_alias (__vmx__setjmp, __setjmp)
-default_symbol_version (__vmxsetjmp,setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
diff --git a/libc/sysdeps/powerpc/powerpc32/dl-machine.c b/libc/sysdeps/powerpc/powerpc32/dl-machine.c
index 3e7202d86..aba361856 100644
--- a/libc/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/libc/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -416,6 +416,12 @@ __process_machine_rela (struct link_map *map,
Elf32_Addr const finaladdr,
int rinfo)
{
+ union unaligned
+ {
+ uint16_t u2;
+ uint32_t u4;
+ } __attribute__((__packed__));
+
switch (rinfo)
{
case R_PPC_NONE:
@@ -432,10 +438,7 @@ __process_machine_rela (struct link_map *map,
return;
case R_PPC_UADDR32:
- ((char *) reloc_addr)[0] = finaladdr >> 24;
- ((char *) reloc_addr)[1] = finaladdr >> 16;
- ((char *) reloc_addr)[2] = finaladdr >> 8;
- ((char *) reloc_addr)[3] = finaladdr;
+ ((union unaligned *) reloc_addr)->u4 = finaladdr;
break;
case R_PPC_ADDR24:
@@ -453,8 +456,7 @@ __process_machine_rela (struct link_map *map,
case R_PPC_UADDR16:
if (__builtin_expect (finaladdr > 0x7fff && finaladdr < 0xffff8000, 0))
_dl_reloc_overflow (map, "R_PPC_UADDR16", reloc_addr, refsym);
- ((char *) reloc_addr)[0] = finaladdr >> 8;
- ((char *) reloc_addr)[1] = finaladdr;
+ ((union unaligned *) reloc_addr)->u2 = finaladdr;
break;
case R_PPC_ADDR16_LO:
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile
new file mode 100644
index 000000000..adf556870
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile
@@ -0,0 +1,9 @@
+ifeq ($(subdir),math)
+libm-routines += fexcepts_to_spe fexcepts_from_spe
+libm-routines += fexcepts_to_prctl fexcepts_from_prctl
+libm-routines += fe_note_change
+endif
+
+ifeq ($(subdir),soft-fp)
+sysdep_routines += fraiseexcept-soft
+endif
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c
new file mode 100644
index 000000000..92a7dd1e0
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c
@@ -0,0 +1,53 @@
+/* Clear given exceptions in current floating-point environment. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+#undef feclearexcept
+int
+__feclearexcept (int excepts)
+{
+ unsigned int fpescr;
+ int excepts_spe = __fexcepts_to_spe (excepts);
+
+ /* Get the current state. */
+ fpescr = fegetenv_register ();
+
+ /* Clear the relevant bits. */
+ fpescr &= ~excepts_spe;
+
+ /* Put the new state in effect. */
+ fesetenv_register (fpescr);
+
+ /* Let the kernel know if the "invalid" or "underflow" bit was
+ cleared. */
+ if (excepts & (FE_INVALID | FE_UNDERFLOW))
+ __fe_note_change ();
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/x86_64/multiarch/strend-sse4.S b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c
index c5a7ae28a..43a570626 100644
--- a/libc/sysdeps/x86_64/multiarch/strend-sse4.S
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c
@@ -1,6 +1,5 @@
-/* Return the pointer to the end of string, using SSE4.2
- Copyright (C) 2009-2013 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
+/* Note a change to floating-point exceptions.
+ Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,32 +16,24 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <fenv_libc.h>
#include <sysdep.h>
-#include "asm-syntax.h"
-
- .section .text.sse4.2,"ax",@progbits
-ENTRY (__strend_sse4)
- pxor %xmm2, %xmm2
- movq %rdi, %rcx
- andq $~15, %rdi
- movdqa %xmm2, %xmm1
- pcmpeqb (%rdi), %xmm2
- orl $0xffffffff, %esi
- subq %rdi, %rcx
- shll %cl, %esi
- pmovmskb %xmm2, %edx
- andl %esi, %edx
- jnz 1f
-
-2: pcmpistri $0x08, 16(%rdi), %xmm1
- leaq 16(%rdi), %rdi
- jnz 2b
-
- leaq (%rdi,%rcx), %rax
- ret
-
-1: bsfl %edx, %eax
- addq %rdi, %rax
- ret
-
-END (__strend_sse4)
+#include <sys/prctl.h>
+
+/* Inform the kernel of a change to floating-point exceptions. */
+
+void
+__fe_note_change (void)
+{
+ int pflags, r;
+ INTERNAL_SYSCALL_DECL (err);
+
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return;
+ if ((pflags & PR_FP_EXC_SW_ENABLE) == 0)
+ INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+ pflags | PR_FP_EXC_SW_ENABLE);
+}
+
+libm_hidden_def (__fe_note_change)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c
new file mode 100644
index 000000000..7cc963c01
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c
@@ -0,0 +1,54 @@
+/* Disable floating-point exceptions. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+fedisableexcept (int excepts)
+{
+ int result = 0, pflags, r;
+ INTERNAL_SYSCALL_DECL (err);
+
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ /* Save old enable bits. */
+ result = __fexcepts_from_prctl (pflags);
+
+ pflags &= ~__fexcepts_to_prctl (excepts);
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+ pflags | PR_FP_EXC_SW_ENABLE);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ /* If disabling signals for "inexact", also disable trapping to the
+ kernel. */
+ if ((excepts & FE_INEXACT) != 0)
+ {
+ unsigned long fpescr;
+
+ fpescr = fegetenv_register ();
+ fpescr &= ~SPEFSCR_FINXE;
+ fesetenv_register (fpescr);
+ }
+
+ return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c
new file mode 100644
index 000000000..133dde7b3
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c
@@ -0,0 +1,54 @@
+/* Enable floating-point exceptions. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+feenableexcept (int excepts)
+{
+ unsigned int result = 0, pflags, r;
+ INTERNAL_SYSCALL_DECL (err);
+
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ /* Save old enable bits. */
+ result = __fexcepts_from_prctl (pflags);
+
+ pflags |= __fexcepts_to_prctl (excepts);
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+ pflags | PR_FP_EXC_SW_ENABLE);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ /* If enabling signals for "inexact", also enable trapping to the
+ kernel. */
+ if ((excepts & FE_INEXACT) != 0)
+ {
+ unsigned long fpescr;
+
+ fpescr = fegetenv_register ();
+ fpescr |= SPEFSCR_FINXE;
+ fesetenv_register (fpescr);
+ }
+
+ return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c
new file mode 100644
index 000000000..bfcbca2ad
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c
@@ -0,0 +1,47 @@
+/* Store current floating-point environment. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+__fegetenv (fenv_t *envp)
+{
+ fenv_union_t u;
+ INTERNAL_SYSCALL_DECL (err);
+ int r;
+
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ u.l[1] = fegetenv_register ();
+ *envp = u.fenv;
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c
new file mode 100644
index 000000000..9c7afc74f
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c
@@ -0,0 +1,36 @@
+/* Get floating-point exceptions. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+fegetexcept (void)
+{
+ int result = 0, pflags, r;
+ INTERNAL_SYSCALL_DECL (err);
+
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ result = __fexcepts_from_prctl (pflags);
+
+ return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c
new file mode 100644
index 000000000..f69e9a5bd
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c
@@ -0,0 +1,29 @@
+/* Return current rounding direction. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+#undef fegetround
+int
+fegetround (void)
+{
+ unsigned long fpescr;
+
+ fpescr = fegetenv_register ();
+ return fpescr & 3;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c
new file mode 100644
index 000000000..bd05ebd3c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c
@@ -0,0 +1,57 @@
+/* Store current floating-point environment and clear exceptions.
+ e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+feholdexcept (fenv_t *envp)
+{
+ fenv_union_t u;
+ INTERNAL_SYSCALL_DECL (err);
+ int r;
+
+ /* Get the current state. */
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ u.l[1] = fegetenv_register ();
+ *envp = u.fenv;
+
+ /* Clear everything except for the rounding mode and trapping to the
+ kernel. */
+ u.l[0] &= ~(PR_FP_EXC_DIV
+ | PR_FP_EXC_OVF
+ | PR_FP_EXC_UND
+ | PR_FP_EXC_RES
+ | PR_FP_EXC_INV);
+ u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE);
+
+ /* Put the new state in effect. */
+ fesetenv_register (u.l[1]);
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+ u.l[0] | PR_FP_EXC_SW_ENABLE);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ return 0;
+}
+libm_hidden_def (feholdexcept)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c
new file mode 100644
index 000000000..3a85f1810
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c
@@ -0,0 +1,41 @@
+/* Constant floating-point environments for e500.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* The use of "unsigned long long" as the type to define the
+ bit-pattern explicitly, rather than the type "double" used in
+ <bits/fenv.h>, means that we cannot include <fenv_libc.h> here to
+ get the enum constants for the SPEFSCR bits to enable
+ exceptions. */
+
+#include <sys/prctl.h>
+
+/* If the default argument is used we use this value. */
+const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) =
+ 0x3cULL;
+
+/* Floating-point environment where none of the exceptions are masked. */
+const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) =
+ (((unsigned long long) (PR_FP_EXC_DIV
+ | PR_FP_EXC_OVF
+ | PR_FP_EXC_UND
+ | PR_FP_EXC_RES
+ | PR_FP_EXC_INV)) << 32) | 0x7cULL;
+
+/* Non-IEEE mode. */
+const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) =
+ 0x0ULL;
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h
new file mode 100644
index 000000000..96375808d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h
@@ -0,0 +1,96 @@
+/* Internal libc stuff for floating point environment routines. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _FENV_LIBC_H
+#define _FENV_LIBC_H 1
+
+#include <fenv.h>
+
+int __feraiseexcept_spe (int);
+libm_hidden_proto (__feraiseexcept_spe)
+
+int __fexcepts_to_spe (int);
+libm_hidden_proto (__fexcepts_to_spe)
+
+int __fexcepts_from_spe (int);
+libm_hidden_proto (__fexcepts_from_spe)
+
+int __fexcepts_to_prctl (int);
+libm_hidden_proto (__fexcepts_to_prctl)
+
+int __fexcepts_from_prctl (int);
+libm_hidden_proto (__fexcepts_from_prctl)
+
+void __fe_note_change (void);
+libm_hidden_proto (__fe_note_change)
+
+/* Equivalent to fegetenv, but returns an unsigned int instead of
+ taking a pointer. */
+#define fegetenv_register() \
+ ({ unsigned int fscr; asm volatile ("mfspefscr %0" : "=r" (fscr)); fscr; })
+
+/* Equivalent to fesetenv, but takes an unsigned int instead of a
+ pointer. */
+#define fesetenv_register(fscr) \
+ ({ asm volatile ("mtspefscr %0" : : "r" (fscr)); })
+
+typedef union
+{
+ fenv_t fenv;
+ unsigned int l[2];
+} fenv_union_t;
+
+/* Definitions of all the SPEFSCR bit numbers. */
+enum {
+ SPEFSCR_SOVH = 0x80000000,
+ SPEFSCR_OVH = 0x40000000,
+ SPEFSCR_FGH = 0x20000000,
+ SPEFSCR_FXH = 0x10000000,
+ SPEFSCR_FINVH = 0x08000000,
+ SPEFSCR_FDBZH = 0x04000000,
+ SPEFSCR_FUNFH = 0x02000000,
+ SPEFSCR_FOVFH = 0x01000000,
+ /* 2 unused bits. */
+ SPEFSCR_FINXS = 0x00200000,
+ SPEFSCR_FINVS = 0x00100000,
+ SPEFSCR_FDBZS = 0x00080000,
+ SPEFSCR_FUNFS = 0x00040000,
+ SPEFSCR_FOVFS = 0x00020000,
+ /* Combination of the exception bits. */
+ SPEFSCR_ALL_EXCEPT = 0x003e0000,
+ SPEFSCR_MODE = 0x00010000,
+ SPEFSCR_SOV = 0x00008000,
+ SPEFSCR_OV = 0x00004000,
+ SPEFSCR_FG = 0x00002000,
+ SPEFSCR_FX = 0x00001000,
+ SPEFSCR_FINV = 0x00000800,
+ SPEFSCR_FDBZ = 0x00000400,
+ SPEFSCR_FUNF = 0x00000200,
+ SPEFSCR_FOVF = 0x00000100,
+ /* 1 unused bit. */
+ SPEFSCR_FINXE = 0x00000040,
+ SPEFSCR_FINVE = 0x00000020,
+ SPEFSCR_FDBZE = 0x00000010,
+ SPEFSCR_FUNFE = 0x00000008,
+ SPEFSCR_FOVFE = 0x00000004,
+ /* Combination of the exception trap enable bits. */
+ SPEFSCR_ALL_EXCEPT_ENABLE = 0x0000007c,
+ SPEFSCR_FRMC = 0x00000003
+};
+
+#endif /* fenv_libc.h */
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c
new file mode 100644
index 000000000..411e6be8d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c
@@ -0,0 +1,49 @@
+/* Install given floating-point environment. e500 version.
+ Copyright (C) 1997-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+__fesetenv (const fenv_t *envp)
+{
+ fenv_union_t u;
+ INTERNAL_SYSCALL_DECL (err);
+ int r;
+
+ u.fenv = *envp;
+
+ fesetenv_register (u.l[1]);
+ r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+ u.l[0] | PR_FP_EXC_SW_ENABLE);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return -1;
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c
new file mode 100644
index 000000000..805008e0c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c
@@ -0,0 +1,35 @@
+/* Set current rounding direction. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+int
+fesetround (int round)
+{
+ unsigned long fpescr;
+
+ if ((unsigned int) round > 3)
+ return 1;
+
+ fpescr = fegetenv_register ();
+ fpescr = (fpescr & ~SPEFSCR_FRMC) | (round & 3);
+ fesetenv_register (fpescr);
+
+ return 0;
+}
+libm_hidden_def (fesetround)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c
new file mode 100644
index 000000000..505c92363
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c
@@ -0,0 +1,47 @@
+/* Install given floating-point environment and raise exceptions.
+ e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+ int exc;
+
+ /* Save the currently set exceptions. */
+ exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT;
+
+ /* Install new environment. */
+ fesetenv (envp);
+
+ /* Raise (if appropriate) saved exceptions. */
+ __feraiseexcept_spe (exc);
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c
new file mode 100644
index 000000000..c094a04cb
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c
@@ -0,0 +1,42 @@
+/* Convert floating-point exceptions from prctl form.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sys/prctl.h>
+
+/* Convert EXCEPTS from prctl bits to FE_* form, returning the
+ converted value. */
+
+int
+__fexcepts_from_prctl (int excepts)
+{
+ int result = 0;
+ if (excepts & PR_FP_EXC_OVF)
+ result |= FE_OVERFLOW;
+ if (excepts & PR_FP_EXC_UND)
+ result |= FE_UNDERFLOW;
+ if (excepts & PR_FP_EXC_INV)
+ result |= FE_INVALID;
+ if (excepts & PR_FP_EXC_DIV)
+ result |= FE_DIVBYZERO;
+ if (excepts & PR_FP_EXC_RES)
+ result |= FE_INEXACT;
+ return result;
+}
+
+libm_hidden_def (__fexcepts_from_prctl)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c
new file mode 100644
index 000000000..3ec939d18
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c
@@ -0,0 +1,41 @@
+/* Convert floating-point exceptions from SPEFSCR form.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+/* Convert EXCEPTS from SPEFSCR bits to FE_* form, returning the
+ converted value. */
+
+int
+__fexcepts_from_spe (int excepts)
+{
+ int result = 0;
+ if (excepts & SPEFSCR_FINXS)
+ result |= FE_INEXACT;
+ if (excepts & SPEFSCR_FDBZS)
+ result |= FE_DIVBYZERO;
+ if (excepts & SPEFSCR_FUNFS)
+ result |= FE_UNDERFLOW;
+ if (excepts & SPEFSCR_FOVFS)
+ result |= FE_OVERFLOW;
+ if (excepts & SPEFSCR_FINVS)
+ result |= FE_INVALID;
+ return result;
+}
+
+libm_hidden_def (__fexcepts_from_spe)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c
new file mode 100644
index 000000000..b9c51b125
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c
@@ -0,0 +1,42 @@
+/* Convert floating-point exceptions to prctl form.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <sys/prctl.h>
+
+/* Convert EXCEPTS from FE_* form to prctl bits, returning the
+ converted value. */
+
+int
+__fexcepts_to_prctl (int excepts)
+{
+ int result = 0;
+ if (excepts & FE_INEXACT)
+ result |= PR_FP_EXC_RES;
+ if (excepts & FE_DIVBYZERO)
+ result |= PR_FP_EXC_DIV;
+ if (excepts & FE_UNDERFLOW)
+ result |= PR_FP_EXC_UND;
+ if (excepts & FE_OVERFLOW)
+ result |= PR_FP_EXC_OVF;
+ if (excepts & FE_INVALID)
+ result |= PR_FP_EXC_INV;
+ return result;
+}
+
+libm_hidden_def (__fexcepts_to_prctl)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c
new file mode 100644
index 000000000..570934d15
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c
@@ -0,0 +1,41 @@
+/* Convert floating-point exceptions to SPEFSCR form.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+/* Convert EXCEPTS from FE_* form to SPEFSCR bits, returning the
+ converted value. */
+
+int
+__fexcepts_to_spe (int excepts)
+{
+ int result = 0;
+ if (excepts & FE_INEXACT)
+ result |= SPEFSCR_FINXS;
+ if (excepts & FE_DIVBYZERO)
+ result |= SPEFSCR_FDBZS;
+ if (excepts & FE_UNDERFLOW)
+ result |= SPEFSCR_FUNFS;
+ if (excepts & FE_OVERFLOW)
+ result |= SPEFSCR_FOVFS;
+ if (excepts & FE_INVALID)
+ result |= SPEFSCR_FINVS;
+ return result;
+}
+
+libm_hidden_def (__fexcepts_to_spe)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c
new file mode 100644
index 000000000..b01cadeff
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c
@@ -0,0 +1,41 @@
+/* Store current representation for exceptions. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+ unsigned long fpescr;
+
+ /* Get the current state. */
+ fpescr = fegetenv_register ();
+
+ *flagp = fpescr & SPEFSCR_ALL_EXCEPT;
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c
new file mode 100644
index 000000000..0aed72ff3
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c
@@ -0,0 +1,28 @@
+/* Raise given exceptions. e500 version for use from soft-fp.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Aldy Hernandez <aldyh@redhat.com>, 2004.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+#include <libc-symbols.h>
+
+int __feraiseexcept_soft (int);
+libc_hidden_proto (__feraiseexcept_soft)
+
+#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_soft
+#include "spe-raise.c"
+libc_hidden_def (__feraiseexcept_soft)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
new file mode 100644
index 000000000..0eca9ffff
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
@@ -0,0 +1,40 @@
+/* Raise given exceptions. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_spe
+#include "spe-raise.c"
+
+libm_hidden_def (__feraiseexcept_spe)
+
+#undef feraiseexcept
+int
+__feraiseexcept (int excepts)
+{
+ return __feraiseexcept_spe (__fexcepts_to_spe (excepts));
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c
new file mode 100644
index 000000000..43f2d19d1
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c
@@ -0,0 +1,55 @@
+/* Set floating-point environment exception handling. e500 version.
+ Copyright (C) 1997-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+int
+__fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+ unsigned long old_spefscr, spefscr;
+ fexcept_t flag;
+ int excepts_spe = __fexcepts_to_spe (excepts);
+
+ /* Get the current state. */
+ old_spefscr = fegetenv_register ();
+
+ /* Ignore exceptions not listed in 'excepts'. */
+ flag = *flagp & excepts_spe;
+
+ /* Replace the exception status */
+ spefscr = (old_spefscr & ~excepts_spe) | flag;
+
+ /* Store the new status word (along with the rest of the environment). */
+ fesetenv_register (spefscr);
+
+ /* If the state of the "invalid" or "underflow" flag has changed,
+ inform the kernel. */
+ if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0)
+ __fe_note_change ();
+
+ /* Success. */
+ return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c
new file mode 100644
index 000000000..f4f547d5f
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c
@@ -0,0 +1,31 @@
+/* Test exception in current environment. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+int
+fetestexcept (int excepts)
+{
+ unsigned long f;
+
+ /* Get the current state. */
+ f = fegetenv_register ();
+
+ return __fexcepts_from_spe (f) & excepts;
+}
+libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h
new file mode 100644
index 000000000..117e7331e
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h
@@ -0,0 +1,4 @@
+/* The generic version of get-rounding-mode.h using fpu_control.h, not
+ the one using the software rounding mode, is correct for e500. */
+
+#include <sysdeps/generic/get-rounding-mode.h>
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S
new file mode 100644
index 000000000..823f748ba
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S
@@ -0,0 +1,27 @@
+/* Floating-point absolute value. e500 version.
+ Copyright (C) 2004-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+ENTRY (__fabsf)
+/* float [r3] fabsf (float [r3] x) ; */
+ efsabs r3,r3
+ blr
+END (__fabsf)
+
+weak_alias (__fabsf, fabsf)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c
new file mode 100644
index 000000000..4394ddc7c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c
@@ -0,0 +1,53 @@
+/* Raise given exceptions, given the SPEFSCR bits for those exceptions.
+ Copyright (C) 1997-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv_libc.h>
+
+int
+__FERAISEEXCEPT_INTERNAL (int excepts)
+{
+ unsigned long f;
+
+ f = fegetenv_register ();
+ f |= (excepts & SPEFSCR_ALL_EXCEPT);
+ fesetenv_register (f);
+
+ /* Force the operations that cause the exceptions. */
+ if ((SPEFSCR_FINVS & excepts) != 0)
+ /* 0 / 0 */
+ asm volatile ("efsdiv %0,%0,%1" : : "r" (0), "r" (0));
+
+ if ((SPEFSCR_FDBZS & excepts) != 0)
+ /* 1.0 / 0.0 */
+ asm volatile ("efsdiv %0,%0,%1" : : "r" (1.0F), "r" (0));
+
+ if ((SPEFSCR_FOVFS & excepts) != 0)
+ /* Largest normalized number plus itself. */
+ asm volatile ("efsadd %0,%0,%1" : : "r" (0x7f7fffff), "r" (0x7f7fffff));
+
+ if ((SPEFSCR_FUNFS & excepts) != 0)
+ /* Smallest normalized number times itself. */
+ asm volatile ("efsmul %0,%0,%1" : : "r" (0x800000), "r" (0x800000));
+
+ if ((SPEFSCR_FINXS & excepts) != 0)
+ /* Smallest normalized minus 1.0 raises the inexact flag. */
+ asm volatile ("efssub %0,%0,%1" : : "r" (0x00800000), "r" (1.0F));
+
+ /* Success. */
+ return 0;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
index 9d34cd916..d02aa5754 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -43,16 +43,16 @@ ENTRY (__longjmp)
# endif
mtlr r6
cfi_same_value (lr)
- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
# else
lwz r5,_dl_hwcap@got(r5)
mtlr r6
cfi_same_value (lr)
- lwz r5,4(r5)
+ lwz r5,LOWORD(r5)
# endif
# else
- lis r5,(_dl_hwcap+4)@ha
- lwz r5,(_dl_hwcap+4)@l(r5)
+ lis r5,(_dl_hwcap+LOWORD)@ha
+ lwz r5,(_dl_hwcap+LOWORD)@l(r5)
# endif
andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
beq L(no_vmx)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
index 96e50de37..27166c454 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
@@ -26,14 +26,14 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmx__longjmp,__longjmp,GLIBC_2.3.4);
+versioned_symbol (libc, __vmx__longjmp, __longjmp, GLIBC_2_3_4);
# define __longjmp __vmx__longjmp
# include "__longjmp-common.S"
# if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
# define __NO_VMX__
# undef JB_SIZE
-symbol_version (__novmx__longjmp,__longjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx__longjmp, __longjmp, GLIBC_2_0);
# undef __longjmp
# define __longjmp __novmx__longjmp
# include "__longjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
index 840891f1c..1da24f492 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
@@ -29,7 +29,7 @@ ENTRY(__copysign)
stwu r1,-16(r1)
cfi_adjust_cfa_offset (16)
stfd fp2,8(r1)
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
cmpwi r3,0
addi r1,r1,16
cfi_adjust_cfa_offset (-16)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
index 4ec8389b5..2ad6de273 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
@@ -30,7 +30,7 @@ ENTRY(__copysignl)
fmr fp0,fp1
fabs fp1,fp1
fcmpu cr7,fp0,fp1
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
cmpwi cr6,r3,0
addi r1,r1,16
cfi_adjust_cfa_offset (-16)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
index 27881f8cc..249fda501 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
@@ -24,10 +24,10 @@ ENTRY (__lrint)
stwu r1,-16(r1)
fctiw fp13,fp1
stfd fp13,8(r1)
- nop /* Insure the following load is in a different dispatch group */
+ nop /* Ensure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
- lwz r3,12(r1)
+ lwz r3,8+LOWORD(r1)
addi r1,r1,16
blr
END (__lrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index 92dc3787d..6309f864b 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -67,7 +67,7 @@ ENTRY (__lround)
nop /* Ensure the following load is in a different dispatch */
nop /* group to avoid pipe stall on POWER4&5. */
nop
- lwz r3,12(r1) /* Load return as integer. */
+ lwz r3,8+LOWORD(r1) /* Load return as integer. */
.Lout:
addi r1,r1,16
blr
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index 2ed9ca7b4..8cff1563a 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -19,7 +19,7 @@
#include <sysdep.h>
.section .rodata.cst8,"aM",@progbits,8
- .align 2
+ .align 3
.LC0: /* 2**23 */
.long 0x4b000000
.LC1: /* 0.5 */
@@ -60,7 +60,6 @@ ENTRY (__roundf )
#ifdef SHARED
lfs fp10,.LC1-.LC0(r9)
#else
- lis r9,.LC1@ha
lfs fp10,.LC1@l(r9)
#endif
ble- cr6,.L4
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
index 46ea2b00f..f3244060e 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -94,14 +94,14 @@ ENTRY (__sigsetjmp)
# else
lwz r5,_rtld_global_ro@got(r5)
# endif
- lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+ lwz r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
# else
lwz r5,_dl_hwcap@got(r5)
- lwz r5,4(r5)
+ lwz r5,LOWORD(r5)
# endif
# else
- lis r6,(_dl_hwcap+4)@ha
- lwz r5,(_dl_hwcap+4)@l(r6)
+ lis r6,(_dl_hwcap+LOWORD)@ha
+ lwz r5,(_dl_hwcap+LOWORD)@l(r6)
# endif
andis. r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
beq L(no_vmx)
@@ -111,44 +111,43 @@ ENTRY (__sigsetjmp)
stw r0,((JB_VRSAVE)*4)(3)
addi r6,r5,16
beq+ L(aligned_save_vmx)
- lvsr v0,0,r5
- vspltisb v1,-1 /* set v1 to all 1's */
- vspltisb v2,0 /* set v2 to all 0's */
- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes on left = misalignment */
+ lvsr v0,0,r5
+ lvsl v1,0,r5
+ addi r6,r5,-16
- /* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
- lvx v5,0,r5
- vperm v20,v20,v20,v0
- vsel v5,v5,v20,v3
- vsel v20,v20,v2,v3
- stvx v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+ addi addgpr,addgpr,32; \
+ vperm tmpvr,prevvr,savevr,shiftvr; \
+ stvx tmpvr,0,savegpr
-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
- addi addgpr,addgpr,32; \
- vperm savevr,savevr,savevr,shiftvr; \
- vsel hivr,prev_savevr,savevr,maskvr; \
- stvx hivr,0,savegpr;
+ /*
+ * We have to be careful not to corrupt the data below v20 and
+ * above v31. To keep things simple we just rotate both ends in
+ * the opposite direction to our main permute so we can use
+ * the common macro.
+ */
- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+ /* load and rotate data below v20 */
+ lvx v2,0,r5
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+ save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+ save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+ save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+ save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+ save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+ save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+ save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+ save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+ save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+ save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+ save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+ /* load and rotate data above v31 */
+ lvx v2,0,r6
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
- /* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
- addi r5,r5,32
- vperm v31,v31,v31,v0
- lvx v4,0,r5
- vsel v5,v30,v31,v3
- stvx v5,0,r6
- vsel v4,v31,v4,v3
- stvx v4,0,r5
b L(no_vmx)
L(aligned_save_vmx):
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
index 60cd35052..92acff1e6 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
@@ -26,7 +26,7 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
# define __sigsetjmp __vmx__sigsetjmp
# define __sigjmp_save __vmx__sigjmp_save
# include "setjmp-common.S"
@@ -36,7 +36,7 @@ default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
# undef __sigsetjmp
# undef __sigjmp_save
# undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
# define __sigsetjmp __novmx__sigsetjmp
# define __sigjmp_save __novmx__sigjmp_save
# include "setjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/mcount.c b/libc/sysdeps/powerpc/powerpc32/mcount.c
index 0476bf61d..d8c063222 100644
--- a/libc/sysdeps/powerpc/powerpc32/mcount.c
+++ b/libc/sysdeps/powerpc/powerpc32/mcount.c
@@ -9,7 +9,7 @@
/* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE,
but it should have been put in version GLIBC_2.15. Mark the
GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead. */
-default_symbol_version (___mcount_internal, __mcount_internal, GLIBC_2.16);
+versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16);
#if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16)
strong_alias (___mcount_internal, ___mcount_internal_private);
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
index 55b2850fd..e7a88feb4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
@@ -29,8 +29,8 @@ ENTRY (__llrint)
nop /* Insure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
index cc80fcb02..da24ad38d 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
@@ -28,8 +28,8 @@ ENTRY (__llrintf)
nop /* Insure the following load is in a different dispatch group */
nop /* to avoid pipe stall on POWER4&5. */
nop
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrintf)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
index 631180f07..7246ca4d1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -19,12 +19,10 @@
#include <sysdep.h>
#include <math_ldbl_opt.h>
- .section .rodata.cst12,"aM",@progbits,12
+ .section .rodata.cst8,"aM",@progbits,8
.align 3
- .LC0: /* 0x1.0000000000000p+52 == 2^52 */
- .long 0x43300000
- .long 0x00000000
- .long 0x3f000000 /* Use this for 0.5 */
+ .LC0: .long (52+127)<<23 /* 0x1p+52 */
+ .long (-1+127)<<23 /* 0.5 */
.section ".text"
@@ -57,12 +55,12 @@ ENTRY (__llround)
addi r9,r9,.LC0-got_label@l
mtlr r11
cfi_same_value (lr)
- lfd fp9,0(r9)
- lfs fp10,8(r9)
+ lfs fp9,0(r9)
+ lfs fp10,4(r9)
#else
lis r9,.LC0@ha
- lfd fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */
- lfs fp10,.LC0@l+8(r9) /* Load 0.5 into fpr10. */
+ lfs fp9,.LC0@l(r9) /* Load 2^52 into fpr9. */
+ lfs fp10,.LC0@l+4(r9) /* Load 0.5 into fpr10. */
#endif
fabs fp2,fp1 /* Get the absolute value of x. */
fsub fp12,fp10,fp10 /* Compute 0.0 into fpr12. */
@@ -80,8 +78,8 @@ ENTRY (__llround)
nop
nop
nop
- lwz r4,12(r1) /* Load return as integer. */
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1) /* Load return as integer. */
+ lwz r4,8+LOWORD(r1)
.Lout:
addi r1,r1,16
blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
index 7d6c96e9e..4e42374ea 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
+++ b/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
@@ -87,18 +87,15 @@ typedef unsigned long long int hp_timing_t;
#define HP_TIMING_NOW(Var) \
do { \
- union { long long ll; long ii[2]; } _var; \
- long tmp; \
- __asm__ __volatile__ ( \
- "1: mfspr %0,269;" \
- " mfspr %1,268;" \
- " mfspr %2,269;" \
- " cmpw %0,%2;" \
- " bne 1b;" \
- : "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp) \
- : : "cr0" \
- ); \
- Var = _var.ll; \
+ unsigned int hi, lo, tmp; \
+ __asm__ __volatile__ ("1: mfspr %0,269;" \
+ " mfspr %1,268;" \
+ " mfspr %2,269;" \
+ " cmpw %0,%2;" \
+ " bne 1b;" \
+ : "=&r" (hi), "=&r" (lo), "=&r" (tmp) \
+ : : "cr0"); \
+ Var = ((hp_timing_t) hi << 32) | lo; \
} while (0)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
index 9a455a3c6..35e162667 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation for PowerPC64.
+/* Optimized strcmp implementation for PowerPC32.
Copyright (C) 2003-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,13 +18,14 @@
#include <sysdep.h>
-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+/* int [r3] memcmp (const char *s1 [r3],
+ const char *s2 [r4],
+ size_t size [r5]) */
.machine power4
EALIGN (memcmp, 4, 0)
CALL_MCOUNT
-#define rTMP r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -35,33 +36,32 @@ EALIGN (memcmp, 4, 0)
#define rWORD4 r9 /* next word in s2 */
#define rWORD5 r10 /* next word in s1 */
#define rWORD6 r11 /* next word in s2 */
-#define rBITDIF r12 /* bits that differ in s1 & s2 words */
#define rWORD7 r30 /* next word in s1 */
#define rWORD8 r31 /* next word in s2 */
- xor rTMP, rSTR2, rSTR1
+ xor r0, rSTR2, rSTR1
cmplwi cr6, rN, 0
cmplwi cr1, rN, 12
- clrlwi. rTMP, rTMP, 30
- clrlwi rBITDIF, rSTR1, 30
- cmplwi cr5, rBITDIF, 0
+ clrlwi. r0, r0, 30
+ clrlwi r12, rSTR1, 30
+ cmplwi cr5, r12, 0
beq- cr6, L(zeroLength)
- dcbt 0,rSTR1
- dcbt 0,rSTR2
+ dcbt 0, rSTR1
+ dcbt 0, rSTR2
/* If less than 8 bytes or not aligned, use the unaligned
byte loop. */
blt cr1, L(bytealigned)
- stwu 1,-64(1)
+ stwu 1, -64(r1)
cfi_adjust_cfa_offset(64)
- stw r31,48(1)
- cfi_offset(31,(48-64))
- stw r30,44(1)
- cfi_offset(30,(44-64))
+ stw rWORD8, 48(r1)
+ cfi_offset(rWORD8, (48-64))
+ stw rWORD7, 44(r1)
+ cfi_offset(rWORD7, (44-64))
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
2 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then we are already word
+ of r12 to 0. If r12 == 0 then we are already word
aligned and can perform the word aligned loop.
Otherwise we know the two strings have the same alignment (but not
@@ -70,74 +70,95 @@ EALIGN (memcmp, 4, 0)
eliminate bits preceding the first byte. Since we want to join the
normal (word aligned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
- versioning for the first word. This insures that the loop count is
+ versioning for the first word. This ensures that the loop count is
correct and the first word (shifted) is in the expected register pair. */
- .align 4
+ .align 4
L(samealignment):
clrrwi rSTR1, rSTR1, 2
clrrwi rSTR2, rSTR2, 2
beq cr5, L(Waligned)
- add rN, rN, rBITDIF
- slwi r11, rBITDIF, 3
- srwi rTMP, rN, 4 /* Divide by 16 */
- andi. rBITDIF, rN, 12 /* Get the word remainder */
+ add rN, rN, r12
+ slwi rWORD6, r12, 3
+ srwi r0, rN, 4 /* Divide by 16 */
+ andi. r12, rN, 12 /* Get the word remainder */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 0(rSTR1)
lwz rWORD2, 0(rSTR2)
- cmplwi cr1, rBITDIF, 8
+#endif
+ cmplwi cr1, r12, 8
cmplwi cr7, rN, 16
clrlwi rN, rN, 30
beq L(dPs4)
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
bgt cr1, L(dPs3)
beq cr1, L(dPs2)
/* Remainder is 4 */
- .align 3
+ .align 3
L(dsP1):
- slw rWORD5, rWORD1, r11
- slw rWORD6, rWORD2, r11
+ slw rWORD5, rWORD1, rWORD6
+ slw rWORD6, rWORD2, rWORD6
cmplw cr5, rWORD5, rWORD6
blt cr7, L(dP1x)
/* Do something useful in this cycle since we have to branch anyway. */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 4(rSTR1)
lwz rWORD2, 4(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
b L(dP1e)
/* Remainder is 8 */
- .align 4
+ .align 4
L(dPs2):
- slw rWORD5, rWORD1, r11
- slw rWORD6, rWORD2, r11
+ slw rWORD5, rWORD1, rWORD6
+ slw rWORD6, rWORD2, rWORD6
cmplw cr6, rWORD5, rWORD6
blt cr7, L(dP2x)
/* Do something useful in this cycle since we have to branch anyway. */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD7, 4(rSTR1)
lwz rWORD8, 4(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
b L(dP2e)
/* Remainder is 12 */
- .align 4
+ .align 4
L(dPs3):
- slw rWORD3, rWORD1, r11
- slw rWORD4, rWORD2, r11
+ slw rWORD3, rWORD1, rWORD6
+ slw rWORD4, rWORD2, rWORD6
cmplw cr1, rWORD3, rWORD4
b L(dP3e)
/* Count is a multiple of 16, remainder is 0 */
- .align 4
+ .align 4
L(dPs4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- slw rWORD1, rWORD1, r11
- slw rWORD2, rWORD2, r11
- cmplw cr0, rWORD1, rWORD2
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ slw rWORD1, rWORD1, rWORD6
+ slw rWORD2, rWORD2, rWORD6
+ cmplw cr7, rWORD1, rWORD2
b L(dP4e)
/* At this point we know both strings are word aligned and the
compare length is at least 8 bytes. */
- .align 4
+ .align 4
L(Waligned):
- andi. rBITDIF, rN, 12 /* Get the word remainder */
- srwi rTMP, rN, 4 /* Divide by 16 */
- cmplwi cr1, rBITDIF, 8
+ andi. r12, rN, 12 /* Get the word remainder */
+ srwi r0, rN, 4 /* Divide by 16 */
+ cmplwi cr1, r12, 8
cmplwi cr7, rN, 16
clrlwi rN, rN, 30
beq L(dP4)
@@ -145,177 +166,352 @@ L(Waligned):
beq cr1, L(dP2)
/* Remainder is 4 */
- .align 4
+ .align 4
L(dP1):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
(8-15 byte compare), we want to use only volatile registers. This
means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 0(rSTR1)
lwz rWORD6, 0(rSTR2)
+#endif
cmplw cr5, rWORD5, rWORD6
blt cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 4(rSTR1)
lwz rWORD2, 4(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
L(dP1e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 8(rSTR1)
lwz rWORD4, 8(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 12(rSTR1)
lwz rWORD6, 12(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
- bne cr5, L(dLcr5)
- bne cr0, L(dLcr0)
+ bne cr5, L(dLcr5x)
+ bne cr7, L(dLcr7x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwzu rWORD7, 16(rSTR1)
lwzu rWORD8, 16(rSTR2)
+#endif
bne cr1, L(dLcr1)
cmplw cr5, rWORD7, rWORD8
bdnz L(dLoop)
bne cr6, L(dLcr6)
- lwz r30,44(1)
- lwz r31,48(1)
- .align 3
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+ .align 3
L(dP1x):
slwi. r12, rN, 3
- bne cr5, L(dLcr5)
+ bne cr5, L(dLcr5x)
subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
- lwz 1,0(1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
bne L(d00)
li rRTN, 0
blr
/* Remainder is 8 */
- .align 4
+ .align 4
+ cfi_adjust_cfa_offset(64)
L(dP2):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 0(rSTR1)
lwz rWORD6, 0(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
blt cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD7, 4(rSTR1)
lwz rWORD8, 4(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
L(dP2e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 8(rSTR1)
lwz rWORD2, 8(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 12(rSTR1)
lwz rWORD4, 12(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 4
addi rSTR2, rSTR2, 4
+#endif
bne cr6, L(dLcr6)
bne cr5, L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
- .align 4
+ .align 4
L(dP2x):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 4(rSTR1)
lwz rWORD4, 4(rSTR2)
- cmplw cr5, rWORD3, rWORD4
+#endif
+ cmplw cr1, rWORD3, rWORD4
slwi. r12, rN, 3
- bne cr6, L(dLcr6)
+ bne cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 4
addi rSTR2, rSTR2, 4
- bne cr5, L(dLcr5)
+#endif
+ bne cr1, L(dLcr1x)
subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
- lwz 1,0(1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
bne L(d00)
li rRTN, 0
blr
/* Remainder is 12 */
- .align 4
+ .align 4
+ cfi_adjust_cfa_offset(64)
L(dP3):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 0(rSTR1)
lwz rWORD4, 0(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
L(dP3e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 4(rSTR1)
lwz rWORD6, 4(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
blt cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD7, 8(rSTR1)
lwz rWORD8, 8(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 12(rSTR1)
lwz rWORD2, 12(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
+#endif
bne cr1, L(dLcr1)
bne cr6, L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
- .align 4
+ .align 4
L(dP3x):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 8(rSTR1)
lwz rWORD2, 8(rSTR2)
- cmplw cr5, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
slwi. r12, rN, 3
- bne cr1, L(dLcr1)
+ bne cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
- bne cr6, L(dLcr6)
+#endif
+ bne cr6, L(dLcr6x)
subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
- bne cr5, L(dLcr5)
- lwz 1,0(1)
+ bne cr7, L(dLcr7x)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
bne L(d00)
li rRTN, 0
blr
/* Count is a multiple of 16, remainder is 0 */
- .align 4
+ .align 4
+ cfi_adjust_cfa_offset(64)
L(dP4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 0(rSTR1)
lwz rWORD2, 0(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
L(dP4e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 4(rSTR1)
lwz rWORD4, 4(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 8(rSTR1)
lwz rWORD6, 8(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwzu rWORD7, 12(rSTR1)
lwzu rWORD8, 12(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
- bne cr0, L(dLcr0)
+ bne cr7, L(dLcr7)
bne cr1, L(dLcr1)
bdz- L(d24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
- .align 4
+ .align 4
L(dLoop):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 4(rSTR1)
lwz rWORD2, 4(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
bne cr6, L(dLcr6)
L(dLoop1):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 8(rSTR1)
lwz rWORD4, 8(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
bne cr5, L(dLcr5)
L(dLoop2):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 12(rSTR1)
lwz rWORD6, 12(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
- bne cr0, L(dLcr0)
+ bne cr7, L(dLcr7)
L(dLoop3):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwzu rWORD7, 16(rSTR1)
lwzu rWORD8, 16(rSTR2)
+#endif
bne- cr1, L(dLcr1)
- cmplw cr0, rWORD1, rWORD2
+ cmplw cr7, rWORD1, rWORD2
bdnz+ L(dLoop)
L(dL4):
@@ -325,7 +521,7 @@ L(dL4):
bne cr5, L(dLcr5)
cmplw cr5, rWORD7, rWORD8
L(d44):
- bne cr0, L(dLcr0)
+ bne cr7, L(dLcr7)
L(d34):
bne cr1, L(dLcr1)
L(d24):
@@ -334,69 +530,82 @@ L(d14):
slwi. r12, rN, 3
bne cr5, L(dLcr5)
L(d04):
- lwz r30,44(1)
- lwz r31,48(1)
- lwz 1,0(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
beq L(zeroLength)
/* At this point we have a remainder of 1 to 3 bytes to compare. Since
we are aligned it is safe to load the whole word, and use
- shift right to eliminate bits beyond the compare length. */
+ shift right to eliminate bits beyond the compare length. */
L(d00):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 4(rSTR1)
lwz rWORD2, 4(rSTR2)
+#endif
srw rWORD1, rWORD1, rN
srw rWORD2, rWORD2, rN
- cmplw rWORD1,rWORD2
- li rRTN,0
- beqlr
- li rRTN,1
- bgtlr
- li rRTN,-1
- blr
-
- .align 4
-L(dLcr0):
- lwz r30,44(1)
- lwz r31,48(1)
+ sub rRTN, rWORD1, rWORD2
+ blr
+
+ .align 4
+ cfi_adjust_cfa_offset(64)
+L(dLcr7):
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+L(dLcr7x):
li rRTN, 1
- lwz 1,0(1)
- bgtlr cr0
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
+ bgtlr cr7
li rRTN, -1
blr
- .align 4
+ .align 4
+ cfi_adjust_cfa_offset(64)
L(dLcr1):
- lwz r30,44(1)
- lwz r31,48(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+L(dLcr1x):
li rRTN, 1
- lwz 1,0(1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
bgtlr cr1
li rRTN, -1
blr
- .align 4
+ .align 4
+ cfi_adjust_cfa_offset(64)
L(dLcr6):
- lwz r30,44(1)
- lwz r31,48(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+L(dLcr6x):
li rRTN, 1
- lwz 1,0(1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
bgtlr cr6
li rRTN, -1
blr
- .align 4
+ .align 4
+ cfi_adjust_cfa_offset(64)
L(dLcr5):
- lwz r30,44(1)
- lwz r31,48(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
L(dLcr5x):
li rRTN, 1
- lwz 1,0(1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
bgtlr cr5
li rRTN, -1
blr
- .align 4
+ .align 4
L(bytealigned):
- cfi_adjust_cfa_offset(-64)
- mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+ mtctr rN /* Power4 wants mtctr 1st in dispatch group */
/* We need to prime this loop. This loop is swing modulo scheduled
to avoid pipe delays. The dependent instruction latencies (load to
@@ -411,7 +620,7 @@ L(bytealigned):
lbz rWORD1, 0(rSTR1)
lbz rWORD2, 0(rSTR2)
bdz- L(b11)
- cmplw cr0, rWORD1, rWORD2
+ cmplw cr7, rWORD1, rWORD2
lbz rWORD3, 1(rSTR1)
lbz rWORD4, 1(rSTR2)
bdz- L(b12)
@@ -419,11 +628,11 @@ L(bytealigned):
lbzu rWORD5, 2(rSTR1)
lbzu rWORD6, 2(rSTR2)
bdz- L(b13)
- .align 4
+ .align 4
L(bLoop):
lbzu rWORD1, 1(rSTR1)
lbzu rWORD2, 1(rSTR2)
- bne- cr0, L(bLcr0)
+ bne- cr7, L(bLcr7)
cmplw cr6, rWORD5, rWORD6
bdz- L(b3i)
@@ -432,7 +641,7 @@ L(bLoop):
lbzu rWORD4, 1(rSTR2)
bne- cr1, L(bLcr1)
- cmplw cr0, rWORD1, rWORD2
+ cmplw cr7, rWORD1, rWORD2
bdz- L(b2i)
lbzu rWORD5, 1(rSTR1)
@@ -449,23 +658,23 @@ L(bLoop):
tested. In this case we must complete the pending operations
before returning. */
L(b1i):
- bne- cr0, L(bLcr0)
+ bne- cr7, L(bLcr7)
bne- cr1, L(bLcr1)
b L(bx56)
- .align 4
+ .align 4
L(b2i):
bne- cr6, L(bLcr6)
- bne- cr0, L(bLcr0)
+ bne- cr7, L(bLcr7)
b L(bx34)
- .align 4
+ .align 4
L(b3i):
bne- cr1, L(bLcr1)
bne- cr6, L(bLcr6)
b L(bx12)
- .align 4
-L(bLcr0):
+ .align 4
+L(bLcr7):
li rRTN, 1
- bgtlr cr0
+ bgtlr cr7
li rRTN, -1
blr
L(bLcr1):
@@ -480,36 +689,31 @@ L(bLcr6):
blr
L(b13):
- bne- cr0, L(bx12)
+ bne- cr7, L(bx12)
bne- cr1, L(bx34)
L(bx56):
sub rRTN, rWORD5, rWORD6
blr
nop
L(b12):
- bne- cr0, L(bx12)
+ bne- cr7, L(bx12)
L(bx34):
sub rRTN, rWORD3, rWORD4
blr
-
L(b11):
L(bx12):
sub rRTN, rWORD1, rWORD2
blr
-
- .align 4
-L(zeroLengthReturn):
-
+ .align 4
L(zeroLength):
li rRTN, 0
blr
- cfi_adjust_cfa_offset(64)
- .align 4
+ .align 4
/* At this point we know the strings have different alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
2 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is word aligned and can
+ of r12 to 0. If r12 == 0 then rStr1 is word aligned and can
perform the Wunaligned loop.
Otherwise we know that rSTR1 is not already word aligned yet.
@@ -518,79 +722,88 @@ L(zeroLength):
eliminate bits preceding the first byte. Since we want to join the
normal (Wualigned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
- versioning for the first W. This insures that the loop count is
+ versioning for the first W. This ensures that the loop count is
correct and the first W (shifted) is in the expected resister pair. */
#define rSHL r29 /* Unaligned shift left count. */
#define rSHR r28 /* Unaligned shift right count. */
-#define rB r27 /* Left rotation temp for rWORD2. */
-#define rD r26 /* Left rotation temp for rWORD4. */
-#define rF r25 /* Left rotation temp for rWORD6. */
-#define rH r24 /* Left rotation temp for rWORD8. */
-#define rA r0 /* Right rotation temp for rWORD2. */
-#define rC r12 /* Right rotation temp for rWORD4. */
-#define rE r0 /* Right rotation temp for rWORD6. */
-#define rG r12 /* Right rotation temp for rWORD8. */
+#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */
+#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */
+#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */
+#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */
+ cfi_adjust_cfa_offset(64)
L(unaligned):
- stw r29,40(r1)
- cfi_offset(r29,(40-64))
+ stw rSHL, 40(r1)
+ cfi_offset(rSHL, (40-64))
clrlwi rSHL, rSTR2, 30
- stw r28,36(r1)
- cfi_offset(r28,(36-64))
+ stw rSHR, 36(r1)
+ cfi_offset(rSHR, (36-64))
beq cr5, L(Wunaligned)
- stw r27,32(r1)
- cfi_offset(r27,(32-64))
+ stw rWORD8_SHIFT, 32(r1)
+ cfi_offset(rWORD8_SHIFT, (32-64))
/* Adjust the logical start of rSTR2 to compensate for the extra bits
in the 1st rSTR1 W. */
- sub r27, rSTR2, rBITDIF
+ sub rWORD8_SHIFT, rSTR2, r12
/* But do not attempt to address the W before that W that contains
the actual start of rSTR2. */
clrrwi rSTR2, rSTR2, 2
- stw r26,28(r1)
- cfi_offset(r26,(28-64))
-/* Compute the left/right shift counts for the unalign rSTR2,
+ stw rWORD2_SHIFT, 28(r1)
+ cfi_offset(rWORD2_SHIFT, (28-64))
+/* Compute the left/right shift counts for the unaligned rSTR2,
compensating for the logical (W aligned) start of rSTR1. */
- clrlwi rSHL, r27, 30
+ clrlwi rSHL, rWORD8_SHIFT, 30
clrrwi rSTR1, rSTR1, 2
- stw r25,24(r1)
- cfi_offset(r25,(24-64))
+ stw rWORD4_SHIFT, 24(r1)
+ cfi_offset(rWORD4_SHIFT, (24-64))
slwi rSHL, rSHL, 3
- cmplw cr5, r27, rSTR2
- add rN, rN, rBITDIF
- slwi r11, rBITDIF, 3
- stw r24,20(r1)
- cfi_offset(r24,(20-64))
+ cmplw cr5, rWORD8_SHIFT, rSTR2
+ add rN, rN, r12
+ slwi rWORD6, r12, 3
+ stw rWORD6_SHIFT, 20(r1)
+ cfi_offset(rWORD6_SHIFT, (20-64))
subfic rSHR, rSHL, 32
- srwi rTMP, rN, 4 /* Divide by 16 */
- andi. rBITDIF, rN, 12 /* Get the W remainder */
+ srwi r0, rN, 4 /* Divide by 16 */
+ andi. r12, rN, 12 /* Get the W remainder */
/* We normally need to load 2 Ws to start the unaligned rSTR2, but in
this special case those bits may be discarded anyway. Also we
must avoid loading a W where none of the bits are part of rSTR2 as
this may cross a page boundary and cause a page fault. */
li rWORD8, 0
blt cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD8, 0(rSTR2)
- la rSTR2, 4(rSTR2)
+ addi rSTR2, rSTR2, 4
+#endif
slw rWORD8, rWORD8, rSHL
L(dus0):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 0(rSTR1)
lwz rWORD2, 0(rSTR2)
- cmplwi cr1, rBITDIF, 8
+#endif
+ cmplwi cr1, r12, 8
cmplwi cr7, rN, 16
- srw rG, rWORD2, rSHR
+ srw r12, rWORD2, rSHR
clrlwi rN, rN, 30
beq L(duPs4)
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- or rWORD8, rG, rWORD8
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, r12, rWORD8
bgt cr1, L(duPs3)
beq cr1, L(duPs2)
/* Remainder is 4 */
- .align 4
+ .align 4
L(dusP1):
- slw rB, rWORD2, rSHL
- slw rWORD7, rWORD1, r11
- slw rWORD8, rWORD8, r11
+ slw rWORD8_SHIFT, rWORD2, rSHL
+ slw rWORD7, rWORD1, rWORD6
+ slw rWORD8, rWORD8, rWORD6
bge cr7, L(duP1e)
/* At this point we exit early with the first word compare
complete and remainder of 0 to 3 bytes. See L(du14) for details on
@@ -600,95 +813,133 @@ L(dusP1):
bne cr5, L(duLcr5)
cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD2, 4(rSTR2)
- srw rA, rWORD2, rSHR
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 8 */
- .align 4
+ .align 4
L(duPs2):
- slw rH, rWORD2, rSHL
- slw rWORD5, rWORD1, r11
- slw rWORD6, rWORD8, r11
+ slw rWORD6_SHIFT, rWORD2, rSHL
+ slw rWORD5, rWORD1, rWORD6
+ slw rWORD6, rWORD8, rWORD6
b L(duP2e)
/* Remainder is 12 */
- .align 4
+ .align 4
L(duPs3):
- slw rF, rWORD2, rSHL
- slw rWORD3, rWORD1, r11
- slw rWORD4, rWORD8, r11
+ slw rWORD4_SHIFT, rWORD2, rSHL
+ slw rWORD3, rWORD1, rWORD6
+ slw rWORD4, rWORD8, rWORD6
b L(duP3e)
/* Count is a multiple of 16, remainder is 0 */
- .align 4
+ .align 4
L(duPs4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- or rWORD8, rG, rWORD8
- slw rD, rWORD2, rSHL
- slw rWORD1, rWORD1, r11
- slw rWORD2, rWORD8, r11
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, r12, rWORD8
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ slw rWORD1, rWORD1, rWORD6
+ slw rWORD2, rWORD8, rWORD6
b L(duP4e)
/* At this point we know rSTR1 is word aligned and the
compare length is at least 8 bytes. */
- .align 4
+ .align 4
L(Wunaligned):
- stw r27,32(r1)
- cfi_offset(r27,(32-64))
+ stw rWORD8_SHIFT, 32(r1)
+ cfi_offset(rWORD8_SHIFT, (32-64))
clrrwi rSTR2, rSTR2, 2
- stw r26,28(r1)
- cfi_offset(r26,(28-64))
- srwi rTMP, rN, 4 /* Divide by 16 */
- stw r25,24(r1)
- cfi_offset(r25,(24-64))
- andi. rBITDIF, rN, 12 /* Get the W remainder */
- stw r24,20(r1)
- cfi_offset(r24,(20-64))
+ stw rWORD2_SHIFT, 28(r1)
+ cfi_offset(rWORD2_SHIFT, (28-64))
+ srwi r0, rN, 4 /* Divide by 16 */
+ stw rWORD4_SHIFT, 24(r1)
+ cfi_offset(rWORD4_SHIFT, (24-64))
+ andi. r12, rN, 12 /* Get the W remainder */
+ stw rWORD6_SHIFT, 20(r1)
+ cfi_offset(rWORD6_SHIFT, (20-64))
slwi rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD6, 0(rSTR2)
lwzu rWORD8, 4(rSTR2)
- cmplwi cr1, rBITDIF, 8
+#endif
+ cmplwi cr1, r12, 8
cmplwi cr7, rN, 16
clrlwi rN, rN, 30
subfic rSHR, rSHL, 32
- slw rH, rWORD6, rSHL
+ slw rWORD6_SHIFT, rWORD6, rSHL
beq L(duP4)
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
bgt cr1, L(duP3)
beq cr1, L(duP2)
/* Remainder is 4 */
- .align 4
+ .align 4
L(duP1):
- srw rG, rWORD8, rSHR
+ srw r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
lwz rWORD7, 0(rSTR1)
- slw rB, rWORD8, rSHL
- or rWORD8, rG, rH
+#endif
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
blt cr7, L(duP1x)
L(duP1e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 4(rSTR1)
lwz rWORD2, 4(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
- srw rA, rWORD2, rSHR
- slw rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 8(rSTR1)
lwz rWORD4, 8(rSTR2)
- cmplw cr0, rWORD1, rWORD2
- srw rC, rWORD4, rSHR
- slw rF, rWORD4, rSHL
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
bne cr5, L(duLcr5)
- or rWORD4, rC, rD
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 12(rSTR1)
lwz rWORD6, 12(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
- srw rE, rWORD6, rSHR
- slw rH, rWORD6, rSHL
- bne cr0, L(duLcr0)
- or rWORD6, rE, rF
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ bne cr7, L(duLcr7)
+ or rWORD6, r0, rWORD4_SHIFT
cmplw cr6, rWORD5, rWORD6
b L(duLoop3)
- .align 4
+ .align 4
/* At this point we exit early with the first word compare
complete and remainder of 0 to 3 bytes. See L(du14) for details on
how we handle the remaining bytes. */
@@ -698,186 +949,321 @@ L(duP1x):
bne cr5, L(duLcr5)
cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
- ld rWORD2, 8(rSTR2)
- srw rA, rWORD2, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD2, 8(rSTR2)
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 8 */
- .align 4
+ .align 4
L(duP2):
- srw rE, rWORD8, rSHR
+ srw r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
lwz rWORD5, 0(rSTR1)
- or rWORD6, rE, rH
- slw rH, rWORD8, rSHL
+#endif
+ or rWORD6, r0, rWORD6_SHIFT
+ slw rWORD6_SHIFT, rWORD8, rSHL
L(duP2e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD7, 4(rSTR1)
lwz rWORD8, 4(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
- srw rG, rWORD8, rSHR
- slw rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
blt cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 8(rSTR1)
lwz rWORD2, 8(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
bne cr6, L(duLcr6)
- srw rA, rWORD2, rSHR
- slw rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 12(rSTR1)
lwz rWORD4, 12(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
bne cr5, L(duLcr5)
- srw rC, rWORD4, rSHR
- slw rF, rWORD4, rSHL
- or rWORD4, rC, rD
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 4
addi rSTR2, rSTR2, 4
+#endif
cmplw cr1, rWORD3, rWORD4
b L(duLoop2)
- .align 4
+ .align 4
L(duP2x):
cmplw cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 4
addi rSTR2, rSTR2, 4
+#endif
bne cr6, L(duLcr6)
slwi. rN, rN, 3
bne cr5, L(duLcr5)
cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD2, 4(rSTR2)
- srw rA, rWORD2, rSHR
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 12 */
- .align 4
+ .align 4
L(duP3):
- srw rC, rWORD8, rSHR
+ srw r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
lwz rWORD3, 0(rSTR1)
- slw rF, rWORD8, rSHL
- or rWORD4, rC, rH
+#endif
+ slw rWORD4_SHIFT, rWORD8, rSHL
+ or rWORD4, r12, rWORD6_SHIFT
L(duP3e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 4(rSTR1)
lwz rWORD6, 4(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
- srw rE, rWORD6, rSHR
- slw rH, rWORD6, rSHL
- or rWORD6, rE, rF
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD7, 8(rSTR1)
lwz rWORD8, 8(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
bne cr1, L(duLcr1)
- srw rG, rWORD8, rSHR
- slw rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
blt cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 12(rSTR1)
lwz rWORD2, 12(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
bne cr6, L(duLcr6)
- srw rA, rWORD2, rSHR
- slw rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
b L(duLoop1)
- .align 4
+ .align 4
L(duP3x):
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
+#endif
+#if 0
+/* Huh? We've already branched on cr1! */
bne cr1, L(duLcr1)
+#endif
cmplw cr5, rWORD7, rWORD8
bne cr6, L(duLcr6)
slwi. rN, rN, 3
bne cr5, L(duLcr5)
cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD2, 4(rSTR2)
- srw rA, rWORD2, rSHR
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Count is a multiple of 16, remainder is 0 */
- .align 4
+ .align 4
L(duP4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- srw rA, rWORD8, rSHR
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ srw r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
lwz rWORD1, 0(rSTR1)
- slw rD, rWORD8, rSHL
- or rWORD2, rA, rH
+#endif
+ slw rWORD2_SHIFT, rWORD8, rSHL
+ or rWORD2, r0, rWORD6_SHIFT
L(duP4e):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 4(rSTR1)
lwz rWORD4, 4(rSTR2)
- cmplw cr0, rWORD1, rWORD2
- srw rC, rWORD4, rSHR
- slw rF, rWORD4, rSHL
- or rWORD4, rC, rD
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 8(rSTR1)
lwz rWORD6, 8(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
- bne cr0, L(duLcr0)
- srw rE, rWORD6, rSHR
- slw rH, rWORD6, rSHL
- or rWORD6, rE, rF
+ bne cr7, L(duLcr7)
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwzu rWORD7, 12(rSTR1)
lwzu rWORD8, 12(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
bne cr1, L(duLcr1)
- srw rG, rWORD8, rSHR
- slw rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
cmplw cr5, rWORD7, rWORD8
bdz- L(du24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
- .align 4
+ .align 4
L(duLoop):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD1, 4(rSTR1)
lwz rWORD2, 4(rSTR2)
+#endif
cmplw cr1, rWORD3, rWORD4
bne cr6, L(duLcr6)
- srw rA, rWORD2, rSHR
- slw rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
L(duLoop1):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD3, 8(rSTR1)
lwz rWORD4, 8(rSTR2)
+#endif
cmplw cr6, rWORD5, rWORD6
bne cr5, L(duLcr5)
- srw rC, rWORD4, rSHR
- slw rF, rWORD4, rSHL
- or rWORD4, rC, rD
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
L(duLoop2):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD5, 12(rSTR1)
lwz rWORD6, 12(rSTR2)
+#endif
cmplw cr5, rWORD7, rWORD8
- bne cr0, L(duLcr0)
- srw rE, rWORD6, rSHR
- slw rH, rWORD6, rSHL
- or rWORD6, rE, rF
+ bne cr7, L(duLcr7)
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
L(duLoop3):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
lwzu rWORD7, 16(rSTR1)
lwzu rWORD8, 16(rSTR2)
- cmplw cr0, rWORD1, rWORD2
+#endif
+ cmplw cr7, rWORD1, rWORD2
bne- cr1, L(duLcr1)
- srw rG, rWORD8, rSHR
- slw rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
bdnz+ L(duLoop)
L(duL4):
+#if 0
+/* Huh? We've already branched on cr1! */
bne cr1, L(duLcr1)
+#endif
cmplw cr1, rWORD3, rWORD4
bne cr6, L(duLcr6)
cmplw cr6, rWORD5, rWORD6
bne cr5, L(duLcr5)
cmplw cr5, rWORD7, rWORD8
L(du44):
- bne cr0, L(duLcr0)
+ bne cr7, L(duLcr7)
L(du34):
bne cr1, L(duLcr1)
L(du24):
@@ -887,95 +1273,101 @@ L(du14):
bne cr5, L(duLcr5)
/* At this point we have a remainder of 1 to 3 bytes to compare. We use
shift right to eliminate bits beyond the compare length.
+ This allows the use of word subtract to compute the final result.
However it may not be safe to load rWORD2 which may be beyond the
string length. So we compare the bit length of the remainder to
the right shift count (rSHR). If the bit count is less than or equal
we do not need to load rWORD2 (all significant bits are already in
- rB). */
+ rWORD8_SHIFT). */
cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
lwz rWORD2, 4(rSTR2)
- srw rA, rWORD2, rSHR
- .align 4
+#endif
+ srw r0, rWORD2, rSHR
+ .align 4
L(dutrim):
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+#else
lwz rWORD1, 4(rSTR1)
- lwz r31,48(1)
+#endif
+ lwz rWORD8, 48(r1)
subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */
- or rWORD2, rA, rB
- lwz r30,44(1)
- lwz r29,40(r1)
+ or rWORD2, r0, rWORD8_SHIFT
+ lwz rWORD7, 44(r1)
+ lwz rSHL, 40(r1)
srw rWORD1, rWORD1, rN
srw rWORD2, rWORD2, rN
- lwz r28,36(r1)
- lwz r27,32(r1)
- cmplw rWORD1,rWORD2
- li rRTN,0
- beq L(dureturn26)
- li rRTN,1
- bgt L(dureturn26)
- li rRTN,-1
- b L(dureturn26)
- .align 4
-L(duLcr0):
- lwz r31,48(1)
- lwz r30,44(1)
+ lwz rSHR, 36(r1)
+ lwz rWORD8_SHIFT, 32(r1)
+ sub rRTN, rWORD1, rWORD2
+ b L(dureturn26)
+ .align 4
+L(duLcr7):
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
li rRTN, 1
- bgt cr0, L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
+ bgt cr7, L(dureturn29)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
li rRTN, -1
b L(dureturn27)
- .align 4
+ .align 4
L(duLcr1):
- lwz r31,48(1)
- lwz r30,44(1)
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
li rRTN, 1
bgt cr1, L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
li rRTN, -1
b L(dureturn27)
- .align 4
+ .align 4
L(duLcr6):
- lwz r31,48(1)
- lwz r30,44(1)
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
li rRTN, 1
bgt cr6, L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
li rRTN, -1
b L(dureturn27)
- .align 4
+ .align 4
L(duLcr5):
- lwz r31,48(1)
- lwz r30,44(1)
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
li rRTN, 1
bgt cr5, L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
li rRTN, -1
b L(dureturn27)
.align 3
L(duZeroReturn):
- li rRTN,0
+ li rRTN, 0
.align 4
L(dureturn):
- lwz r31,48(1)
- lwz r30,44(1)
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
L(dureturn29):
- lwz r29,40(r1)
- lwz r28,36(r1)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
L(dureturn27):
- lwz r27,32(r1)
+ lwz rWORD8_SHIFT, 32(r1)
L(dureturn26):
- lwz r26,28(r1)
+ lwz rWORD2_SHIFT, 28(r1)
L(dureturn25):
- lwz r25,24(r1)
- lwz r24,20(r1)
- lwz 1,0(1)
+ lwz rWORD4_SHIFT, 24(r1)
+ lwz rWORD6_SHIFT, 20(r1)
+ addi 1, 1, 64
+ cfi_adjust_cfa_offset(-64)
blr
END (memcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
index d9146631e..338d3cce3 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
@@ -203,15 +203,28 @@ EALIGN (memcpy, 5, 0)
blt cr6,5f
srwi 7,6,16
bgt cr6,3f
+#ifdef __LITTLE_ENDIAN__
+ sth 7,0(3)
+#else
sth 6,0(3)
+#endif
b 7f
.align 4
3:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,24
+ stb 6,0(3)
+ sth 7,1(3)
+#else
stb 7,0(3)
sth 6,1(3)
+#endif
b 7f
.align 4
5:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,8
+#endif
stb 6,0(3)
7:
cmplwi cr1,10,16
@@ -339,13 +352,23 @@ EALIGN (memcpy, 5, 0)
bf 30,1f
/* there are at least two words to copy, so copy them */
+#ifdef __LITTLE_ENDIAN__
+ srw 0,6,10
+ slw 8,7,9
+#else
slw 0,6,10 /* shift 1st src word to left align it in R0 */
srw 8,7,9 /* shift 2nd src word to right align it in R8 */
+#endif
or 0,0,8 /* or them to get word to store */
lwz 6,8(5) /* load the 3rd src word */
stw 0,0(4) /* store the 1st dst word */
+#ifdef __LITTLE_ENDIAN__
+ srw 0,7,10
+ slw 8,6,9
+#else
slw 0,7,10 /* now left align 2nd src word into R0 */
srw 8,6,9 /* shift 3rd src word to right align it in R8 */
+#endif
or 0,0,8 /* or them to get word to store */
lwz 7,12(5)
stw 0,4(4) /* store the 2nd dst word */
@@ -353,8 +376,13 @@ EALIGN (memcpy, 5, 0)
addi 5,5,16
bf 31,4f
/* there is a third word to copy, so copy it */
+#ifdef __LITTLE_ENDIAN__
+ srw 0,6,10
+ slw 8,7,9
+#else
slw 0,6,10 /* shift 3rd src word to left align it in R0 */
srw 8,7,9 /* shift 4th src word to right align it in R8 */
+#endif
or 0,0,8 /* or them to get word to store */
stw 0,0(4) /* store 3rd dst word */
mr 6,7
@@ -364,8 +392,13 @@ EALIGN (memcpy, 5, 0)
b 4f
.align 4
1:
+#ifdef __LITTLE_ENDIAN__
+ srw 0,6,10
+ slw 8,7,9
+#else
slw 0,6,10 /* shift 1st src word to left align it in R0 */
srw 8,7,9 /* shift 2nd src word to right align it in R8 */
+#endif
addi 5,5,8
or 0,0,8 /* or them to get word to store */
bf 31,4f
@@ -378,23 +411,43 @@ EALIGN (memcpy, 5, 0)
.align 4
4:
/* copy 16 bytes at a time */
+#ifdef __LITTLE_ENDIAN__
+ srw 0,6,10
+ slw 8,7,9
+#else
slw 0,6,10
srw 8,7,9
+#endif
or 0,0,8
lwz 6,0(5)
stw 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srw 0,7,10
+ slw 8,6,9
+#else
slw 0,7,10
srw 8,6,9
+#endif
or 0,0,8
lwz 7,4(5)
stw 0,4(4)
+#ifdef __LITTLE_ENDIAN__
+ srw 0,6,10
+ slw 8,7,9
+#else
slw 0,6,10
srw 8,7,9
+#endif
or 0,0,8
lwz 6,8(5)
stw 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srw 0,7,10
+ slw 8,6,9
+#else
slw 0,7,10
srw 8,6,9
+#endif
or 0,0,8
lwz 7,12(5)
stw 0,12(4)
@@ -403,8 +456,13 @@ EALIGN (memcpy, 5, 0)
bdnz+ 4b
8:
/* calculate and store the final word */
+#ifdef __LITTLE_ENDIAN__
+ srw 0,6,10
+ slw 8,7,9
+#else
slw 0,6,10
srw 8,7,9
+#endif
or 0,0,8
stw 0,0(4)
3:
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memset.S b/libc/sysdeps/powerpc/powerpc32/power4/memset.S
index c2d288b38..4fd9d8cb4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memset.S
@@ -50,7 +50,7 @@ L(_memset):
/* Align to word boundary. */
cmplwi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 4
@@ -65,7 +65,7 @@ L(g0):
/* Handle the case of size < 31. */
L(aligned):
mtcrf 0x01, rLEN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x1C
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
index 724d9084a..89b961e78 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
@@ -24,7 +24,7 @@
EALIGN (strncmp, 4, 0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -37,6 +37,7 @@ EALIGN (strncmp, 4, 0)
#define r7F7F r9 /* constant 0x7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -75,12 +76,45 @@ L(g1): add rTMP, rFEFE, rWORD1
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ slwi rTMP, rTMP, 1
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzw rBITDIF, rBITDIF
@@ -88,28 +122,20 @@ L(endstring):
addi rNEG, rNEG, 7
cmpw cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
- blt- cr1, L(equal)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+ cr1
L(equal):
li rRTN, 0
blr
L(different):
- lwzu rWORD1, -4(rSTR1)
+ lwz rWORD1, -4(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+
L(highbit):
- srwi rWORD2, rWORD2, 24
- srwi rWORD1, rWORD1, 24
- sub rRTN, rWORD1, rWORD2
+ ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
index ecd37c3cd..49c8a0866 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
@@ -39,8 +39,8 @@ ENTRY (__llround)
nop /* Ensure the following load is in a different dispatch */
nop /* group to avoid pipe stall on POWER4&5. */
nop
- lwz r4,12(r1)
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
index d4da625bb..780dd9ca4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
@@ -38,7 +38,7 @@ ENTRY (__lround)
nop /* Ensure the following load is in a different dispatch */
nop /* group to avoid pipe stall on POWER4&5. */
nop
- lwz r3,12(r1)
+ lwz r3,8+LOWORD(r1)
addi r1,r1,16
blr
END (__lround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
index f2417fdf4..5f7ba43a2 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
@@ -27,8 +27,8 @@ EALIGN (__isnan, 4, 0)
ori r1,r1,0
stfd fp1,24(r1) /* copy FPR to GPR */
ori r1,r1,0
- lwz r4,24(r1)
- lwz r5,28(r1)
+ lwz r4,24+HIWORD(r1)
+ lwz r5,24+LOWORD(r1)
lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */
clrlwi r4,r4,1 /* x = fabs(x) */
cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
index 2c095db1d..3ea18589c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
@@ -27,8 +27,8 @@ EALIGN (__isnan, 4, 0)
ori r1,r1,0
stfd fp1,24(r1) /* copy FPR to GPR */
ori r1,r1,0
- lwz r4,24(r1)
- lwz r5,28(r1)
+ lwz r4,24+HIWORD(r1)
+ lwz r5,24+LOWORD(r1)
lis r0,0x7ff0 /* const long r0 0x7ff00000 00000000 */
clrlwi r4,r4,1 /* x = fabs(x) */
cmpw cr7,r4,r0 /* if (fabs(x) =< inf) */
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
index 3344b312e..c0660cf6e 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
@@ -29,8 +29,8 @@ ENTRY (__llrint)
/* Insure the following load is in a different dispatch group by
inserting "group ending nop". */
ori r1,r1,0
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
index 7f64f8d12..ce298905c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
@@ -28,8 +28,8 @@ ENTRY (__llrintf)
/* Insure the following load is in a different dispatch group by
inserting "group ending nop". */
ori r1,r1,0
- lwz r3,8(r1)
- lwz r4,12(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llrintf)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
index 0ff04cb71..abb0840d1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
@@ -39,8 +39,8 @@ ENTRY (__llround)
/* Insure the following load is in a different dispatch group by
inserting "group ending nop". */
ori r1,r1,0
- lwz r4,12(r1)
- lwz r3,8(r1)
+ lwz r3,8+HIWORD(r1)
+ lwz r4,8+LOWORD(r1)
addi r1,r1,16
blr
END (__llround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
index a76f71e04..f58114a0c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
@@ -219,15 +219,28 @@ L(word_unaligned_short):
blt cr6,5f
srwi 7,6,16
bgt cr6,3f
+#ifdef __LITTLE_ENDIAN__
+ sth 7,0(3)
+#else
sth 6,0(3)
+#endif
b 7f
.align 4
3:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,24
+ stb 6,0(3)
+ sth 7,1(3)
+#else
stb 7,0(3)
sth 6,1(3)
+#endif
b 7f
.align 4
5:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,8
+#endif
stb 6,0(3)
7:
cmplwi cr1,10,16
@@ -577,7 +590,11 @@ L(wdu1_32):
lwz 6,-1(4)
cmplwi cr6,31,4
srwi 8,31,5 /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+ srwi 6,6,8
+#else
slwi 6,6,8
+#endif
clrlwi 31,31,27 /* The remaining bytes, < 32. */
blt cr5,L(wdu1_32tail)
mtctr 8
@@ -585,8 +602,12 @@ L(wdu1_32):
lwz 8,3(4)
lwz 7,4(4)
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,24,32
+#else
/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
rlwimi 6,8,8,(32-8),31
+#endif
b L(wdu1_loop32x)
.align 4
L(wdu1_loop32):
@@ -595,8 +616,12 @@ L(wdu1_loop32):
lwz 7,4(4)
stw 10,-8(3)
stw 11,-4(3)
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,24,32
+#else
/* Equivalent to srwi 8,8,32-8; or 6,6,8 */
rlwimi 6,8,8,(32-8),31
+#endif
L(wdu1_loop32x):
lwz 10,8(4)
lwz 11,12(4)
@@ -613,7 +638,11 @@ L(wdu1_loop32x):
stw 6,16(3)
stw 7,20(3)
addi 3,3,32
+#ifdef __LITTLE_ENDIAN__
+ srwi 6,8,8
+#else
slwi 6,8,8
+#endif
bdnz+ L(wdu1_loop32)
stw 10,-8(3)
stw 11,-4(3)
@@ -624,8 +653,12 @@ L(wdu1_32tail):
blt cr6,L(wdu_4tail)
/* calculate and store the final word */
lwz 8,3(4)
-/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,24,32
+#else
+/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
rlwimi 6,8,8,(32-8),31
+#endif
b L(wdu_32tailx)
L(wdu2_32):
@@ -633,7 +666,11 @@ L(wdu2_32):
lwz 6,-2(4)
cmplwi cr6,31,4
srwi 8,31,5 /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+ srwi 6,6,16
+#else
slwi 6,6,16
+#endif
clrlwi 31,31,27 /* The remaining bytes, < 32. */
blt cr5,L(wdu2_32tail)
mtctr 8
@@ -641,8 +678,11 @@ L(wdu2_32):
lwz 8,2(4)
lwz 7,4(4)
-/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,16,32
+#else
rlwimi 6,8,16,(32-16),31
+#endif
b L(wdu2_loop32x)
.align 4
L(wdu2_loop32):
@@ -651,8 +691,11 @@ L(wdu2_loop32):
lwz 7,4(4)
stw 10,-8(3)
stw 11,-4(3)
-/* Equivalent to srwi 8,8,32-8; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,16,32
+#else
rlwimi 6,8,16,(32-16),31
+#endif
L(wdu2_loop32x):
lwz 10,8(4)
lwz 11,12(4)
@@ -670,7 +713,11 @@ L(wdu2_loop32x):
stw 6,16(3)
stw 7,20(3)
addi 3,3,32
+#ifdef __LITTLE_ENDIAN__
+ srwi 6,8,16
+#else
slwi 6,8,16
+#endif
bdnz+ L(wdu2_loop32)
stw 10,-8(3)
stw 11,-4(3)
@@ -681,8 +728,11 @@ L(wdu2_32tail):
blt cr6,L(wdu_4tail)
/* calculate and store the final word */
lwz 8,2(4)
-/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,16,32
+#else
rlwimi 6,8,16,(32-16),31
+#endif
b L(wdu_32tailx)
L(wdu3_32):
@@ -690,7 +740,11 @@ L(wdu3_32):
lwz 6,-3(4)
cmplwi cr6,31,4
srwi 8,31,5 /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+ srwi 6,6,24
+#else
slwi 6,6,24
+#endif
clrlwi 31,31,27 /* The remaining bytes, < 32. */
blt cr5,L(wdu3_32tail)
mtctr 8
@@ -698,8 +752,11 @@ L(wdu3_32):
lwz 8,1(4)
lwz 7,4(4)
-/* Equivalent to: srwi 8,8,32-8; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,8,32
+#else
rlwimi 6,8,24,(32-24),31
+#endif
b L(wdu3_loop32x)
.align 4
L(wdu3_loop32):
@@ -708,8 +765,11 @@ L(wdu3_loop32):
lwz 7,4(4)
stw 10,-8(3)
stw 11,-4(3)
-/* Equivalent to srwi 8,8,32-8; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,8,32
+#else
rlwimi 6,8,24,(32-24),31
+#endif
L(wdu3_loop32x):
lwz 10,8(4)
lwz 11,12(4)
@@ -726,7 +786,11 @@ L(wdu3_loop32x):
stw 6,16(3)
stw 7,20(3)
addi 3,3,32
+#ifdef __LITTLE_ENDIAN__
+ srwi 6,8,24
+#else
slwi 6,8,24
+#endif
bdnz+ L(wdu3_loop32)
stw 10,-8(3)
stw 11,-4(3)
@@ -737,8 +801,11 @@ L(wdu3_32tail):
blt cr6,L(wdu_4tail)
/* calculate and store the final word */
lwz 8,1(4)
-/* Equivalent to: srwi 8,8,32-9; or 6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+ rldimi 6,8,8,32
+#else
rlwimi 6,8,24,(32-24),31
+#endif
b L(wdu_32tailx)
.align 4
L(wdu_32tailx):
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memset.S b/libc/sysdeps/powerpc/powerpc32/power6/memset.S
index 8c23c8d13..a4b002a96 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/memset.S
@@ -48,7 +48,7 @@ L(_memset):
ble- cr1, L(small)
/* Align to word boundary. */
cmplwi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 4
@@ -64,7 +64,7 @@ L(g0):
/* Handle the case of size < 31. */
L(aligned):
mtcrf 0x01, rLEN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x1C
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
index b2ab5bfe7..095c15547 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
@@ -54,9 +54,8 @@ ENTRY (__finite)
stfd fp1,8(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lhz r0,8(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
clrlwi r0,r0,17 /* r0 = abs(r0). */
addi r1,r1,16 /* Reset the stack pointer. */
cmpwi cr7,r0,0x7ff0 /* r4 == 0x7ff0?. */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
index 3f8af60a5..0101c8fa1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
@@ -48,14 +48,13 @@ ENTRY (__isinf)
li r3,0
bflr 29 /* If not INF, return. */
- /* Either we have -INF/+INF or a denormal. */
+ /* Either we have +INF or -INF. */
stwu r1,-16(r1) /* Allocate stack space. */
stfd fp1,8(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lhz r4,8(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
addi r1,r1,16 /* Reset the stack pointer. */
cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
li r3,1
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
index 99ff12696..0ad1dcf1f 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
@@ -53,8 +53,8 @@ ENTRY (__isnan)
stwu r1,-16(r1) /* Allocate stack space. */
stfd fp1,8(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lwz r4,8(r1) /* Load the upper half of the FP value. */
- lwz r5,12(r1) /* Load the lower half of the FP value. */
+ lwz r4,8+HIWORD(r1) /* Load the upper half of the FP value. */
+ lwz r5,8+LOWORD(r1) /* Load the lower half of the FP value. */
addi r1,r1,16 /* Reset the stack pointer. */
lis r0,0x7ff0 /* Load the upper portion for an INF/NaN. */
clrlwi r4,r4,1 /* r4 = abs(r4). */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
index e008ed0c3..1c82577f5 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
@@ -35,14 +35,14 @@ static const union {
long double
__logbl (long double x)
{
- double xh, xl;
+ double xh;
double ret;
if (__builtin_expect (x == 0.0L, 0))
/* Raise FE_DIVBYZERO and return -HUGE_VAL[LF]. */
return -1.0L / __builtin_fabsl (x);
- ldbl_unpack (x, &xh, &xl);
+ xh = ldbl_high (x);
/* ret = x & 0x7ff0000000000000; */
asm (
"xxland %x0,%x1,%x2\n"
@@ -58,9 +58,9 @@ __logbl (long double x)
{
/* POSIX specifies that denormal number is treated as
though it were normalized. */
- int64_t lx, hx;
+ int64_t hx;
- GET_LDOUBLE_WORDS64 (hx, lx, x);
+ EXTRACT_WORDS64 (hx, xh);
return (long double) (-1023 - (__builtin_clzll (hx) - 12));
}
/* Test to avoid logb_downward (0.0) == -0.0. */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memchr.S b/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
index 369e5e048..85754f3f1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
@@ -25,107 +25,111 @@ ENTRY (__memchr)
CALL_MCOUNT
dcbt 0,r3
clrrwi r8,r3,2
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
add r7,r3,r5 /* Calculate the last acceptable address. */
+ insrdi r4,r4,16,32
cmplwi r5,16
+ li r9, -1
+ rlwinm r6,r3,3,27,28 /* Calculate padding. */
+ addi r7,r7,-1
+#ifdef __LITTLE_ENDIAN__
+ slw r9,r9,r6
+#else
+ srw r9,r9,r6
+#endif
ble L(small_range)
- cmplw cr7,r3,r7 /* Compare the starting address (r3) with the
- ending address (r7). If (r3 >= r7), the size
- passed in is zero or negative. */
- ble cr7,L(proceed)
-
- li r7,-1 /* Artificially set our ending address (r7)
- such that we will exit early. */
-L(proceed):
- rlwinm r6,r3,3,27,28 /* Calculate padding. */
- cmpli cr6,r6,0 /* cr6 == Do we have padding? */
lwz r12,0(r8) /* Load word from memory. */
- cmpb r10,r12,r4 /* Check for BYTEs in WORD1. */
- beq cr6,L(proceed_no_padding)
- slw r10,r10,r6
- srw r10,r10,r6
-L(proceed_no_padding):
- cmplwi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */
+ cmpb r3,r12,r4 /* Check for BYTEs in WORD1. */
+ and r3,r3,r9
+ clrlwi r5,r7,30 /* Byte count - 1 in last word. */
+ clrrwi r7,r7,2 /* Address of last word. */
+ cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,4
- cmplw cr6,r9,r7
- bge cr6,L(null)
-
mtcrf 0x01,r8
/* Are we now aligned to a doubleword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
-
bt 29,L(loop_setup)
/* Handle WORD2 of pair. */
lwzu r12,4(r8)
- cmpb r10,r12,r4
- cmplwi cr7,r10,0
+ cmpb r3,r12,r4
+ cmplwi cr7,r3,0
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,4
- cmplw cr6,r9,r7
- bge cr6,L(null)
-
L(loop_setup):
- sub r5,r7,r9
- srwi r6,r5,3 /* Number of loop iterations. */
+ /* The last word we want to read in the loop below is the one
+ containing the last byte of the string, ie. the word at
+ (s + size - 1) & ~3, or r7. The first word read is at
+ r8 + 4, we read 2 * cnt words, so the last word read will
+ be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives
+ cnt = (r7 - r8) / 8 */
+ sub r6,r7,r8
+ srwi r6,r6,3 /* Number of loop iterations. */
mtctr r6 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for BYTE backwards in the string. Since
- it's a small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
+
+ /* Main loop to look for BYTE in the string. Since
+ it's a small loop (8 instructions), align it to 32-bytes. */
+ .align 5
L(loop):
/* Load two words, compare and merge in a
single register for speed. This is an attempt
to speed up the byte-checking process for bigger strings. */
lwz r12,4(r8)
lwzu r11,8(r8)
- cmpb r10,r12,r4
+ cmpb r3,r12,r4
cmpb r9,r11,r4
- or r5,r9,r10 /* Merge everything in one word. */
- cmplwi cr7,r5,0
+ or r6,r9,r3 /* Merge everything in one word. */
+ cmplwi cr7,r6,0
bne cr7,L(found)
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for BYTE in the whole range. */
- subi r11,r7,4
- cmplw cr6,r8,r11
- blt cr6,L(loop_small)
- b L(null)
+ /* We may have one more dword to read. */
+ cmplw r8,r7
+ beqlr
+
+ lwzu r12,4(r8)
+ cmpb r3,r12,r4
+ cmplwi cr6,r3,0
+ bne cr6,L(done)
+ blr
+ .align 4
+L(found):
/* OK, one (or both) of the words contains BYTE. Check
the first word and decrement the address in case the first
word really contains BYTE. */
- .align 4
-L(found):
- cmplwi cr6,r10,0
+ cmplwi cr6,r3,0
addi r8,r8,-4
bne cr6,L(done)
/* BYTE must be in the second word. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
+ again and move the result of cmpb to r3 so we can calculate the
pointer. */
- mr r10,r9
+ mr r3,r9
addi r8,r8,4
- /* r10 has the output of the cmpb instruction, that is, it contains
+ /* r3 has the output of the cmpb instruction, that is, it contains
0xff in the same position as BYTE in the original
word from the string. Use that to calculate the pointer.
We need to make sure BYTE is *before* the end of the range. */
L(done):
- cntlzw r0,r10 /* Count leading zeroes before the match. */
- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r3,-1
+ andc r0,r0,r3
+ popcntw r0,r0 /* Count trailing zeros. */
+#else
+ cntlzw r0,r3 /* Count leading zeros before the match. */
+#endif
+ cmplw r8,r7 /* Are we on the last word? */
+ srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
add r3,r8,r0
- cmplw r3,r7
- bge L(null)
+ cmplw cr7,r0,r5 /* If on the last dword, check byte offset. */
+ bnelr
+ blelr cr7
+ li r3,0
blr
.align 4
@@ -137,67 +141,42 @@ L(null):
.align 4
L(small_range):
cmplwi r5,0
- rlwinm r6,r3,3,27,28 /* Calculate padding. */
- beq L(null) /* This branch is for the cmplwi r5,0 above */
+ beq L(null)
lwz r12,0(r8) /* Load word from memory. */
- cmplwi cr6,r6,0 /* cr6 == Do we have padding? */
- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
- beq cr6,L(small_no_padding)
- slw r10,r10,r6
- srw r10,r10,r6
-L(small_no_padding):
- cmplwi cr7,r10,0
+ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
+ and r3,r3,r9
+ cmplwi cr7,r3,0
+ clrlwi r5,r7,30 /* Byte count - 1 in last word. */
+ clrrwi r7,r7,2 /* Address of last word. */
+ cmplw r8,r7 /* Are we done already? */
bne cr7,L(done)
+ beqlr
- /* Are we done already? */
- addi r9,r8,4
- cmplw r9,r7
- bge L(null)
-
-L(loop_small): /* loop_small has been unrolled. */
lwzu r12,4(r8)
- cmpb r10,r12,r4
- addi r9,r8,4
- cmplwi cr6,r10,0
- cmplw r9,r7
+ cmpb r3,r12,r4
+ cmplwi cr6,r3,0
+ cmplw r8,r7
bne cr6,L(done)
- bge L(null)
+ beqlr
lwzu r12,4(r8)
- cmpb r10,r12,r4
- addi r9,r8,4
- cmplwi cr6,r10,0
- cmplw r9,r7
+ cmpb r3,r12,r4
+ cmplwi cr6,r3,0
+ cmplw r8,r7
bne cr6,L(done)
- bge L(null)
+ beqlr
lwzu r12,4(r8)
- cmpb r10,r12,r4
- addi r9,r8,4
- cmplwi cr6,r10,0
- cmplw r9,r7
+ cmpb r3,r12,r4
+ cmplwi cr6,r3,0
+ cmplw r8,r7
bne cr6,L(done)
- bge L(null)
+ beqlr
lwzu r12,4(r8)
- cmpb r10,r12,r4
- addi r9,r8,4
- cmplwi cr6,r10,0
- cmplw r9,r7
+ cmpb r3,r12,r4
+ cmplwi cr6,r3,0
bne cr6,L(done)
- bge L(null)
-
- /* For most cases we will never get here. Under some combinations of
- padding + length there is a leftover word that still needs to be
- checked. */
- lwzu r12,4(r8)
- cmpb r10,r12,r4
- addi r9,r8,4
- cmplwi cr6,r10,0
- bne cr6,L(done)
-
- /* save a branch and exit directly */
- li r3,0
blr
END (__memchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
index 075e19f14..f160ddebf 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
@@ -23,10 +23,9 @@
size_t size [r5]) */
.machine power7
-EALIGN (memcmp,4,0)
+EALIGN (memcmp, 4, 0)
CALL_MCOUNT
-#define rTMP r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -37,35 +36,32 @@ EALIGN (memcmp,4,0)
#define rWORD4 r9 /* next word in s2 */
#define rWORD5 r10 /* next word in s1 */
#define rWORD6 r11 /* next word in s2 */
-#define rBITDIF r12 /* bits that differ in s1 & s2 words */
#define rWORD7 r30 /* next word in s1 */
#define rWORD8 r31 /* next word in s2 */
- xor rTMP,rSTR2,rSTR1
- cmplwi cr6,rN,0
- cmplwi cr1,rN,12
- clrlwi. rTMP,rTMP,30
- clrlwi rBITDIF,rSTR1,30
- cmplwi cr5,rBITDIF,0
- beq- cr6,L(zeroLength)
- dcbt 0,rSTR1
- dcbt 0,rSTR2
-
- /* If less than 8 bytes or not aligned, use the unaligned
- byte loop. */
-
- blt cr1,L(bytealigned)
- stwu 1,-64(1)
+ xor r0, rSTR2, rSTR1
+ cmplwi cr6, rN, 0
+ cmplwi cr1, rN, 12
+ clrlwi. r0, r0, 30
+ clrlwi r12, rSTR1, 30
+ cmplwi cr5, r12, 0
+ beq- cr6, L(zeroLength)
+ dcbt 0, rSTR1
+ dcbt 0, rSTR2
+/* If less than 8 bytes or not aligned, use the unaligned
+ byte loop. */
+ blt cr1, L(bytealigned)
+ stwu 1, -64(r1)
cfi_adjust_cfa_offset(64)
- stw r31,48(1)
- cfi_offset(31,(48-64))
- stw r30,44(1)
- cfi_offset(30,(44-64))
+ stw rWORD8, 48(r1)
+ cfi_offset(rWORD8, (48-64))
+ stw rWORD7, 44(r1)
+ cfi_offset(rWORD7, (44-64))
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
2 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then we are already word
+ of r12 to 0. If r12 == 0 then we are already word
aligned and can perform the word aligned loop.
Otherwise we know the two strings have the same alignment (but not
@@ -74,332 +70,541 @@ EALIGN (memcmp,4,0)
eliminate bits preceding the first byte. Since we want to join the
normal (word aligned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
- versioning for the first word. This insures that the loop count is
+ versioning for the first word. This ensures that the loop count is
correct and the first word (shifted) is in the expected register pair. */
.align 4
L(samealignment):
- clrrwi rSTR1,rSTR1,2
- clrrwi rSTR2,rSTR2,2
- beq cr5,L(Waligned)
- add rN,rN,rBITDIF
- slwi r11,rBITDIF,3
- srwi rTMP,rN,4 /* Divide by 16 */
- andi. rBITDIF,rN,12 /* Get the word remainder */
- lwz rWORD1,0(rSTR1)
- lwz rWORD2,0(rSTR2)
- cmplwi cr1,rBITDIF,8
- cmplwi cr7,rN,16
- clrlwi rN,rN,30
+ clrrwi rSTR1, rSTR1, 2
+ clrrwi rSTR2, rSTR2, 2
+ beq cr5, L(Waligned)
+ add rN, rN, r12
+ slwi rWORD6, r12, 3
+ srwi r0, rN, 4 /* Divide by 16 */
+ andi. r12, rN, 12 /* Get the word remainder */
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+#endif
+ cmplwi cr1, r12, 8
+ cmplwi cr7, rN, 16
+ clrlwi rN, rN, 30
beq L(dPs4)
- mtctr rTMP
- bgt cr1,L(dPs3)
- beq cr1,L(dPs2)
+ mtctr r0
+ bgt cr1, L(dPs3)
+ beq cr1, L(dPs2)
/* Remainder is 4 */
.align 3
L(dsP1):
- slw rWORD5,rWORD1,r11
- slw rWORD6,rWORD2,r11
- cmplw cr5,rWORD5,rWORD6
- blt cr7,L(dP1x)
+ slw rWORD5, rWORD1, rWORD6
+ slw rWORD6, rWORD2, rWORD6
+ cmplw cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
/* Do something useful in this cycle since we have to branch anyway. */
- lwz rWORD1,4(rSTR1)
- lwz rWORD2,4(rSTR2)
- cmplw cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
b L(dP1e)
/* Remainder is 8 */
.align 4
L(dPs2):
- slw rWORD5,rWORD1,r11
- slw rWORD6,rWORD2,r11
- cmplw cr6,rWORD5,rWORD6
- blt cr7,L(dP2x)
+ slw rWORD5, rWORD1, rWORD6
+ slw rWORD6, rWORD2, rWORD6
+ cmplw cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
/* Do something useful in this cycle since we have to branch anyway. */
- lwz rWORD7,4(rSTR1)
- lwz rWORD8,4(rSTR2)
- cmplw cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD7, 4(rSTR1)
+ lwz rWORD8, 4(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
b L(dP2e)
/* Remainder is 12 */
.align 4
L(dPs3):
- slw rWORD3,rWORD1,r11
- slw rWORD4,rWORD2,r11
- cmplw cr1,rWORD3,rWORD4
+ slw rWORD3, rWORD1, rWORD6
+ slw rWORD4, rWORD2, rWORD6
+ cmplw cr1, rWORD3, rWORD4
b L(dP3e)
/* Count is a multiple of 16, remainder is 0 */
.align 4
L(dPs4):
- mtctr rTMP
- slw rWORD1,rWORD1,r11
- slw rWORD2,rWORD2,r11
- cmplw cr0,rWORD1,rWORD2
+ mtctr r0
+ slw rWORD1, rWORD1, rWORD6
+ slw rWORD2, rWORD2, rWORD6
+ cmplw cr7, rWORD1, rWORD2
b L(dP4e)
/* At this point we know both strings are word aligned and the
compare length is at least 8 bytes. */
.align 4
L(Waligned):
- andi. rBITDIF,rN,12 /* Get the word remainder */
- srwi rTMP,rN,4 /* Divide by 16 */
- cmplwi cr1,rBITDIF,8
- cmplwi cr7,rN,16
- clrlwi rN,rN,30
+ andi. r12, rN, 12 /* Get the word remainder */
+ srwi r0, rN, 4 /* Divide by 16 */
+ cmplwi cr1, r12, 8
+ cmplwi cr7, rN, 16
+ clrlwi rN, rN, 30
beq L(dP4)
- bgt cr1,L(dP3)
- beq cr1,L(dP2)
+ bgt cr1, L(dP3)
+ beq cr1, L(dP2)
/* Remainder is 4 */
.align 4
L(dP1):
- mtctr rTMP
+ mtctr r0
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
(8-15 byte compare), we want to use only volatile registers. This
means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
- lwz rWORD5,0(rSTR1)
- lwz rWORD6,0(rSTR2)
- cmplw cr5,rWORD5,rWORD6
- blt cr7,L(dP1x)
- lwz rWORD1,4(rSTR1)
- lwz rWORD2,4(rSTR2)
- cmplw cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 0(rSTR1)
+ lwz rWORD6, 0(rSTR2)
+#endif
+ cmplw cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
L(dP1e):
- lwz rWORD3,8(rSTR1)
- lwz rWORD4,8(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- lwz rWORD5,12(rSTR1)
- lwz rWORD6,12(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- bne cr5,L(dLcr5)
- bne cr0,L(dLcr0)
-
- lwzu rWORD7,16(rSTR1)
- lwzu rWORD8,16(rSTR2)
- bne cr1,L(dLcr1)
- cmplw cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5x)
+ bne cr7, L(dLcr7x)
+
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwzu rWORD7, 16(rSTR1)
+ lwzu rWORD8, 16(rSTR2)
+#endif
+ bne cr1, L(dLcr1)
+ cmplw cr5, rWORD7, rWORD8
bdnz L(dLoop)
- bne cr6,L(dLcr6)
- lwz r30,44(1)
- lwz r31,48(1)
+ bne cr6, L(dLcr6)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
.align 3
L(dP1x):
- slwi. r12,rN,3
- bne cr5,L(dLcr5)
- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */
- lwz 1,0(1)
+ slwi. r12, rN, 3
+ bne cr5, L(dLcr5x)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
bne L(d00)
- li rRTN,0
+ li rRTN, 0
blr
/* Remainder is 8 */
.align 4
+ cfi_adjust_cfa_offset(64)
L(dP2):
- mtctr rTMP
- lwz rWORD5,0(rSTR1)
- lwz rWORD6,0(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- blt cr7,L(dP2x)
- lwz rWORD7,4(rSTR1)
- lwz rWORD8,4(rSTR2)
- cmplw cr5,rWORD7,rWORD8
+ mtctr r0
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 0(rSTR1)
+ lwz rWORD6, 0(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD7, 4(rSTR1)
+ lwz rWORD8, 4(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
L(dP2e):
- lwz rWORD1,8(rSTR1)
- lwz rWORD2,8(rSTR2)
- cmplw cr0,rWORD1,rWORD2
- lwz rWORD3,12(rSTR1)
- lwz rWORD4,12(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- addi rSTR1,rSTR1,4
- addi rSTR2,rSTR2,4
- bne cr6,L(dLcr6)
- bne cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 8(rSTR1)
+ lwz rWORD2, 8(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 12(rSTR1)
+ lwz rWORD4, 12(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#endif
+ bne cr6, L(dLcr6)
+ bne cr5, L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP2x):
- lwz rWORD3,4(rSTR1)
- lwz rWORD4,4(rSTR2)
- cmplw cr5,rWORD3,rWORD4
- slwi. r12,rN,3
- bne cr6,L(dLcr6)
- addi rSTR1,rSTR1,4
- addi rSTR2,rSTR2,4
- bne cr5,L(dLcr5)
- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */
- lwz 1,0(1)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 4(rSTR1)
+ lwz rWORD4, 4(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ slwi. r12, rN, 3
+ bne cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#endif
+ bne cr1, L(dLcr1x)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
bne L(d00)
- li rRTN,0
+ li rRTN, 0
blr
/* Remainder is 12 */
.align 4
+ cfi_adjust_cfa_offset(64)
L(dP3):
- mtctr rTMP
- lwz rWORD3,0(rSTR1)
- lwz rWORD4,0(rSTR2)
- cmplw cr1,rWORD3,rWORD4
+ mtctr r0
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 0(rSTR1)
+ lwz rWORD4, 0(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
L(dP3e):
- lwz rWORD5,4(rSTR1)
- lwz rWORD6,4(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- blt cr7,L(dP3x)
- lwz rWORD7,8(rSTR1)
- lwz rWORD8,8(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- lwz rWORD1,12(rSTR1)
- lwz rWORD2,12(rSTR2)
- cmplw cr0,rWORD1,rWORD2
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- bne cr1,L(dLcr1)
- bne cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 4(rSTR1)
+ lwz rWORD6, 4(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ blt cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD7, 8(rSTR1)
+ lwz rWORD8, 8(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 12(rSTR1)
+ lwz rWORD2, 12(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ bne cr1, L(dLcr1)
+ bne cr6, L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP3x):
- lwz rWORD1,8(rSTR1)
- lwz rWORD2,8(rSTR2)
- cmplw cr5,rWORD1,rWORD2
- slwi. r12,rN,3
- bne cr1,L(dLcr1)
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- bne cr6,L(dLcr6)
- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */
- bne cr5,L(dLcr5)
- lwz 1,0(1)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 8(rSTR1)
+ lwz rWORD2, 8(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ slwi. r12, rN, 3
+ bne cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ bne cr6, L(dLcr6x)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
+ bne cr7, L(dLcr7x)
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
bne L(d00)
- li rRTN,0
+ li rRTN, 0
blr
/* Count is a multiple of 16, remainder is 0 */
.align 4
+ cfi_adjust_cfa_offset(64)
L(dP4):
- mtctr rTMP
- lwz rWORD1,0(rSTR1)
- lwz rWORD2,0(rSTR2)
- cmplw cr0,rWORD1,rWORD2
+ mtctr r0
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
L(dP4e):
- lwz rWORD3,4(rSTR1)
- lwz rWORD4,4(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- lwz rWORD5,8(rSTR1)
- lwz rWORD6,8(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- lwzu rWORD7,12(rSTR1)
- lwzu rWORD8,12(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- bne cr0,L(dLcr0)
- bne cr1,L(dLcr1)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 4(rSTR1)
+ lwz rWORD4, 4(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 8(rSTR1)
+ lwz rWORD6, 8(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwzu rWORD7, 12(rSTR1)
+ lwzu rWORD8, 12(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ bne cr7, L(dLcr7)
+ bne cr1, L(dLcr1)
bdz- L(d24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
.align 4
L(dLoop):
- lwz rWORD1,4(rSTR1)
- lwz rWORD2,4(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- bne cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
L(dLoop1):
- lwz rWORD3,8(rSTR1)
- lwz rWORD4,8(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- bne cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
L(dLoop2):
- lwz rWORD5,12(rSTR1)
- lwz rWORD6,12(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- bne cr0,L(dLcr0)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ bne cr7, L(dLcr7)
L(dLoop3):
- lwzu rWORD7,16(rSTR1)
- lwzu rWORD8,16(rSTR2)
- bne cr1,L(dLcr1)
- cmplw cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwzu rWORD7, 16(rSTR1)
+ lwzu rWORD8, 16(rSTR2)
+#endif
+ bne cr1, L(dLcr1)
+ cmplw cr7, rWORD1, rWORD2
bdnz L(dLoop)
L(dL4):
- cmplw cr1,rWORD3,rWORD4
- bne cr6,L(dLcr6)
- cmplw cr6,rWORD5,rWORD6
- bne cr5,L(dLcr5)
- cmplw cr5,rWORD7,rWORD8
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+ cmplw cr5, rWORD7, rWORD8
L(d44):
- bne cr0,L(dLcr0)
+ bne cr7, L(dLcr7)
L(d34):
- bne cr1,L(dLcr1)
+ bne cr1, L(dLcr1)
L(d24):
- bne cr6,L(dLcr6)
+ bne cr6, L(dLcr6)
L(d14):
- slwi. r12,rN,3
- bne cr5,L(dLcr5)
+ slwi. r12, rN, 3
+ bne cr5, L(dLcr5)
L(d04):
- lwz r30,44(1)
- lwz r31,48(1)
- lwz 1,0(1)
- subfic rN,r12,32 /* Shift count is 32 - (rN * 8). */
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
+ subfic rN, r12, 32 /* Shift count is 32 - (rN * 8). */
beq L(zeroLength)
/* At this point we have a remainder of 1 to 3 bytes to compare. Since
we are aligned it is safe to load the whole word, and use
- shift right to eliminate bits beyond the compare length. */
+ shift right to eliminate bits beyond the compare length. */
L(d00):
- lwz rWORD1,4(rSTR1)
- lwz rWORD2,4(rSTR2)
- srw rWORD1,rWORD1,rN
- srw rWORD2,rWORD2,rN
- cmplw rWORD1,rWORD2
- li rRTN,0
- beqlr
- li rRTN,1
- bgtlr
- li rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+#endif
+ srw rWORD1, rWORD1, rN
+ srw rWORD2, rWORD2, rN
+ sub rRTN, rWORD1, rWORD2
blr
.align 4
-L(dLcr0):
- lwz r30,44(1)
- lwz r31,48(1)
- li rRTN,1
- lwz 1,0(1)
- bgtlr cr0
- li rRTN,-1
+ cfi_adjust_cfa_offset(64)
+L(dLcr7):
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+L(dLcr7x):
+ li rRTN, 1
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
+ bgtlr cr7
+ li rRTN, -1
blr
.align 4
+ cfi_adjust_cfa_offset(64)
L(dLcr1):
- lwz r30,44(1)
- lwz r31,48(1)
- li rRTN,1
- lwz 1,0(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+L(dLcr1x):
+ li rRTN, 1
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
bgtlr cr1
- li rRTN,-1
+ li rRTN, -1
blr
.align 4
+ cfi_adjust_cfa_offset(64)
L(dLcr6):
- lwz r30,44(1)
- lwz r31,48(1)
- li rRTN,1
- lwz 1,0(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
+L(dLcr6x):
+ li rRTN, 1
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
bgtlr cr6
- li rRTN,-1
+ li rRTN, -1
blr
.align 4
+ cfi_adjust_cfa_offset(64)
L(dLcr5):
- lwz r30,44(1)
- lwz r31,48(1)
+ lwz rWORD7, 44(r1)
+ lwz rWORD8, 48(r1)
L(dLcr5x):
- li rRTN,1
- lwz 1,0(1)
+ li rRTN, 1
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
bgtlr cr5
- li rRTN,-1
+ li rRTN, -1
blr
.align 4
L(bytealigned):
- cfi_adjust_cfa_offset(-64)
mtctr rN
/* We need to prime this loop. This loop is swing modulo scheduled
@@ -411,38 +616,39 @@ L(bytealigned):
So we must precondition some registers and condition codes so that
we don't exit the loop early on the first iteration. */
- lbz rWORD1,0(rSTR1)
- lbz rWORD2,0(rSTR2)
+
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
bdz L(b11)
- cmplw cr0,rWORD1,rWORD2
- lbz rWORD3,1(rSTR1)
- lbz rWORD4,1(rSTR2)
+ cmplw cr7, rWORD1, rWORD2
+ lbz rWORD3, 1(rSTR1)
+ lbz rWORD4, 1(rSTR2)
bdz L(b12)
- cmplw cr1,rWORD3,rWORD4
- lbzu rWORD5,2(rSTR1)
- lbzu rWORD6,2(rSTR2)
+ cmplw cr1, rWORD3, rWORD4
+ lbzu rWORD5, 2(rSTR1)
+ lbzu rWORD6, 2(rSTR2)
bdz L(b13)
.align 4
L(bLoop):
- lbzu rWORD1,1(rSTR1)
- lbzu rWORD2,1(rSTR2)
- bne cr0,L(bLcr0)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ bne cr7, L(bLcr7)
- cmplw cr6,rWORD5,rWORD6
+ cmplw cr6, rWORD5, rWORD6
bdz L(b3i)
- lbzu rWORD3,1(rSTR1)
- lbzu rWORD4,1(rSTR2)
- bne cr1,L(bLcr1)
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne cr1, L(bLcr1)
- cmplw cr0,rWORD1,rWORD2
+ cmplw cr7, rWORD1, rWORD2
bdz L(b2i)
- lbzu rWORD5,1(rSTR1)
- lbzu rWORD6,1(rSTR2)
- bne cr6,L(bLcr6)
+ lbzu rWORD5, 1(rSTR1)
+ lbzu rWORD6, 1(rSTR2)
+ bne cr6, L(bLcr6)
- cmplw cr1,rWORD3,rWORD4
+ cmplw cr1, rWORD3, rWORD4
bdnz L(bLoop)
/* We speculatively loading bytes before we have tested the previous
@@ -452,67 +658,62 @@ L(bLoop):
tested. In this case we must complete the pending operations
before returning. */
L(b1i):
- bne cr0,L(bLcr0)
- bne cr1,L(bLcr1)
+ bne cr7, L(bLcr7)
+ bne cr1, L(bLcr1)
b L(bx56)
.align 4
L(b2i):
- bne cr6,L(bLcr6)
- bne cr0,L(bLcr0)
+ bne cr6, L(bLcr6)
+ bne cr7, L(bLcr7)
b L(bx34)
.align 4
L(b3i):
- bne cr1,L(bLcr1)
- bne cr6,L(bLcr6)
+ bne cr1, L(bLcr1)
+ bne cr6, L(bLcr6)
b L(bx12)
.align 4
-L(bLcr0):
- li rRTN,1
- bgtlr cr0
- li rRTN,-1
+L(bLcr7):
+ li rRTN, 1
+ bgtlr cr7
+ li rRTN, -1
blr
L(bLcr1):
- li rRTN,1
+ li rRTN, 1
bgtlr cr1
- li rRTN,-1
+ li rRTN, -1
blr
L(bLcr6):
- li rRTN,1
+ li rRTN, 1
bgtlr cr6
- li rRTN,-1
+ li rRTN, -1
blr
L(b13):
- bne cr0,L(bx12)
- bne cr1,L(bx34)
+ bne cr7, L(bx12)
+ bne cr1, L(bx34)
L(bx56):
- sub rRTN,rWORD5,rWORD6
+ sub rRTN, rWORD5, rWORD6
blr
nop
L(b12):
- bne cr0,L(bx12)
+ bne cr7, L(bx12)
L(bx34):
- sub rRTN,rWORD3,rWORD4
+ sub rRTN, rWORD3, rWORD4
blr
-
L(b11):
L(bx12):
- sub rRTN,rWORD1,rWORD2
+ sub rRTN, rWORD1, rWORD2
blr
-
.align 4
-L(zeroLengthReturn):
-
L(zeroLength):
- li rRTN,0
+ li rRTN, 0
blr
- cfi_adjust_cfa_offset(64)
.align 4
/* At this point we know the strings have different alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
2 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is word aligned and can
+ of r12 to 0. If r12 == 0 then rStr1 is word aligned and can
perform the Wunaligned loop.
Otherwise we know that rSTR1 is not already word aligned yet.
@@ -521,465 +722,654 @@ L(zeroLength):
eliminate bits preceding the first byte. Since we want to join the
normal (Wualigned) compare loop, starting at the second word,
we need to adjust the length (rN) and special case the loop
- versioning for the first W. This insures that the loop count is
+ versioning for the first W. This ensures that the loop count is
correct and the first W (shifted) is in the expected resister pair. */
#define rSHL r29 /* Unaligned shift left count. */
#define rSHR r28 /* Unaligned shift right count. */
-#define rB r27 /* Left rotation temp for rWORD2. */
-#define rD r26 /* Left rotation temp for rWORD4. */
-#define rF r25 /* Left rotation temp for rWORD6. */
-#define rH r24 /* Left rotation temp for rWORD8. */
-#define rA r0 /* Right rotation temp for rWORD2. */
-#define rC r12 /* Right rotation temp for rWORD4. */
-#define rE r0 /* Right rotation temp for rWORD6. */
-#define rG r12 /* Right rotation temp for rWORD8. */
+#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */
+#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */
+#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */
+#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */
+ cfi_adjust_cfa_offset(64)
L(unaligned):
- stw r29,40(r1)
- cfi_offset(r29,(40-64))
- clrlwi rSHL,rSTR2,30
- stw r28,36(r1)
- cfi_offset(r28,(36-64))
- beq cr5,L(Wunaligned)
- stw r27,32(r1)
- cfi_offset(r27,(32-64))
+ stw rSHL, 40(r1)
+ cfi_offset(rSHL, (40-64))
+ clrlwi rSHL, rSTR2, 30
+ stw rSHR, 36(r1)
+ cfi_offset(rSHR, (36-64))
+ beq cr5, L(Wunaligned)
+ stw rWORD8_SHIFT, 32(r1)
+ cfi_offset(rWORD8_SHIFT, (32-64))
/* Adjust the logical start of rSTR2 to compensate for the extra bits
in the 1st rSTR1 W. */
- sub r27,rSTR2,rBITDIF
+ sub rWORD8_SHIFT, rSTR2, r12
/* But do not attempt to address the W before that W that contains
the actual start of rSTR2. */
- clrrwi rSTR2,rSTR2,2
- stw r26,28(r1)
- cfi_offset(r26,(28-64))
-/* Compute the left/right shift counts for the unalign rSTR2,
+ clrrwi rSTR2, rSTR2, 2
+ stw rWORD2_SHIFT, 28(r1)
+ cfi_offset(rWORD2_SHIFT, (28-64))
+/* Compute the left/right shift counts for the unaligned rSTR2,
compensating for the logical (W aligned) start of rSTR1. */
- clrlwi rSHL,r27,30
- clrrwi rSTR1,rSTR1,2
- stw r25,24(r1)
- cfi_offset(r25,(24-64))
- slwi rSHL,rSHL,3
- cmplw cr5,r27,rSTR2
- add rN,rN,rBITDIF
- slwi r11,rBITDIF,3
- stw r24,20(r1)
- cfi_offset(r24,(20-64))
- subfic rSHR,rSHL,32
- srwi rTMP,rN,4 /* Divide by 16 */
- andi. rBITDIF,rN,12 /* Get the W remainder */
+ clrlwi rSHL, rWORD8_SHIFT, 30
+ clrrwi rSTR1, rSTR1, 2
+ stw rWORD4_SHIFT, 24(r1)
+ cfi_offset(rWORD4_SHIFT, (24-64))
+ slwi rSHL, rSHL, 3
+ cmplw cr5, rWORD8_SHIFT, rSTR2
+ add rN, rN, r12
+ slwi rWORD6, r12, 3
+ stw rWORD6_SHIFT, 20(r1)
+ cfi_offset(rWORD6_SHIFT, (20-64))
+ subfic rSHR, rSHL, 32
+ srwi r0, rN, 4 /* Divide by 16 */
+ andi. r12, rN, 12 /* Get the W remainder */
/* We normally need to load 2 Ws to start the unaligned rSTR2, but in
this special case those bits may be discarded anyway. Also we
must avoid loading a W where none of the bits are part of rSTR2 as
this may cross a page boundary and cause a page fault. */
- li rWORD8,0
- blt cr5,L(dus0)
- lwz rWORD8,0(rSTR2)
- la rSTR2,4(rSTR2)
- slw rWORD8,rWORD8,rSHL
+ li rWORD8, 0
+ blt cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD8, 0(rSTR2)
+ addi rSTR2, rSTR2, 4
+#endif
+ slw rWORD8, rWORD8, rSHL
L(dus0):
- lwz rWORD1,0(rSTR1)
- lwz rWORD2,0(rSTR2)
- cmplwi cr1,rBITDIF,8
- cmplwi cr7,rN,16
- srw rG,rWORD2,rSHR
- clrlwi rN,rN,30
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 0(rSTR1)
+ lwz rWORD2, 0(rSTR2)
+#endif
+ cmplwi cr1, r12, 8
+ cmplwi cr7, rN, 16
+ srw r12, rWORD2, rSHR
+ clrlwi rN, rN, 30
beq L(duPs4)
- mtctr rTMP
- or rWORD8,rG,rWORD8
- bgt cr1,L(duPs3)
- beq cr1,L(duPs2)
+ mtctr r0
+ or rWORD8, r12, rWORD8
+ bgt cr1, L(duPs3)
+ beq cr1, L(duPs2)
/* Remainder is 4 */
.align 4
L(dusP1):
- slw rB,rWORD2,rSHL
- slw rWORD7,rWORD1,r11
- slw rWORD8,rWORD8,r11
- bge cr7,L(duP1e)
+ slw rWORD8_SHIFT, rWORD2, rSHL
+ slw rWORD7, rWORD1, rWORD6
+ slw rWORD8, rWORD8, rWORD6
+ bge cr7, L(duP1e)
/* At this point we exit early with the first word compare
complete and remainder of 0 to 3 bytes. See L(du14) for details on
how we handle the remaining bytes. */
- cmplw cr5,rWORD7,rWORD8
- slwi. rN,rN,3
- bne cr5,L(duLcr5)
- cmplw cr7,rN,rSHR
+ cmplw cr5, rWORD7, rWORD8
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- lwz rWORD2,4(rSTR2)
- srw rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD2, 4(rSTR2)
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 8 */
.align 4
L(duPs2):
- slw rH,rWORD2,rSHL
- slw rWORD5,rWORD1,r11
- slw rWORD6,rWORD8,r11
+ slw rWORD6_SHIFT, rWORD2, rSHL
+ slw rWORD5, rWORD1, rWORD6
+ slw rWORD6, rWORD8, rWORD6
b L(duP2e)
/* Remainder is 12 */
.align 4
L(duPs3):
- slw rF,rWORD2,rSHL
- slw rWORD3,rWORD1,r11
- slw rWORD4,rWORD8,r11
+ slw rWORD4_SHIFT, rWORD2, rSHL
+ slw rWORD3, rWORD1, rWORD6
+ slw rWORD4, rWORD8, rWORD6
b L(duP3e)
/* Count is a multiple of 16, remainder is 0 */
.align 4
L(duPs4):
- mtctr rTMP
- or rWORD8,rG,rWORD8
- slw rD,rWORD2,rSHL
- slw rWORD1,rWORD1,r11
- slw rWORD2,rWORD8,r11
+ mtctr r0
+ or rWORD8, r12, rWORD8
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ slw rWORD1, rWORD1, rWORD6
+ slw rWORD2, rWORD8, rWORD6
b L(duP4e)
/* At this point we know rSTR1 is word aligned and the
compare length is at least 8 bytes. */
.align 4
L(Wunaligned):
- stw r27,32(r1)
- cfi_offset(r27,(32-64))
- clrrwi rSTR2,rSTR2,2
- stw r26,28(r1)
- cfi_offset(r26,(28-64))
- srwi rTMP,rN,4 /* Divide by 16 */
- stw r25,24(r1)
- cfi_offset(r25,(24-64))
- andi. rBITDIF,rN,12 /* Get the W remainder */
- stw r24,20(r1)
- cfi_offset(r24,(24-64))
- slwi rSHL,rSHL,3
- lwz rWORD6,0(rSTR2)
- lwzu rWORD8,4(rSTR2)
- cmplwi cr1,rBITDIF,8
- cmplwi cr7,rN,16
- clrlwi rN,rN,30
- subfic rSHR,rSHL,32
- slw rH,rWORD6,rSHL
+ stw rWORD8_SHIFT, 32(r1)
+ cfi_offset(rWORD8_SHIFT, (32-64))
+ clrrwi rSTR2, rSTR2, 2
+ stw rWORD2_SHIFT, 28(r1)
+ cfi_offset(rWORD2_SHIFT, (28-64))
+ srwi r0, rN, 4 /* Divide by 16 */
+ stw rWORD4_SHIFT, 24(r1)
+ cfi_offset(rWORD4_SHIFT, (24-64))
+ andi. r12, rN, 12 /* Get the W remainder */
+ stw rWORD6_SHIFT, 20(r1)
+ cfi_offset(rWORD6_SHIFT, (20-64))
+ slwi rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD6, 0(rSTR2)
+ lwzu rWORD8, 4(rSTR2)
+#endif
+ cmplwi cr1, r12, 8
+ cmplwi cr7, rN, 16
+ clrlwi rN, rN, 30
+ subfic rSHR, rSHL, 32
+ slw rWORD6_SHIFT, rWORD6, rSHL
beq L(duP4)
- mtctr rTMP
- bgt cr1,L(duP3)
- beq cr1,L(duP2)
+ mtctr r0
+ bgt cr1, L(duP3)
+ beq cr1, L(duP2)
/* Remainder is 4 */
.align 4
L(duP1):
- srw rG,rWORD8,rSHR
- lwz rWORD7,0(rSTR1)
- slw rB,rWORD8,rSHL
- or rWORD8,rG,rH
- blt cr7,L(duP1x)
+ srw r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
+ lwz rWORD7, 0(rSTR1)
+#endif
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ blt cr7, L(duP1x)
L(duP1e):
- lwz rWORD1,4(rSTR1)
- lwz rWORD2,4(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- srw rA,rWORD2,rSHR
- slw rD,rWORD2,rSHL
- or rWORD2,rA,rB
- lwz rWORD3,8(rSTR1)
- lwz rWORD4,8(rSTR2)
- cmplw cr0,rWORD1,rWORD2
- srw rC,rWORD4,rSHR
- slw rF,rWORD4,rSHL
- bne cr5,L(duLcr5)
- or rWORD4,rC,rD
- lwz rWORD5,12(rSTR1)
- lwz rWORD6,12(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- srw rE,rWORD6,rSHR
- slw rH,rWORD6,rSHL
- bne cr0,L(duLcr0)
- or rWORD6,rE,rF
- cmplw cr6,rWORD5,rWORD6
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ bne cr5, L(duLcr5)
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ bne cr7, L(duLcr7)
+ or rWORD6, r0, rWORD4_SHIFT
+ cmplw cr6, rWORD5, rWORD6
b L(duLoop3)
.align 4
/* At this point we exit early with the first word compare
complete and remainder of 0 to 3 bytes. See L(du14) for details on
how we handle the remaining bytes. */
L(duP1x):
- cmplw cr5,rWORD7,rWORD8
- slwi. rN,rN,3
- bne cr5,L(duLcr5)
- cmplw cr7,rN,rSHR
+ cmplw cr5, rWORD7, rWORD8
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- ld rWORD2,8(rSTR2)
- srw rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD2, 8(rSTR2)
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 8 */
.align 4
L(duP2):
- srw rE,rWORD8,rSHR
- lwz rWORD5,0(rSTR1)
- or rWORD6,rE,rH
- slw rH,rWORD8,rSHL
+ srw r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
+ lwz rWORD5, 0(rSTR1)
+#endif
+ or rWORD6, r0, rWORD6_SHIFT
+ slw rWORD6_SHIFT, rWORD8, rSHL
L(duP2e):
- lwz rWORD7,4(rSTR1)
- lwz rWORD8,4(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- srw rG,rWORD8,rSHR
- slw rB,rWORD8,rSHL
- or rWORD8,rG,rH
- blt cr7,L(duP2x)
- lwz rWORD1,8(rSTR1)
- lwz rWORD2,8(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- bne cr6,L(duLcr6)
- srw rA,rWORD2,rSHR
- slw rD,rWORD2,rSHL
- or rWORD2,rA,rB
- lwz rWORD3,12(rSTR1)
- lwz rWORD4,12(rSTR2)
- cmplw cr0,rWORD1,rWORD2
- bne cr5,L(duLcr5)
- srw rC,rWORD4,rSHR
- slw rF,rWORD4,rSHL
- or rWORD4,rC,rD
- addi rSTR1,rSTR1,4
- addi rSTR2,rSTR2,4
- cmplw cr1,rWORD3,rWORD4
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD7, 4(rSTR1)
+ lwz rWORD8, 4(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ blt cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 8(rSTR1)
+ lwz rWORD2, 8(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 12(rSTR1)
+ lwz rWORD4, 12(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ bne cr5, L(duLcr5)
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#endif
+ cmplw cr1, rWORD3, rWORD4
b L(duLoop2)
.align 4
L(duP2x):
- cmplw cr5,rWORD7,rWORD8
- addi rSTR1,rSTR1,4
- addi rSTR2,rSTR2,4
- bne cr6,L(duLcr6)
- slwi. rN,rN,3
- bne cr5,L(duLcr5)
- cmplw cr7,rN,rSHR
+ cmplw cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#endif
+ bne cr6, L(duLcr6)
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- lwz rWORD2,4(rSTR2)
- srw rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD2, 4(rSTR2)
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 12 */
.align 4
L(duP3):
- srw rC,rWORD8,rSHR
- lwz rWORD3,0(rSTR1)
- slw rF,rWORD8,rSHL
- or rWORD4,rC,rH
+ srw r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
+ lwz rWORD3, 0(rSTR1)
+#endif
+ slw rWORD4_SHIFT, rWORD8, rSHL
+ or rWORD4, r12, rWORD6_SHIFT
L(duP3e):
- lwz rWORD5,4(rSTR1)
- lwz rWORD6,4(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- srw rE,rWORD6,rSHR
- slw rH,rWORD6,rSHL
- or rWORD6,rE,rF
- lwz rWORD7,8(rSTR1)
- lwz rWORD8,8(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- bne cr1,L(duLcr1)
- srw rG,rWORD8,rSHR
- slw rB,rWORD8,rSHL
- or rWORD8,rG,rH
- blt cr7,L(duP3x)
- lwz rWORD1,12(rSTR1)
- lwz rWORD2,12(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- bne cr6,L(duLcr6)
- srw rA,rWORD2,rSHR
- slw rD,rWORD2,rSHL
- or rWORD2,rA,rB
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- cmplw cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 4(rSTR1)
+ lwz rWORD6, 4(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD7, 8(rSTR1)
+ lwz rWORD8, 8(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ blt cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 12(rSTR1)
+ lwz rWORD2, 12(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ cmplw cr7, rWORD1, rWORD2
b L(duLoop1)
.align 4
L(duP3x):
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- bne cr1,L(duLcr1)
- cmplw cr5,rWORD7,rWORD8
- bne cr6,L(duLcr6)
- slwi. rN,rN,3
- bne cr5,L(duLcr5)
- cmplw cr7,rN,rSHR
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+#if 0
+/* Huh? We've already branched on cr1! */
+ bne cr1, L(duLcr1)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- lwz rWORD2,4(rSTR2)
- srw rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD2, 4(rSTR2)
+#endif
+ srw r0, rWORD2, rSHR
b L(dutrim)
/* Count is a multiple of 16, remainder is 0 */
.align 4
L(duP4):
- mtctr rTMP
- srw rA,rWORD8,rSHR
- lwz rWORD1,0(rSTR1)
- slw rD,rWORD8,rSHL
- or rWORD2,rA,rH
+ mtctr r0
+ srw r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ addi rSTR1, rSTR1, 4
+#else
+ lwz rWORD1, 0(rSTR1)
+#endif
+ slw rWORD2_SHIFT, rWORD8, rSHL
+ or rWORD2, r0, rWORD6_SHIFT
L(duP4e):
- lwz rWORD3,4(rSTR1)
- lwz rWORD4,4(rSTR2)
- cmplw cr0,rWORD1,rWORD2
- srw rC,rWORD4,rSHR
- slw rF,rWORD4,rSHL
- or rWORD4,rC,rD
- lwz rWORD5,8(rSTR1)
- lwz rWORD6,8(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- bne cr0,L(duLcr0)
- srw rE,rWORD6,rSHR
- slw rH,rWORD6,rSHL
- or rWORD6,rE,rF
- lwzu rWORD7,12(rSTR1)
- lwzu rWORD8,12(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- bne cr1,L(duLcr1)
- srw rG,rWORD8,rSHR
- slw rB,rWORD8,rSHL
- or rWORD8,rG,rH
- cmplw cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 4(rSTR1)
+ lwz rWORD4, 4(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 8(rSTR1)
+ lwz rWORD6, 8(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ bne cr7, L(duLcr7)
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwzu rWORD7, 12(rSTR1)
+ lwzu rWORD8, 12(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ cmplw cr5, rWORD7, rWORD8
bdz L(du24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
.align 4
L(duLoop):
- lwz rWORD1,4(rSTR1)
- lwz rWORD2,4(rSTR2)
- cmplw cr1,rWORD3,rWORD4
- bne cr6,L(duLcr6)
- srw rA,rWORD2,rSHR
- slw rD,rWORD2,rSHL
- or rWORD2,rA,rB
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD1, 4(rSTR1)
+ lwz rWORD2, 4(rSTR2)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ srw r0, rWORD2, rSHR
+ slw rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
L(duLoop1):
- lwz rWORD3,8(rSTR1)
- lwz rWORD4,8(rSTR2)
- cmplw cr6,rWORD5,rWORD6
- bne cr5,L(duLcr5)
- srw rC,rWORD4,rSHR
- slw rF,rWORD4,rSHL
- or rWORD4,rC,rD
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD3, 0, rSTR1
+ lwbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD3, 8(rSTR1)
+ lwz rWORD4, 8(rSTR2)
+#endif
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ srw r12, rWORD4, rSHR
+ slw rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
L(duLoop2):
- lwz rWORD5,12(rSTR1)
- lwz rWORD6,12(rSTR2)
- cmplw cr5,rWORD7,rWORD8
- bne cr0,L(duLcr0)
- srw rE,rWORD6,rSHR
- slw rH,rWORD6,rSHL
- or rWORD6,rE,rF
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD5, 0, rSTR1
+ lwbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD5, 12(rSTR1)
+ lwz rWORD6, 12(rSTR2)
+#endif
+ cmplw cr5, rWORD7, rWORD8
+ bne cr7, L(duLcr7)
+ srw r0, rWORD6, rSHR
+ slw rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
L(duLoop3):
- lwzu rWORD7,16(rSTR1)
- lwzu rWORD8,16(rSTR2)
- cmplw cr0,rWORD1,rWORD2
- bne cr1,L(duLcr1)
- srw rG,rWORD8,rSHR
- slw rB,rWORD8,rSHL
- or rWORD8,rG,rH
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD7, 0, rSTR1
+ lwbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 4
+ addi rSTR2, rSTR2, 4
+#else
+ lwzu rWORD7, 16(rSTR1)
+ lwzu rWORD8, 16(rSTR2)
+#endif
+ cmplw cr7, rWORD1, rWORD2
+ bne cr1, L(duLcr1)
+ srw r12, rWORD8, rSHR
+ slw rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
bdnz L(duLoop)
L(duL4):
- bne cr1,L(duLcr1)
- cmplw cr1,rWORD3,rWORD4
- bne cr6,L(duLcr6)
- cmplw cr6,rWORD5,rWORD6
- bne cr5,L(duLcr5)
- cmplw cr5,rWORD7,rWORD8
+#if 0
+/* Huh? We've already branched on cr1! */
+ bne cr1, L(duLcr1)
+#endif
+ cmplw cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ cmplw cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ cmplw cr5, rWORD7, rWORD8
L(du44):
- bne cr0,L(duLcr0)
+ bne cr7, L(duLcr7)
L(du34):
- bne cr1,L(duLcr1)
+ bne cr1, L(duLcr1)
L(du24):
- bne cr6,L(duLcr6)
+ bne cr6, L(duLcr6)
L(du14):
- slwi. rN,rN,3
- bne cr5,L(duLcr5)
+ slwi. rN, rN, 3
+ bne cr5, L(duLcr5)
/* At this point we have a remainder of 1 to 3 bytes to compare. We use
shift right to eliminate bits beyond the compare length.
+ This allows the use of word subtract to compute the final result.
However it may not be safe to load rWORD2 which may be beyond the
string length. So we compare the bit length of the remainder to
the right shift count (rSHR). If the bit count is less than or equal
we do not need to load rWORD2 (all significant bits are already in
- rB). */
- cmplw cr7,rN,rSHR
+ rWORD8_SHIFT). */
+ cmplw cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- lwz rWORD2,4(rSTR2)
- srw rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 4
+#else
+ lwz rWORD2, 4(rSTR2)
+#endif
+ srw r0, rWORD2, rSHR
.align 4
L(dutrim):
- lwz rWORD1,4(rSTR1)
- lwz r31,48(1)
- subfic rN,rN,32 /* Shift count is 32 - (rN * 8). */
- or rWORD2,rA,rB
- lwz r30,44(1)
- lwz r29,40(r1)
- srw rWORD1,rWORD1,rN
- srw rWORD2,rWORD2,rN
- lwz r28,36(r1)
- lwz r27,32(r1)
- cmplw rWORD1,rWORD2
- li rRTN,0
- beq L(dureturn26)
- li rRTN,1
- bgt L(dureturn26)
- li rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+ lwbrx rWORD1, 0, rSTR1
+#else
+ lwz rWORD1, 4(rSTR1)
+#endif
+ lwz rWORD8, 48(r1)
+ subfic rN, rN, 32 /* Shift count is 32 - (rN * 8). */
+ or rWORD2, r0, rWORD8_SHIFT
+ lwz rWORD7, 44(r1)
+ lwz rSHL, 40(r1)
+ srw rWORD1, rWORD1, rN
+ srw rWORD2, rWORD2, rN
+ lwz rSHR, 36(r1)
+ lwz rWORD8_SHIFT, 32(r1)
+ sub rRTN, rWORD1, rWORD2
b L(dureturn26)
.align 4
-L(duLcr0):
- lwz r31,48(1)
- lwz r30,44(1)
- li rRTN,1
- bgt cr0,L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
- li rRTN,-1
+L(duLcr7):
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
+ li rRTN, 1
+ bgt cr7, L(dureturn29)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
+ li rRTN, -1
b L(dureturn27)
.align 4
L(duLcr1):
- lwz r31,48(1)
- lwz r30,44(1)
- li rRTN,1
- bgt cr1,L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
- li rRTN,-1
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
+ li rRTN, 1
+ bgt cr1, L(dureturn29)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
+ li rRTN, -1
b L(dureturn27)
.align 4
L(duLcr6):
- lwz r31,48(1)
- lwz r30,44(1)
- li rRTN,1
- bgt cr6,L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
- li rRTN,-1
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
+ li rRTN, 1
+ bgt cr6, L(dureturn29)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
+ li rRTN, -1
b L(dureturn27)
.align 4
L(duLcr5):
- lwz r31,48(1)
- lwz r30,44(1)
- li rRTN,1
- bgt cr5,L(dureturn29)
- lwz r29,40(r1)
- lwz r28,36(r1)
- li rRTN,-1
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
+ li rRTN, 1
+ bgt cr5, L(dureturn29)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
+ li rRTN, -1
b L(dureturn27)
.align 3
L(duZeroReturn):
- li rRTN,0
+ li rRTN, 0
.align 4
L(dureturn):
- lwz r31,48(1)
- lwz r30,44(1)
+ lwz rWORD8, 48(r1)
+ lwz rWORD7, 44(r1)
L(dureturn29):
- lwz r29,40(r1)
- lwz r28,36(r1)
+ lwz rSHL, 40(r1)
+ lwz rSHR, 36(r1)
L(dureturn27):
- lwz r27,32(r1)
+ lwz rWORD8_SHIFT, 32(r1)
L(dureturn26):
- lwz r26,28(r1)
+ lwz rWORD2_SHIFT, 28(r1)
L(dureturn25):
- lwz r25,24(r1)
- lwz r24,20(r1)
- lwz 1,0(1)
+ lwz rWORD4_SHIFT, 24(r1)
+ lwz rWORD6_SHIFT, 20(r1)
+ addi r1, r1, 64
+ cfi_adjust_cfa_offset(-64)
blr
END (memcmp)
+
libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
+weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
index 7f0077823..acf3c1019 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -383,7 +383,7 @@ L(copy_GE_32_unaligned):
beq L(copy_GE_32_unaligned_cont)
- /* SRC is not quadword aligned, get it aligned. */
+ /* DST is not quadword aligned, get it aligned. */
mtcrf 0x01,0
subf 31,0,5
@@ -435,13 +435,21 @@ L(copy_GE_32_unaligned_cont):
mr 11,12
mtcrf 0x01,9
cmplwi cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+ lvsr 5,0,12
+#else
lvsl 5,0,12
+#endif
lvx 3,0,12
bf 31,L(setup_unaligned_loop)
/* Copy another 16 bytes to align to 32-bytes due to the loop . */
lvx 4,12,6
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
vperm 6,3,4,5
+#endif
addi 11,12,16
addi 10,3,16
stvx 6,0,3
@@ -461,11 +469,17 @@ L(unaligned_loop):
vector instructions though. */
lvx 4,11,6 /* vr4 = r11+16. */
- vperm 6,3,4,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr6. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
lvx 3,11,7 /* vr3 = r11+32. */
- vperm 10,4,3,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr10. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 10,3,4,5
+#else
+ vperm 10,4,3,5
+#endif
addi 11,11,32
stvx 6,0,10
stvx 10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
index 5ad4edb58..4610ec5b5 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
@@ -325,7 +325,7 @@ L(copy_GE_32_unaligned):
beq L(copy_GE_32_unaligned_cont)
- /* SRC is not quadword aligned, get it aligned. */
+ /* DST is not quadword aligned, get it aligned. */
mtcrf 0x01,0
subf 31,0,5
@@ -377,13 +377,21 @@ L(copy_GE_32_unaligned_cont):
mr 11,12
mtcrf 0x01,9
cmplwi cr6,9,1
- lvsl 5,0,12
+#ifdef __LITTLE_ENDIAN__
+ lvsr 5,0,12
+#else
+ lvsl 5,0,12
+#endif
lvx 3,0,12
bf 31,L(setup_unaligned_loop)
/* Copy another 16 bytes to align to 32-bytes due to the loop . */
lvx 4,12,6
- vperm 6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
addi 11,12,16
addi 10,3,16
stvx 6,0,3
@@ -403,11 +411,17 @@ L(unaligned_loop):
vector instructions though. */
lvx 4,11,6 /* vr4 = r11+16. */
- vperm 6,3,4,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr6. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
lvx 3,11,7 /* vr3 = r11+32. */
- vperm 10,4,3,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr10. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 10,3,4,5
+#else
+ vperm 10,4,3,5
+#endif
addi 11,11,32
stvx 6,0,10
stvx 10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
index defd832b0..9601aa799 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
@@ -23,117 +23,131 @@
.machine power7
ENTRY (__memrchr)
CALL_MCOUNT
- dcbt 0,r3
- mr r7,r3
- add r3,r7,r5 /* Calculate the last acceptable address. */
- cmplw cr7,r3,r7 /* Is the address equal or less than r3? */
+ add r7,r3,r5 /* Calculate the last acceptable address. */
+ neg r0,r7
+ addi r7,r7,-1
+ mr r10,r3
+ clrrwi r6,r7,7
+ li r9,3<<5
+ dcbt r9,r6,16 /* Stream hint, decreasing addresses. */
/* Replicate BYTE to word. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
- bge cr7,L(proceed)
-
- li r3,-1 /* Make r11 the biggest if r4 <= 0. */
-L(proceed):
+ rldimi r4,r4,8,48
+ rldimi r4,r4,16,32
li r6,-4
- addi r9,r3,-1
- clrrwi r8,r9,2
- addi r8,r8,4
- neg r0,r3
+ li r9,-1
rlwinm r0,r0,3,27,28 /* Calculate padding. */
-
+ clrrwi r8,r7,2
+ srw r9,r9,r0
cmplwi r5,16
+ clrrwi r0,r10,2
ble L(small_range)
- lwbrx r12,r8,r6 /* Load reversed word from memory. */
- cmpb r10,r12,r4 /* Check for BYTE in WORD1. */
- slw r10,r10,r0
- srw r10,r10,r0
- cmplwi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */
+#ifdef __LITTLE_ENDIAN__
+ lwzx r12,0,r8
+#else
+ lwbrx r12,0,r8 /* Load reversed word from memory. */
+#endif
+ cmpb r3,r12,r4 /* Check for BYTE in WORD1. */
+ and r3,r3,r9
+ cmplwi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,-4
- cmplw cr6,r9,r7
- ble cr6,L(null)
-
mtcrf 0x01,r8
/* Are we now aligned to a doubleword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
- mr r8,r9
- bt 29,L(loop_setup)
+ bf 29,L(loop_setup)
/* Handle WORD2 of pair. */
+#ifdef __LITTLE_ENDIAN__
+ lwzx r12,r8,r6
+#else
lwbrx r12,r8,r6
- cmpb r10,r12,r4
- cmplwi cr7,r10,0
- bne cr7,L(done)
-
- /* Are we done already? */
+#endif
addi r8,r8,-4
- cmplw cr6,r8,r7
- ble cr6,L(null)
+ cmpb r3,r12,r4
+ cmplwi cr7,r3,0
+ bne cr7,L(done)
L(loop_setup):
- li r0,-8
- sub r5,r8,r7
- srwi r9,r5,3 /* Number of loop iterations. */
+ /* The last word we want to read in the loop below is the one
+ containing the first byte of the string, ie. the word at
+ s & ~3, or r0. The first word read is at r8 - 4, we
+ read 2 * cnt words, so the last word read will be at
+ r8 - 4 - 8 * cnt + 4. Solving for cnt gives
+ cnt = (r8 - r0) / 8 */
+ sub r5,r8,r0
+ addi r8,r8,-4
+ srwi r9,r5,3 /* Number of loop iterations. */
mtctr r9 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for BYTE backwards in the string. Since it's a
- small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
+
+ /* Main loop to look for BYTE backwards in the string.
+ FIXME: Investigate whether 32 byte align helps with this
+ 9 instruction loop. */
+ .align 5
L(loop):
/* Load two words, compare and merge in a
single register for speed. This is an attempt
to speed up the byte-checking process for bigger strings. */
- lwbrx r12,r8,r6
- lwbrx r11,r8,r0
- addi r8,r8,-4
- cmpb r10,r12,r4
+#ifdef __LITTLE_ENDIAN__
+ lwzx r12,0,r8
+ lwzx r11,r8,r6
+#else
+ lwbrx r12,0,r8
+ lwbrx r11,r8,r6
+#endif
+ cmpb r3,r12,r4
cmpb r9,r11,r4
- or r5,r9,r10 /* Merge everything in one word. */
+ or r5,r9,r3 /* Merge everything in one word. */
cmplwi cr7,r5,0
bne cr7,L(found)
- addi r8,r8,-4
+ addi r8,r8,-8
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for BYTE in the whole range. Just return
- the original range. */
- addi r8,r8,4
- cmplw cr6,r8,r7
- bgt cr6,L(loop_small)
- b L(null)
- /* OK, one (or both) of the words contains BYTE. Check
- the first word and decrement the address in case the first
- word really contains BYTE. */
+ /* We may have one more word to read. */
+ cmplw r8,r0
+ bnelr
+
+#ifdef __LITTLE_ENDIAN__
+ lwzx r12,0,r8
+#else
+ lwbrx r12,0,r8
+#endif
+ cmpb r3,r12,r4
+ cmplwi cr7,r3,0
+ bne cr7,L(done)
+ blr
+
.align 4
L(found):
- cmplwi cr6,r10,0
- addi r8,r8,4
+ /* OK, one (or both) of the words contains BYTE. Check
+ the first word. */
+ cmplwi cr6,r3,0
bne cr6,L(done)
/* BYTE must be in the second word. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
+ again and move the result of cmpb to r3 so we can calculate the
pointer. */
- mr r10,r9
+ mr r3,r9
addi r8,r8,-4
- /* r10 has the output of the cmpb instruction, that is, it contains
+ /* r3 has the output of the cmpb instruction, that is, it contains
0xff in the same position as BYTE in the original
word from the string. Use that to calculate the pointer.
We need to make sure BYTE is *before* the end of the
range. */
L(done):
- cntlzw r0,r10 /* Count leading zeroes before the match. */
- srwi r6,r0,3 /* Convert leading zeroes to bytes. */
- addi r0,r6,1
+ cntlzw r9,r3 /* Count leading zeros before the match. */
+ cmplw r8,r0 /* Are we on the last word? */
+ srwi r6,r9,3 /* Convert leading zeros to bytes. */
+ addi r0,r6,-3
sub r3,r8,r0
- cmplw r3,r7
- blt L(null)
+ cmplw cr7,r3,r10
+ bnelr
+ bgelr cr7
+ li r3,0
blr
.align 4
@@ -147,28 +161,35 @@ L(small_range):
cmplwi r5,0
beq L(null)
- lwbrx r12,r8,r6 /* Load reversed word from memory. */
- cmpb r10,r12,r4 /* Check for null bytes in WORD1. */
- slw r10,r10,r0
- srw r10,r10,r0
- cmplwi cr7,r10,0
+#ifdef __LITTLE_ENDIAN__
+ lwzx r12,0,r8
+#else
+ lwbrx r12,0,r8 /* Load reversed word from memory. */
+#endif
+ cmpb r3,r12,r4 /* Check for BYTE in WORD1. */
+ and r3,r3,r9
+ cmplwi cr7,r3,0
bne cr7,L(done)
+ /* Are we done already? */
+ cmplw r8,r0
addi r8,r8,-4
- cmplw r8,r7
- ble L(null)
- b L(loop_small)
+ beqlr
- .p2align 5
+ .align 5
L(loop_small):
- lwbrx r12,r8,r6
- cmpb r10,r12,r4
- cmplwi cr6,r10,0
- bne cr6,L(done)
+#ifdef __LITTLE_ENDIAN__
+ lwzx r12,0,r8
+#else
+ lwbrx r12,0,r8
+#endif
+ cmpb r3,r12,r4
+ cmplw r8,r0
+ cmplwi cr7,r3,0
+ bne cr7,L(done)
addi r8,r8,-4
- cmplw r8,r7
- ble L(null)
- b L(loop_small)
+ bne L(loop_small)
+ blr
END (__memrchr)
weak_alias (__memrchr, memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memset.S b/libc/sysdeps/powerpc/powerpc32/power7/memset.S
index 360ea717f..aadda2558 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memset.S
@@ -35,8 +35,8 @@ L(_memset):
cfi_offset(31,-8)
/* Replicate byte to word. */
- rlwimi 4,4,8,16,23
- rlwimi 4,4,16,0,15
+ insrdi 4,4,8,48
+ insrdi 4,4,16,32
ble cr6,L(small) /* If length <= 8, use short copy code. */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
index a80c74a09..c2d8c4b7b 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
@@ -27,16 +27,21 @@ ENTRY (__rawmemchr)
clrrwi r8,r3,2 /* Align the address to word boundary. */
/* Replicate byte to word. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ rldimi r4,r4,8,48
+ rldimi r4,r4,16,32
/* Now r4 has a word of c bytes. */
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
cmpb r5,r12,r4 /* Compare each byte against c byte. */
+#ifdef __LITTLE_ENDIAN__
+ srw r5,r5,r6
+ slw r5,r5,r6
+#else
slw r5,r5,r6 /* Move left to discard ignored bits. */
srw r5,r5,r6 /* Bring the bits back as zeros. */
+#endif
cmpwi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
bne cr7,L(done)
@@ -90,8 +95,14 @@ L(loop):
word from the string. Use that fact to find out what is
the position of the byte inside the string. */
L(done):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntw r0,r0
+#else
cntlzw r0,r5 /* Count leading zeros before the match. */
- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
+#endif
+ srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching char. */
blr
END (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strchr.S b/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
index 0ecadb271..b66265967 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
beq cr7,L(null_match)
/* Replicate byte to word. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
/* Now r4 has a word of c bytes and r0 has
a word of null bytes. */
@@ -46,11 +46,17 @@ ENTRY (strchr)
/* Move the words left and right to discard the bits that are
not part of the string and to bring them back as zeros. */
-
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ srw r11,r11,r6
+ slw r10,r10,r6
+ slw r11,r11,r6
+#else
slw r10,r10,r6
slw r11,r11,r6
srw r10,r10,r6
srw r11,r11,r6
+#endif
or r5,r10,r11 /* OR the results to speed things up. */
cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -65,7 +71,7 @@ ENTRY (strchr)
/* Handle WORD2 of pair. */
lwzu r12,4(r8)
- cmpb r10,r12,r4
+ cmpb r10,r12,r4
cmpb r11,r12,r0
or r5,r10,r11
cmpwi cr7,r5,0
@@ -100,22 +106,31 @@ L(loop):
bne cr6,L(done)
/* The c/null byte must be in the second word. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
- pointer. */
+ again and move the result of cmpb to r10/r11 so we can calculate
+ the pointer. */
mr r10,r6
mr r11,r7
addi r8,r8,4
- /* r5 has the output of the cmpb instruction, that is, it contains
+ /* r10/r11 have the output of the cmpb instructions, that is,
0xff in the same position as the c/null byte in the original
word from the string. Use that to calculate the pointer. */
L(done):
- cntlzw r4,r10 /* Count leading zeroes before c matches. */
- cntlzw r0,r11 /* Count leading zeroes before null matches. */
- cmplw cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+ addi r3,r10,-1
+ andc r3,r3,r10
+ popcntw r0,r3
+ addi r4,r11,-1
+ andc r4,r4,r11
+ cmplw cr7,r3,r4
+ bgt cr7,L(no_match)
+#else
+ cntlzw r0,r10 /* Count leading zeros before c matches. */
+ cmplw cr7,r11,r10
bgt cr7,L(no_match)
- srwi r0,r4,3 /* Convert leading zeroes to bytes. */
+#endif
+ srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching c byte
or null in case c was not found. */
blr
@@ -133,10 +148,14 @@ L(null_match):
cmpb r5,r12,r0 /* Compare each byte against null bytes. */
/* Move the words left and right to discard the bits that are
- not part of the string and to bring them back as zeros. */
-
+ not part of the string and bring them back as zeros. */
+#ifdef __LITTLE_ENDIAN__
+ srw r5,r5,r6
+ slw r5,r5,r6
+#else
slw r5,r5,r6
srw r5,r5,r6
+#endif
cmpwi cr7,r5,0 /* If r10 == 0, no c or null bytes
have been found. */
bne cr7,L(done_null)
@@ -191,7 +210,13 @@ L(loop_null):
0xff in the same position as the null byte in the original
word from the string. Use that to calculate the pointer. */
L(done_null):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntw r0,r0
+#else
cntlzw r0,r5 /* Count leading zeros before the match. */
+#endif
srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching null byte. */
blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
index d4cacab60..f5d24d434 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
clrrwi r8,r3,2 /* Align the address to word boundary. */
/* Replicate byte to word. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
@@ -43,10 +43,17 @@ ENTRY (__strchrnul)
/* Move the words left and right to discard the bits that are
not part of the string and bring them back as zeros. */
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ srw r9,r9,r6
+ slw r10,r10,r6
+ slw r9,r9,r6
+#else
slw r10,r10,r6
slw r9,r9,r6
srw r10,r10,r6
srw r9,r9,r6
+#endif
or r5,r9,r10 /* OR the results to speed things up. */
cmpwi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -54,7 +61,7 @@ ENTRY (__strchrnul)
mtcrf 0x01,r8
- /* Are we now aligned to a quadword boundary? If so, skip to
+ /* Are we now aligned to a doubleword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
bt 29,L(loop)
@@ -76,7 +83,7 @@ L(loop):
single register for speed. This is an attempt
to speed up the null-checking process for bigger strings. */
lwz r12,4(r8)
- lwzu r11,8(r8)
+ lwzu r11,8(r8)
cmpb r10,r12,r0
cmpb r9,r12,r4
cmpb r6,r11,r0
@@ -95,9 +102,9 @@ L(loop):
addi r8,r8,-4
bne cr6,L(done)
- /* The c/null byte must be in the second word. Adjust the
- address again and move the result of cmpb to r10 so we can calculate
- the pointer. */
+ /* The c/null byte must be in the second word. Adjust the address
+ again and move the result of cmpb to r5 so we can calculate the
+ pointer. */
mr r5,r10
addi r8,r8,4
@@ -105,7 +112,13 @@ L(loop):
0xff in the same position as the c/null byte in the original
word from the string. Use that to calculate the pointer. */
L(done):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntw r0,r0
+#else
cntlzw r0,r5 /* Count leading zeros before the match. */
+#endif
srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of matching c/null byte. */
blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strlen.S b/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
index b71a10f5c..b08d6c028 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
@@ -29,7 +29,11 @@ ENTRY (strlen)
li r0,0 /* Word with null chars to use with cmpb. */
li r5,-1 /* MASK = 0xffffffffffffffff. */
lwz r12,0(r4) /* Load word from memory. */
+#ifdef __LITTLE_ENDIAN__
+ slw r5,r5,r6
+#else
srw r5,r5,r6 /* MASK = MASK >> padding. */
+#endif
orc r9,r12,r5 /* Mask bits that are not part of the string. */
cmpb r10,r9,r0 /* Check for null bytes in WORD1. */
cmpwi cr7,r10,0 /* If r10 == 0, no null's have been found. */
@@ -47,9 +51,6 @@ ENTRY (strlen)
cmpb r10,r12,r0
cmpwi cr7,r10,0
bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
/* Main loop to look for the end of the string. Since it's a
small loop (< 8 instructions), align it to 32-bytes. */
@@ -86,9 +87,15 @@ L(loop):
0xff in the same position as the null byte in the original
word from the string. Use that to calculate the length. */
L(done):
- cntlzw r0,r10 /* Count leading zeroes before the match. */
+#ifdef __LITTLE_ENDIAN__
+ addi r9, r10, -1 /* Form a mask from trailing zeros. */
+ andc r9, r9, r10
+ popcntw r0, r9 /* Count the bits in the mask. */
+#else
+ cntlzw r0,r10 /* Count leading zeros before the match. */
+#endif
subf r5,r3,r4
- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
+ srwi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r5,r0 /* Compute final length. */
blr
END (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
index fdae44d26..10c9d251b 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
@@ -26,7 +26,7 @@
EALIGN (strncmp,5,0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -39,6 +39,7 @@ EALIGN (strncmp,5,0)
#define r7F7F r9 /* constant 0x7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
nop
@@ -78,13 +79,45 @@ L(g1): add rTMP,rFEFE,rWORD1
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ slwi rTMP, rTMP, 1
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rldimi rTMP2, rWORD2, 24, 32
+ rldimi rTMP, rWORD1, 24, 32
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr
+ ori rRTN, rTMP2, 1
+ blr
+#else
L(endstring):
and rTMP,r7F7F,rWORD1
beq cr1,L(equal)
add rTMP,rTMP,r7F7F
xor. rBITDIF,rWORD1,rWORD2
-
andc rNEG,rNEG,rTMP
blt L(highbit)
cntlzw rBITDIF,rBITDIF
@@ -92,28 +125,20 @@ L(endstring):
addi rNEG,rNEG,7
cmpw cr1,rNEG,rBITDIF
sub rRTN,rWORD1,rWORD2
- blt cr1,L(equal)
- srawi rRTN,rRTN,31
- ori rRTN,rRTN,1
- blr
+ bgelr cr1
L(equal):
li rRTN,0
blr
L(different):
- lwzu rWORD1,-4(rSTR1)
+ lwz rWORD1,-4(rSTR1)
xor. rBITDIF,rWORD1,rWORD2
sub rRTN,rWORD1,rWORD2
- blt L(highbit)
- srawi rRTN,rRTN,31
- ori rRTN,rRTN,1
- blr
+ bgelr
L(highbit):
- srwi rWORD2,rWORD2,24
- srwi rWORD1,rWORD1,24
- sub rRTN,rWORD1,rWORD2
+ ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
index ed088366a..eb52afd1a 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
@@ -28,51 +28,47 @@ ENTRY (__strnlen)
add r7,r3,r4 /* Calculate the last acceptable address. */
cmplwi r4,16
li r0,0 /* Word with null chars. */
+ addi r7,r7,-1
ble L(small_range)
- cmplw cr7,r3,r7 /* Is the address equal or less than r3? If
- it's equal or less, it means size is either 0
- or a negative number. */
- ble cr7,L(proceed)
-
- li r7,-1 /* Make r11 the biggest if r4 <= 0. */
-L(proceed):
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ slw r10,r10,r6
+#else
slw r10,r10,r6
srw r10,r10,r6
+#endif
cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,4
- cmplw cr6,r9,r7
- bge cr6,L(end_max)
-
+ clrrwi r7,r7,2 /* Address of last word. */
mtcrf 0x01,r8
/* Are we now aligned to a doubleword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
bt 29,L(loop_setup)
- /* Handle DWORD2 of pair. */
+ /* Handle WORD2 of pair. */
lwzu r12,4(r8)
cmpb r10,r12,r0
cmplwi cr7,r10,0
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,4
- cmplw cr6,r9,r7
- bge cr6,L(end_max)
-
L(loop_setup):
- sub r5,r7,r9
+ /* The last word we want to read in the loop below is the one
+ containing the last byte of the string, ie. the word at
+ (s + size - 1) & ~3, or r7. The first word read is at
+ r8 + 4, we read 2 * cnt words, so the last word read will
+ be at r8 + 4 + 8 * cnt - 4. Solving for cnt gives
+ cnt = (r7 - r8) / 8 */
+ sub r5,r7,r8
srwi r6,r5,3 /* Number of loop iterations. */
mtctr r6 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for the null byte backwards in the string. Since
+
+ /* Main loop to look for the null byte in the string. Since
it's a small loop (< 8 instructions), align it to 32-bytes. */
.p2align 5
L(loop):
@@ -88,15 +84,18 @@ L(loop):
cmplwi cr7,r5,0
bne cr7,L(found)
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for null in the whole range. Just return
- the original size. */
- addi r9,r8,4
- cmplw cr6,r9,r7
- blt cr6,L(loop_small)
+
+ /* We may have one more word to read. */
+ cmplw cr6,r8,r7
+ beq cr6,L(end_max)
+
+ lwzu r12,4(r8)
+ cmpb r10,r12,r0
+ cmplwi cr6,r10,0
+ bne cr6,L(done)
L(end_max):
- sub r3,r7,r3
+ mr r3,r4
blr
/* OK, one (or both) of the words contains a null byte. Check
@@ -121,49 +120,56 @@ L(found):
We need to make sure the null char is *before* the end of the
range. */
L(done):
- cntlzw r0,r10 /* Count leading zeroes before the match. */
- srwi r0,r0,3 /* Convert leading zeroes to bytes. */
- add r9,r8,r0
- sub r6,r9,r3 /* Length until the match. */
- cmplw r9,r7
- bgt L(end_max)
- mr r3,r6
- blr
-
- .align 4
-L(zero):
- li r3,0
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r10,-1
+ andc r0,r0,r10
+ popcntw r0,r0
+#else
+ cntlzw r0,r10 /* Count leading zeros before the match. */
+#endif
+ sub r3,r8,r3
+ srwi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
+ add r3,r3,r0 /* Length until the match. */
+ cmplw r3,r4
+ blelr
+ mr r3,r4
blr
-/* Deals with size <= 32. */
+/* Deals with size <= 16. */
.align 4
L(small_range):
cmplwi r4,0
- beq L(zero)
+ beq L(end_max)
+
+ clrrwi r7,r7,2 /* Address of last word. */
rlwinm r6,r3,3,27,28 /* Calculate padding. */
lwz r12,0(r8) /* Load word from memory. */
cmpb r10,r12,r0 /* Check for null bytes in WORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srw r10,r10,r6
+ slw r10,r10,r6
+#else
slw r10,r10,r6
srw r10,r10,r6
+#endif
cmplwi cr7,r10,0
bne cr7,L(done)
- addi r9,r8,4
- cmplw r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmplw r8,r7
+ beq L(end_max)
.p2align 5
L(loop_small):
lwzu r12,4(r8)
cmpb r10,r12,r0
- addi r9,r8,4
cmplwi cr6,r10,0
bne cr6,L(done)
- cmplw r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmplw r8,r7
+ bne L(loop_small)
+ mr r3,r4
+ blr
+
END (__strnlen)
weak_alias (__strnlen, strnlen)
libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
index 60b0026fa..3fb65b5f7 100644
--- a/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
@@ -24,6 +24,11 @@
# include <jmpbuf-offsets.h>
#endif
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+# define SAVE_GP(N) evstdd r##N,((JB_FPRS+((N)-14)*2)*4)(3)
+#else
+# define SAVE_GP(N) stw r##N,((JB_GPRS+(N)-14)*4)(3)
+#endif
ENTRY (__sigsetjmp)
@@ -35,31 +40,31 @@ ENTRY (__sigsetjmp)
stw r1,(JB_GPR1*4)(3)
#endif
mflr r0
- stw r14,((JB_GPRS+0)*4)(3)
+ SAVE_GP (14)
#ifdef PTR_MANGLE
PTR_MANGLE2 (r0, r10)
li r10,0
#endif
stw r0,(JB_LR*4)(3)
- stw r15,((JB_GPRS+1)*4)(3)
+ SAVE_GP (15)
mfcr r0
- stw r16,((JB_GPRS+2)*4)(3)
+ SAVE_GP (16)
stw r0,(JB_CR*4)(3)
- stw r17,((JB_GPRS+3)*4)(3)
- stw r18,((JB_GPRS+4)*4)(3)
- stw r19,((JB_GPRS+5)*4)(3)
- stw r20,((JB_GPRS+6)*4)(3)
- stw r21,((JB_GPRS+7)*4)(3)
- stw r22,((JB_GPRS+8)*4)(3)
- stw r23,((JB_GPRS+9)*4)(3)
- stw r24,((JB_GPRS+10)*4)(3)
- stw r25,((JB_GPRS+11)*4)(3)
- stw r26,((JB_GPRS+12)*4)(3)
- stw r27,((JB_GPRS+13)*4)(3)
- stw r28,((JB_GPRS+14)*4)(3)
- stw r29,((JB_GPRS+15)*4)(3)
- stw r30,((JB_GPRS+16)*4)(3)
- stw r31,((JB_GPRS+17)*4)(3)
+ SAVE_GP (17)
+ SAVE_GP (18)
+ SAVE_GP (19)
+ SAVE_GP (20)
+ SAVE_GP (21)
+ SAVE_GP (22)
+ SAVE_GP (23)
+ SAVE_GP (24)
+ SAVE_GP (25)
+ SAVE_GP (26)
+ SAVE_GP (27)
+ SAVE_GP (28)
+ SAVE_GP (29)
+ SAVE_GP (30)
+ SAVE_GP (31)
#if defined NOT_IN_libc && defined IS_IN_rtld
li r3,0
blr
diff --git a/libc/sysdeps/powerpc/powerpc32/setjmp.S b/libc/sysdeps/powerpc/powerpc32/setjmp.S
index 8a8cf0d6e..49b64ecf0 100644
--- a/libc/sysdeps/powerpc/powerpc32/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/setjmp.S
@@ -25,7 +25,7 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
# define __sigsetjmp __vmx__sigsetjmp
# define __sigjmp_save __vmx__sigjmp_save
# include "setjmp-common.S"
@@ -35,7 +35,7 @@ default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
# undef __sigsetjmp
# undef __sigjmp_save
# undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
# define __sigsetjmp __novmx__sigsetjmp
# define __sigjmp_save __novmx__sigjmp_save
# include "setjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h b/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
index 839f6a4b9..b3d0af830 100644
--- a/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
+++ b/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
@@ -2,3 +2,13 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("lwz %0,-28680(2)" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ \
+ uintptr_t x; \
+ asm ("lwz %0,%1(2)" \
+ : "=r" (x) \
+ : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \
+ ); \
+ x; \
+ })
diff --git a/libc/sysdeps/powerpc/powerpc32/stpcpy.S b/libc/sysdeps/powerpc/powerpc32/stpcpy.S
index 03c6dddc3..7e106e0e6 100644
--- a/libc/sysdeps/powerpc/powerpc32/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/stpcpy.S
@@ -62,7 +62,22 @@ L(g2): add rTMP, rFEFE, rWORD
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stbu rALT, 4(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stbu rTMP, 1(rDEST)
+ blr
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
stbu rTMP, 4(rDEST)
beqlr-
rlwinm. rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1): rlwinm. rTMP, rALT, 8, 24, 31
beqlr-
stbu rALT, 1(rDEST)
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strchr.S b/libc/sysdeps/powerpc/powerpc32/strchr.S
index c9952eecc..605056577 100644
--- a/libc/sysdeps/powerpc/powerpc32/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/strchr.S
@@ -36,6 +36,8 @@ ENTRY (strchr)
#define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */
#define rTMP3 r12
+#define rTMP4 rIGN
+#define rTMP5 rMASK
rlwimi rCHR, rCHR, 8, 16, 23
@@ -49,64 +51,93 @@ ENTRY (strchr)
addi r7F7F, r7F7F, 0x7f7f
/* Test the first (partial?) word. */
lwz rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+ slw rMASK, rMASK, rIGN
+#else
srw rMASK, rMASK, rIGN
+#endif
orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2
xor rTMP3, rCHR, rWORD
orc rTMP3, rTMP3, rMASK
b L(loopentry)
/* The loop. */
-L(loop):lwzu rWORD, 4(rSTR)
- and. rTMP1, rTMP1, rTMP2
+L(loop):
+ lwzu rWORD, 4(rSTR)
+ and. rTMP5, rTMP1, rTMP2
/* Test for 0. */
- add rTMP1, rFEFE, rWORD
- nor rTMP2, r7F7F, rWORD
+ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */
+ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */
bne L(foundit)
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */
/* Start test for the bytes we're looking for. */
xor rTMP3, rCHR, rWORD
L(loopentry):
add rTMP1, rFEFE, rTMP3
nor rTMP2, r7F7F, rTMP3
beq L(loop)
+
/* There is a zero byte in the word, but may also be a matching byte (either
before or after the zero byte). In fact, we may be looking for a
- zero byte, in which case we return a match. We guess that this hasn't
- happened, though. */
-L(missed):
- and. rTMP1, rTMP1, rTMP2
+ zero byte, in which case we return a match. */
+ and. rTMP5, rTMP1, rTMP2
li rRTN, 0
beqlr
-/* It did happen. Decide which one was first...
- I'm not sure if this is actually faster than a sequence of
- rotates, compares, and branches (we use it anyway because it's shorter). */
+/* At this point:
+ rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+ But there may be false matches in the next most significant byte from
+ a true match due to carries. This means we need to recalculate the
+ matches using a longer method for big-endian. */
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzw rCLZB, rTMP1
+ addi rTMP2, rTMP4, -1
+ andc rTMP2, rTMP2, rTMP4
+ cmplw rTMP1, rTMP2
+ bgtlr
+ subfic rCLZB, rCLZB, 32-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+ results from the loop for reuse here. See strlen.S tail. Similarly
+ one instruction could be pruned from L(foundit). */
and rFEFE, r7F7F, rWORD
- or rMASK, r7F7F, rWORD
+ or rTMP5, r7F7F, rWORD
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rFEFE, rFEFE, r7F7F
add rTMP1, rTMP1, r7F7F
- nor rWORD, rMASK, rFEFE
- nor rTMP2, rIGN, rTMP1
+ nor rWORD, rTMP5, rFEFE
+ nor rTMP2, rTMP4, rTMP1
+ cntlzw rCLZB, rTMP2
cmplw rWORD, rTMP2
bgtlr
- cntlzw rCLZB, rTMP2
+#endif
srwi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
blr
L(foundit):
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzw rCLZB, rTMP1
+ subfic rCLZB, rCLZB, 32-7-32
+ srawi rCLZB, rCLZB, 3
+#else
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F
- nor rTMP2, rIGN, rTMP1
+ nor rTMP2, rTMP4, rTMP1
cntlzw rCLZB, rTMP2
subi rSTR, rSTR, 4
srwi rCLZB, rCLZB, 3
+#endif
add rRTN, rSTR, rCLZB
blr
END (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/strcmp.S b/libc/sysdeps/powerpc/powerpc32/strcmp.S
index 297ca3c1b..91d60c905 100644
--- a/libc/sysdeps/powerpc/powerpc32/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strcmp.S
@@ -24,7 +24,7 @@
EALIGN (strcmp, 4, 0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -34,6 +34,7 @@ EALIGN (strcmp, 4, 0)
#define r7F7F r8 /* constant 0x7f7f7f7f */
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
+#define rTMP r11
or rTMP, rSTR2, rSTR1
@@ -56,10 +57,45 @@ L(g1): add rTMP, rFEFE, rWORD1
and. rTMP, rTMP, rNEG
cmpw cr1, rWORD1, rWORD2
beq+ L(g0)
-L(endstring):
+
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ rlwimi rTMP2, rTMP2, 1, 0, 30
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+#else
+L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
@@ -84,7 +120,7 @@ L(different):
L(highbit):
ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strcpy.S b/libc/sysdeps/powerpc/powerpc32/strcpy.S
index 4ae577dbb..e938cc42a 100644
--- a/libc/sysdeps/powerpc/powerpc32/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/strcpy.S
@@ -62,7 +62,22 @@ L(g2): add rTMP, rFEFE, rWORD
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stb rALT, 4(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stb rTMP, 5(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stb rTMP, 6(rDEST)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stb rTMP, 7(rDEST)
+ blr
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
stb rTMP, 4(rDEST)
beqlr-
rlwinm. rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1): rlwinm. rTMP, rALT, 8, 24, 31
beqlr-
stb rALT, 7(rDEST)
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strlen.S b/libc/sysdeps/powerpc/powerpc32/strlen.S
index 9a6eafc38..a7153ed7a 100644
--- a/libc/sysdeps/powerpc/powerpc32/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/strlen.S
@@ -29,7 +29,12 @@
1 is subtracted you get a value in the range 0x00-0x7f, none of which
have their high bit set. The expression here is
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
- there were no 0x00 bytes in the word.
+ there were no 0x00 bytes in the word. You get 0x80 in bytes that
+ match, but possibly false 0x80 matches in the next more significant
+ byte to a true match due to carries. For little-endian this is
+ of no consequence since the least significant match is the one
+ we're interested in, but big-endian needs method 2 to find which
+ byte matches.
2) Given a word 'x', we can test to see _which_ byte was zero by
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -72,7 +77,7 @@
ENTRY (strlen)
-#define rTMP1 r0
+#define rTMP4 r0
#define rRTN r3 /* incoming STR arg, outgoing result */
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
@@ -82,9 +87,9 @@ ENTRY (strlen)
#define rWORD1 r8 /* current string word */
#define rWORD2 r9 /* next string word */
#define rMASK r9 /* mask for first string word */
-#define rTMP2 r10
-#define rTMP3 r11
-#define rTMP4 r12
+#define rTMP1 r10
+#define rTMP2 r11
+#define rTMP3 r12
clrrwi rSTR, rRTN, 2
@@ -93,15 +98,20 @@ ENTRY (strlen)
lwz rWORD1, 0(rSTR)
li rMASK, -1
addi r7F7F, r7F7F, 0x7f7f
-/* That's the setup done, now do the first pair of words.
- We make an exception and use method (2) on the first two words, to reduce
- overhead. */
+/* We use method (2) on the first two words, because rFEFE isn't
+ required which reduces setup overhead. Also gives a faster return
+ for small strings on big-endian due to needing to recalculate with
+ method (2) anyway. */
+#ifdef __LITTLE_ENDIAN__
+ slw rMASK, rMASK, rPADN
+#else
srw rMASK, rMASK, rPADN
+#endif
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
- nor rTMP1, rTMP2, rTMP1
- and. rWORD1, rTMP1, rMASK
+ nor rTMP3, rTMP2, rTMP1
+ and. rTMP3, rTMP3, rMASK
mtcrf 0x01, rRTN
bne L(done0)
lis rFEFE, -0x101
@@ -110,11 +120,12 @@ ENTRY (strlen)
bt 29, L(loop)
/* Handle second word of pair. */
+/* Perhaps use method (1) here for little-endian, saving one instruction? */
lwzu rWORD1, 4(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
- nor. rWORD1, rTMP2, rTMP1
+ nor. rTMP3, rTMP2, rTMP1
bne L(done0)
/* The loop. */
@@ -128,28 +139,52 @@ L(loop):
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
- and. rTMP1, rTMP3, rTMP4
+ and. rTMP3, rTMP3, rTMP4
beq L(loop)
+#ifndef __LITTLE_ENDIAN__
and rTMP1, r7F7F, rWORD2
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP4, rTMP1
+ andc rTMP3, rTMP4, rTMP1
b L(done0)
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 4
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP2, rTMP1
+ andc rTMP3, rTMP2, rTMP1
/* When we get to here, rSTR points to the first word in the string that
- contains a zero byte, and the most significant set bit in rWORD1 is in that
- byte. */
+ contains a zero byte, and rTMP3 has 0x80 for bytes that are zero,
+ and 0x00 otherwise. */
L(done0):
- cntlzw rTMP3, rWORD1
+ cntlzw rTMP3, rTMP3
subf rTMP1, rRTN, rSTR
srwi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
blr
+#else
+
+L(done0):
+ addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */
+ andc rTMP1, rTMP1, rTMP3
+ cntlzw rTMP1, rTMP1 /* Count bits not in the mask. */
+ subf rTMP3, rRTN, rSTR
+ subfic rTMP1, rTMP1, 32-7
+ srwi rTMP1, rTMP1, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+
+L(done1):
+ addi rTMP3, rTMP1, -1
+ andc rTMP3, rTMP3, rTMP1
+ cntlzw rTMP3, rTMP3
+ subf rTMP1, rRTN, rSTR
+ subfic rTMP3, rTMP3, 32-7-32
+ srawi rTMP3, rTMP3, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+#endif
+
END (strlen)
libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/strncmp.S b/libc/sysdeps/powerpc/powerpc32/strncmp.S
index fa345d293..e36a160a8 100644
--- a/libc/sysdeps/powerpc/powerpc32/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strncmp.S
@@ -24,7 +24,7 @@
EALIGN (strncmp, 4, 0)
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -35,6 +35,7 @@ EALIGN (strncmp, 4, 0)
#define r7F7F r9 /* constant 0x7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -73,12 +74,45 @@ L(g1): add rTMP, rFEFE, rWORD1
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ slwi rTMP, rTMP, 1
+ addi rTMP2, rTMP, -1
+ andc rTMP2, rTMP2, rTMP
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+L(different):
+ lwz rWORD1, -4(rSTR1)
+ rlwinm rTMP2, rWORD2, 8, 0xffffffff /* Byte reverse word. */
+ rlwinm rTMP, rWORD1, 8, 0xffffffff
+ rlwimi rTMP2, rWORD2, 24, 0, 7
+ rlwimi rTMP, rWORD1, 24, 0, 7
+ rlwimi rTMP2, rWORD2, 24, 16, 23
+ rlwimi rTMP, rWORD1, 24, 16, 23
+ xor. rBITDIF, rTMP, rTMP2
+ sub rRTN, rTMP, rTMP2
+ bgelr+
+ ori rRTN, rTMP2, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzw rBITDIF, rBITDIF
@@ -86,28 +120,20 @@ L(endstring):
addi rNEG, rNEG, 7
cmpw cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
- blt- cr1, L(equal)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+ cr1
L(equal):
li rRTN, 0
blr
L(different):
- lwzu rWORD1, -4(rSTR1)
+ lwz rWORD1, -4(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
- blt- L(highbit)
- srawi rRTN, rRTN, 31
- ori rRTN, rRTN, 1
- blr
+ bgelr+
L(highbit):
- srwi rWORD2, rWORD2, 24
- srwi rWORD1, rWORD1, 24
- sub rRTN, rWORD1, rWORD2
+ ori rRTN, rWORD2, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
index 70c370439..4f1e3c88d 100644
--- a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -57,7 +57,7 @@ ENTRY (__longjmp)
beq L(no_vmx)
la r5,((JB_VRS)*8)(3)
andi. r6,r5,0xf
- lwz r0,((JB_VRSAVE)*8)(3)
+ lwz r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */
mtspr VRSAVE,r0
beq+ L(aligned_restore_vmx)
addi r6,r5,16
@@ -153,7 +153,7 @@ L(no_vmx):
lfd fp21,((JB_FPRS+7)*8)(r3)
ld r22,((JB_GPRS+8)*8)(r3)
lfd fp22,((JB_FPRS+8)*8)(r3)
- ld r0,(JB_CR*8)(r3)
+ lwz r0,((JB_CR*8)+4)(r3) /* 32-bit CR. */
ld r23,((JB_GPRS+9)*8)(r3)
lfd fp23,((JB_FPRS+9)*8)(r3)
ld r24,((JB_GPRS+10)*8)(r3)
diff --git a/libc/sysdeps/powerpc/powerpc64/dl-machine.h b/libc/sysdeps/powerpc/powerpc64/dl-machine.h
index 059fdafd5..18cf15738 100644
--- a/libc/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/libc/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -561,6 +561,12 @@ elf_machine_rela (struct link_map *map,
Elf64_Addr *const reloc_addr = reloc_addr_arg;
const int r_type = ELF64_R_TYPE (reloc->r_info);
const Elf64_Sym *const refsym = sym;
+ union unaligned
+ {
+ uint16_t u2;
+ uint32_t u4;
+ uint64_t u8;
+ } __attribute__ ((__packed__));
if (r_type == R_PPC64_RELATIVE)
{
@@ -741,23 +747,11 @@ elf_machine_rela (struct link_map *map,
return;
case R_PPC64_UADDR64:
- /* We are big-endian. */
- ((char *) reloc_addr_arg)[0] = (value >> 56) & 0xff;
- ((char *) reloc_addr_arg)[1] = (value >> 48) & 0xff;
- ((char *) reloc_addr_arg)[2] = (value >> 40) & 0xff;
- ((char *) reloc_addr_arg)[3] = (value >> 32) & 0xff;
- ((char *) reloc_addr_arg)[4] = (value >> 24) & 0xff;
- ((char *) reloc_addr_arg)[5] = (value >> 16) & 0xff;
- ((char *) reloc_addr_arg)[6] = (value >> 8) & 0xff;
- ((char *) reloc_addr_arg)[7] = (value >> 0) & 0xff;
+ ((union unaligned *) reloc_addr)->u8 = value;
return;
case R_PPC64_UADDR32:
- /* We are big-endian. */
- ((char *) reloc_addr_arg)[0] = (value >> 24) & 0xff;
- ((char *) reloc_addr_arg)[1] = (value >> 16) & 0xff;
- ((char *) reloc_addr_arg)[2] = (value >> 8) & 0xff;
- ((char *) reloc_addr_arg)[3] = (value >> 0) & 0xff;
+ ((union unaligned *) reloc_addr)->u4 = value;
return;
case R_PPC64_ADDR32:
@@ -781,10 +775,8 @@ elf_machine_rela (struct link_map *map,
case R_PPC64_UADDR16:
if (dont_expect ((value + 0x8000) >= 0x10000))
_dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, refsym);
- /* We are big-endian. */
- ((char *) reloc_addr_arg)[0] = (value >> 8) & 0xff;
- ((char *) reloc_addr_arg)[1] = (value >> 0) & 0xff;
- break;
+ ((union unaligned *) reloc_addr)->u2 = value;
+ return;
case R_PPC64_ADDR16_DS:
if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0))
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
index 801af5d0a..45f71d7ac 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__ceilf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
index a0a22e7eb..e85b820b2 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__floorf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
index 876707c76..b1a2b8cd6 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
@@ -26,8 +26,10 @@
/* float [fp1] nearbyintf(float [fp1]) */
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__nearbyintf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index cb28ec748..188771742 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
EALIGN (__rintf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
index 980a77bde..4f2c85163 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
@@ -19,10 +19,12 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
.LC1: /* 0.5 */
- .tc FD_3f000000_0[TC],0x3f00000000000000
+ .long 0x3f000000
+
.section ".text"
/* float [fp1] roundf (float x [fp1])
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
index 5ea5f3d04..b8fd05031 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
@@ -19,8 +19,10 @@
#include <sysdep.h>
.section ".toc","aw"
+ .p2align 3
.LC0: /* 2**23 */
- .tc FD_4b000000_0[TC],0x4b00000000000000
+ .long 0x4b000000
+ .long 0x0
.section ".text"
/* float [fp1] truncf (float x [fp1])
diff --git a/libc/sysdeps/powerpc/powerpc64/memcpy.S b/libc/sysdeps/powerpc/powerpc64/memcpy.S
index b8c4cc8b1..5fc7401c9 100644
--- a/libc/sysdeps/powerpc/powerpc64/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/memcpy.S
@@ -212,15 +212,28 @@ EALIGN (memcpy, 5, 0)
blt cr6,5f
srdi 7,6,16
bgt cr6,3f
+#ifdef __LITTLE_ENDIAN__
+ sth 7,0(3)
+#else
sth 6,0(3)
+#endif
b 7f
.align 4
3:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,24
+ stb 6,0(3)
+ sth 7,1(3)
+#else
stb 7,0(3)
sth 6,1(3)
+#endif
b 7f
.align 4
5:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,8
+#endif
stb 6,0(3)
7:
cmpldi cr1,10,16
@@ -328,7 +341,11 @@ EALIGN (memcpy, 5, 0)
ld 7,8(5)
subfic 9,10,64
beq 2f
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+#else
sld 0,6,10
+#endif
cmpldi 11,1
mr 6,7
addi 4,4,-8
@@ -336,15 +353,25 @@ EALIGN (memcpy, 5, 0)
b 1f
2: addi 5,5,8
.align 4
+#ifdef __LITTLE_ENDIAN__
+0: srd 0,6,10
+ sld 8,7,9
+#else
0: sld 0,6,10
srd 8,7,9
+#endif
cmpldi 11,2
ld 6,8(5)
or 0,0,8
addi 11,11,-2
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srd 0,7,10
+1: sld 8,6,9
+#else
sld 0,7,10
1: srd 8,6,9
+#endif
or 0,0,8
beq 8f
ld 7,16(5)
diff --git a/libc/sysdeps/powerpc/powerpc64/memset.S b/libc/sysdeps/powerpc/powerpc64/memset.S
index 6acf149c8..1027a592c 100644
--- a/libc/sysdeps/powerpc/powerpc64/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/memset.S
@@ -55,14 +55,14 @@ L(_memset):
/* Align to doubleword boundary. */
cmpldi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned2)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 8
cror 28,30,31 /* Detect odd word aligned. */
add rMEMP, rMEMP, rALIGN
sub rLEN, rLEN, rALIGN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
bt 29, L(g4)
/* Process the even word of doubleword. */
bf+ 31, L(g2)
@@ -84,14 +84,14 @@ L(g0):
/* Handle the case of size < 31. */
L(aligned2):
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
L(aligned):
mtcrf 0x01, rLEN
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x18
subfic rALIGN, rALIGN, 0x20
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
beq L(caligned)
mtcrf 0x01, rALIGN
add rMEMP, rMEMP, rALIGN
@@ -212,7 +212,7 @@ L(le4):
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cmpldi cr1, rLEN, 16
L(medium_tail2):
add rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
index 69caedc9f..80d67c9aa 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation for PowerPC64.
+/* Optimized memcmp implementation for PowerPC64.
Copyright (C) 2003-2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -18,13 +18,14 @@
#include <sysdep.h>
-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */
+/* int [r3] memcmp (const char *s1 [r3],
+ const char *s2 [r4],
+ size_t size [r5]) */
.machine power4
EALIGN (memcmp, 4, 0)
CALL_MCOUNT 3
-#define rTMP r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -35,107 +36,127 @@ EALIGN (memcmp, 4, 0)
#define rWORD4 r9 /* next word in s2 */
#define rWORD5 r10 /* next word in s1 */
#define rWORD6 r11 /* next word in s2 */
-#define rBITDIF r12 /* bits that differ in s1 & s2 words */
#define rWORD7 r30 /* next word in s1 */
#define rWORD8 r31 /* next word in s2 */
- xor rTMP, rSTR2, rSTR1
+ xor r0, rSTR2, rSTR1
cmpldi cr6, rN, 0
cmpldi cr1, rN, 12
- clrldi. rTMP, rTMP, 61
- clrldi rBITDIF, rSTR1, 61
- cmpldi cr5, rBITDIF, 0
+ clrldi. r0, r0, 61
+ clrldi r12, rSTR1, 61
+ cmpldi cr5, r12, 0
beq- cr6, L(zeroLength)
- dcbt 0,rSTR1
- dcbt 0,rSTR2
+ dcbt 0, rSTR1
+ dcbt 0, rSTR2
/* If less than 8 bytes or not aligned, use the unaligned
byte loop. */
blt cr1, L(bytealigned)
- std rWORD8,-8(r1)
- cfi_offset(rWORD8,-8)
- std rWORD7,-16(r1)
- cfi_offset(rWORD7,-16)
+ std rWORD8, -8(r1)
+ cfi_offset(rWORD8, -8)
+ std rWORD7, -16(r1)
+ cfi_offset(rWORD7, -16)
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then we are already double word
- aligned and can perform the DWaligned loop.
+ of r12 to 0. If r12 == 0 then we are already double word
+ aligned and can perform the DW aligned loop.
Otherwise we know the two strings have the same alignment (but not
- yet DW). So we can force the string addresses to the next lower DW
- boundary and special case this first DW word using shift left to
+ yet DW). So we force the string addresses to the next lower DW
+ boundary and special case this first DW using shift left to
eliminate bits preceding the first byte. Since we want to join the
- normal (DWaligned) compare loop, starting at the second double word,
+ normal (DW aligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
- versioning for the first DW. This insures that the loop count is
- correct and the first DW (shifted) is in the expected resister pair. */
- .align 4
+ versioning for the first DW. This ensures that the loop count is
+ correct and the first DW (shifted) is in the expected register pair. */
+ .align 4
L(samealignment):
clrrdi rSTR1, rSTR1, 3
clrrdi rSTR2, rSTR2, 3
beq cr5, L(DWaligned)
- add rN, rN, rBITDIF
- sldi r11, rBITDIF, 3
- srdi rTMP, rN, 5 /* Divide by 32 */
- andi. rBITDIF, rN, 24 /* Get the DW remainder */
+ add rN, rN, r12
+ sldi rWORD6, r12, 3
+ srdi r0, rN, 5 /* Divide by 32 */
+ andi. r12, rN, 24 /* Get the DW remainder */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 0(rSTR1)
ld rWORD2, 0(rSTR2)
- cmpldi cr1, rBITDIF, 16
+#endif
+ cmpldi cr1, r12, 16
cmpldi cr7, rN, 32
clrldi rN, rN, 61
beq L(dPs4)
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
bgt cr1, L(dPs3)
beq cr1, L(dPs2)
/* Remainder is 8 */
- .align 3
+ .align 3
L(dsP1):
- sld rWORD5, rWORD1, r11
- sld rWORD6, rWORD2, r11
+ sld rWORD5, rWORD1, rWORD6
+ sld rWORD6, rWORD2, rWORD6
cmpld cr5, rWORD5, rWORD6
blt cr7, L(dP1x)
/* Do something useful in this cycle since we have to branch anyway. */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
b L(dP1e)
/* Remainder is 16 */
- .align 4
+ .align 4
L(dPs2):
- sld rWORD5, rWORD1, r11
- sld rWORD6, rWORD2, r11
+ sld rWORD5, rWORD1, rWORD6
+ sld rWORD6, rWORD2, rWORD6
cmpld cr6, rWORD5, rWORD6
blt cr7, L(dP2x)
/* Do something useful in this cycle since we have to branch anyway. */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD7, 8(rSTR1)
ld rWORD8, 8(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
b L(dP2e)
/* Remainder is 24 */
- .align 4
+ .align 4
L(dPs3):
- sld rWORD3, rWORD1, r11
- sld rWORD4, rWORD2, r11
+ sld rWORD3, rWORD1, rWORD6
+ sld rWORD4, rWORD2, rWORD6
cmpld cr1, rWORD3, rWORD4
b L(dP3e)
/* Count is a multiple of 32, remainder is 0 */
- .align 4
+ .align 4
L(dPs4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- sld rWORD1, rWORD1, r11
- sld rWORD2, rWORD2, r11
- cmpld cr0, rWORD1, rWORD2
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ sld rWORD1, rWORD1, rWORD6
+ sld rWORD2, rWORD2, rWORD6
+ cmpld cr7, rWORD1, rWORD2
b L(dP4e)
/* At this point we know both strings are double word aligned and the
compare length is at least 8 bytes. */
- .align 4
+ .align 4
L(DWaligned):
- andi. rBITDIF, rN, 24 /* Get the DW remainder */
- srdi rTMP, rN, 5 /* Divide by 32 */
- cmpldi cr1, rBITDIF, 16
+ andi. r12, rN, 24 /* Get the DW remainder */
+ srdi r0, rN, 5 /* Divide by 32 */
+ cmpldi cr1, r12, 16
cmpldi cr7, rN, 32
clrldi rN, rN, 61
beq L(dP4)
@@ -143,174 +164,343 @@ L(DWaligned):
beq cr1, L(dP2)
/* Remainder is 8 */
- .align 4
+ .align 4
L(dP1):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
(8-15 byte compare), we want to use only volatile registers. This
means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 0(rSTR1)
ld rWORD6, 0(rSTR2)
+#endif
cmpld cr5, rWORD5, rWORD6
blt cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
L(dP1e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 16(rSTR1)
ld rWORD4, 16(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 24(rSTR1)
ld rWORD6, 24(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
- bne cr5, L(dLcr5)
- bne cr0, L(dLcr0)
+ bne cr5, L(dLcr5x)
+ bne cr7, L(dLcr7x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ldu rWORD7, 32(rSTR1)
ldu rWORD8, 32(rSTR2)
+#endif
bne cr1, L(dLcr1)
cmpld cr5, rWORD7, rWORD8
bdnz L(dLoop)
bne cr6, L(dLcr6)
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- .align 3
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+ .align 3
L(dP1x):
sldi. r12, rN, 3
- bne cr5, L(dLcr5)
+ bne cr5, L(dLcr5x)
subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
bne L(d00)
li rRTN, 0
blr
/* Remainder is 16 */
- .align 4
+ .align 4
L(dP2):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 0(rSTR1)
ld rWORD6, 0(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
blt cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD7, 8(rSTR1)
ld rWORD8, 8(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
L(dP2e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 16(rSTR1)
ld rWORD2, 16(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 24(rSTR1)
ld rWORD4, 24(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
+#endif
bne cr6, L(dLcr6)
bne cr5, L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
- .align 4
+ .align 4
L(dP2x):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 8(rSTR1)
ld rWORD4, 8(rSTR2)
- cmpld cr5, rWORD3, rWORD4
+#endif
+ cmpld cr1, rWORD3, rWORD4
sldi. r12, rN, 3
- bne cr6, L(dLcr6)
+ bne cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
- bne cr5, L(dLcr5)
+#endif
+ bne cr1, L(dLcr1x)
subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
bne L(d00)
li rRTN, 0
blr
/* Remainder is 24 */
- .align 4
+ .align 4
L(dP3):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 0(rSTR1)
ld rWORD4, 0(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
L(dP3e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 8(rSTR1)
ld rWORD6, 8(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
blt cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD7, 16(rSTR1)
ld rWORD8, 16(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 24(rSTR1)
ld rWORD2, 24(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 16
addi rSTR2, rSTR2, 16
+#endif
bne cr1, L(dLcr1)
bne cr6, L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
- .align 4
+ .align 4
L(dP3x):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 16(rSTR1)
ld rWORD2, 16(rSTR2)
- cmpld cr5, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
sldi. r12, rN, 3
- bne cr1, L(dLcr1)
+ bne cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 16
addi rSTR2, rSTR2, 16
- bne cr6, L(dLcr6)
+#endif
+ bne cr6, L(dLcr6x)
subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
- bne cr5, L(dLcr5)
+ bne cr7, L(dLcr7x)
bne L(d00)
li rRTN, 0
blr
/* Count is a multiple of 32, remainder is 0 */
- .align 4
+ .align 4
L(dP4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 0(rSTR1)
ld rWORD2, 0(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
L(dP4e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 8(rSTR1)
ld rWORD4, 8(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 16(rSTR1)
ld rWORD6, 16(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ldu rWORD7, 24(rSTR1)
ldu rWORD8, 24(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
- bne cr0, L(dLcr0)
+ bne cr7, L(dLcr7)
bne cr1, L(dLcr1)
bdz- L(d24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
- .align 4
+ .align 4
L(dLoop):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
bne cr6, L(dLcr6)
L(dLoop1):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 16(rSTR1)
ld rWORD4, 16(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
bne cr5, L(dLcr5)
L(dLoop2):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 24(rSTR1)
ld rWORD6, 24(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
- bne cr0, L(dLcr0)
+ bne cr7, L(dLcr7)
L(dLoop3):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ldu rWORD7, 32(rSTR1)
ldu rWORD8, 32(rSTR2)
+#endif
bne- cr1, L(dLcr1)
- cmpld cr0, rWORD1, rWORD2
+ cmpld cr7, rWORD1, rWORD2
bdnz+ L(dLoop)
L(dL4):
@@ -320,7 +510,7 @@ L(dL4):
bne cr5, L(dLcr5)
cmpld cr5, rWORD7, rWORD8
L(d44):
- bne cr0, L(dLcr0)
+ bne cr7, L(dLcr7)
L(d34):
bne cr1, L(dLcr1)
L(d24):
@@ -329,60 +519,74 @@ L(d14):
sldi. r12, rN, 3
bne cr5, L(dLcr5)
L(d04):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
beq L(zeroLength)
/* At this point we have a remainder of 1 to 7 bytes to compare. Since
we are aligned it is safe to load the whole double word, and use
shift right double to eliminate bits beyond the compare length. */
L(d00):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
+#endif
srd rWORD1, rWORD1, rN
srd rWORD2, rWORD2, rN
- cmpld cr5, rWORD1, rWORD2
- bne cr5, L(dLcr5x)
+ cmpld cr7, rWORD1, rWORD2
+ bne cr7, L(dLcr7x)
li rRTN, 0
blr
- .align 4
-L(dLcr0):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+
+ .align 4
+L(dLcr7):
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+L(dLcr7x):
li rRTN, 1
- bgtlr cr0
+ bgtlr cr7
li rRTN, -1
blr
- .align 4
+ .align 4
L(dLcr1):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+L(dLcr1x):
li rRTN, 1
bgtlr cr1
li rRTN, -1
blr
- .align 4
+ .align 4
L(dLcr6):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+L(dLcr6x):
li rRTN, 1
bgtlr cr6
li rRTN, -1
blr
- .align 4
+ .align 4
L(dLcr5):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
L(dLcr5x):
li rRTN, 1
bgtlr cr5
li rRTN, -1
blr
- .align 4
+ .align 4
L(bytealigned):
- mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+ mtctr rN /* Power4 wants mtctr 1st in dispatch group */
+#if 0
+/* Huh? We've already branched on cr6! */
beq- cr6, L(zeroLength)
+#endif
/* We need to prime this loop. This loop is swing modulo scheduled
to avoid pipe delays. The dependent instruction latencies (load to
@@ -397,7 +601,7 @@ L(bytealigned):
lbz rWORD1, 0(rSTR1)
lbz rWORD2, 0(rSTR2)
bdz- L(b11)
- cmpld cr0, rWORD1, rWORD2
+ cmpld cr7, rWORD1, rWORD2
lbz rWORD3, 1(rSTR1)
lbz rWORD4, 1(rSTR2)
bdz- L(b12)
@@ -405,11 +609,11 @@ L(bytealigned):
lbzu rWORD5, 2(rSTR1)
lbzu rWORD6, 2(rSTR2)
bdz- L(b13)
- .align 4
+ .align 4
L(bLoop):
lbzu rWORD1, 1(rSTR1)
lbzu rWORD2, 1(rSTR2)
- bne- cr0, L(bLcr0)
+ bne- cr7, L(bLcr7)
cmpld cr6, rWORD5, rWORD6
bdz- L(b3i)
@@ -418,7 +622,7 @@ L(bLoop):
lbzu rWORD4, 1(rSTR2)
bne- cr1, L(bLcr1)
- cmpld cr0, rWORD1, rWORD2
+ cmpld cr7, rWORD1, rWORD2
bdz- L(b2i)
lbzu rWORD5, 1(rSTR1)
@@ -435,23 +639,23 @@ L(bLoop):
tested. In this case we must complete the pending operations
before returning. */
L(b1i):
- bne- cr0, L(bLcr0)
+ bne- cr7, L(bLcr7)
bne- cr1, L(bLcr1)
b L(bx56)
- .align 4
+ .align 4
L(b2i):
bne- cr6, L(bLcr6)
- bne- cr0, L(bLcr0)
+ bne- cr7, L(bLcr7)
b L(bx34)
- .align 4
+ .align 4
L(b3i):
bne- cr1, L(bLcr1)
bne- cr6, L(bLcr6)
b L(bx12)
- .align 4
-L(bLcr0):
+ .align 4
+L(bLcr7):
li rRTN, 1
- bgtlr cr0
+ bgtlr cr7
li rRTN, -1
blr
L(bLcr1):
@@ -466,14 +670,14 @@ L(bLcr6):
blr
L(b13):
- bne- cr0, L(bx12)
+ bne- cr7, L(bx12)
bne- cr1, L(bx34)
L(bx56):
sub rRTN, rWORD5, rWORD6
blr
nop
L(b12):
- bne- cr0, L(bx12)
+ bne- cr7, L(bx12)
L(bx34):
sub rRTN, rWORD3, rWORD4
blr
@@ -481,101 +685,106 @@ L(b11):
L(bx12):
sub rRTN, rWORD1, rWORD2
blr
- .align 4
-L(zeroLengthReturn):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ .align 4
L(zeroLength):
li rRTN, 0
blr
- .align 4
+ .align 4
/* At this point we know the strings have different alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word
+ of r12 to 0. If r12 == 0 then rStr1 is double word
aligned and can perform the DWunaligned loop.
Otherwise we know that rSTR1 is not already DW aligned yet.
So we can force the string addresses to the next lower DW
- boundary and special case this first DW word using shift left to
+ boundary and special case this first DW using shift left to
eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
- versioning for the first DW. This insures that the loop count is
+ versioning for the first DW. This ensures that the loop count is
correct and the first DW (shifted) is in the expected resister pair. */
-#define rSHL r29 /* Unaligned shift left count. */
-#define rSHR r28 /* Unaligned shift right count. */
-#define rB r27 /* Left rotation temp for rWORD2. */
-#define rD r26 /* Left rotation temp for rWORD4. */
-#define rF r25 /* Left rotation temp for rWORD6. */
-#define rH r24 /* Left rotation temp for rWORD8. */
-#define rA r0 /* Right rotation temp for rWORD2. */
-#define rC r12 /* Right rotation temp for rWORD4. */
-#define rE r0 /* Right rotation temp for rWORD6. */
-#define rG r12 /* Right rotation temp for rWORD8. */
+#define rSHL r29 /* Unaligned shift left count. */
+#define rSHR r28 /* Unaligned shift right count. */
+#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */
+#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */
+#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */
+#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */
L(unaligned):
- std r29,-24(r1)
- cfi_offset(r29,-24)
+ std rSHL, -24(r1)
+ cfi_offset(rSHL, -24)
clrldi rSHL, rSTR2, 61
beq- cr6, L(duzeroLength)
- std r28,-32(r1)
- cfi_offset(r28,-32)
+ std rSHR, -32(r1)
+ cfi_offset(rSHR, -32)
beq cr5, L(DWunaligned)
- std r27,-40(r1)
- cfi_offset(r27,-40)
-/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+ std rWORD8_SHIFT, -40(r1)
+ cfi_offset(rWORD8_SHIFT, -40)
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
in the 1st rSTR1 DW. */
- sub r27, rSTR2, rBITDIF
+ sub rWORD8_SHIFT, rSTR2, r12
/* But do not attempt to address the DW before that DW that contains
the actual start of rSTR2. */
clrrdi rSTR2, rSTR2, 3
- std r26,-48(r1)
- cfi_offset(r26,-48)
-/* Compute the left/right shift counts for the unalign rSTR2,
+ std rWORD2_SHIFT, -48(r1)
+ cfi_offset(rWORD2_SHIFT, -48)
+/* Compute the left/right shift counts for the unaligned rSTR2,
compensating for the logical (DW aligned) start of rSTR1. */
- clrldi rSHL, r27, 61
+ clrldi rSHL, rWORD8_SHIFT, 61
clrrdi rSTR1, rSTR1, 3
- std r25,-56(r1)
- cfi_offset(r25,-56)
+ std rWORD4_SHIFT, -56(r1)
+ cfi_offset(rWORD4_SHIFT, -56)
sldi rSHL, rSHL, 3
- cmpld cr5, r27, rSTR2
- add rN, rN, rBITDIF
- sldi r11, rBITDIF, 3
- std r24,-64(r1)
- cfi_offset(r24,-64)
+ cmpld cr5, rWORD8_SHIFT, rSTR2
+ add rN, rN, r12
+ sldi rWORD6, r12, 3
+ std rWORD6_SHIFT, -64(r1)
+ cfi_offset(rWORD6_SHIFT, -64)
subfic rSHR, rSHL, 64
- srdi rTMP, rN, 5 /* Divide by 32 */
- andi. rBITDIF, rN, 24 /* Get the DW remainder */
+ srdi r0, rN, 5 /* Divide by 32 */
+ andi. r12, rN, 24 /* Get the DW remainder */
/* We normally need to load 2 DWs to start the unaligned rSTR2, but in
this special case those bits may be discarded anyway. Also we
must avoid loading a DW where none of the bits are part of rSTR2 as
this may cross a page boundary and cause a page fault. */
li rWORD8, 0
blt cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD8, 0(rSTR2)
- la rSTR2, 8(rSTR2)
+ addi rSTR2, rSTR2, 8
+#endif
sld rWORD8, rWORD8, rSHL
L(dus0):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 0(rSTR1)
ld rWORD2, 0(rSTR2)
- cmpldi cr1, rBITDIF, 16
+#endif
+ cmpldi cr1, r12, 16
cmpldi cr7, rN, 32
- srd rG, rWORD2, rSHR
+ srd r12, rWORD2, rSHR
clrldi rN, rN, 61
beq L(duPs4)
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- or rWORD8, rG, rWORD8
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, r12, rWORD8
bgt cr1, L(duPs3)
beq cr1, L(duPs2)
/* Remainder is 8 */
- .align 4
+ .align 4
L(dusP1):
- sld rB, rWORD2, rSHL
- sld rWORD7, rWORD1, r11
- sld rWORD8, rWORD8, r11
+ sld rWORD8_SHIFT, rWORD2, rSHL
+ sld rWORD7, rWORD1, rWORD6
+ sld rWORD8, rWORD8, rWORD6
bge cr7, L(duP1e)
/* At this point we exit early with the first double word compare
complete and remainder of 0 to 7 bytes. See L(du14) for details on
@@ -585,95 +794,133 @@ L(dusP1):
bne cr5, L(duLcr5)
cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD2, 8(rSTR2)
- srd rA, rWORD2, rSHR
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 16 */
- .align 4
+ .align 4
L(duPs2):
- sld rH, rWORD2, rSHL
- sld rWORD5, rWORD1, r11
- sld rWORD6, rWORD8, r11
+ sld rWORD6_SHIFT, rWORD2, rSHL
+ sld rWORD5, rWORD1, rWORD6
+ sld rWORD6, rWORD8, rWORD6
b L(duP2e)
/* Remainder is 24 */
- .align 4
+ .align 4
L(duPs3):
- sld rF, rWORD2, rSHL
- sld rWORD3, rWORD1, r11
- sld rWORD4, rWORD8, r11
+ sld rWORD4_SHIFT, rWORD2, rSHL
+ sld rWORD3, rWORD1, rWORD6
+ sld rWORD4, rWORD8, rWORD6
b L(duP3e)
/* Count is a multiple of 32, remainder is 0 */
- .align 4
+ .align 4
L(duPs4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- or rWORD8, rG, rWORD8
- sld rD, rWORD2, rSHL
- sld rWORD1, rWORD1, r11
- sld rWORD2, rWORD8, r11
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ or rWORD8, r12, rWORD8
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ sld rWORD1, rWORD1, rWORD6
+ sld rWORD2, rWORD8, rWORD6
b L(duP4e)
/* At this point we know rSTR1 is double word aligned and the
compare length is at least 8 bytes. */
- .align 4
+ .align 4
L(DWunaligned):
- std r27,-40(r1)
- cfi_offset(r27,-40)
+ std rWORD8_SHIFT, -40(r1)
+ cfi_offset(rWORD8_SHIFT, -40)
clrrdi rSTR2, rSTR2, 3
- std r26,-48(r1)
- cfi_offset(r26,-48)
- srdi rTMP, rN, 5 /* Divide by 32 */
- std r25,-56(r1)
- cfi_offset(r25,-56)
- andi. rBITDIF, rN, 24 /* Get the DW remainder */
- std r24,-64(r1)
- cfi_offset(r24,-64)
+ std rWORD2_SHIFT, -48(r1)
+ cfi_offset(rWORD2_SHIFT, -48)
+ srdi r0, rN, 5 /* Divide by 32 */
+ std rWORD4_SHIFT, -56(r1)
+ cfi_offset(rWORD4_SHIFT, -56)
+ andi. r12, rN, 24 /* Get the DW remainder */
+ std rWORD6_SHIFT, -64(r1)
+ cfi_offset(rWORD6_SHIFT, -64)
sldi rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD6, 0(rSTR2)
ldu rWORD8, 8(rSTR2)
- cmpldi cr1, rBITDIF, 16
+#endif
+ cmpldi cr1, r12, 16
cmpldi cr7, rN, 32
clrldi rN, rN, 61
subfic rSHR, rSHL, 64
- sld rH, rWORD6, rSHL
+ sld rWORD6_SHIFT, rWORD6, rSHL
beq L(duP4)
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
bgt cr1, L(duP3)
beq cr1, L(duP2)
/* Remainder is 8 */
- .align 4
+ .align 4
L(duP1):
- srd rG, rWORD8, rSHR
+ srd r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
ld rWORD7, 0(rSTR1)
- sld rB, rWORD8, rSHL
- or rWORD8, rG, rH
+#endif
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
blt cr7, L(duP1x)
L(duP1e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
- srd rA, rWORD2, rSHR
- sld rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 16(rSTR1)
ld rWORD4, 16(rSTR2)
- cmpld cr0, rWORD1, rWORD2
- srd rC, rWORD4, rSHR
- sld rF, rWORD4, rSHL
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
bne cr5, L(duLcr5)
- or rWORD4, rC, rD
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 24(rSTR1)
ld rWORD6, 24(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
- srd rE, rWORD6, rSHR
- sld rH, rWORD6, rSHL
- bne cr0, L(duLcr0)
- or rWORD6, rE, rF
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ bne cr7, L(duLcr7)
+ or rWORD6, r0, rWORD4_SHIFT
cmpld cr6, rWORD5, rWORD6
b L(duLoop3)
- .align 4
+ .align 4
/* At this point we exit early with the first double word compare
complete and remainder of 0 to 7 bytes. See L(du14) for details on
how we handle the remaining bytes. */
@@ -683,186 +930,321 @@ L(duP1x):
bne cr5, L(duLcr5)
cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD2, 8(rSTR2)
- srd rA, rWORD2, rSHR
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 16 */
- .align 4
+ .align 4
L(duP2):
- srd rE, rWORD8, rSHR
+ srd r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
ld rWORD5, 0(rSTR1)
- or rWORD6, rE, rH
- sld rH, rWORD8, rSHL
+#endif
+ or rWORD6, r0, rWORD6_SHIFT
+ sld rWORD6_SHIFT, rWORD8, rSHL
L(duP2e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD7, 8(rSTR1)
ld rWORD8, 8(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
- srd rG, rWORD8, rSHR
- sld rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
blt cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 16(rSTR1)
ld rWORD2, 16(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
bne cr6, L(duLcr6)
- srd rA, rWORD2, rSHR
- sld rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 24(rSTR1)
ld rWORD4, 24(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
bne cr5, L(duLcr5)
- srd rC, rWORD4, rSHR
- sld rF, rWORD4, rSHL
- or rWORD4, rC, rD
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
+#endif
cmpld cr1, rWORD3, rWORD4
b L(duLoop2)
- .align 4
+ .align 4
L(duP2x):
cmpld cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 8
addi rSTR2, rSTR2, 8
+#endif
bne cr6, L(duLcr6)
sldi. rN, rN, 3
bne cr5, L(duLcr5)
cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD2, 8(rSTR2)
- srd rA, rWORD2, rSHR
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 24 */
- .align 4
+ .align 4
L(duP3):
- srd rC, rWORD8, rSHR
+ srd r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
ld rWORD3, 0(rSTR1)
- sld rF, rWORD8, rSHL
- or rWORD4, rC, rH
+#endif
+ sld rWORD4_SHIFT, rWORD8, rSHL
+ or rWORD4, r12, rWORD6_SHIFT
L(duP3e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 8(rSTR1)
ld rWORD6, 8(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
- srd rE, rWORD6, rSHR
- sld rH, rWORD6, rSHL
- or rWORD6, rE, rF
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD7, 16(rSTR1)
ld rWORD8, 16(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
bne cr1, L(duLcr1)
- srd rG, rWORD8, rSHR
- sld rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
blt cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 24(rSTR1)
ld rWORD2, 24(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
bne cr6, L(duLcr6)
- srd rA, rWORD2, rSHR
- sld rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 16
addi rSTR2, rSTR2, 16
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
b L(duLoop1)
- .align 4
+ .align 4
L(duP3x):
+#ifndef __LITTLE_ENDIAN__
addi rSTR1, rSTR1, 16
addi rSTR2, rSTR2, 16
+#endif
+#if 0
+/* Huh? We've already branched on cr1! */
bne cr1, L(duLcr1)
+#endif
cmpld cr5, rWORD7, rWORD8
bne cr6, L(duLcr6)
sldi. rN, rN, 3
bne cr5, L(duLcr5)
cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD2, 8(rSTR2)
- srd rA, rWORD2, rSHR
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Count is a multiple of 32, remainder is 0 */
- .align 4
+ .align 4
L(duP4):
- mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */
- srd rA, rWORD8, rSHR
+ mtctr r0 /* Power4 wants mtctr 1st in dispatch group */
+ srd r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
ld rWORD1, 0(rSTR1)
- sld rD, rWORD8, rSHL
- or rWORD2, rA, rH
+#endif
+ sld rWORD2_SHIFT, rWORD8, rSHL
+ or rWORD2, r0, rWORD6_SHIFT
L(duP4e):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 8(rSTR1)
ld rWORD4, 8(rSTR2)
- cmpld cr0, rWORD1, rWORD2
- srd rC, rWORD4, rSHR
- sld rF, rWORD4, rSHL
- or rWORD4, rC, rD
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 16(rSTR1)
ld rWORD6, 16(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
- bne cr0, L(duLcr0)
- srd rE, rWORD6, rSHR
- sld rH, rWORD6, rSHL
- or rWORD6, rE, rF
+ bne cr7, L(duLcr7)
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ldu rWORD7, 24(rSTR1)
ldu rWORD8, 24(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
bne cr1, L(duLcr1)
- srd rG, rWORD8, rSHR
- sld rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
cmpld cr5, rWORD7, rWORD8
bdz- L(du24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
- .align 4
+ .align 4
L(duLoop):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD1, 8(rSTR1)
ld rWORD2, 8(rSTR2)
+#endif
cmpld cr1, rWORD3, rWORD4
bne cr6, L(duLcr6)
- srd rA, rWORD2, rSHR
- sld rD, rWORD2, rSHL
- or rWORD2, rA, rB
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
L(duLoop1):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD3, 16(rSTR1)
ld rWORD4, 16(rSTR2)
+#endif
cmpld cr6, rWORD5, rWORD6
bne cr5, L(duLcr5)
- srd rC, rWORD4, rSHR
- sld rF, rWORD4, rSHL
- or rWORD4, rC, rD
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
L(duLoop2):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD5, 24(rSTR1)
ld rWORD6, 24(rSTR2)
+#endif
cmpld cr5, rWORD7, rWORD8
- bne cr0, L(duLcr0)
- srd rE, rWORD6, rSHR
- sld rH, rWORD6, rSHL
- or rWORD6, rE, rF
+ bne cr7, L(duLcr7)
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
L(duLoop3):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
ldu rWORD7, 32(rSTR1)
ldu rWORD8, 32(rSTR2)
- cmpld cr0, rWORD1, rWORD2
+#endif
+ cmpld cr7, rWORD1, rWORD2
bne- cr1, L(duLcr1)
- srd rG, rWORD8, rSHR
- sld rB, rWORD8, rSHL
- or rWORD8, rG, rH
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
bdnz+ L(duLoop)
L(duL4):
+#if 0
+/* Huh? We've already branched on cr1! */
bne cr1, L(duLcr1)
+#endif
cmpld cr1, rWORD3, rWORD4
bne cr6, L(duLcr6)
cmpld cr6, rWORD5, rWORD6
bne cr5, L(duLcr5)
cmpld cr5, rWORD7, rWORD8
L(du44):
- bne cr0, L(duLcr0)
+ bne cr7, L(duLcr7)
L(du34):
bne cr1, L(duLcr1)
L(du24):
@@ -872,103 +1254,110 @@ L(du14):
bne cr5, L(duLcr5)
/* At this point we have a remainder of 1 to 7 bytes to compare. We use
shift right double to eliminate bits beyond the compare length.
- This allows the use of double word subtract to compute the final
- result.
However it may not be safe to load rWORD2 which may be beyond the
string length. So we compare the bit length of the remainder to
the right shift count (rSHR). If the bit count is less than or equal
we do not need to load rWORD2 (all significant bits are already in
- rB). */
+ rWORD8_SHIFT). */
cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA, 0
+ li r0, 0
ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
ld rWORD2, 8(rSTR2)
- srd rA, rWORD2, rSHR
- .align 4
+#endif
+ srd r0, rWORD2, rSHR
+ .align 4
L(dutrim):
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+#else
ld rWORD1, 8(rSTR1)
- ld rWORD8,-8(r1)
+#endif
+ ld rWORD8, -8(r1)
subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */
- or rWORD2, rA, rB
- ld rWORD7,-16(r1)
- ld r29,-24(r1)
+ or rWORD2, r0, rWORD8_SHIFT
+ ld rWORD7, -16(r1)
+ ld rSHL, -24(r1)
srd rWORD1, rWORD1, rN
srd rWORD2, rWORD2, rN
- ld r28,-32(r1)
- ld r27,-40(r1)
+ ld rSHR, -32(r1)
+ ld rWORD8_SHIFT, -40(r1)
li rRTN, 0
- cmpld cr0, rWORD1, rWORD2
- ld r26,-48(r1)
- ld r25,-56(r1)
- beq cr0, L(dureturn24)
+ cmpld cr7, rWORD1, rWORD2
+ ld rWORD2_SHIFT, -48(r1)
+ ld rWORD4_SHIFT, -56(r1)
+ beq cr7, L(dureturn24)
li rRTN, 1
- ld r24,-64(r1)
- bgtlr cr0
+ ld rWORD6_SHIFT, -64(r1)
+ bgtlr cr7
li rRTN, -1
blr
- .align 4
-L(duLcr0):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ .align 4
+L(duLcr7):
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
li rRTN, 1
- bgt cr0, L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
+ bgt cr7, L(dureturn29)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
li rRTN, -1
b L(dureturn27)
- .align 4
+ .align 4
L(duLcr1):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
li rRTN, 1
bgt cr1, L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
li rRTN, -1
b L(dureturn27)
- .align 4
+ .align 4
L(duLcr6):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
li rRTN, 1
bgt cr6, L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
li rRTN, -1
b L(dureturn27)
- .align 4
+ .align 4
L(duLcr5):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
li rRTN, 1
bgt cr5, L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
li rRTN, -1
b L(dureturn27)
.align 3
L(duZeroReturn):
- li rRTN,0
+ li rRTN, 0
.align 4
L(dureturn):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
L(dureturn29):
- ld r29,-24(r1)
- ld r28,-32(r1)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
L(dureturn27):
- ld r27,-40(r1)
+ ld rWORD8_SHIFT, -40(r1)
L(dureturn26):
- ld r26,-48(r1)
+ ld rWORD2_SHIFT, -48(r1)
L(dureturn25):
- ld r25,-56(r1)
+ ld rWORD4_SHIFT, -56(r1)
L(dureturn24):
- ld r24,-64(r1)
+ ld rWORD6_SHIFT, -64(r1)
blr
L(duzeroLength):
- li rRTN,0
+ li rRTN, 0
blr
END (memcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
index 4317c7e78..f9a7260dc 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -214,15 +214,28 @@ EALIGN (memcpy, 5, 0)
blt cr6,5f
srdi 7,6,16
bgt cr6,3f
+#ifdef __LITTLE_ENDIAN__
+ sth 7,0(3)
+#else
sth 6,0(3)
+#endif
b 7f
.align 4
3:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,24
+ stb 6,0(3)
+ sth 7,1(3)
+#else
stb 7,0(3)
sth 6,1(3)
+#endif
b 7f
.align 4
5:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,8
+#endif
stb 6,0(3)
7:
cmpldi cr1,10,16
@@ -334,13 +347,23 @@ EALIGN (memcpy, 5, 0)
bf 30,1f
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+ sld 8,7,9
+#else
sld 0,6,10
srd 8,7,9
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srd 0,7,10
+ sld 8,6,9
+#else
sld 0,7,10
srd 8,6,9
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -349,8 +372,13 @@ EALIGN (memcpy, 5, 0)
blt cr6,8f /* if total DWs = 3, then bypass loop */
bf 31,4f
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+ sld 8,7,9
+#else
sld 0,6,10
srd 8,7,9
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -361,8 +389,13 @@ EALIGN (memcpy, 5, 0)
b 4f
.align 4
1:
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+ sld 8,7,9
+#else
sld 0,6,10
srd 8,7,9
+#endif
addi 5,5,16
or 0,0,8
bf 31,4f
@@ -373,23 +406,44 @@ EALIGN (memcpy, 5, 0)
addi 4,4,8
.align 4
/* copy 32 bytes at a time */
-4: sld 0,6,10
+4:
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+ sld 8,7,9
+#else
+ sld 0,6,10
srd 8,7,9
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srd 0,7,10
+ sld 8,6,9
+#else
sld 0,7,10
srd 8,6,9
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+ sld 8,7,9
+#else
sld 0,6,10
srd 8,7,9
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srd 0,7,10
+ sld 8,6,9
+#else
sld 0,7,10
srd 8,6,9
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -399,8 +453,13 @@ EALIGN (memcpy, 5, 0)
.align 4
8:
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srd 0,6,10
+ sld 8,7,9
+#else
sld 0,6,10
srd 8,7,9
+#endif
or 0,0,8
std 0,0(4)
3:
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memset.S b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
index dbecee8b9..ad0d38128 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -50,14 +50,14 @@ L(_memset):
/* Align to doubleword boundary. */
cmpldi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned2)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 8
cror 28,30,31 /* Detect odd word aligned. */
add rMEMP, rMEMP, rALIGN
sub rLEN, rLEN, rALIGN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
bt 29, L(g4)
/* Process the even word of doubleword. */
bf+ 31, L(g2)
@@ -79,14 +79,14 @@ L(g0):
/* Handle the case of size < 31. */
L(aligned2):
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
L(aligned):
mtcrf 0x01, rLEN
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x18
subfic rALIGN, rALIGN, 0x20
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
beq L(caligned)
mtcrf 0x01, rALIGN
add rMEMP, rMEMP, rALIGN
@@ -146,24 +146,24 @@ L(zloopstart):
L(getCacheAligned):
cmpldi cr1,rLEN,32
andi. rTMP,rMEMP,127
- blt cr1,L(handletail32)
- beq L(cacheAligned)
+ blt cr1,L(handletail32)
+ beq L(cacheAligned)
addi rMEMP,rMEMP,32
addi rLEN,rLEN,-32
- std rCHR,-32(rMEMP)
- std rCHR,-24(rMEMP)
- std rCHR,-16(rMEMP)
- std rCHR,-8(rMEMP)
- b L(getCacheAligned)
+ std rCHR,-32(rMEMP)
+ std rCHR,-24(rMEMP)
+ std rCHR,-16(rMEMP)
+ std rCHR,-8(rMEMP)
+ b L(getCacheAligned)
/* Now we are aligned to the cache line and can use dcbz. */
L(cacheAligned):
cmpld cr1,rLEN,rCLS
- blt cr1,L(handletail32)
+ blt cr1,L(handletail32)
dcbz 0,rMEMP
subf rLEN,rCLS,rLEN
- add rMEMP,rMEMP,rCLS
- b L(cacheAligned)
+ add rMEMP,rMEMP,rCLS
+ b L(cacheAligned)
/* We are here because the cache line size was set and was not 32-bytes
and the remainder (rLEN) is less than the actual cache line size.
@@ -200,7 +200,7 @@ L(le4):
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cmpldi cr1, rLEN, 16
L(medium_tail2):
add rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
index 1276e16a5..5d136cfa2 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -25,7 +25,7 @@
EALIGN (strncmp, 4, 0)
CALL_MCOUNT 3
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -38,6 +38,7 @@ EALIGN (strncmp, 4, 0)
#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -79,12 +80,59 @@ L(g1): add rTMP, rFEFE, rWORD1
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzd rBITDIF, rBITDIF
@@ -93,7 +141,7 @@ L(endstring):
cmpd cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
blt- cr1, L(equal)
- sradi rRTN, rRTN, 63
+ sradi rRTN, rRTN, 63 /* must return an int. */
ori rRTN, rRTN, 1
blr
L(equal):
@@ -101,7 +149,7 @@ L(equal):
blr
L(different):
- ldu rWORD1, -8(rSTR1)
+ ld rWORD1, -8(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
blt- L(highbit)
@@ -109,11 +157,10 @@ L(different):
ori rRTN, rRTN, 1
blr
L(highbit):
- srdi rWORD2, rWORD2, 56
- srdi rWORD1, rWORD1, 56
- sub rRTN, rWORD1, rWORD2
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
index d6d242d29..e3f3d8a30 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
@@ -400,15 +400,28 @@ L(das_tail2):
blt cr6,5f
srdi 7,6,16
bgt cr6,3f
+#ifdef __LITTLE_ENDIAN__
+ sth 7,0(3)
+#else
sth 6,0(3)
+#endif
b 7f
.align 4
3:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,24
+ stb 6,0(3)
+ sth 7,1(3)
+#else
stb 7,0(3)
sth 6,1(3)
+#endif
b 7f
.align 4
5:
+#ifdef __LITTLE_ENDIAN__
+ rotlwi 6,6,8
+#endif
stb 6,0(3)
7:
cmpldi cr1,10,16
@@ -595,13 +608,24 @@ L(du1_do):
bf 30,L(du1_1dw)
/* there are at least two DWs to copy */
+ /* FIXME: can combine last shift and "or" into "rldimi" */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 8
+ sldi 8,7, 64-8
+#else
sldi 0,6, 8
srdi 8,7, 64-8
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 8
+ sldi 8,6, 64-8
+#else
sldi 0,7, 8
srdi 8,6, 64-8
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -610,8 +634,13 @@ L(du1_do):
blt cr6,L(du1_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du1_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 8
+ sldi 8,7, 64-8
+#else
sldi 0,6, 8
srdi 8,7, 64-8
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -622,8 +651,13 @@ L(du1_do):
b L(du1_loop)
.align 4
L(du1_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 8
+ sldi 8,7, 64-8
+#else
sldi 0,6, 8
srdi 8,7, 64-8
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du1_loop)
@@ -635,23 +669,43 @@ L(du1_1dw):
.align 4
/* copy 32 bytes at a time */
L(du1_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 8
+ sldi 8,7, 64-8
+#else
sldi 0,6, 8
srdi 8,7, 64-8
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 8
+ sldi 8,6, 64-8
+#else
sldi 0,7, 8
srdi 8,6, 64-8
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 8
+ sldi 8,7, 64-8
+#else
sldi 0,6, 8
srdi 8,7, 64-8
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 8
+ sldi 8,6, 64-8
+#else
sldi 0,7, 8
srdi 8,6, 64-8
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -661,8 +715,13 @@ L(du1_loop):
.align 4
L(du1_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 8
+ sldi 8,7, 64-8
+#else
sldi 0,6, 8
srdi 8,7, 64-8
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
@@ -672,13 +731,23 @@ L(du2_do):
bf 30,L(du2_1dw)
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 16
+ sldi 8,7, 64-16
+#else
sldi 0,6, 16
srdi 8,7, 64-16
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 16
+ sldi 8,6, 64-16
+#else
sldi 0,7, 16
srdi 8,6, 64-16
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -687,8 +756,13 @@ L(du2_do):
blt cr6,L(du2_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du2_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 16
+ sldi 8,7, 64-16
+#else
sldi 0,6, 16
srdi 8,7, 64-16
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -699,8 +773,13 @@ L(du2_do):
b L(du2_loop)
.align 4
L(du2_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 16
+ sldi 8,7, 64-16
+#else
sldi 0,6, 16
srdi 8,7, 64-16
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du2_loop)
@@ -712,23 +791,43 @@ L(du2_1dw):
.align 4
/* copy 32 bytes at a time */
L(du2_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 16
+ sldi 8,7, 64-16
+#else
sldi 0,6, 16
srdi 8,7, 64-16
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 16
+ sldi 8,6, 64-16
+#else
sldi 0,7, 16
srdi 8,6, 64-16
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 16
+ sldi 8,7, 64-16
+#else
sldi 0,6, 16
srdi 8,7, 64-16
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 16
+ sldi 8,6, 64-16
+#else
sldi 0,7, 16
srdi 8,6, 64-16
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -738,8 +837,13 @@ L(du2_loop):
.align 4
L(du2_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 16
+ sldi 8,7, 64-16
+#else
sldi 0,6, 16
srdi 8,7, 64-16
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
@@ -749,13 +853,23 @@ L(du3_do):
bf 30,L(du3_1dw)
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 24
+ sldi 8,7, 64-24
+#else
sldi 0,6, 24
srdi 8,7, 64-24
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 24
+ sldi 8,6, 64-24
+#else
sldi 0,7, 24
srdi 8,6, 64-24
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -764,8 +878,13 @@ L(du3_do):
blt cr6,L(du3_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du3_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 24
+ sldi 8,7, 64-24
+#else
sldi 0,6, 24
srdi 8,7, 64-24
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -776,8 +895,13 @@ L(du3_do):
b L(du3_loop)
.align 4
L(du3_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 24
+ sldi 8,7, 64-24
+#else
sldi 0,6, 24
srdi 8,7, 64-24
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du3_loop)
@@ -789,23 +913,43 @@ L(du3_1dw):
.align 4
/* copy 32 bytes at a time */
L(du3_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 24
+ sldi 8,7, 64-24
+#else
sldi 0,6, 24
srdi 8,7, 64-24
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 24
+ sldi 8,6, 64-24
+#else
sldi 0,7, 24
srdi 8,6, 64-24
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 24
+ sldi 8,7, 64-24
+#else
sldi 0,6, 24
srdi 8,7, 64-24
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 24
+ sldi 8,6, 64-24
+#else
sldi 0,7, 24
srdi 8,6, 64-24
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -815,8 +959,13 @@ L(du3_loop):
.align 4
L(du3_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 24
+ sldi 8,7, 64-24
+#else
sldi 0,6, 24
srdi 8,7, 64-24
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
@@ -832,13 +981,23 @@ L(du4_dox):
bf 30,L(du4_1dw)
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 32
+ sldi 8,7, 64-32
+#else
sldi 0,6, 32
srdi 8,7, 64-32
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 32
+ sldi 8,6, 64-32
+#else
sldi 0,7, 32
srdi 8,6, 64-32
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -847,8 +1006,13 @@ L(du4_dox):
blt cr6,L(du4_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du4_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 32
+ sldi 8,7, 64-32
+#else
sldi 0,6, 32
srdi 8,7, 64-32
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -859,8 +1023,13 @@ L(du4_dox):
b L(du4_loop)
.align 4
L(du4_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 32
+ sldi 8,7, 64-32
+#else
sldi 0,6, 32
srdi 8,7, 64-32
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du4_loop)
@@ -872,23 +1041,43 @@ L(du4_1dw):
.align 4
/* copy 32 bytes at a time */
L(du4_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 32
+ sldi 8,7, 64-32
+#else
sldi 0,6, 32
srdi 8,7, 64-32
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 32
+ sldi 8,6, 64-32
+#else
sldi 0,7, 32
srdi 8,6, 64-32
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 32
+ sldi 8,7, 64-32
+#else
sldi 0,6, 32
srdi 8,7, 64-32
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 32
+ sldi 8,6, 64-32
+#else
sldi 0,7, 32
srdi 8,6, 64-32
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -898,8 +1087,13 @@ L(du4_loop):
.align 4
L(du4_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 32
+ sldi 8,7, 64-32
+#else
sldi 0,6, 32
srdi 8,7, 64-32
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
@@ -909,13 +1103,23 @@ L(du5_do):
bf 30,L(du5_1dw)
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 40
+ sldi 8,7, 64-40
+#else
sldi 0,6, 40
srdi 8,7, 64-40
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 40
+ sldi 8,6, 64-40
+#else
sldi 0,7, 40
srdi 8,6, 64-40
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -924,8 +1128,13 @@ L(du5_do):
blt cr6,L(du5_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du5_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 40
+ sldi 8,7, 64-40
+#else
sldi 0,6, 40
srdi 8,7, 64-40
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -936,8 +1145,13 @@ L(du5_do):
b L(du5_loop)
.align 4
L(du5_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 40
+ sldi 8,7, 64-40
+#else
sldi 0,6, 40
srdi 8,7, 64-40
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du5_loop)
@@ -949,23 +1163,43 @@ L(du5_1dw):
.align 4
/* copy 32 bytes at a time */
L(du5_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 40
+ sldi 8,7, 64-40
+#else
sldi 0,6, 40
srdi 8,7, 64-40
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 40
+ sldi 8,6, 64-40
+#else
sldi 0,7, 40
srdi 8,6, 64-40
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 40
+ sldi 8,7, 64-40
+#else
sldi 0,6, 40
srdi 8,7, 64-40
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 40
+ sldi 8,6, 64-40
+#else
sldi 0,7, 40
srdi 8,6, 64-40
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -975,8 +1209,13 @@ L(du5_loop):
.align 4
L(du5_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 40
+ sldi 8,7, 64-40
+#else
sldi 0,6, 40
srdi 8,7, 64-40
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
@@ -986,13 +1225,23 @@ L(du6_do):
bf 30,L(du6_1dw)
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 48
+ sldi 8,7, 64-48
+#else
sldi 0,6, 48
srdi 8,7, 64-48
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 48
+ sldi 8,6, 64-48
+#else
sldi 0,7, 48
srdi 8,6, 64-48
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -1001,8 +1250,13 @@ L(du6_do):
blt cr6,L(du6_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du6_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 48
+ sldi 8,7, 64-48
+#else
sldi 0,6, 48
srdi 8,7, 64-48
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -1013,8 +1267,13 @@ L(du6_do):
b L(du6_loop)
.align 4
L(du6_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 48
+ sldi 8,7, 64-48
+#else
sldi 0,6, 48
srdi 8,7, 64-48
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du6_loop)
@@ -1026,23 +1285,43 @@ L(du6_1dw):
.align 4
/* copy 32 bytes at a time */
L(du6_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 48
+ sldi 8,7, 64-48
+#else
sldi 0,6, 48
srdi 8,7, 64-48
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 48
+ sldi 8,6, 64-48
+#else
sldi 0,7, 48
srdi 8,6, 64-48
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 48
+ sldi 8,7, 64-48
+#else
sldi 0,6, 48
srdi 8,7, 64-48
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 48
+ sldi 8,6, 64-48
+#else
sldi 0,7, 48
srdi 8,6, 64-48
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -1052,8 +1331,13 @@ L(du6_loop):
.align 4
L(du6_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 48
+ sldi 8,7, 64-48
+#else
sldi 0,6, 48
srdi 8,7, 64-48
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
@@ -1063,13 +1347,23 @@ L(du7_do):
bf 30,L(du7_1dw)
/* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 56
+ sldi 8,7, 64-56
+#else
sldi 0,6, 56
srdi 8,7, 64-56
+#endif
or 0,0,8
ld 6,16(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 56
+ sldi 8,6, 64-56
+#else
sldi 0,7, 56
srdi 8,6, 64-56
+#endif
or 0,0,8
ld 7,24(5)
std 0,8(4)
@@ -1078,8 +1372,13 @@ L(du7_do):
blt cr6,L(du7_fini) /* if total DWs = 3, then bypass loop */
bf 31,L(du7_loop)
/* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 56
+ sldi 8,7, 64-56
+#else
sldi 0,6, 56
srdi 8,7, 64-56
+#endif
or 0,0,8
std 0,0(4)
mr 6,7
@@ -1090,8 +1389,13 @@ L(du7_do):
b L(du7_loop)
.align 4
L(du7_1dw):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 56
+ sldi 8,7, 64-56
+#else
sldi 0,6, 56
srdi 8,7, 64-56
+#endif
addi 5,5,16
or 0,0,8
bf 31,L(du7_loop)
@@ -1103,23 +1407,43 @@ L(du7_1dw):
.align 4
/* copy 32 bytes at a time */
L(du7_loop):
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 56
+ sldi 8,7, 64-56
+#else
sldi 0,6, 56
srdi 8,7, 64-56
+#endif
or 0,0,8
ld 6,0(5)
std 0,0(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 56
+ sldi 8,6, 64-56
+#else
sldi 0,7, 56
srdi 8,6, 64-56
+#endif
or 0,0,8
ld 7,8(5)
std 0,8(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 56
+ sldi 8,7, 64-56
+#else
sldi 0,6, 56
srdi 8,7, 64-56
+#endif
or 0,0,8
ld 6,16(5)
std 0,16(4)
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,7, 56
+ sldi 8,6, 64-56
+#else
sldi 0,7, 56
srdi 8,6, 64-56
+#endif
or 0,0,8
ld 7,24(5)
std 0,24(4)
@@ -1129,8 +1453,13 @@ L(du7_loop):
.align 4
L(du7_fini):
/* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+ srdi 0,6, 56
+ sldi 8,7, 64-56
+#else
sldi 0,6, 56
srdi 8,7, 64-56
+#endif
or 0,0,8
std 0,0(4)
b L(du_done)
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memset.S b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
index 3e8ae2d25..d61988a19 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -47,14 +47,14 @@ L(_memset):
/* Align to doubleword boundary. */
cmpldi cr5, rLEN, 31
- rlwimi rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword. */
+ insrdi rCHR, rCHR, 8, 48 /* Replicate byte to halfword. */
beq+ L(aligned2)
mtcrf 0x01, rMEMP0
subfic rALIGN, rALIGN, 8
cror 28,30,31 /* Detect odd word aligned. */
add rMEMP, rMEMP, rALIGN
sub rLEN, rLEN, rALIGN
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
bt 29, L(g4)
/* Process the even word of doubleword. */
bf+ 31, L(g2)
@@ -76,14 +76,14 @@ L(g0):
/* Handle the case of size < 31. */
L(aligned2):
- rlwimi rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word. */
+ insrdi rCHR, rCHR, 16, 32 /* Replicate halfword to word. */
L(aligned):
mtcrf 0x01, rLEN
ble cr5, L(medium)
/* Align to 32-byte boundary. */
andi. rALIGN, rMEMP, 0x18
subfic rALIGN, rALIGN, 0x20
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
beq L(caligned)
mtcrf 0x01, rALIGN
add rMEMP, rMEMP, rALIGN
@@ -344,7 +344,7 @@ L(le4):
/* Memset of 0-31 bytes. */
.align 5
L(medium):
- insrdi rCHR,rCHR,32,0 /* Replicate word to double word. */
+ insrdi rCHR, rCHR, 32, 0 /* Replicate word to double word. */
cmpldi cr1, rLEN, 16
L(medium_tail2):
add rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
index d0071c765..ebec0e0ba 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
@@ -39,10 +39,8 @@ EALIGN (__finite, 4, 0)
stfd fp1,-16(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
-
- lhz r4,-16(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
clrlwi r4,r4,17 /* r4 = abs(r4). */
cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
bltlr cr7 /* LT means finite, other non-finite. */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
index 1aea12383..8d088db5a 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
@@ -38,9 +38,8 @@ EALIGN (__isinf, 4, 0)
stfd fp1,-16(r1) /* Transfer FP to GPR's. */
ori 2,2,0 /* Force a new dispatch group. */
- lhz r4,-16(r1) /* Fetch the upper portion of the high word of
- the FP value (where the exponent and sign bits
- are). */
+ lhz r4,-16+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+ (biased exponent and sign bit). */
cmpwi cr7,r4,0x7ff0 /* r4 == 0x7ff0? */
li r3,1
beqlr cr7 /* EQ means INF, otherwise -INF. */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
index 3416897f5..5076dd0c1 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
@@ -25,109 +25,112 @@ ENTRY (__memchr)
CALL_MCOUNT 2
dcbt 0,r3
clrrdi r8,r3,3
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
add r7,r3,r5 /* Calculate the last acceptable address. */
+ insrdi r4,r4,16,32
cmpldi r5,32
+ li r9, -1
+ rlwinm r6,r3,3,26,28 /* Calculate padding. */
insrdi r4,r4,32,0
+ addi r7,r7,-1
+#ifdef __LITTLE_ENDIAN__
+ sld r9,r9,r6
+#else
+ srd r9,r9,r6
+#endif
ble L(small_range)
- cmpld cr7,r3,r7 /* Compare the starting address (r3) with the
- ending address (r7). If (r3 >= r7),
- the size passed in was zero or negative. */
- ble cr7,L(proceed)
-
- li r7,-1 /* Artificially set our ending address (r7)
- such that we will exit early. */
-
-L(proceed):
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- cmpldi cr6,r6,0 /* cr6 == Do we have padding? */
ld r12,0(r8) /* Load doubleword from memory. */
- cmpb r10,r12,r4 /* Check for BYTEs in DWORD1. */
- beq cr6,L(proceed_no_padding)
- sld r10,r10,r6
- srd r10,r10,r6
-L(proceed_no_padding):
- cmpldi cr7,r10,0 /* Does r10 indicate we got a hit? */
+ cmpb r3,r12,r4 /* Check for BYTEs in DWORD1. */
+ and r3,r3,r9
+ clrldi r5,r7,61 /* Byte count - 1 in last dword. */
+ clrrdi r7,r7,3 /* Address of last doubleword. */
+ cmpldi cr7,r3,0 /* Does r3 indicate we got a hit? */
bne cr7,L(done)
- /* See if we are at the last acceptable address yet. */
- addi r9,r8,8
- cmpld cr6,r9,r7
- bge cr6,L(null)
-
mtcrf 0x01,r8
/* Are we now aligned to a quadword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
-
bt 28,L(loop_setup)
/* Handle DWORD2 of pair. */
ldu r12,8(r8)
- cmpb r10,r12,r4
- cmpldi cr7,r10,0
+ cmpb r3,r12,r4
+ cmpldi cr7,r3,0
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,8
- cmpld cr6,r9,r7
- bge cr6,L(null)
-
L(loop_setup):
- sub r5,r7,r9
- srdi r6,r5,4 /* Number of loop iterations. */
+ /* The last dword we want to read in the loop below is the one
+ containing the last byte of the string, ie. the dword at
+ (s + size - 1) & ~7, or r7. The first dword read is at
+ r8 + 8, we read 2 * cnt dwords, so the last dword read will
+ be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives
+ cnt = (r7 - r8) / 16 */
+ sub r6,r7,r8
+ srdi r6,r6,4 /* Number of loop iterations. */
mtctr r6 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for BYTE backwards in the string. Since
- it's a small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
+
+ /* Main loop to look for BYTE in the string. Since
+ it's a small loop (8 instructions), align it to 32-bytes. */
+ .align 5
L(loop):
/* Load two doublewords, compare and merge in a
single register for speed. This is an attempt
to speed up the byte-checking process for bigger strings. */
ld r12,8(r8)
ldu r11,16(r8)
- cmpb r10,r12,r4
+ cmpb r3,r12,r4
cmpb r9,r11,r4
- or r5,r9,r10 /* Merge everything in one doubleword. */
- cmpldi cr7,r5,0
+ or r6,r9,r3 /* Merge everything in one doubleword. */
+ cmpldi cr7,r6,0
bne cr7,L(found)
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for BYTE in the whole range. */
- subi r11,r7,8
- cmpld cr6,r8,r11
- blt cr6,L(loop_small)
- b L(null)
+ /* We may have one more dword to read. */
+ cmpld r8,r7
+ beqlr
+ ldu r12,8(r8)
+ cmpb r3,r12,r4
+ cmpldi cr6,r3,0
+ bne cr6,L(done)
+ blr
+
+ .align 4
+L(found):
/* OK, one (or both) of the doublewords contains BYTE. Check
the first doubleword and decrement the address in case the first
doubleword really contains BYTE. */
- .align 4
-L(found):
- cmpldi cr6,r10,0
+ cmpldi cr6,r3,0
addi r8,r8,-8
bne cr6,L(done)
/* BYTE must be in the second doubleword. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
+ again and move the result of cmpb to r3 so we can calculate the
pointer. */
- mr r10,r9
+ mr r3,r9
addi r8,r8,8
- /* r10 has the output of the cmpb instruction, that is, it contains
+ /* r3 has the output of the cmpb instruction, that is, it contains
0xff in the same position as BYTE in the original
doubleword from the string. Use that to calculate the pointer.
We need to make sure BYTE is *before* the end of the range. */
L(done):
- cntlzd r0,r10 /* Count leading zeroes before the match. */
- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r3,-1
+ andc r0,r0,r3
+ popcntd r0,r0 /* Count trailing zeros. */
+#else
+ cntlzd r0,r3 /* Count leading zeros before the match. */
+#endif
+ cmpld r8,r7 /* Are we on the last dword? */
+ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
add r3,r8,r0
- cmpld r3,r7
- bge L(null)
+ cmpld cr7,r0,r5 /* If on the last dword, check byte offset. */
+ bnelr
+ blelr cr7
+ li r3,0
blr
.align 4
@@ -139,67 +142,44 @@ L(null):
.align 4
L(small_range):
cmpldi r5,0
- rlwinm r6,r3,3,26,28 /* Calculate padding. */
- beq L(null) /* This branch is for the cmpldi r5,0 above. */
+ beq L(null)
ld r12,0(r8) /* Load word from memory. */
- cmpldi cr6,r6,0 /* cr6 == Do we have padding? */
- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
- /* If no padding, skip the shifts. */
- beq cr6,L(small_no_padding)
- sld r10,r10,r6
- srd r10,r10,r6
-L(small_no_padding):
- cmpldi cr7,r10,0
+ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
+ and r3,r3,r9
+ cmpldi cr7,r3,0
+ clrldi r5,r7,61 /* Byte count - 1 in last dword. */
+ clrrdi r7,r7,3 /* Address of last doubleword. */
+ cmpld r8,r7 /* Are we done already? */
bne cr7,L(done)
+ beqlr
- /* Are we done already? */
- addi r9,r8,8
- cmpld r9,r7
- bge L(null)
- /* If we're not done, drop through into loop_small. */
-
-L(loop_small): /* loop_small has been unrolled. */
ldu r12,8(r8)
- cmpb r10,r12,r4
- addi r9,r8,8
- cmpldi cr6,r10,0
- cmpld r9,r7
+ cmpb r3,r12,r4
+ cmpldi cr6,r3,0
+ cmpld r8,r7
bne cr6,L(done) /* Found something. */
- bge L(null) /* Hit end of string (length). */
+ beqlr /* Hit end of string (length). */
ldu r12,8(r8)
- cmpb r10,r12,r4
- addi r9,r8,8
- cmpldi cr6,r10,0
- cmpld r9,r7
- bne cr6,L(done) /* Found something. */
- bge L(null)
+ cmpb r3,r12,r4
+ cmpldi cr6,r3,0
+ cmpld r8,r7
+ bne cr6,L(done)
+ beqlr
ldu r12,8(r8)
- subi r11,r7,8
- cmpb r10,r12,r4
- cmpldi cr6,r10,0
- ori r2,r2,0 /* Force a dispatch group. */
+ cmpb r3,r12,r4
+ cmpldi cr6,r3,0
+ cmpld r8,r7
bne cr6,L(done)
+ beqlr
- cmpld r8,r11 /* At end of range? */
- bge L(null)
-
- /* For most cases we will never get here. Under some combinations of
- padding + length there is a leftover double that still needs to be
- checked. */
ldu r12,8(r8)
- cmpb r10,r12,r4
- addi r9,r8,8
- cmpldi cr6,r10,0
- cmpld r9,r7
- bne cr6,L(done) /* Found something. */
-
- /* Save a branch and exit directly. */
- li r3,0
+ cmpb r3,r12,r4
+ cmpldi cr6,r3,0
+ bne cr6,L(done)
blr
-
END (__memchr)
weak_alias (__memchr, memchr)
libc_hidden_builtin_def (memchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
index f190c6461..6851cdc75 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
@@ -23,10 +23,9 @@
size_t size [r5]) */
.machine power7
-EALIGN (memcmp,4,0)
+EALIGN (memcmp, 4, 0)
CALL_MCOUNT 3
-#define rTMP r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -37,354 +36,557 @@ EALIGN (memcmp,4,0)
#define rWORD4 r9 /* next word in s2 */
#define rWORD5 r10 /* next word in s1 */
#define rWORD6 r11 /* next word in s2 */
-#define rBITDIF r12 /* bits that differ in s1 & s2 words */
#define rWORD7 r30 /* next word in s1 */
#define rWORD8 r31 /* next word in s2 */
- xor rTMP,rSTR2,rSTR1
- cmpldi cr6,rN,0
- cmpldi cr1,rN,12
- clrldi. rTMP,rTMP,61
- clrldi rBITDIF,rSTR1,61
- cmpldi cr5,rBITDIF,0
- beq- cr6,L(zeroLength)
- dcbt 0,rSTR1
- dcbt 0,rSTR2
+ xor r0, rSTR2, rSTR1
+ cmpldi cr6, rN, 0
+ cmpldi cr1, rN, 12
+ clrldi. r0, r0, 61
+ clrldi r12, rSTR1, 61
+ cmpldi cr5, r12, 0
+ beq- cr6, L(zeroLength)
+ dcbt 0, rSTR1
+ dcbt 0, rSTR2
/* If less than 8 bytes or not aligned, use the unaligned
byte loop. */
- blt cr1,L(bytealigned)
- std rWORD8,-8(r1)
- cfi_offset(rWORD8,-8)
- std rWORD7,-16(r1)
- cfi_offset(rWORD7,-16)
+ blt cr1, L(bytealigned)
+ std rWORD8, -8(r1)
+ cfi_offset(rWORD8, -8)
+ std rWORD7, -16(r1)
+ cfi_offset(rWORD7, -16)
bne L(unaligned)
/* At this point we know both strings have the same alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then we are already double word
- aligned and can perform the DWaligned loop.
+ of r12 to 0. If r12 == 0 then we are already double word
+ aligned and can perform the DW aligned loop.
Otherwise we know the two strings have the same alignment (but not
- yet DW). So we can force the string addresses to the next lower DW
- boundary and special case this first DW word using shift left to
+ yet DW). So we force the string addresses to the next lower DW
+ boundary and special case this first DW using shift left to
eliminate bits preceding the first byte. Since we want to join the
- normal (DWaligned) compare loop, starting at the second double word,
+ normal (DW aligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
- versioning for the first DW. This insures that the loop count is
- correct and the first DW (shifted) is in the expected resister pair. */
+ versioning for the first DW. This ensures that the loop count is
+ correct and the first DW (shifted) is in the expected register pair. */
.align 4
L(samealignment):
- clrrdi rSTR1,rSTR1,3
- clrrdi rSTR2,rSTR2,3
- beq cr5,L(DWaligned)
- add rN,rN,rBITDIF
- sldi r11,rBITDIF,3
- srdi rTMP,rN,5 /* Divide by 32 */
- andi. rBITDIF,rN,24 /* Get the DW remainder */
- ld rWORD1,0(rSTR1)
- ld rWORD2,0(rSTR2)
- cmpldi cr1,rBITDIF,16
- cmpldi cr7,rN,32
- clrldi rN,rN,61
+ clrrdi rSTR1, rSTR1, 3
+ clrrdi rSTR2, rSTR2, 3
+ beq cr5, L(DWaligned)
+ add rN, rN, r12
+ sldi rWORD6, r12, 3
+ srdi r0, rN, 5 /* Divide by 32 */
+ andi. r12, rN, 24 /* Get the DW remainder */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+#endif
+ cmpldi cr1, r12, 16
+ cmpldi cr7, rN, 32
+ clrldi rN, rN, 61
beq L(dPs4)
- mtctr rTMP
- bgt cr1,L(dPs3)
- beq cr1,L(dPs2)
+ mtctr r0
+ bgt cr1, L(dPs3)
+ beq cr1, L(dPs2)
/* Remainder is 8 */
.align 3
L(dsP1):
- sld rWORD5,rWORD1,r11
- sld rWORD6,rWORD2,r11
- cmpld cr5,rWORD5,rWORD6
- blt cr7,L(dP1x)
+ sld rWORD5, rWORD1, rWORD6
+ sld rWORD6, rWORD2, rWORD6
+ cmpld cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
/* Do something useful in this cycle since we have to branch anyway. */
- ld rWORD1,8(rSTR1)
- ld rWORD2,8(rSTR2)
- cmpld cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
b L(dP1e)
/* Remainder is 16 */
.align 4
L(dPs2):
- sld rWORD5,rWORD1,r11
- sld rWORD6,rWORD2,r11
- cmpld cr6,rWORD5,rWORD6
- blt cr7,L(dP2x)
+ sld rWORD5, rWORD1, rWORD6
+ sld rWORD6, rWORD2, rWORD6
+ cmpld cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
/* Do something useful in this cycle since we have to branch anyway. */
- ld rWORD7,8(rSTR1)
- ld rWORD8,8(rSTR2)
- cmpld cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD7, 8(rSTR1)
+ ld rWORD8, 8(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
b L(dP2e)
/* Remainder is 24 */
.align 4
L(dPs3):
- sld rWORD3,rWORD1,r11
- sld rWORD4,rWORD2,r11
- cmpld cr1,rWORD3,rWORD4
+ sld rWORD3, rWORD1, rWORD6
+ sld rWORD4, rWORD2, rWORD6
+ cmpld cr1, rWORD3, rWORD4
b L(dP3e)
/* Count is a multiple of 32, remainder is 0 */
.align 4
L(dPs4):
- mtctr rTMP
- sld rWORD1,rWORD1,r11
- sld rWORD2,rWORD2,r11
- cmpld cr0,rWORD1,rWORD2
+ mtctr r0
+ sld rWORD1, rWORD1, rWORD6
+ sld rWORD2, rWORD2, rWORD6
+ cmpld cr7, rWORD1, rWORD2
b L(dP4e)
/* At this point we know both strings are double word aligned and the
compare length is at least 8 bytes. */
.align 4
L(DWaligned):
- andi. rBITDIF,rN,24 /* Get the DW remainder */
- srdi rTMP,rN,5 /* Divide by 32 */
- cmpldi cr1,rBITDIF,16
- cmpldi cr7,rN,32
- clrldi rN,rN,61
+ andi. r12, rN, 24 /* Get the DW remainder */
+ srdi r0, rN, 5 /* Divide by 32 */
+ cmpldi cr1, r12, 16
+ cmpldi cr7, rN, 32
+ clrldi rN, rN, 61
beq L(dP4)
- bgt cr1,L(dP3)
- beq cr1,L(dP2)
+ bgt cr1, L(dP3)
+ beq cr1, L(dP2)
/* Remainder is 8 */
.align 4
L(dP1):
- mtctr rTMP
+ mtctr r0
/* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
(8-15 byte compare), we want to use only volatile registers. This
means we can avoid restoring non-volatile registers since we did not
change any on the early exit path. The key here is the non-early
exit path only cares about the condition code (cr5), not about which
register pair was used. */
- ld rWORD5,0(rSTR1)
- ld rWORD6,0(rSTR2)
- cmpld cr5,rWORD5,rWORD6
- blt cr7,L(dP1x)
- ld rWORD1,8(rSTR1)
- ld rWORD2,8(rSTR2)
- cmpld cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 0(rSTR1)
+ ld rWORD6, 0(rSTR2)
+#endif
+ cmpld cr5, rWORD5, rWORD6
+ blt cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
L(dP1e):
- ld rWORD3,16(rSTR1)
- ld rWORD4,16(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- ld rWORD5,24(rSTR1)
- ld rWORD6,24(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- bne cr5,L(dLcr5)
- bne cr0,L(dLcr0)
-
- ldu rWORD7,32(rSTR1)
- ldu rWORD8,32(rSTR2)
- bne cr1,L(dLcr1)
- cmpld cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5x)
+ bne cr7, L(dLcr7x)
+
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ldu rWORD7, 32(rSTR1)
+ ldu rWORD8, 32(rSTR2)
+#endif
+ bne cr1, L(dLcr1)
+ cmpld cr5, rWORD7, rWORD8
bdnz L(dLoop)
- bne cr6,L(dLcr6)
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ bne cr6, L(dLcr6)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
.align 3
L(dP1x):
- sldi. r12,rN,3
- bne cr5,L(dLcr5)
- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */
+ sldi. r12, rN, 3
+ bne cr5, L(dLcr5x)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
bne L(d00)
- li rRTN,0
+ li rRTN, 0
blr
/* Remainder is 16 */
.align 4
L(dP2):
- mtctr rTMP
- ld rWORD5,0(rSTR1)
- ld rWORD6,0(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- blt cr7,L(dP2x)
- ld rWORD7,8(rSTR1)
- ld rWORD8,8(rSTR2)
- cmpld cr5,rWORD7,rWORD8
+ mtctr r0
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 0(rSTR1)
+ ld rWORD6, 0(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ blt cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD7, 8(rSTR1)
+ ld rWORD8, 8(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
L(dP2e):
- ld rWORD1,16(rSTR1)
- ld rWORD2,16(rSTR2)
- cmpld cr0,rWORD1,rWORD2
- ld rWORD3,24(rSTR1)
- ld rWORD4,24(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- bne cr6,L(dLcr6)
- bne cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 16(rSTR1)
+ ld rWORD2, 16(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 24(rSTR1)
+ ld rWORD4, 24(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ bne cr6, L(dLcr6)
+ bne cr5, L(dLcr5)
b L(dLoop2)
/* Again we are on a early exit path (16-23 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP2x):
- ld rWORD3,8(rSTR1)
- ld rWORD4,8(rSTR2)
- cmpld cr5,rWORD3,rWORD4
- sldi. r12,rN,3
- bne cr6,L(dLcr6)
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- bne cr5,L(dLcr5)
- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 8(rSTR1)
+ ld rWORD4, 8(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ sldi. r12, rN, 3
+ bne cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ bne cr1, L(dLcr1x)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
bne L(d00)
- li rRTN,0
+ li rRTN, 0
blr
/* Remainder is 24 */
.align 4
L(dP3):
- mtctr rTMP
- ld rWORD3,0(rSTR1)
- ld rWORD4,0(rSTR2)
- cmpld cr1,rWORD3,rWORD4
+ mtctr r0
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 0(rSTR1)
+ ld rWORD4, 0(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
L(dP3e):
- ld rWORD5,8(rSTR1)
- ld rWORD6,8(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- blt cr7,L(dP3x)
- ld rWORD7,16(rSTR1)
- ld rWORD8,16(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- ld rWORD1,24(rSTR1)
- ld rWORD2,24(rSTR2)
- cmpld cr0,rWORD1,rWORD2
- addi rSTR1,rSTR1,16
- addi rSTR2,rSTR2,16
- bne cr1,L(dLcr1)
- bne cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 8(rSTR1)
+ ld rWORD6, 8(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ blt cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD7, 16(rSTR1)
+ ld rWORD8, 16(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 24(rSTR1)
+ ld rWORD2, 24(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+#endif
+ bne cr1, L(dLcr1)
+ bne cr6, L(dLcr6)
b L(dLoop1)
/* Again we are on a early exit path (24-31 byte compare), we want to
only use volatile registers and avoid restoring non-volatile
registers. */
.align 4
L(dP3x):
- ld rWORD1,16(rSTR1)
- ld rWORD2,16(rSTR2)
- cmpld cr5,rWORD1,rWORD2
- sldi. r12,rN,3
- bne cr1,L(dLcr1)
- addi rSTR1,rSTR1,16
- addi rSTR2,rSTR2,16
- bne cr6,L(dLcr6)
- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */
- bne cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 16(rSTR1)
+ ld rWORD2, 16(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ sldi. r12, rN, 3
+ bne cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+#endif
+ bne cr6, L(dLcr6x)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
+ bne cr7, L(dLcr7x)
bne L(d00)
- li rRTN,0
+ li rRTN, 0
blr
/* Count is a multiple of 32, remainder is 0 */
.align 4
L(dP4):
- mtctr rTMP
- ld rWORD1,0(rSTR1)
- ld rWORD2,0(rSTR2)
- cmpld cr0,rWORD1,rWORD2
+ mtctr r0
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
L(dP4e):
- ld rWORD3,8(rSTR1)
- ld rWORD4,8(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- ld rWORD5,16(rSTR1)
- ld rWORD6,16(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- ldu rWORD7,24(rSTR1)
- ldu rWORD8,24(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- bne cr0,L(dLcr0)
- bne cr1,L(dLcr1)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 8(rSTR1)
+ ld rWORD4, 8(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 16(rSTR1)
+ ld rWORD6, 16(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ldu rWORD7, 24(rSTR1)
+ ldu rWORD8, 24(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ bne cr7, L(dLcr7)
+ bne cr1, L(dLcr1)
bdz- L(d24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
.align 4
L(dLoop):
- ld rWORD1,8(rSTR1)
- ld rWORD2,8(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- bne cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
L(dLoop1):
- ld rWORD3,16(rSTR1)
- ld rWORD4,16(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- bne cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
L(dLoop2):
- ld rWORD5,24(rSTR1)
- ld rWORD6,24(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- bne cr0,L(dLcr0)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ bne cr7, L(dLcr7)
L(dLoop3):
- ldu rWORD7,32(rSTR1)
- ldu rWORD8,32(rSTR2)
- bne cr1,L(dLcr1)
- cmpld cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ldu rWORD7, 32(rSTR1)
+ ldu rWORD8, 32(rSTR2)
+#endif
+ bne cr1, L(dLcr1)
+ cmpld cr7, rWORD1, rWORD2
bdnz L(dLoop)
L(dL4):
- cmpld cr1,rWORD3,rWORD4
- bne cr6,L(dLcr6)
- cmpld cr6,rWORD5,rWORD6
- bne cr5,L(dLcr5)
- cmpld cr5,rWORD7,rWORD8
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(dLcr6)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(dLcr5)
+ cmpld cr5, rWORD7, rWORD8
L(d44):
- bne cr0,L(dLcr0)
+ bne cr7, L(dLcr7)
L(d34):
- bne cr1,L(dLcr1)
+ bne cr1, L(dLcr1)
L(d24):
- bne cr6,L(dLcr6)
+ bne cr6, L(dLcr6)
L(d14):
- sldi. r12,rN,3
- bne cr5,L(dLcr5)
+ sldi. r12, rN, 3
+ bne cr5, L(dLcr5)
L(d04):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- subfic rN,r12,64 /* Shift count is 64 - (rN * 8). */
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+ subfic rN, r12, 64 /* Shift count is 64 - (rN * 8). */
beq L(zeroLength)
/* At this point we have a remainder of 1 to 7 bytes to compare. Since
we are aligned it is safe to load the whole double word, and use
shift right double to eliminate bits beyond the compare length. */
L(d00):
- ld rWORD1,8(rSTR1)
- ld rWORD2,8(rSTR2)
- srd rWORD1,rWORD1,rN
- srd rWORD2,rWORD2,rN
- cmpld cr5,rWORD1,rWORD2
- bne cr5,L(dLcr5x)
- li rRTN,0
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+#endif
+ srd rWORD1, rWORD1, rN
+ srd rWORD2, rWORD2, rN
+ cmpld cr7, rWORD1, rWORD2
+ bne cr7, L(dLcr7x)
+ li rRTN, 0
blr
+
.align 4
-L(dLcr0):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
- bgtlr cr0
- li rRTN,-1
+L(dLcr7):
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+L(dLcr7x):
+ li rRTN, 1
+ bgtlr cr7
+ li rRTN, -1
blr
.align 4
L(dLcr1):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+L(dLcr1x):
+ li rRTN, 1
bgtlr cr1
- li rRTN,-1
+ li rRTN, -1
blr
.align 4
L(dLcr6):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+L(dLcr6x):
+ li rRTN, 1
bgtlr cr6
- li rRTN,-1
+ li rRTN, -1
blr
.align 4
L(dLcr5):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
L(dLcr5x):
- li rRTN,1
+ li rRTN, 1
bgtlr cr5
- li rRTN,-1
+ li rRTN, -1
blr
.align 4
L(bytealigned):
mtctr rN
- beq cr6,L(zeroLength)
+#if 0
+/* Huh? We've already branched on cr6! */
+ beq cr6, L(zeroLength)
+#endif
/* We need to prime this loop. This loop is swing modulo scheduled
to avoid pipe delays. The dependent instruction latencies (load to
@@ -396,38 +598,38 @@ L(bytealigned):
So we must precondition some registers and condition codes so that
we don't exit the loop early on the first iteration. */
- lbz rWORD1,0(rSTR1)
- lbz rWORD2,0(rSTR2)
+ lbz rWORD1, 0(rSTR1)
+ lbz rWORD2, 0(rSTR2)
bdz L(b11)
- cmpld cr0,rWORD1,rWORD2
- lbz rWORD3,1(rSTR1)
- lbz rWORD4,1(rSTR2)
+ cmpld cr7, rWORD1, rWORD2
+ lbz rWORD3, 1(rSTR1)
+ lbz rWORD4, 1(rSTR2)
bdz L(b12)
- cmpld cr1,rWORD3,rWORD4
- lbzu rWORD5,2(rSTR1)
- lbzu rWORD6,2(rSTR2)
+ cmpld cr1, rWORD3, rWORD4
+ lbzu rWORD5, 2(rSTR1)
+ lbzu rWORD6, 2(rSTR2)
bdz L(b13)
.align 4
L(bLoop):
- lbzu rWORD1,1(rSTR1)
- lbzu rWORD2,1(rSTR2)
- bne cr0,L(bLcr0)
+ lbzu rWORD1, 1(rSTR1)
+ lbzu rWORD2, 1(rSTR2)
+ bne cr7, L(bLcr7)
- cmpld cr6,rWORD5,rWORD6
+ cmpld cr6, rWORD5, rWORD6
bdz L(b3i)
- lbzu rWORD3,1(rSTR1)
- lbzu rWORD4,1(rSTR2)
- bne cr1,L(bLcr1)
+ lbzu rWORD3, 1(rSTR1)
+ lbzu rWORD4, 1(rSTR2)
+ bne cr1, L(bLcr1)
- cmpld cr0,rWORD1,rWORD2
+ cmpld cr7, rWORD1, rWORD2
bdz L(b2i)
- lbzu rWORD5,1(rSTR1)
- lbzu rWORD6,1(rSTR2)
- bne cr6,L(bLcr6)
+ lbzu rWORD5, 1(rSTR1)
+ lbzu rWORD6, 1(rSTR2)
+ bne cr6, L(bLcr6)
- cmpld cr1,rWORD3,rWORD4
+ cmpld cr1, rWORD3, rWORD4
bdnz L(bLoop)
/* We speculatively loading bytes before we have tested the previous
@@ -437,542 +639,727 @@ L(bLoop):
tested. In this case we must complete the pending operations
before returning. */
L(b1i):
- bne cr0,L(bLcr0)
- bne cr1,L(bLcr1)
+ bne cr7, L(bLcr7)
+ bne cr1, L(bLcr1)
b L(bx56)
.align 4
L(b2i):
- bne cr6,L(bLcr6)
- bne cr0,L(bLcr0)
+ bne cr6, L(bLcr6)
+ bne cr7, L(bLcr7)
b L(bx34)
.align 4
L(b3i):
- bne cr1,L(bLcr1)
- bne cr6,L(bLcr6)
+ bne cr1, L(bLcr1)
+ bne cr6, L(bLcr6)
b L(bx12)
.align 4
-L(bLcr0):
- li rRTN,1
- bgtlr cr0
- li rRTN,-1
+L(bLcr7):
+ li rRTN, 1
+ bgtlr cr7
+ li rRTN, -1
blr
L(bLcr1):
- li rRTN,1
+ li rRTN, 1
bgtlr cr1
- li rRTN,-1
+ li rRTN, -1
blr
L(bLcr6):
- li rRTN,1
+ li rRTN, 1
bgtlr cr6
- li rRTN,-1
+ li rRTN, -1
blr
L(b13):
- bne cr0,L(bx12)
- bne cr1,L(bx34)
+ bne cr7, L(bx12)
+ bne cr1, L(bx34)
L(bx56):
- sub rRTN,rWORD5,rWORD6
+ sub rRTN, rWORD5, rWORD6
blr
nop
L(b12):
- bne cr0,L(bx12)
+ bne cr7, L(bx12)
L(bx34):
- sub rRTN,rWORD3,rWORD4
+ sub rRTN, rWORD3, rWORD4
blr
L(b11):
L(bx12):
- sub rRTN,rWORD1,rWORD2
+ sub rRTN, rWORD1, rWORD2
blr
.align 4
-L(zeroLengthReturn):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
L(zeroLength):
- li rRTN,0
+ li rRTN, 0
blr
.align 4
/* At this point we know the strings have different alignment and the
- compare length is at least 8 bytes. rBITDIF contains the low order
+ compare length is at least 8 bytes. r12 contains the low order
3 bits of rSTR1 and cr5 contains the result of the logical compare
- of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word
+ of r12 to 0. If r12 == 0 then rStr1 is double word
aligned and can perform the DWunaligned loop.
Otherwise we know that rSTR1 is not already DW aligned yet.
So we can force the string addresses to the next lower DW
- boundary and special case this first DW word using shift left to
+ boundary and special case this first DW using shift left to
eliminate bits preceding the first byte. Since we want to join the
normal (DWaligned) compare loop, starting at the second double word,
we need to adjust the length (rN) and special case the loop
- versioning for the first DW. This insures that the loop count is
+ versioning for the first DW. This ensures that the loop count is
correct and the first DW (shifted) is in the expected resister pair. */
-#define rSHL r29 /* Unaligned shift left count. */
-#define rSHR r28 /* Unaligned shift right count. */
-#define rB r27 /* Left rotation temp for rWORD2. */
-#define rD r26 /* Left rotation temp for rWORD4. */
-#define rF r25 /* Left rotation temp for rWORD6. */
-#define rH r24 /* Left rotation temp for rWORD8. */
-#define rA r0 /* Right rotation temp for rWORD2. */
-#define rC r12 /* Right rotation temp for rWORD4. */
-#define rE r0 /* Right rotation temp for rWORD6. */
-#define rG r12 /* Right rotation temp for rWORD8. */
+#define rSHL r29 /* Unaligned shift left count. */
+#define rSHR r28 /* Unaligned shift right count. */
+#define rWORD8_SHIFT r27 /* Left rotation temp for rWORD2. */
+#define rWORD2_SHIFT r26 /* Left rotation temp for rWORD4. */
+#define rWORD4_SHIFT r25 /* Left rotation temp for rWORD6. */
+#define rWORD6_SHIFT r24 /* Left rotation temp for rWORD8. */
L(unaligned):
- std r29,-24(r1)
- cfi_offset(r29,-24)
- clrldi rSHL,rSTR2,61
- beq cr6,L(duzeroLength)
- std r28,-32(r1)
- cfi_offset(r28,-32)
- beq cr5,L(DWunaligned)
- std r27,-40(r1)
- cfi_offset(r27,-40)
-/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+ std rSHL, -24(r1)
+ cfi_offset(rSHL, -24)
+ clrldi rSHL, rSTR2, 61
+ beq cr6, L(duzeroLength)
+ std rSHR, -32(r1)
+ cfi_offset(rSHR, -32)
+ beq cr5, L(DWunaligned)
+ std rWORD8_SHIFT, -40(r1)
+ cfi_offset(rWORD8_SHIFT, -40)
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
in the 1st rSTR1 DW. */
- sub r27,rSTR2,rBITDIF
+ sub rWORD8_SHIFT, rSTR2, r12
/* But do not attempt to address the DW before that DW that contains
the actual start of rSTR2. */
- clrrdi rSTR2,rSTR2,3
- std r26,-48(r1)
- cfi_offset(r26,-48)
+ clrrdi rSTR2, rSTR2, 3
+ std rWORD2_SHIFT, -48(r1)
+ cfi_offset(rWORD2_SHIFT, -48)
/* Compute the left/right shift counts for the unaligned rSTR2,
compensating for the logical (DW aligned) start of rSTR1. */
- clrldi rSHL,r27,61
- clrrdi rSTR1,rSTR1,3
- std r25,-56(r1)
- cfi_offset(r25,-56)
- sldi rSHL,rSHL,3
- cmpld cr5,r27,rSTR2
- add rN,rN,rBITDIF
- sldi r11,rBITDIF,3
- std r24,-64(r1)
- cfi_offset(r24,-64)
- subfic rSHR,rSHL,64
- srdi rTMP,rN,5 /* Divide by 32 */
- andi. rBITDIF,rN,24 /* Get the DW remainder */
+ clrldi rSHL, rWORD8_SHIFT, 61
+ clrrdi rSTR1, rSTR1, 3
+ std rWORD4_SHIFT, -56(r1)
+ cfi_offset(rWORD4_SHIFT, -56)
+ sldi rSHL, rSHL, 3
+ cmpld cr5, rWORD8_SHIFT, rSTR2
+ add rN, rN, r12
+ sldi rWORD6, r12, 3
+ std rWORD6_SHIFT, -64(r1)
+ cfi_offset(rWORD6_SHIFT, -64)
+ subfic rSHR, rSHL, 64
+ srdi r0, rN, 5 /* Divide by 32 */
+ andi. r12, rN, 24 /* Get the DW remainder */
/* We normally need to load 2 DWs to start the unaligned rSTR2, but in
this special case those bits may be discarded anyway. Also we
must avoid loading a DW where none of the bits are part of rSTR2 as
this may cross a page boundary and cause a page fault. */
- li rWORD8,0
- blt cr5,L(dus0)
- ld rWORD8,0(rSTR2)
- la rSTR2,8(rSTR2)
- sld rWORD8,rWORD8,rSHL
+ li rWORD8, 0
+ blt cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD8, 0(rSTR2)
+ addi rSTR2, rSTR2, 8
+#endif
+ sld rWORD8, rWORD8, rSHL
L(dus0):
- ld rWORD1,0(rSTR1)
- ld rWORD2,0(rSTR2)
- cmpldi cr1,rBITDIF,16
- cmpldi cr7,rN,32
- srd rG,rWORD2,rSHR
- clrldi rN,rN,61
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 0(rSTR1)
+ ld rWORD2, 0(rSTR2)
+#endif
+ cmpldi cr1, r12, 16
+ cmpldi cr7, rN, 32
+ srd r12, rWORD2, rSHR
+ clrldi rN, rN, 61
beq L(duPs4)
- mtctr rTMP
- or rWORD8,rG,rWORD8
- bgt cr1,L(duPs3)
- beq cr1,L(duPs2)
+ mtctr r0
+ or rWORD8, r12, rWORD8
+ bgt cr1, L(duPs3)
+ beq cr1, L(duPs2)
/* Remainder is 8 */
.align 4
L(dusP1):
- sld rB,rWORD2,rSHL
- sld rWORD7,rWORD1,r11
- sld rWORD8,rWORD8,r11
- bge cr7,L(duP1e)
+ sld rWORD8_SHIFT, rWORD2, rSHL
+ sld rWORD7, rWORD1, rWORD6
+ sld rWORD8, rWORD8, rWORD6
+ bge cr7, L(duP1e)
/* At this point we exit early with the first double word compare
complete and remainder of 0 to 7 bytes. See L(du14) for details on
how we handle the remaining bytes. */
- cmpld cr5,rWORD7,rWORD8
- sldi. rN,rN,3
- bne cr5,L(duLcr5)
- cmpld cr7,rN,rSHR
+ cmpld cr5, rWORD7, rWORD8
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- ld rWORD2,8(rSTR2)
- srd rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD2, 8(rSTR2)
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 16 */
.align 4
L(duPs2):
- sld rH,rWORD2,rSHL
- sld rWORD5,rWORD1,r11
- sld rWORD6,rWORD8,r11
+ sld rWORD6_SHIFT, rWORD2, rSHL
+ sld rWORD5, rWORD1, rWORD6
+ sld rWORD6, rWORD8, rWORD6
b L(duP2e)
/* Remainder is 24 */
.align 4
L(duPs3):
- sld rF,rWORD2,rSHL
- sld rWORD3,rWORD1,r11
- sld rWORD4,rWORD8,r11
+ sld rWORD4_SHIFT, rWORD2, rSHL
+ sld rWORD3, rWORD1, rWORD6
+ sld rWORD4, rWORD8, rWORD6
b L(duP3e)
/* Count is a multiple of 32, remainder is 0 */
.align 4
L(duPs4):
- mtctr rTMP
- or rWORD8,rG,rWORD8
- sld rD,rWORD2,rSHL
- sld rWORD1,rWORD1,r11
- sld rWORD2,rWORD8,r11
+ mtctr r0
+ or rWORD8, r12, rWORD8
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ sld rWORD1, rWORD1, rWORD6
+ sld rWORD2, rWORD8, rWORD6
b L(duP4e)
/* At this point we know rSTR1 is double word aligned and the
compare length is at least 8 bytes. */
.align 4
L(DWunaligned):
- std r27,-40(r1)
- cfi_offset(r27,-40)
- clrrdi rSTR2,rSTR2,3
- std r26,-48(r1)
- cfi_offset(r26,-48)
- srdi rTMP,rN,5 /* Divide by 32 */
- std r25,-56(r1)
- cfi_offset(r25,-56)
- andi. rBITDIF,rN,24 /* Get the DW remainder */
- std r24,-64(r1)
- cfi_offset(r24,-64)
- sldi rSHL,rSHL,3
- ld rWORD6,0(rSTR2)
- ldu rWORD8,8(rSTR2)
- cmpldi cr1,rBITDIF,16
- cmpldi cr7,rN,32
- clrldi rN,rN,61
- subfic rSHR,rSHL,64
- sld rH,rWORD6,rSHL
+ std rWORD8_SHIFT, -40(r1)
+ cfi_offset(rWORD8_SHIFT, -40)
+ clrrdi rSTR2, rSTR2, 3
+ std rWORD2_SHIFT, -48(r1)
+ cfi_offset(rWORD2_SHIFT, -48)
+ srdi r0, rN, 5 /* Divide by 32 */
+ std rWORD4_SHIFT, -56(r1)
+ cfi_offset(rWORD4_SHIFT, -56)
+ andi. r12, rN, 24 /* Get the DW remainder */
+ std rWORD6_SHIFT, -64(r1)
+ cfi_offset(rWORD6_SHIFT, -64)
+ sldi rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD6, 0(rSTR2)
+ ldu rWORD8, 8(rSTR2)
+#endif
+ cmpldi cr1, r12, 16
+ cmpldi cr7, rN, 32
+ clrldi rN, rN, 61
+ subfic rSHR, rSHL, 64
+ sld rWORD6_SHIFT, rWORD6, rSHL
beq L(duP4)
- mtctr rTMP
- bgt cr1,L(duP3)
- beq cr1,L(duP2)
+ mtctr r0
+ bgt cr1, L(duP3)
+ beq cr1, L(duP2)
/* Remainder is 8 */
.align 4
L(duP1):
- srd rG,rWORD8,rSHR
- ld rWORD7,0(rSTR1)
- sld rB,rWORD8,rSHL
- or rWORD8,rG,rH
- blt cr7,L(duP1x)
+ srd r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
+ ld rWORD7, 0(rSTR1)
+#endif
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ blt cr7, L(duP1x)
L(duP1e):
- ld rWORD1,8(rSTR1)
- ld rWORD2,8(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- srd rA,rWORD2,rSHR
- sld rD,rWORD2,rSHL
- or rWORD2,rA,rB
- ld rWORD3,16(rSTR1)
- ld rWORD4,16(rSTR2)
- cmpld cr0,rWORD1,rWORD2
- srd rC,rWORD4,rSHR
- sld rF,rWORD4,rSHL
- bne cr5,L(duLcr5)
- or rWORD4,rC,rD
- ld rWORD5,24(rSTR1)
- ld rWORD6,24(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- srd rE,rWORD6,rSHR
- sld rH,rWORD6,rSHL
- bne cr0,L(duLcr0)
- or rWORD6,rE,rF
- cmpld cr6,rWORD5,rWORD6
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ bne cr5, L(duLcr5)
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ bne cr7, L(duLcr7)
+ or rWORD6, r0, rWORD4_SHIFT
+ cmpld cr6, rWORD5, rWORD6
b L(duLoop3)
.align 4
/* At this point we exit early with the first double word compare
complete and remainder of 0 to 7 bytes. See L(du14) for details on
how we handle the remaining bytes. */
L(duP1x):
- cmpld cr5,rWORD7,rWORD8
- sldi. rN,rN,3
- bne cr5,L(duLcr5)
- cmpld cr7,rN,rSHR
+ cmpld cr5, rWORD7, rWORD8
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- ld rWORD2,8(rSTR2)
- srd rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD2, 8(rSTR2)
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 16 */
.align 4
L(duP2):
- srd rE,rWORD8,rSHR
- ld rWORD5,0(rSTR1)
- or rWORD6,rE,rH
- sld rH,rWORD8,rSHL
+ srd r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
+ ld rWORD5, 0(rSTR1)
+#endif
+ or rWORD6, r0, rWORD6_SHIFT
+ sld rWORD6_SHIFT, rWORD8, rSHL
L(duP2e):
- ld rWORD7,8(rSTR1)
- ld rWORD8,8(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- srd rG,rWORD8,rSHR
- sld rB,rWORD8,rSHL
- or rWORD8,rG,rH
- blt cr7,L(duP2x)
- ld rWORD1,16(rSTR1)
- ld rWORD2,16(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- bne cr6,L(duLcr6)
- srd rA,rWORD2,rSHR
- sld rD,rWORD2,rSHL
- or rWORD2,rA,rB
- ld rWORD3,24(rSTR1)
- ld rWORD4,24(rSTR2)
- cmpld cr0,rWORD1,rWORD2
- bne cr5,L(duLcr5)
- srd rC,rWORD4,rSHR
- sld rF,rWORD4,rSHL
- or rWORD4,rC,rD
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- cmpld cr1,rWORD3,rWORD4
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD7, 8(rSTR1)
+ ld rWORD8, 8(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ blt cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 16(rSTR1)
+ ld rWORD2, 16(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 24(rSTR1)
+ ld rWORD4, 24(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ bne cr5, L(duLcr5)
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ cmpld cr1, rWORD3, rWORD4
b L(duLoop2)
.align 4
L(duP2x):
- cmpld cr5,rWORD7,rWORD8
- addi rSTR1,rSTR1,8
- addi rSTR2,rSTR2,8
- bne cr6,L(duLcr6)
- sldi. rN,rN,3
- bne cr5,L(duLcr5)
- cmpld cr7,rN,rSHR
+ cmpld cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#endif
+ bne cr6, L(duLcr6)
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- ld rWORD2,8(rSTR2)
- srd rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD2, 8(rSTR2)
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Remainder is 24 */
.align 4
L(duP3):
- srd rC,rWORD8,rSHR
- ld rWORD3,0(rSTR1)
- sld rF,rWORD8,rSHL
- or rWORD4,rC,rH
+ srd r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
+ ld rWORD3, 0(rSTR1)
+#endif
+ sld rWORD4_SHIFT, rWORD8, rSHL
+ or rWORD4, r12, rWORD6_SHIFT
L(duP3e):
- ld rWORD5,8(rSTR1)
- ld rWORD6,8(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- srd rE,rWORD6,rSHR
- sld rH,rWORD6,rSHL
- or rWORD6,rE,rF
- ld rWORD7,16(rSTR1)
- ld rWORD8,16(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- bne cr1,L(duLcr1)
- srd rG,rWORD8,rSHR
- sld rB,rWORD8,rSHL
- or rWORD8,rG,rH
- blt cr7,L(duP3x)
- ld rWORD1,24(rSTR1)
- ld rWORD2,24(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- bne cr6,L(duLcr6)
- srd rA,rWORD2,rSHR
- sld rD,rWORD2,rSHL
- or rWORD2,rA,rB
- addi rSTR1,rSTR1,16
- addi rSTR2,rSTR2,16
- cmpld cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 8(rSTR1)
+ ld rWORD6, 8(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD7, 16(rSTR1)
+ ld rWORD8, 16(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ blt cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 24(rSTR1)
+ ld rWORD2, 24(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+#endif
+ cmpld cr7, rWORD1, rWORD2
b L(duLoop1)
.align 4
L(duP3x):
- addi rSTR1,rSTR1,16
- addi rSTR2,rSTR2,16
- bne cr1,L(duLcr1)
- cmpld cr5,rWORD7,rWORD8
- bne cr6,L(duLcr6)
- sldi. rN,rN,3
- bne cr5,L(duLcr5)
- cmpld cr7,rN,rSHR
+#ifndef __LITTLE_ENDIAN__
+ addi rSTR1, rSTR1, 16
+ addi rSTR2, rSTR2, 16
+#endif
+#if 0
+/* Huh? We've already branched on cr1! */
+ bne cr1, L(duLcr1)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ bne cr6, L(duLcr6)
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
+ cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- ld rWORD2,8(rSTR2)
- srd rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD2, 8(rSTR2)
+#endif
+ srd r0, rWORD2, rSHR
b L(dutrim)
/* Count is a multiple of 32, remainder is 0 */
.align 4
L(duP4):
- mtctr rTMP
- srd rA,rWORD8,rSHR
- ld rWORD1,0(rSTR1)
- sld rD,rWORD8,rSHL
- or rWORD2,rA,rH
+ mtctr r0
+ srd r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ addi rSTR1, rSTR1, 8
+#else
+ ld rWORD1, 0(rSTR1)
+#endif
+ sld rWORD2_SHIFT, rWORD8, rSHL
+ or rWORD2, r0, rWORD6_SHIFT
L(duP4e):
- ld rWORD3,8(rSTR1)
- ld rWORD4,8(rSTR2)
- cmpld cr0,rWORD1,rWORD2
- srd rC,rWORD4,rSHR
- sld rF,rWORD4,rSHL
- or rWORD4,rC,rD
- ld rWORD5,16(rSTR1)
- ld rWORD6,16(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- bne cr0,L(duLcr0)
- srd rE,rWORD6,rSHR
- sld rH,rWORD6,rSHL
- or rWORD6,rE,rF
- ldu rWORD7,24(rSTR1)
- ldu rWORD8,24(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- bne cr1,L(duLcr1)
- srd rG,rWORD8,rSHR
- sld rB,rWORD8,rSHL
- or rWORD8,rG,rH
- cmpld cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 8(rSTR1)
+ ld rWORD4, 8(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 16(rSTR1)
+ ld rWORD6, 16(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ bne cr7, L(duLcr7)
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ldu rWORD7, 24(rSTR1)
+ ldu rWORD8, 24(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ bne cr1, L(duLcr1)
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
+ cmpld cr5, rWORD7, rWORD8
bdz L(du24) /* Adjust CTR as we start with +4 */
/* This is the primary loop */
.align 4
L(duLoop):
- ld rWORD1,8(rSTR1)
- ld rWORD2,8(rSTR2)
- cmpld cr1,rWORD3,rWORD4
- bne cr6,L(duLcr6)
- srd rA,rWORD2,rSHR
- sld rD,rWORD2,rSHL
- or rWORD2,rA,rB
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD1, 8(rSTR1)
+ ld rWORD2, 8(rSTR2)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ srd r0, rWORD2, rSHR
+ sld rWORD2_SHIFT, rWORD2, rSHL
+ or rWORD2, r0, rWORD8_SHIFT
L(duLoop1):
- ld rWORD3,16(rSTR1)
- ld rWORD4,16(rSTR2)
- cmpld cr6,rWORD5,rWORD6
- bne cr5,L(duLcr5)
- srd rC,rWORD4,rSHR
- sld rF,rWORD4,rSHL
- or rWORD4,rC,rD
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD3, 0, rSTR1
+ ldbrx rWORD4, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD3, 16(rSTR1)
+ ld rWORD4, 16(rSTR2)
+#endif
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ srd r12, rWORD4, rSHR
+ sld rWORD4_SHIFT, rWORD4, rSHL
+ or rWORD4, r12, rWORD2_SHIFT
L(duLoop2):
- ld rWORD5,24(rSTR1)
- ld rWORD6,24(rSTR2)
- cmpld cr5,rWORD7,rWORD8
- bne cr0,L(duLcr0)
- srd rE,rWORD6,rSHR
- sld rH,rWORD6,rSHL
- or rWORD6,rE,rF
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD5, 0, rSTR1
+ ldbrx rWORD6, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD5, 24(rSTR1)
+ ld rWORD6, 24(rSTR2)
+#endif
+ cmpld cr5, rWORD7, rWORD8
+ bne cr7, L(duLcr7)
+ srd r0, rWORD6, rSHR
+ sld rWORD6_SHIFT, rWORD6, rSHL
+ or rWORD6, r0, rWORD4_SHIFT
L(duLoop3):
- ldu rWORD7,32(rSTR1)
- ldu rWORD8,32(rSTR2)
- cmpld cr0,rWORD1,rWORD2
- bne- cr1,L(duLcr1)
- srd rG,rWORD8,rSHR
- sld rB,rWORD8,rSHL
- or rWORD8,rG,rH
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD7, 0, rSTR1
+ ldbrx rWORD8, 0, rSTR2
+ addi rSTR1, rSTR1, 8
+ addi rSTR2, rSTR2, 8
+#else
+ ldu rWORD7, 32(rSTR1)
+ ldu rWORD8, 32(rSTR2)
+#endif
+ cmpld cr7, rWORD1, rWORD2
+ bne cr1, L(duLcr1)
+ srd r12, rWORD8, rSHR
+ sld rWORD8_SHIFT, rWORD8, rSHL
+ or rWORD8, r12, rWORD6_SHIFT
bdnz L(duLoop)
L(duL4):
- bne cr1,L(duLcr1)
- cmpld cr1,rWORD3,rWORD4
- bne cr6,L(duLcr6)
- cmpld cr6,rWORD5,rWORD6
- bne cr5,L(duLcr5)
- cmpld cr5,rWORD7,rWORD8
+#if 0
+/* Huh? We've already branched on cr1! */
+ bne cr1, L(duLcr1)
+#endif
+ cmpld cr1, rWORD3, rWORD4
+ bne cr6, L(duLcr6)
+ cmpld cr6, rWORD5, rWORD6
+ bne cr5, L(duLcr5)
+ cmpld cr5, rWORD7, rWORD8
L(du44):
- bne cr0,L(duLcr0)
+ bne cr7, L(duLcr7)
L(du34):
- bne cr1,L(duLcr1)
+ bne cr1, L(duLcr1)
L(du24):
- bne cr6,L(duLcr6)
+ bne cr6, L(duLcr6)
L(du14):
- sldi. rN,rN,3
- bne cr5,L(duLcr5)
+ sldi. rN, rN, 3
+ bne cr5, L(duLcr5)
/* At this point we have a remainder of 1 to 7 bytes to compare. We use
shift right double to eliminate bits beyond the compare length.
- This allows the use of double word subtract to compute the final
- result.
However it may not be safe to load rWORD2 which may be beyond the
string length. So we compare the bit length of the remainder to
the right shift count (rSHR). If the bit count is less than or equal
we do not need to load rWORD2 (all significant bits are already in
- rB). */
- cmpld cr7,rN,rSHR
+ rWORD8_SHIFT). */
+ cmpld cr7, rN, rSHR
beq L(duZeroReturn)
- li rA,0
- ble cr7,L(dutrim)
- ld rWORD2,8(rSTR2)
- srd rA,rWORD2,rSHR
+ li r0, 0
+ ble cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD2, 0, rSTR2
+ addi rSTR2, rSTR2, 8
+#else
+ ld rWORD2, 8(rSTR2)
+#endif
+ srd r0, rWORD2, rSHR
.align 4
L(dutrim):
- ld rWORD1,8(rSTR1)
- ld rWORD8,-8(r1)
- subfic rN,rN,64 /* Shift count is 64 - (rN * 8). */
- or rWORD2,rA,rB
- ld rWORD7,-16(r1)
- ld r29,-24(r1)
- srd rWORD1,rWORD1,rN
- srd rWORD2,rWORD2,rN
- ld r28,-32(r1)
- ld r27,-40(r1)
- li rRTN,0
- cmpld cr0,rWORD1,rWORD2
- ld r26,-48(r1)
- ld r25,-56(r1)
- beq cr0,L(dureturn24)
- li rRTN,1
- ld r24,-64(r1)
- bgtlr cr0
- li rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+ ldbrx rWORD1, 0, rSTR1
+#else
+ ld rWORD1, 8(rSTR1)
+#endif
+ ld rWORD8, -8(r1)
+ subfic rN, rN, 64 /* Shift count is 64 - (rN * 8). */
+ or rWORD2, r0, rWORD8_SHIFT
+ ld rWORD7, -16(r1)
+ ld rSHL, -24(r1)
+ srd rWORD1, rWORD1, rN
+ srd rWORD2, rWORD2, rN
+ ld rSHR, -32(r1)
+ ld rWORD8_SHIFT, -40(r1)
+ li rRTN, 0
+ cmpld cr7, rWORD1, rWORD2
+ ld rWORD2_SHIFT, -48(r1)
+ ld rWORD4_SHIFT, -56(r1)
+ beq cr7, L(dureturn24)
+ li rRTN, 1
+ ld rWORD6_SHIFT, -64(r1)
+ bgtlr cr7
+ li rRTN, -1
blr
.align 4
-L(duLcr0):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
- bgt cr0,L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
- li rRTN,-1
+L(duLcr7):
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+ li rRTN, 1
+ bgt cr7, L(dureturn29)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
+ li rRTN, -1
b L(dureturn27)
.align 4
L(duLcr1):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
- bgt cr1,L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
- li rRTN,-1
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+ li rRTN, 1
+ bgt cr1, L(dureturn29)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
+ li rRTN, -1
b L(dureturn27)
.align 4
L(duLcr6):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
- bgt cr6,L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
- li rRTN,-1
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+ li rRTN, 1
+ bgt cr6, L(dureturn29)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
+ li rRTN, -1
b L(dureturn27)
.align 4
L(duLcr5):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
- li rRTN,1
- bgt cr5,L(dureturn29)
- ld r29,-24(r1)
- ld r28,-32(r1)
- li rRTN,-1
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
+ li rRTN, 1
+ bgt cr5, L(dureturn29)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
+ li rRTN, -1
b L(dureturn27)
.align 3
L(duZeroReturn):
- li rRTN,0
+ li rRTN, 0
.align 4
L(dureturn):
- ld rWORD8,-8(r1)
- ld rWORD7,-16(r1)
+ ld rWORD8, -8(r1)
+ ld rWORD7, -16(r1)
L(dureturn29):
- ld r29,-24(r1)
- ld r28,-32(r1)
+ ld rSHL, -24(r1)
+ ld rSHR, -32(r1)
L(dureturn27):
- ld r27,-40(r1)
+ ld rWORD8_SHIFT, -40(r1)
L(dureturn26):
- ld r26,-48(r1)
+ ld rWORD2_SHIFT, -48(r1)
L(dureturn25):
- ld r25,-56(r1)
+ ld rWORD4_SHIFT, -56(r1)
L(dureturn24):
- ld r24,-64(r1)
+ ld rWORD6_SHIFT, -64(r1)
blr
L(duzeroLength):
- li rRTN,0
+ li rRTN, 0
blr
END (memcmp)
libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
+weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
index 800a9f1bb..e8df75f59 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -23,418 +23,361 @@
/* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
Returns 'dst'. */
+#define dst 11 /* Use r11 so r3 kept unchanged. */
+#define src 4
+#define cnt 5
+
.machine power7
EALIGN (memcpy, 5, 0)
CALL_MCOUNT 3
- cmpldi cr1,5,31
+ cmpldi cr1,cnt,31
neg 0,3
- std 3,-16(1)
- std 31,-8(1)
- cfi_offset(31,-8)
ble cr1, L(copy_LT_32) /* If move < 32 bytes use short move
code. */
- andi. 11,3,7 /* Check alignment of DST. */
-
-
- clrldi 10,4,61 /* Check alignment of SRC. */
- cmpld cr6,10,11 /* SRC and DST alignments match? */
- mr 12,4
- mr 31,5
+#ifdef __LITTLE_ENDIAN__
+/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
+ or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
+ loop is only used for quadword aligned copies. */
+ andi. 10,3,15
+ clrldi 11,4,60
+#else
+ andi. 10,3,7 /* Check alignment of DST. */
+ clrldi 11,4,61 /* Check alignment of SRC. */
+#endif
+ cmpld cr6,10,11 /* SRC and DST alignments match? */
+
+ mr dst,3
bne cr6,L(copy_GE_32_unaligned)
+ beq L(aligned_copy)
- srdi 9,5,3 /* Number of full quadwords remaining. */
-
- beq L(copy_GE_32_aligned_cont)
-
- clrldi 0,0,61
- mtcrf 0x01,0
- subf 31,0,5
-
- /* Get the SRC aligned to 8 bytes. */
-
-1: bf 31,2f
- lbz 6,0(12)
- addi 12,12,1
- stb 6,0(3)
- addi 3,3,1
-2: bf 30,4f
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-4: bf 29,0f
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-0:
- clrldi 10,12,61 /* Check alignment of SRC again. */
- srdi 9,31,3 /* Number of full doublewords remaining. */
-
-L(copy_GE_32_aligned_cont):
-
- clrldi 11,31,61
- mtcrf 0x01,9
-
- srdi 8,31,5
- cmpldi cr1,9,4
- cmpldi cr6,11,0
- mr 11,12
-
- /* Copy 1~3 doublewords so the main loop starts
- at a multiple of 32 bytes. */
+ mtocrf 0x01,0
+#ifdef __LITTLE_ENDIAN__
+ clrldi 0,0,60
+#else
+ clrldi 0,0,61
+#endif
- bf 30,1f
- ld 6,0(12)
- ld 7,8(12)
- addi 11,12,16
- mtctr 8
- std 6,0(3)
- std 7,8(3)
- addi 10,3,16
- bf 31,4f
- ld 0,16(12)
- std 0,16(3)
- blt cr1,3f
- addi 11,12,24
- addi 10,3,24
- b 4f
-
- .align 4
-1: /* Copy 1 doubleword and set the counter. */
- mr 10,3
- mtctr 8
- bf 31,4f
- ld 6,0(12)
- addi 11,12,8
- std 6,0(3)
- addi 10,3,8
-
-L(aligned_copy):
- /* Main aligned copy loop. Copies up to 128-bytes at a time. */
- .align 4
+/* Get the DST and SRC aligned to 8 bytes (16 for little-endian). */
+1:
+ bf 31,2f
+ lbz 6,0(src)
+ addi src,src,1
+ stb 6,0(dst)
+ addi dst,dst,1
+2:
+ bf 30,4f
+ lhz 6,0(src)
+ addi src,src,2
+ sth 6,0(dst)
+ addi dst,dst,2
4:
- /* check for any 32-byte or 64-byte lumps that are outside of a
- nice 128-byte range. R8 contains the number of 32-byte
- lumps, so drop this into the CR, and use the SO/EQ bits to help
- handle the 32- or 64- byte lumps. Then handle the rest with an
- unrolled 128-bytes-at-a-time copy loop. */
- mtocrf 1,8
- li 6,16 # 16() index
- li 7,32 # 32() index
- li 8,48 # 48() index
-
-L(aligned_32byte):
- /* if the SO bit (indicating a 32-byte lump) is not set, move along. */
- bns cr7,L(aligned_64byte)
- lxvd2x 6,0,11
- lxvd2x 7,11,6
- addi 11,11,32
- stxvd2x 6,0,10
- stxvd2x 7,10,6
- addi 10,10,32
-
-L(aligned_64byte):
- /* if the EQ bit (indicating a 64-byte lump) is not set, move along. */
- bne cr7,L(aligned_128setup)
- lxvd2x 6,0,11
- lxvd2x 7,11,6
- lxvd2x 8,11,7
- lxvd2x 9,11,8
- addi 11,11,64
- stxvd2x 6,0,10
- stxvd2x 7,10,6
- stxvd2x 8,10,7
- stxvd2x 9,10,8
- addi 10,10,64
-
-L(aligned_128setup):
- /* Set up for the 128-byte at a time copy loop. */
- srdi 8,31,7
- cmpdi 8,0 # Any 4x lumps left?
- beq 3f # if not, move along.
- lxvd2x 6,0,11
- lxvd2x 7,11,6
- mtctr 8 # otherwise, load the ctr and begin.
- li 8,48 # 48() index
+ bf 29,8f
+ lwz 6,0(src)
+ addi src,src,4
+ stw 6,0(dst)
+ addi dst,dst,4
+8:
+#ifdef __LITTLE_ENDIAN__
+ bf 28,16f
+ ld 6,0(src)
+ addi src,src,8
+ std 6,0(dst)
+ addi dst,dst,8
+16:
+#endif
+ subf cnt,0,cnt
+
+/* Main aligned copy loop. Copies 128 bytes at a time. */
+L(aligned_copy):
+ li 6,16
+ li 7,32
+ li 8,48
+ mtocrf 0x02,cnt
+ srdi 12,cnt,7
+ cmpdi 12,0
+ beq L(aligned_tail)
+ lxvd2x 6,0,src
+ lxvd2x 7,src,6
+ mtctr 12
b L(aligned_128loop)
+ .align 4
L(aligned_128head):
/* for the 2nd + iteration of this loop. */
- lxvd2x 6,0,11
- lxvd2x 7,11,6
+ lxvd2x 6,0,src
+ lxvd2x 7,src,6
L(aligned_128loop):
- lxvd2x 8,11,7
- lxvd2x 9,11,8
- stxvd2x 6,0,10
- addi 11,11,64
- stxvd2x 7,10,6
- stxvd2x 8,10,7
- stxvd2x 9,10,8
- lxvd2x 6,0,11
- lxvd2x 7,11,6
- addi 10,10,64
- lxvd2x 8,11,7
- lxvd2x 9,11,8
- addi 11,11,64
- stxvd2x 6,0,10
- stxvd2x 7,10,6
- stxvd2x 8,10,7
- stxvd2x 9,10,8
- addi 10,10,64
+ lxvd2x 8,src,7
+ lxvd2x 9,src,8
+ stxvd2x 6,0,dst
+ addi src,src,64
+ stxvd2x 7,dst,6
+ stxvd2x 8,dst,7
+ stxvd2x 9,dst,8
+ lxvd2x 6,0,src
+ lxvd2x 7,src,6
+ addi dst,dst,64
+ lxvd2x 8,src,7
+ lxvd2x 9,src,8
+ addi src,src,64
+ stxvd2x 6,0,dst
+ stxvd2x 7,dst,6
+ stxvd2x 8,dst,7
+ stxvd2x 9,dst,8
+ addi dst,dst,64
bdnz L(aligned_128head)
-3:
- /* Check for tail bytes. */
- rldicr 0,31,0,60
- mtcrf 0x01,31
- beq cr6,0f
-
-.L9:
- add 3,3,0
- add 12,12,0
-
- /* At this point we have a tail of 0-7 bytes and we know that the
- destination is doubleword-aligned. */
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-2: /* Copy 2 bytes. */
- bf 30,1f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-1: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,0(12)
- stb 6,0(3)
-0: /* Return original DST pointer. */
- ld 31,-8(1)
- ld 3,-16(1)
+L(aligned_tail):
+ mtocrf 0x01,cnt
+ bf 25,32f
+ lxvd2x 6,0,src
+ lxvd2x 7,src,6
+ lxvd2x 8,src,7
+ lxvd2x 9,src,8
+ addi src,src,64
+ stxvd2x 6,0,dst
+ stxvd2x 7,dst,6
+ stxvd2x 8,dst,7
+ stxvd2x 9,dst,8
+ addi dst,dst,64
+32:
+ bf 26,16f
+ lxvd2x 6,0,src
+ lxvd2x 7,src,6
+ addi src,src,32
+ stxvd2x 6,0,dst
+ stxvd2x 7,dst,6
+ addi dst,dst,32
+16:
+ bf 27,8f
+ lxvd2x 6,0,src
+ addi src,src,16
+ stxvd2x 6,0,dst
+ addi dst,dst,16
+8:
+ bf 28,4f
+ ld 6,0(src)
+ addi src,src,8
+ std 6,0(dst)
+ addi dst,dst,8
+4: /* Copies 4~7 bytes. */
+ bf 29,L(tail2)
+ lwz 6,0(src)
+ stw 6,0(dst)
+ bf 30,L(tail5)
+ lhz 7,4(src)
+ sth 7,4(dst)
+ bflr 31
+ lbz 8,6(src)
+ stb 8,6(dst)
+ /* Return original DST pointer. */
blr
- /* Handle copies of 0~31 bytes. */
- .align 4
+
+/* Handle copies of 0~31 bytes. */
+ .align 4
L(copy_LT_32):
- cmpldi cr6,5,8
- mr 12,4
- mtcrf 0x01,5
+ mr dst,3
+ cmpldi cr6,cnt,8
+ mtocrf 0x01,cnt
ble cr6,L(copy_LE_8)
/* At least 9 bytes to go. */
neg 8,4
- clrrdi 11,4,2
- andi. 0,8,3
- cmpldi cr1,5,16
- mr 10,5
+ andi. 0,8,3
+ cmpldi cr1,cnt,16
beq L(copy_LT_32_aligned)
- /* Force 4-bytes alignment for SRC. */
- mtocrf 0x01,0
- subf 10,0,5
-2: bf 30,1f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-1: bf 31,L(end_4bytes_alignment)
-
- lbz 6,0(12)
- addi 12,12,1
- stb 6,0(3)
- addi 3,3,1
-
- .align 4
+ /* Force 4-byte alignment for SRC. */
+ mtocrf 0x01,0
+ subf cnt,0,cnt
+2:
+ bf 30,1f
+ lhz 6,0(src)
+ addi src,src,2
+ sth 6,0(dst)
+ addi dst,dst,2
+1:
+ bf 31,L(end_4bytes_alignment)
+ lbz 6,0(src)
+ addi src,src,1
+ stb 6,0(dst)
+ addi dst,dst,1
+
+ .align 4
L(end_4bytes_alignment):
- cmpldi cr1,10,16
- mtcrf 0x01,10
+ cmpldi cr1,cnt,16
+ mtocrf 0x01,cnt
L(copy_LT_32_aligned):
/* At least 6 bytes to go, and SRC is word-aligned. */
blt cr1,8f
/* Copy 16 bytes. */
- lwz 6,0(12)
- lwz 7,4(12)
- stw 6,0(3)
- lwz 8,8(12)
- stw 7,4(3)
- lwz 6,12(12)
- addi 12,12,16
- stw 8,8(3)
- stw 6,12(3)
- addi 3,3,16
+ lwz 6,0(src)
+ lwz 7,4(src)
+ stw 6,0(dst)
+ lwz 8,8(src)
+ stw 7,4(dst)
+ lwz 6,12(src)
+ addi src,src,16
+ stw 8,8(dst)
+ stw 6,12(dst)
+ addi dst,dst,16
8: /* Copy 8 bytes. */
- bf 28,4f
+ bf 28,L(tail4)
+ lwz 6,0(src)
+ lwz 7,4(src)
+ addi src,src,8
+ stw 6,0(dst)
+ stw 7,4(dst)
+ addi dst,dst,8
+
+ .align 4
+/* Copies 4~7 bytes. */
+L(tail4):
+ bf 29,L(tail2)
+ lwz 6,0(src)
+ stw 6,0(dst)
+ bf 30,L(tail5)
+ lhz 7,4(src)
+ sth 7,4(dst)
+ bflr 31
+ lbz 8,6(src)
+ stb 8,6(dst)
+ /* Return original DST pointer. */
+ blr
- lwz 6,0(12)
- lwz 7,4(12)
- addi 12,12,8
- stw 6,0(3)
- stw 7,4(3)
- addi 3,3,8
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-2: /* Copy 2-3 bytes. */
+ .align 4
+/* Copies 2~3 bytes. */
+L(tail2):
bf 30,1f
-
- lhz 6,0(12)
- sth 6,0(3)
- bf 31,0f
- lbz 7,2(12)
- stb 7,2(3)
- ld 3,-16(1)
+ lhz 6,0(src)
+ sth 6,0(dst)
+ bflr 31
+ lbz 7,2(src)
+ stb 7,2(dst)
blr
- .align 4
-1: /* Copy 1 byte. */
- bf 31,0f
+ .align 4
+L(tail5):
+ bflr 31
+ lbz 6,4(src)
+ stb 6,4(dst)
+ blr
- lbz 6,0(12)
- stb 6,0(3)
-0: /* Return original DST pointer. */
- ld 3,-16(1)
+ .align 4
+1:
+ bflr 31
+ lbz 6,0(src)
+ stb 6,0(dst)
+ /* Return original DST pointer. */
blr
- /* Handles copies of 0~8 bytes. */
- .align 4
+
+/* Handles copies of 0~8 bytes. */
+ .align 4
L(copy_LE_8):
- bne cr6,4f
+ bne cr6,L(tail4)
/* Though we could've used ld/std here, they are still
slow for unaligned cases. */
- lwz 6,0(4)
- lwz 7,4(4)
- stw 6,0(3)
- stw 7,4(3)
- ld 3,-16(1) /* Return original DST pointers. */
+ lwz 6,0(src)
+ lwz 7,4(src)
+ stw 6,0(dst)
+ stw 7,4(dst)
blr
- .align 4
-4: /* Copies 4~7 bytes. */
- bf 29,2b
-
- lwz 6,0(4)
- stw 6,0(3)
- bf 30,5f
- lhz 7,4(4)
- sth 7,4(3)
- bf 31,0f
- lbz 8,6(4)
- stb 8,6(3)
- ld 3,-16(1)
- blr
-
- .align 4
-5: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,4(4)
- stb 6,4(3)
-
-0: /* Return original DST pointer. */
- ld 3,-16(1)
- blr
- /* Handle copies of 32+ bytes where DST is aligned (to quadword) but
- SRC is not. Use aligned quadword loads from SRC, shifted to realign
- the data, allowing for aligned DST stores. */
- .align 4
+/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
+ SRC is not. Use aligned quadword loads from SRC, shifted to realign
+ the data, allowing for aligned DST stores. */
+ .align 4
L(copy_GE_32_unaligned):
- clrldi 0,0,60 /* Number of bytes until the 1st
- quadword. */
- andi. 11,3,15 /* Check alignment of DST (against
- quadwords). */
- srdi 9,5,4 /* Number of full quadwords remaining. */
+ clrldi 0,0,60 /* Number of bytes until the 1st dst quadword. */
+#ifndef __LITTLE_ENDIAN__
+ andi. 10,3,15 /* Check alignment of DST (against quadwords). */
+#endif
+ srdi 9,cnt,4 /* Number of full quadwords remaining. */
beq L(copy_GE_32_unaligned_cont)
- /* SRC is not quadword aligned, get it aligned. */
+ /* DST is not quadword aligned, get it aligned. */
- mtcrf 0x01,0
- subf 31,0,5
+ mtocrf 0x01,0
+ subf cnt,0,cnt
/* Vector instructions work best when proper alignment (16-bytes)
is present. Move 0~15 bytes as needed to get DST quadword-aligned. */
-1: /* Copy 1 byte. */
+1:
bf 31,2f
-
- lbz 6,0(12)
- addi 12,12,1
- stb 6,0(3)
- addi 3,3,1
-2: /* Copy 2 bytes. */
+ lbz 6,0(src)
+ addi src,src,1
+ stb 6,0(dst)
+ addi dst,dst,1
+2:
bf 30,4f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-4: /* Copy 4 bytes. */
+ lhz 6,0(src)
+ addi src,src,2
+ sth 6,0(dst)
+ addi dst,dst,2
+4:
bf 29,8f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-8: /* Copy 8 bytes. */
+ lwz 6,0(src)
+ addi src,src,4
+ stw 6,0(dst)
+ addi dst,dst,4
+8:
bf 28,0f
-
- ld 6,0(12)
- addi 12,12,8
- std 6,0(3)
- addi 3,3,8
+ ld 6,0(src)
+ addi src,src,8
+ std 6,0(dst)
+ addi dst,dst,8
0:
- clrldi 10,12,60 /* Check alignment of SRC. */
- srdi 9,31,4 /* Number of full quadwords remaining. */
+ srdi 9,cnt,4 /* Number of full quadwords remaining. */
/* The proper alignment is present, it is OK to copy the bytes now. */
L(copy_GE_32_unaligned_cont):
/* Setup two indexes to speed up the indexed vector operations. */
- clrldi 11,31,60
- li 6,16 /* Index for 16-bytes offsets. */
+ clrldi 10,cnt,60
+ li 6,16 /* Index for 16-bytes offsets. */
li 7,32 /* Index for 32-bytes offsets. */
- cmpldi cr1,11,0
- srdi 8,31,5 /* Setup the loop counter. */
- mr 10,3
- mr 11,12
- mtcrf 0x01,9
- cmpldi cr6,9,1
- lvsl 5,0,12
- lvx 3,0,12
- bf 31,L(setup_unaligned_loop)
-
- /* Copy another 16 bytes to align to 32-bytes due to the loop . */
- lvx 4,12,6
- vperm 6,3,4,5
- addi 11,12,16
- addi 10,3,16
- stvx 6,0,3
+ cmpldi cr1,10,0
+ srdi 8,cnt,5 /* Setup the loop counter. */
+ mtocrf 0x01,9
+ cmpldi cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+ lvsr 5,0,src
+#else
+ lvsl 5,0,src
+#endif
+ lvx 3,0,src
+ li 0,0
+ bf 31,L(setup_unaligned_loop)
+
+ /* Copy another 16 bytes to align to 32-bytes due to the loop. */
+ lvx 4,src,6
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
+ addi src,src,16
+ stvx 6,0,dst
+ addi dst,dst,16
vor 3,4,4
+ clrrdi 0,src,60
L(setup_unaligned_loop):
- mtctr 8
- ble cr6,L(end_unaligned_loop)
+ mtctr 8
+ ble cr6,L(end_unaligned_loop)
/* Copy 32 bytes at a time using vector instructions. */
- .align 4
+ .align 4
L(unaligned_loop):
/* Note: vr6/vr10 may contain data that was already copied,
@@ -442,62 +385,55 @@ L(unaligned_loop):
some portions again. This is faster than having unaligned
vector instructions though. */
- lvx 4,11,6 /* vr4 = r11+16. */
- vperm 6,3,4,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr6. */
- lvx 3,11,7 /* vr3 = r11+32. */
- vperm 10,4,3,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr10. */
- addi 11,11,32
- stvx 6,0,10
- stvx 10,10,6
- addi 10,10,32
-
+ lvx 4,src,6
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
+ lvx 3,src,7
+#ifdef __LITTLE_ENDIAN__
+ vperm 10,3,4,5
+#else
+ vperm 10,4,3,5
+#endif
+ addi src,src,32
+ stvx 6,0,dst
+ stvx 10,dst,6
+ addi dst,dst,32
bdnz L(unaligned_loop)
- .align 4
+ clrrdi 0,src,60
+
+ .align 4
L(end_unaligned_loop):
/* Check for tail bytes. */
- rldicr 0,31,0,59
- mtcrf 0x01,31
- beq cr1,0f
+ mtocrf 0x01,cnt
+ beqlr cr1
- add 3,3,0
- add 12,12,0
+ add src,src,0
/* We have 1~15 tail bytes to copy, and DST is quadword aligned. */
-8: /* Copy 8 bytes. */
+ /* Copy 8 bytes. */
bf 28,4f
-
- lwz 6,0(12)
- lwz 7,4(12)
- addi 12,12,8
- stw 6,0(3)
- stw 7,4(3)
- addi 3,3,8
-4: /* Copy 4 bytes. */
- bf 29,2f
-
- lwz 6,0(12)
- addi 12,12,4
- stw 6,0(3)
- addi 3,3,4
-2: /* Copy 2~3 bytes. */
- bf 30,1f
-
- lhz 6,0(12)
- addi 12,12,2
- sth 6,0(3)
- addi 3,3,2
-1: /* Copy 1 byte. */
- bf 31,0f
-
- lbz 6,0(12)
- stb 6,0(3)
-0: /* Return original DST pointer. */
- ld 31,-8(1)
- ld 3,-16(1)
+ lwz 6,0(src)
+ lwz 7,4(src)
+ addi src,src,8
+ stw 6,0(dst)
+ stw 7,4(dst)
+ addi dst,dst,8
+4: /* Copy 4~7 bytes. */
+ bf 29,L(tail2)
+ lwz 6,0(src)
+ stw 6,0(dst)
+ bf 30,L(tail5)
+ lhz 7,4(src)
+ sth 7,4(dst)
+ bflr 31
+ lbz 8,6(src)
+ stb 8,6(dst)
+ /* Return original DST pointer. */
blr
END_GEN_TB (memcpy,TB_TOCLESS)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
index f20be938d..b93ab7da5 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
@@ -365,13 +365,21 @@ L(copy_GE_32_unaligned_cont):
mr 11,12
mtcrf 0x01,9
cmpldi cr6,9,1
- lvsl 5,0,12
+#ifdef __LITTLE_ENDIAN__
+ lvsr 5,0,12
+#else
+ lvsl 5,0,12
+#endif
lvx 3,0,12
bf 31,L(setup_unaligned_loop)
/* Copy another 16 bytes to align to 32-bytes due to the loop . */
lvx 4,12,6
- vperm 6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
addi 11,12,16
addi 10,3,16
stvx 6,0,3
@@ -391,11 +399,17 @@ L(unaligned_loop):
vector instructions though. */
lvx 4,11,6 /* vr4 = r11+16. */
- vperm 6,3,4,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr6. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 6,4,3,5
+#else
+ vperm 6,3,4,5
+#endif
lvx 3,11,7 /* vr3 = r11+32. */
- vperm 10,4,3,5 /* Merge the correctly-aligned portions
- of vr3/vr4 into vr10. */
+#ifdef __LITTLE_ENDIAN__
+ vperm 10,3,4,5
+#else
+ vperm 10,4,3,5
+#endif
addi 11,11,32
stvx 6,0,10
stvx 10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
index c49995210..a9e86cb19 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
@@ -23,118 +23,132 @@
.machine power7
ENTRY (__memrchr)
CALL_MCOUNT
- dcbt 0,r3
- mr r7,r3
- add r3,r7,r5 /* Calculate the last acceptable address. */
- cmpld cr7,r3,r7 /* Is the address equal or less than r3? */
+ add r7,r3,r5 /* Calculate the last acceptable address. */
+ neg r0,r7
+ addi r7,r7,-1
+ mr r10,r3
+ clrrdi r6,r7,7
+ li r9,3<<5
+ dcbt r9,r6,16 /* Stream hint, decreasing addresses. */
/* Replicate BYTE to doubleword. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
insrdi r4,r4,32,0
- bge cr7,L(proceed)
-
- li r3,-1 /* Make r11 the biggest if r4 <= 0. */
-L(proceed):
li r6,-8
- addi r9,r3,-1
- clrrdi r8,r9,3
- addi r8,r8,8
- neg r0,r3
+ li r9,-1
rlwinm r0,r0,3,26,28 /* Calculate padding. */
-
+ clrrdi r8,r7,3
+ srd r9,r9,r0
cmpldi r5,32
+ clrrdi r0,r10,3
ble L(small_range)
- ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */
- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
- sld r10,r10,r0
- srd r10,r10,r0
- cmpldi cr7,r10,0 /* If r10 == 0, no BYTEs have been found. */
+#ifdef __LITTLE_ENDIAN__
+ ldx r12,0,r8
+#else
+ ldbrx r12,0,r8 /* Load reversed doubleword from memory. */
+#endif
+ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
+ and r3,r3,r9
+ cmpldi cr7,r3,0 /* If r3 == 0, no BYTEs have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,-8
- cmpld cr6,r9,r7
- ble cr6,L(null)
-
mtcrf 0x01,r8
- /* Are we now aligned to a doubleword boundary? If so, skip to
+ /* Are we now aligned to a quadword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
- mr r8,r9
- bt 28,L(loop_setup)
+ bf 28,L(loop_setup)
/* Handle DWORD2 of pair. */
+#ifdef __LITTLE_ENDIAN__
+ ldx r12,r8,r6
+#else
ldbrx r12,r8,r6
- cmpb r10,r12,r4
- cmpldi cr7,r10,0
- bne cr7,L(done)
-
- /* Are we done already. */
+#endif
addi r8,r8,-8
- cmpld cr6,r8,r7
- ble cr6,L(null)
+ cmpb r3,r12,r4
+ cmpldi cr7,r3,0
+ bne cr7,L(done)
L(loop_setup):
- li r0,-16
- sub r5,r8,r7
- srdi r9,r5,4 /* Number of loop iterations. */
+ /* The last dword we want to read in the loop below is the one
+ containing the first byte of the string, ie. the dword at
+ s & ~7, or r0. The first dword read is at r8 - 8, we
+ read 2 * cnt dwords, so the last dword read will be at
+ r8 - 8 - 16 * cnt + 8. Solving for cnt gives
+ cnt = (r8 - r0) / 16 */
+ sub r5,r8,r0
+ addi r8,r8,-8
+ srdi r9,r5,4 /* Number of loop iterations. */
mtctr r9 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for BYTE backwards in the string. Since it's a
- small loop (< 8 instructions), align it to 32-bytes. */
- .p2align 5
+
+ /* Main loop to look for BYTE backwards in the string.
+ FIXME: Investigate whether 32 byte align helps with this
+ 9 instruction loop. */
+ .align 5
L(loop):
/* Load two doublewords, compare and merge in a
single register for speed. This is an attempt
to speed up the byte-checking process for bigger strings. */
- ldbrx r12,r8,r6
- ldbrx r11,r8,r0
- addi r8,r8,-8
- cmpb r10,r12,r4
+#ifdef __LITTLE_ENDIAN__
+ ldx r12,0,r8
+ ldx r11,r8,r6
+#else
+ ldbrx r12,0,r8
+ ldbrx r11,r8,r6
+#endif
+ cmpb r3,r12,r4
cmpb r9,r11,r4
- or r5,r9,r10 /* Merge everything in one doubleword. */
+ or r5,r9,r3 /* Merge everything in one doubleword. */
cmpldi cr7,r5,0
bne cr7,L(found)
- addi r8,r8,-8
+ addi r8,r8,-16
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for BYTE in the whole range. Just return
- the original range. */
- addi r8,r8,8
- cmpld cr6,r8,r7
- bgt cr6,L(loop_small)
- b L(null)
-
- /* OK, one (or both) of the words contains BYTE. Check
- the first word and decrement the address in case the first
- word really contains BYTE. */
+
+ /* We may have one more word to read. */
+ cmpld r8,r0
+ bnelr
+
+#ifdef __LITTLE_ENDIAN__
+ ldx r12,0,r8
+#else
+ ldbrx r12,0,r8
+#endif
+ cmpb r3,r12,r4
+ cmpldi cr7,r3,0
+ bne cr7,L(done)
+ blr
+
.align 4
L(found):
- cmpldi cr6,r10,0
- addi r8,r8,8
+ /* OK, one (or both) of the dwords contains BYTE. Check
+ the first dword. */
+ cmpldi cr6,r3,0
bne cr6,L(done)
/* BYTE must be in the second word. Adjust the address
- again and move the result of cmpb to r10 so we can calculate the
+ again and move the result of cmpb to r3 so we can calculate the
pointer. */
- mr r10,r9
+ mr r3,r9
addi r8,r8,-8
- /* r10 has the output of the cmpb instruction, that is, it contains
- 0xff in the same position as the BYTE in the original
+ /* r3 has the output of the cmpb instruction, that is, it contains
+ 0xff in the same position as BYTE in the original
word from the string. Use that to calculate the pointer.
We need to make sure BYTE is *before* the end of the
range. */
L(done):
- cntlzd r0,r10 /* Count leading zeroes before the match. */
- srdi r6,r0,3 /* Convert leading zeroes to bytes. */
- addi r0,r6,1
+ cntlzd r9,r3 /* Count leading zeros before the match. */
+ cmpld r8,r0 /* Are we on the last word? */
+ srdi r6,r9,3 /* Convert leading zeros to bytes. */
+ addi r0,r6,-7
sub r3,r8,r0
- cmpld r3,r7
- blt L(null)
+ cmpld cr7,r3,r10
+ bnelr
+ bgelr cr7
+ li r3,0
blr
.align 4
@@ -148,29 +162,35 @@ L(small_range):
cmpldi r5,0
beq L(null)
- ldbrx r12,r8,r6 /* Load reversed doubleword from memory. */
- cmpb r10,r12,r4 /* Check for BYTE in DWORD1. */
- sld r10,r10,r0
- srd r10,r10,r0
- cmpldi cr7,r10,0
+#ifdef __LITTLE_ENDIAN__
+ ldx r12,0,r8
+#else
+ ldbrx r12,0,r8 /* Load reversed doubleword from memory. */
+#endif
+ cmpb r3,r12,r4 /* Check for BYTE in DWORD1. */
+ and r3,r3,r9
+ cmpldi cr7,r3,0
bne cr7,L(done)
/* Are we done already? */
+ cmpld r8,r0
addi r8,r8,-8
- cmpld r8,r7
- ble L(null)
- b L(loop_small)
+ beqlr
- .p2align 5
+ .align 5
L(loop_small):
- ldbrx r12,r8,r6
- cmpb r10,r12,r4
- cmpldi cr6,r10,0
- bne cr6,L(done)
+#ifdef __LITTLE_ENDIAN__
+ ldx r12,0,r8
+#else
+ ldbrx r12,0,r8
+#endif
+ cmpb r3,r12,r4
+ cmpld r8,r0
+ cmpldi cr7,r3,0
+ bne cr7,L(done)
addi r8,r8,-8
- cmpld r8,r7
- ble L(null)
- b L(loop_small)
+ bne L(loop_small)
+ blr
END (__memrchr)
weak_alias (__memrchr, memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memset.S b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
index b24cfa163..8b081e87c 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
@@ -32,8 +32,8 @@ L(_memset):
mr 10,3
/* Replicate byte to word. */
- rlwimi 4,4,8,16,23
- rlwimi 4,4,16,0,15
+ insrdi 4,4,8,48
+ insrdi 4,4,16,32
ble cr6,L(small) /* If length <= 8, use short copy code. */
neg 0,3
@@ -321,7 +321,7 @@ L(medium):
clrldi 0,0,62
beq L(medium_aligned)
- /* Force 4-bytes alignment for SRC. */
+ /* Force 4-bytes alignment for DST. */
mtocrf 0x01,0
subf 5,0,5
1: /* Copy 1 byte. */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
index 50a33d8fa..547aed771 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
@@ -27,8 +27,8 @@ ENTRY (__rawmemchr)
clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
/* Replicate byte to doubleword. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
insrdi r4,r4,32,0
/* Now r4 has a doubleword of c bytes. */
@@ -36,8 +36,13 @@ ENTRY (__rawmemchr)
rlwinm r6,r3,3,26,28 /* Calculate padding. */
ld r12,0(r8) /* Load doubleword from memory. */
cmpb r5,r12,r4 /* Compare each byte against c byte. */
+#ifdef __LITTLE_ENDIAN__
+ srd r5,r5,r6
+ sld r5,r5,r6
+#else
sld r5,r5,r6 /* Move left to discard ignored bits. */
srd r5,r5,r6 /* Bring the bits back as zeros. */
+#endif
cmpdi cr7,r5,0 /* If r5 == 0, no c bytes have been found. */
bne cr7,L(done)
@@ -91,8 +96,14 @@ L(loop):
doubleword from the string. Use that fact to find out what is
the position of the byte inside the string. */
L(done):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntd r0,r0 /* Count trailing zeros. */
+#else
cntlzd r0,r5 /* Count leading zeros before the match. */
- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
+#endif
+ srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching char. */
blr
END (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
index 3ffe7a188..4679a158f 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
beq cr7,L(null_match)
/* Replicate byte to doubleword. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
insrdi r4,r4,32,0
/* Now r4 has a doubleword of c bytes and r0 has
@@ -47,11 +47,17 @@ ENTRY (strchr)
/* Move the doublewords left and right to discard the bits that are
not part of the string and bring them back as zeros. */
-
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ srd r11,r11,r6
+ sld r10,r10,r6
+ sld r11,r11,r6
+#else
sld r10,r10,r6
sld r11,r11,r6
srd r10,r10,r6
srd r11,r11,r6
+#endif
or r5,r10,r11 /* OR the results to speed things up. */
cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -108,15 +114,24 @@ L(loop):
mr r11,r7
addi r8,r8,8
- /* r5 has the output of the cmpb instruction, that is, it contains
+ /* r10/r11 have the output of the cmpb instructions, that is,
0xff in the same position as the c/null byte in the original
doubleword from the string. Use that to calculate the pointer. */
L(done):
- cntlzd r4,r10 /* Count leading zeroes before c matches. */
- cntlzd r0,r11 /* Count leading zeroes before null matches. */
- cmpld cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+ addi r3,r10,-1
+ andc r3,r3,r10
+ popcntd r0,r3
+ addi r4,r11,-1
+ andc r4,r4,r11
+ cmpld cr7,r3,r4
bgt cr7,L(no_match)
- srdi r0,r4,3 /* Convert leading zeroes to bytes. */
+#else
+ cntlzd r0,r10 /* Count leading zeros before c matches. */
+ cmpld cr7,r11,r10
+ bgt cr7,L(no_match)
+#endif
+ srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching c byte
or null in case c was not found. */
blr
@@ -135,9 +150,13 @@ L(null_match):
/* Move the doublewords left and right to discard the bits that are
not part of the string and bring them back as zeros. */
-
+#ifdef __LITTLE_ENDIAN__
+ srd r5,r5,r6
+ sld r5,r5,r6
+#else
sld r5,r5,r6
srd r5,r5,r6
+#endif
cmpdi cr7,r5,0 /* If r10 == 0, no c or null bytes
have been found. */
bne cr7,L(done_null)
@@ -192,7 +211,13 @@ L(loop_null):
0xff in the same position as the null byte in the original
doubleword from the string. Use that to calculate the pointer. */
L(done_null):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntd r0,r0
+#else
cntlzd r0,r5 /* Count leading zeros before the match. */
+#endif
srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of the matching null byte. */
blr
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
index 9dbc51b0d..df457525e 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
clrrdi r8,r3,3 /* Align the address to doubleword boundary. */
/* Replicate byte to doubleword. */
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
+ insrdi r4,r4,8,48
+ insrdi r4,r4,16,32
insrdi r4,r4,32,0
rlwinm r6,r3,3,26,28 /* Calculate padding. */
@@ -44,10 +44,17 @@ ENTRY (__strchrnul)
/* Move the doublewords left and right to discard the bits that are
not part of the string and to bring them back as zeros. */
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ srd r9,r9,r6
+ sld r10,r10,r6
+ sld r9,r9,r6
+#else
sld r10,r10,r6
sld r9,r9,r6
srd r10,r10,r6
srd r9,r9,r6
+#endif
or r5,r9,r10 /* OR the results to speed things up. */
cmpdi cr7,r5,0 /* If r5 == 0, no c or null bytes
have been found. */
@@ -97,7 +104,7 @@ L(loop):
bne cr6,L(done)
/* The c/null byte must be in the second doubleword. Adjust the
- address again and move the result of cmpb to r10 so we can calculate
+ address again and move the result of cmpb to r5 so we can calculate
the pointer. */
mr r5,r10
addi r8,r8,8
@@ -106,7 +113,13 @@ L(loop):
0xff in the same position as the c/null byte in the original
doubleword from the string. Use that to calculate the pointer. */
L(done):
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r5,-1
+ andc r0,r0,r5
+ popcntd r0,r0
+#else
cntlzd r0,r5 /* Count leading zeros before the match. */
+#endif
srdi r0,r0,3 /* Convert leading zeros to bytes. */
add r3,r8,r0 /* Return address of matching c/null byte. */
blr
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
index 343216952..807ef1082 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
@@ -30,7 +30,11 @@ ENTRY (strlen)
with cmpb. */
li r5,-1 /* MASK = 0xffffffffffffffff. */
ld r12,0(r4) /* Load doubleword from memory. */
+#ifdef __LITTLE_ENDIAN__
+ sld r5,r5,r6
+#else
srd r5,r5,r6 /* MASK = MASK >> padding. */
+#endif
orc r9,r12,r5 /* Mask bits that are not part of the string. */
cmpb r10,r9,r0 /* Check for null bytes in DWORD1. */
cmpdi cr7,r10,0 /* If r10 == 0, no null's have been found. */
@@ -48,9 +52,6 @@ ENTRY (strlen)
cmpb r10,r12,r0
cmpdi cr7,r10,0
bne cr7,L(done)
- b L(loop) /* We branch here (rather than falling through)
- to skip the nops due to heavy alignment
- of the loop below. */
/* Main loop to look for the end of the string. Since it's a
small loop (< 8 instructions), align it to 32-bytes. */
@@ -87,9 +88,15 @@ L(loop):
0xff in the same position as the null byte in the original
doubleword from the string. Use that to calculate the length. */
L(done):
- cntlzd r0,r10 /* Count leading zeroes before the match. */
+#ifdef __LITTLE_ENDIAN__
+ addi r9, r10, -1 /* Form a mask from trailing zeros. */
+ andc r9, r9, r10
+ popcntd r0, r9 /* Count the bits in the mask. */
+#else
+ cntlzd r0,r10 /* Count leading zeros before the match. */
+#endif
subf r5,r3,r4
- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
+ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
add r3,r5,r0 /* Compute final length. */
blr
END (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
index 77ecad5ab..e618b010b 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
@@ -27,7 +27,7 @@
EALIGN (strncmp,5,0)
CALL_MCOUNT 3
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -40,6 +40,7 @@ EALIGN (strncmp,5,0)
#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
nop
@@ -83,12 +84,57 @@ L(g1): add rTMP,rFEFE,rWORD1
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
+ addi rNEG, rBITDIF, 1
+ orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
+ sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
+ andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
+ andc rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ cmpb rBITDIF, rWORD1, rWORD2 /* 0xff on equal bytes. */
+ addi rNEG, rBITDIF, 1
+ orc rNEG, rNEG, rBITDIF /* 0's below LS differing byte. */
+ sldi rNEG, rNEG, 8 /* 1's above LS differing byte. */
+ andc rWORD1, rWORD1, rNEG /* mask off MS bytes. */
+ andc rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
L(endstring):
and rTMP,r7F7F,rWORD1
beq cr1,L(equal)
add rTMP,rTMP,r7F7F
xor. rBITDIF,rWORD1,rWORD2
-
andc rNEG,rNEG,rTMP
blt L(highbit)
cntlzd rBITDIF,rBITDIF
@@ -97,7 +143,7 @@ L(endstring):
cmpd cr1,rNEG,rBITDIF
sub rRTN,rWORD1,rWORD2
blt cr1,L(equal)
- sradi rRTN,rRTN,63
+ sradi rRTN,rRTN,63 /* must return an int. */
ori rRTN,rRTN,1
blr
L(equal):
@@ -105,7 +151,7 @@ L(equal):
blr
L(different):
- ldu rWORD1,-8(rSTR1)
+ ld rWORD1,-8(rSTR1)
xor. rBITDIF,rWORD1,rWORD2
sub rRTN,rWORD1,rWORD2
blt L(highbit)
@@ -113,11 +159,10 @@ L(different):
ori rRTN,rRTN,1
blr
L(highbit):
- srdi rWORD2,rWORD2,56
- srdi rWORD1,rWORD1,56
- sub rRTN,rWORD1,rWORD2
+ sradi rRTN,rWORD2,63
+ ori rRTN,rRTN,1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
index 37c7dbfe8..51591069d 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
@@ -24,33 +24,29 @@
ENTRY (__strnlen)
CALL_MCOUNT 2
dcbt 0,r3
- clrrdi r8,r3,3
+ clrrdi r8,r3,3
add r7,r3,r4 /* Calculate the last acceptable address. */
cmpldi r4,32
li r0,0 /* Doubleword with null chars. */
+ addi r7,r7,-1
+
/* If we have less than 33 bytes to search, skip to a faster code. */
ble L(small_range)
- cmpld cr7,r3,r7 /* Is the address equal or less than r3? If
- it's equal or less, it means size is either 0
- or a negative number. */
- ble cr7,L(proceed)
-
- li r7,-1 /* Make r11 the biggest if r4 <= 0. */
-L(proceed):
rlwinm r6,r3,3,26,28 /* Calculate padding. */
ld r12,0(r8) /* Load doubleword from memory. */
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ sld r10,r10,r6
+#else
sld r10,r10,r6
srd r10,r10,r6
+#endif
cmpldi cr7,r10,0 /* If r10 == 0, no null's have been found. */
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,8
- cmpld cr6,r9,r7
- bge cr6,L(end_max)
-
+ clrrdi r7,r7,3 /* Address of last doubleword. */
mtcrf 0x01,r8
/* Are we now aligned to a quadword boundary? If so, skip to
the main loop. Otherwise, go through the alignment code. */
@@ -63,17 +59,18 @@ L(proceed):
cmpldi cr7,r10,0
bne cr7,L(done)
- /* Are we done already? */
- addi r9,r8,8
- cmpld cr6,r9,r7
- bge cr6,L(end_max)
-
L(loop_setup):
- sub r5,r7,r9
+ /* The last dword we want to read in the loop below is the one
+ containing the last byte of the string, ie. the dword at
+ (s + size - 1) & ~7, or r7. The first dword read is at
+ r8 + 8, we read 2 * cnt dwords, so the last dword read will
+ be at r8 + 8 + 16 * cnt - 8. Solving for cnt gives
+ cnt = (r7 - r8) / 16 */
+ sub r5,r7,r8
srdi r6,r5,4 /* Number of loop iterations. */
mtctr r6 /* Setup the counter. */
- b L(loop)
- /* Main loop to look for the null byte backwards in the string. Since
+
+ /* Main loop to look for the null byte in the string. Since
it's a small loop (< 8 instructions), align it to 32-bytes. */
.p2align 5
L(loop):
@@ -89,15 +86,18 @@ L(loop):
cmpldi cr7,r5,0
bne cr7,L(found)
bdnz L(loop)
- /* We're here because the counter reached 0, and that means we
- didn't have any matches for null in the whole range. Just return
- the original size. */
- addi r9,r8,8
- cmpld cr6,r9,r7
- blt cr6,L(loop_small)
+
+ /* We may have one more dword to read. */
+ cmpld cr6,r8,r7
+ beq cr6,L(end_max)
+
+ ldu r12,8(r8)
+ cmpb r10,r12,r0
+ cmpldi cr6,r10,0
+ bne cr6,L(done)
L(end_max):
- sub r3,r7,r3
+ mr r3,r4
blr
/* OK, one (or both) of the doublewords contains a null byte. Check
@@ -119,52 +119,59 @@ L(found):
/* r10 has the output of the cmpb instruction, that is, it contains
0xff in the same position as the null byte in the original
doubleword from the string. Use that to calculate the length.
- We need to make sure the null char is *before* the start of the
- range (since we're going backwards). */
+ We need to make sure the null char is *before* the end of the
+ range. */
L(done):
- cntlzd r0,r10 /* Count leading zeroes before the match. */
- srdi r0,r0,3 /* Convert leading zeroes to bytes. */
- add r9,r8,r0
- sub r6,r9,r3 /* Length until the match. */
- cmpld r9,r7
- bgt L(end_max)
- mr r3,r6
- blr
-
- .align 4
-L(zero):
- li r3,0
+#ifdef __LITTLE_ENDIAN__
+ addi r0,r10,-1
+ andc r0,r0,r10
+ popcntd r0,r0
+#else
+ cntlzd r0,r10 /* Count leading zeros before the match. */
+#endif
+ sub r3,r8,r3
+ srdi r0,r0,3 /* Convert leading/trailing zeros to bytes. */
+ add r3,r3,r0 /* Length until the match. */
+ cmpld r3,r4
+ blelr
+ mr r3,r4
blr
/* Deals with size <= 32. */
.align 4
L(small_range):
cmpldi r4,0
- beq L(zero)
+ beq L(end_max)
+
+ clrrdi r7,r7,3 /* Address of last doubleword. */
rlwinm r6,r3,3,26,28 /* Calculate padding. */
- ld r12,0(r8) /* Load word from memory. */
+ ld r12,0(r8) /* Load doubleword from memory. */
cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */
+#ifdef __LITTLE_ENDIAN__
+ srd r10,r10,r6
+ sld r10,r10,r6
+#else
sld r10,r10,r6
srd r10,r10,r6
+#endif
cmpldi cr7,r10,0
bne cr7,L(done)
- addi r9,r8,8
- cmpld r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmpld r8,r7
+ beq L(end_max)
.p2align 5
L(loop_small):
ldu r12,8(r8)
cmpb r10,r12,r0
- addi r9,r8,8
cmpldi cr6,r10,0
bne cr6,L(done)
- cmpld r9,r7
- bge L(end_max)
- b L(loop_small)
+ cmpld r8,r7
+ bne L(loop_small)
+ mr r3,r4
+ blr
+
END (__strnlen)
weak_alias (__strnlen, strnlen)
libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
index 58ec61062..1829b9ab6 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
mfcr r0
std r16,((JB_GPRS+2)*8)(3)
stfd fp16,((JB_FPRS+2)*8)(3)
- std r0,(JB_CR*8)(3)
+ stw r0,((JB_CR*8)+4)(3) /* 32-bit CR. */
std r17,((JB_GPRS+3)*8)(3)
stfd fp17,((JB_FPRS+3)*8)(3)
std r18,((JB_GPRS+4)*8)(3)
@@ -139,50 +139,46 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
la r5,((JB_VRS)*8)(3)
andi. r6,r5,0xf
mfspr r0,VRSAVE
- stw r0,((JB_VRSAVE)*8)(3)
+ stw r0,((JB_VRSAVE)*8)(3) /* 32-bit VRSAVE. */
addi r6,r5,16
beq+ L(aligned_save_vmx)
- lvsr v0,0,r5
- vspltisb v1,-1 /* set v1 to all 1's */
- vspltisb v2,0 /* set v2 to all 0's */
- vperm v3,v2,v1,v0 /* v3 contains shift mask with num all 1 bytes
- on left = misalignment */
+ lvsr v0,0,r5
+ lvsl v1,0,r5
+ addi r6,r5,-16
- /* Special case for v20 we need to preserve what is in save area
- below v20 before obliterating it */
- lvx v5,0,r5
- vperm v20,v20,v20,v0
- vsel v5,v5,v20,v3
- vsel v20,v20,v2,v3
- stvx v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+ addi addgpr,addgpr,32; \
+ vperm tmpvr,prevvr,savevr,shiftvr; \
+ stvx tmpvr,0,savegpr
-# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
- addi addgpr,addgpr,32; \
- vperm savevr,savevr,savevr,shiftvr; \
- vsel hivr,prev_savevr,savevr,maskvr; \
- stvx hivr,0,savegpr;
+ /*
+ * We have to be careful not to corrupt the data below v20 and
+ * above v31. To keep things simple we just rotate both ends in
+ * the opposite direction to our main permute so we can use
+ * the common macro.
+ */
- save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
- save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
- save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
- save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
- save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
- save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
- save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
- save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
- save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
- save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+ /* load and rotate data below v20 */
+ lvx v2,0,r5
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+ save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+ save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+ save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+ save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+ save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+ save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+ save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+ save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+ save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+ save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+ save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+ /* load and rotate data above v31 */
+ lvx v2,0,r6
+ vperm v2,v2,v2,v1
+ save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
- /* Special case for r31 we need to preserve what is in save area
- above v31 before obliterating it */
- addi r5,r5,32
- vperm v31,v31,v31,v0
- lvx v4,0,r5
- vsel v5,v30,v31,v3
- stvx v5,0,r6
- vsel v4,v31,v4,v3
- stvx v4,0,r5
b L(no_vmx)
L(aligned_save_vmx):
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp.S b/libc/sysdeps/powerpc/powerpc64/setjmp.S
index 667b9d12d..0a3b2fc02 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp.S
@@ -26,9 +26,9 @@
#else /* !NOT_IN_libc */
/* Build a versioned object for libc. */
-default_symbol_version (__vmxsetjmp, setjmp, GLIBC_2.3.4)
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
# define setjmp __vmxsetjmp
# define _setjmp __vmx_setjmp
# define __sigsetjmp __vmx__sigsetjmp
@@ -44,9 +44,9 @@ strong_alias (__vmx__sigsetjmp, __setjmp)
# undef __sigjmp_save
# undef JB_SIZE
# define __NO_VMX__
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.3);
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.3)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3)
+compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3);
+compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3)
# define setjmp __novmxsetjmp
# define _setjmp __novmx_setjmp
# define __sigsetjmp __novmx__sigsetjmp
diff --git a/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h b/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
index 9da879c61..e80a683e6 100644
--- a/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
+++ b/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
@@ -2,3 +2,13 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("ld %0,-28688(13)" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ \
+ uintptr_t x; \
+ asm ("ld %0,%1(13)" \
+ : "=r" (x) \
+ : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t)) \
+ ); \
+ x; \
+ })
diff --git a/libc/sysdeps/powerpc/powerpc64/stpcpy.S b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
index 070cd4662..c0b39729e 100644
--- a/libc/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -62,7 +62,22 @@ L(g2): add rTMP, rFEFE, rWORD
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
-L(g1): rlwinm. rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+ rlwinm. rTMP, rALT, 0, 24, 31
+ stbu rALT, 4(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 24, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm. rTMP, rALT, 16, 24, 31
+ stbu rTMP, 1(rDEST)
+ beqlr-
+ rlwinm rTMP, rALT, 8, 24, 31
+ stbu rTMP, 1(rDEST)
+ blr
+#else
+ rlwinm. rTMP, rALT, 8, 24, 31
stbu rTMP, 4(rDEST)
beqlr-
rlwinm. rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1): rlwinm. rTMP, rALT, 8, 24, 31
beqlr-
stbu rALT, 1(rDEST)
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strchr.S b/libc/sysdeps/powerpc/powerpc64/strchr.S
index d2d8cd361..da707ae58 100644
--- a/libc/sysdeps/powerpc/powerpc64/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/strchr.S
@@ -37,11 +37,13 @@ ENTRY (strchr)
#define rIGN r10 /* number of bits we should ignore in the first word */
#define rMASK r11 /* mask with the bits to ignore set to 0 */
#define rTMP3 r12
+#define rTMP4 rIGN
+#define rTMP5 rMASK
dcbt 0,rRTN
- rlwimi rCHR, rCHR, 8, 16, 23
+ insrdi rCHR, rCHR, 8, 48
li rMASK, -1
- rlwimi rCHR, rCHR, 16, 0, 15
+ insrdi rCHR, rCHR, 16, 32
rlwinm rIGN, rRTN, 3, 26, 28
insrdi rCHR, rCHR, 32, 0
lis rFEFE, -0x101
@@ -54,64 +56,93 @@ ENTRY (strchr)
add rFEFE, rFEFE, rTMP1
/* Test the first (partial?) word. */
ld rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+ sld rMASK, rMASK, rIGN
+#else
srd rMASK, rMASK, rIGN
+#endif
orc rWORD, rWORD, rMASK
add rTMP1, rFEFE, rWORD
nor rTMP2, r7F7F, rWORD
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2
xor rTMP3, rCHR, rWORD
orc rTMP3, rTMP3, rMASK
b L(loopentry)
/* The loop. */
-L(loop):ldu rWORD, 8(rSTR)
- and. rTMP1, rTMP1, rTMP2
+L(loop):
+ ldu rWORD, 8(rSTR)
+ and. rTMP5, rTMP1, rTMP2
/* Test for 0. */
- add rTMP1, rFEFE, rWORD
- nor rTMP2, r7F7F, rWORD
+ add rTMP1, rFEFE, rWORD /* x - 0x01010101. */
+ nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */
bne L(foundit)
- and. rTMP1, rTMP1, rTMP2
+ and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */
/* Start test for the bytes we're looking for. */
xor rTMP3, rCHR, rWORD
L(loopentry):
add rTMP1, rFEFE, rTMP3
nor rTMP2, r7F7F, rTMP3
beq L(loop)
+
/* There is a zero byte in the word, but may also be a matching byte (either
before or after the zero byte). In fact, we may be looking for a
- zero byte, in which case we return a match. We guess that this hasn't
- happened, though. */
-L(missed):
- and. rTMP1, rTMP1, rTMP2
+ zero byte, in which case we return a match. */
+ and. rTMP5, rTMP1, rTMP2
li rRTN, 0
beqlr
-/* It did happen. Decide which one was first...
- I'm not sure if this is actually faster than a sequence of
- rotates, compares, and branches (we use it anyway because it's shorter). */
+/* At this point:
+ rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+ rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+ But there may be false matches in the next most significant byte from
+ a true match due to carries. This means we need to recalculate the
+ matches using a longer method for big-endian. */
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzd rCLZB, rTMP1
+ addi rTMP2, rTMP4, -1
+ andc rTMP2, rTMP2, rTMP4
+ cmpld rTMP1, rTMP2
+ bgtlr
+ subfic rCLZB, rCLZB, 64-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+ results from the loop for reuse here. See strlen.S tail. Similarly
+ one instruction could be pruned from L(foundit). */
and rFEFE, r7F7F, rWORD
- or rMASK, r7F7F, rWORD
+ or rTMP5, r7F7F, rWORD
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rFEFE, rFEFE, r7F7F
add rTMP1, rTMP1, r7F7F
- nor rWORD, rMASK, rFEFE
- nor rTMP2, rIGN, rTMP1
+ nor rWORD, rTMP5, rFEFE
+ nor rTMP2, rTMP4, rTMP1
+ cntlzd rCLZB, rTMP2
cmpld rWORD, rTMP2
bgtlr
- cntlzd rCLZB, rTMP2
+#endif
srdi rCLZB, rCLZB, 3
add rRTN, rSTR, rCLZB
blr
L(foundit):
+#ifdef __LITTLE_ENDIAN__
+ addi rTMP1, rTMP5, -1
+ andc rTMP1, rTMP1, rTMP5
+ cntlzd rCLZB, rTMP1
+ subfic rCLZB, rCLZB, 64-7-64
+ sradi rCLZB, rCLZB, 3
+#else
and rTMP1, r7F7F, rTMP3
- or rIGN, r7F7F, rTMP3
+ or rTMP4, r7F7F, rTMP3
add rTMP1, rTMP1, r7F7F
- nor rTMP2, rIGN, rTMP1
+ nor rTMP2, rTMP4, rTMP1
cntlzd rCLZB, rTMP2
subi rSTR, rSTR, 8
srdi rCLZB, rCLZB, 3
+#endif
add rRTN, rSTR, rCLZB
blr
END (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/strcmp.S b/libc/sysdeps/powerpc/powerpc64/strcmp.S
index c9d6dac12..70854689d 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcmp.S
@@ -25,7 +25,7 @@
EALIGN (strcmp, 4, 0)
CALL_MCOUNT 2
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -35,6 +35,7 @@ EALIGN (strcmp, 4, 0)
#define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r10 /* bits that differ in s1 & s2 words */
+#define rTMP r11
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -58,19 +59,66 @@ L(g0): ldu rWORD1, 8(rSTR1)
ldu rWORD2, 8(rSTR2)
L(g1): add rTMP, rFEFE, rWORD1
nor rNEG, r7F7F, rWORD1
-
and. rTMP, rTMP, rNEG
cmpd cr1, rWORD1, rWORD2
beq+ L(g0)
-L(endstring):
+
/* OK. We've hit the end of the string. We need to be careful that
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
+L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzd rBITDIF, rBITDIF
@@ -79,7 +127,7 @@ L(endstring):
cmpd cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
blt- cr1, L(equal)
- sradi rRTN, rRTN, 63
+ sradi rRTN, rRTN, 63 /* must return an int. */
ori rRTN, rRTN, 1
blr
L(equal):
@@ -95,11 +143,10 @@ L(different):
ori rRTN, rRTN, 1
blr
L(highbit):
- srdi rWORD2, rWORD2, 56
- srdi rWORD1, rWORD1, 56
- sub rRTN, rWORD1, rWORD2
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strcpy.S b/libc/sysdeps/powerpc/powerpc64/strcpy.S
index 4c6fd3f9d..a7fd85bad 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcpy.S
@@ -68,6 +68,32 @@ L(g2): add rTMP, rFEFE, rWORD
mr rALT, rWORD
/* We've hit the end of the string. Do the rest byte-by-byte. */
L(g1):
+#ifdef __LITTLE_ENDIAN__
+ extrdi. rTMP, rALT, 8, 56
+ stb rALT, 8(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 48
+ stb rTMP, 9(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 40
+ stb rTMP, 10(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 32
+ stb rTMP, 11(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 24
+ stb rTMP, 12(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 16
+ stb rTMP, 13(rDEST)
+ beqlr-
+ extrdi. rTMP, rALT, 8, 8
+ stb rTMP, 14(rDEST)
+ beqlr-
+ extrdi rTMP, rALT, 8, 0
+ stb rTMP, 15(rDEST)
+ blr
+#else
extrdi. rTMP, rALT, 8, 0
stb rTMP, 8(rDEST)
beqlr-
@@ -91,6 +117,7 @@ L(g1):
beqlr-
stb rALT, 15(rDEST)
blr
+#endif
/* Oh well. In this case, we just do a byte-by-byte copy. */
.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strlen.S b/libc/sysdeps/powerpc/powerpc64/strlen.S
index 0f9b5eea9..4ed1ba3ad 100644
--- a/libc/sysdeps/powerpc/powerpc64/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/strlen.S
@@ -29,7 +29,12 @@
1 is subtracted you get a value in the range 0x00-0x7f, none of which
have their high bit set. The expression here is
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
- there were no 0x00 bytes in the word.
+ there were no 0x00 bytes in the word. You get 0x80 in bytes that
+ match, but possibly false 0x80 matches in the next more significant
+ byte to a true match due to carries. For little-endian this is
+ of no consequence since the least significant match is the one
+ we're interested in, but big-endian needs method 2 to find which
+ byte matches.
2) Given a word 'x', we can test to see _which_ byte was zero by
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -62,7 +67,7 @@
Answer:
1) Added a Data Cache Block Touch early to prefetch the first 128
byte cache line. Adding dcbt instructions to the loop would not be
- effective since most strings will be shorter than the cache line.*/
+ effective since most strings will be shorter than the cache line. */
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without
@@ -78,7 +83,7 @@
ENTRY (strlen)
CALL_MCOUNT 1
-#define rTMP1 r0
+#define rTMP4 r0
#define rRTN r3 /* incoming STR arg, outgoing result */
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
@@ -88,9 +93,9 @@ ENTRY (strlen)
#define rWORD1 r8 /* current string doubleword */
#define rWORD2 r9 /* next string doubleword */
#define rMASK r9 /* mask for first string doubleword */
-#define rTMP2 r10
-#define rTMP3 r11
-#define rTMP4 r12
+#define rTMP1 r10
+#define rTMP2 r11
+#define rTMP3 r12
dcbt 0,rRTN
clrrdi rSTR, rRTN, 3
@@ -100,30 +105,36 @@ ENTRY (strlen)
addi r7F7F, r7F7F, 0x7f7f
li rMASK, -1
insrdi r7F7F, r7F7F, 32, 0
-/* That's the setup done, now do the first pair of doublewords.
- We make an exception and use method (2) on the first two doublewords,
- to reduce overhead. */
+/* We use method (2) on the first two doublewords, because rFEFE isn't
+ required which reduces setup overhead. Also gives a faster return
+ for small strings on big-endian due to needing to recalculate with
+ method (2) anyway. */
+#ifdef __LITTLE_ENDIAN__
+ sld rMASK, rMASK, rPADN
+#else
srd rMASK, rMASK, rPADN
+#endif
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
lis rFEFE, -0x101
add rTMP1, rTMP1, r7F7F
addi rFEFE, rFEFE, -0x101
- nor rTMP1, rTMP2, rTMP1
- and. rWORD1, rTMP1, rMASK
+ nor rTMP3, rTMP2, rTMP1
+ and. rTMP3, rTMP3, rMASK
mtcrf 0x01, rRTN
bne L(done0)
- sldi rTMP1, rFEFE, 32
- add rFEFE, rFEFE, rTMP1
+ sldi rTMP1, rFEFE, 32
+ add rFEFE, rFEFE, rTMP1
/* Are we now aligned to a doubleword boundary? */
bt 28, L(loop)
/* Handle second doubleword of pair. */
+/* Perhaps use method (1) here for little-endian, saving one instruction? */
ldu rWORD1, 8(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
- nor. rWORD1, rTMP2, rTMP1
+ nor. rTMP3, rTMP2, rTMP1
bne L(done0)
/* The loop. */
@@ -137,28 +148,52 @@ L(loop):
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
- and. rTMP1, rTMP3, rTMP4
+ and. rTMP3, rTMP3, rTMP4
beq L(loop)
+#ifndef __LITTLE_ENDIAN__
and rTMP1, r7F7F, rWORD2
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP4, rTMP1
+ andc rTMP3, rTMP4, rTMP1
b L(done0)
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 8
add rTMP1, rTMP1, r7F7F
- andc rWORD1, rTMP2, rTMP1
+ andc rTMP3, rTMP2, rTMP1
/* When we get to here, rSTR points to the first doubleword in the string that
- contains a zero byte, and the most significant set bit in rWORD1 is in that
- byte. */
+ contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00
+ otherwise. */
L(done0):
- cntlzd rTMP3, rWORD1
+ cntlzd rTMP3, rTMP3
subf rTMP1, rRTN, rSTR
srdi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
blr
+#else
+
+L(done0):
+ addi rTMP1, rTMP3, -1 /* Form a mask from trailing zeros. */
+ andc rTMP1, rTMP1, rTMP3
+ cntlzd rTMP1, rTMP1 /* Count bits not in the mask. */
+ subf rTMP3, rRTN, rSTR
+ subfic rTMP1, rTMP1, 64-7
+ srdi rTMP1, rTMP1, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+
+L(done1):
+ addi rTMP3, rTMP1, -1
+ andc rTMP3, rTMP3, rTMP1
+ cntlzd rTMP3, rTMP3
+ subf rTMP1, rRTN, rSTR
+ subfic rTMP3, rTMP3, 64-7-64
+ sradi rTMP3, rTMP3, 3
+ add rRTN, rTMP1, rTMP3
+ blr
+#endif
+
END (strlen)
libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/strncmp.S b/libc/sysdeps/powerpc/powerpc64/strncmp.S
index 779d9f7f6..8f842c4b2 100644
--- a/libc/sysdeps/powerpc/powerpc64/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strncmp.S
@@ -25,7 +25,7 @@
EALIGN (strncmp, 4, 0)
CALL_MCOUNT 3
-#define rTMP r0
+#define rTMP2 r0
#define rRTN r3
#define rSTR1 r3 /* first string arg */
#define rSTR2 r4 /* second string arg */
@@ -36,6 +36,7 @@ EALIGN (strncmp, 4, 0)
#define r7F7F r9 /* constant 0x7f7f7f7f7f7f7f7f */
#define rNEG r10 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
#define rBITDIF r11 /* bits that differ in s1 & s2 words */
+#define rTMP r12
dcbt 0,rSTR1
or rTMP, rSTR2, rSTR1
@@ -77,12 +78,59 @@ L(g1): add rTMP, rFEFE, rWORD1
we don't compare two strings as different because of gunk beyond
the end of the strings... */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+ addi rTMP2, rTMP, -1
+ beq cr1, L(equal)
+ andc rTMP2, rTMP2, rTMP
+ rldimi rTMP2, rTMP2, 1, 0
+ and rWORD2, rWORD2, rTMP2 /* Mask off gunk. */
+ and rWORD1, rWORD1, rTMP2
+ cmpd cr1, rWORD1, rWORD2
+ beq cr1, L(equal)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63 /* must return an int. */
+ ori rRTN, rRTN, 1
+ blr
+L(equal):
+ li rRTN, 0
+ blr
+
+L(different):
+ ld rWORD1, -8(rSTR1)
+ xor rBITDIF, rWORD1, rWORD2 /* rBITDIF has bits that differ. */
+ neg rNEG, rBITDIF
+ and rNEG, rNEG, rBITDIF /* rNEG has LS bit that differs. */
+ cntlzd rNEG, rNEG /* bitcount of the bit. */
+ andi. rNEG, rNEG, 56 /* bitcount to LS byte that differs. */
+ sld rWORD1, rWORD1, rNEG /* shift left to clear MS bytes. */
+ sld rWORD2, rWORD2, rNEG
+ xor. rBITDIF, rWORD1, rWORD2
+ sub rRTN, rWORD1, rWORD2
+ blt- L(highbit)
+ sradi rRTN, rRTN, 63
+ ori rRTN, rRTN, 1
+ blr
+L(highbit):
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
+ blr
+
+#else
L(endstring):
and rTMP, r7F7F, rWORD1
beq cr1, L(equal)
add rTMP, rTMP, r7F7F
xor. rBITDIF, rWORD1, rWORD2
-
andc rNEG, rNEG, rTMP
blt- L(highbit)
cntlzd rBITDIF, rBITDIF
@@ -91,7 +139,7 @@ L(endstring):
cmpd cr1, rNEG, rBITDIF
sub rRTN, rWORD1, rWORD2
blt- cr1, L(equal)
- sradi rRTN, rRTN, 63
+ sradi rRTN, rRTN, 63 /* must return an int. */
ori rRTN, rRTN, 1
blr
L(equal):
@@ -99,7 +147,7 @@ L(equal):
blr
L(different):
- ldu rWORD1, -8(rSTR1)
+ ld rWORD1, -8(rSTR1)
xor. rBITDIF, rWORD1, rWORD2
sub rRTN, rWORD1, rWORD2
blt- L(highbit)
@@ -107,11 +155,10 @@ L(different):
ori rRTN, rRTN, 1
blr
L(highbit):
- srdi rWORD2, rWORD2, 56
- srdi rWORD1, rWORD1, 56
- sub rRTN, rWORD1, rWORD2
+ sradi rRTN, rWORD2, 63
+ ori rRTN, rRTN, 1
blr
-
+#endif
/* Oh well. In this case, we just do a byte-by-byte comparison. */
.align 4
diff --git a/libc/sysdeps/powerpc/preconfigure b/libc/sysdeps/powerpc/preconfigure
new file mode 100644
index 000000000..1741c251f
--- /dev/null
+++ b/libc/sysdeps/powerpc/preconfigure
@@ -0,0 +1,11 @@
+# Check for e500.
+
+case "$machine" in
+powerpc)
+ $CC $CFLAGS $CPPFLAGS -E -dM -xc /dev/null > conftest.i
+ if grep -q __NO_FPRS__ conftest.i && ! grep -q _SOFT_FLOAT conftest.i; then
+ base_machine=powerpc machine=powerpc/powerpc32/e500
+ fi
+ rm -f conftest.i
+ ;;
+esac
diff --git a/libc/sysdeps/powerpc/soft-fp/sfp-machine.h b/libc/sysdeps/powerpc/soft-fp/sfp-machine.h
new file mode 100644
index 000000000..041187807
--- /dev/null
+++ b/libc/sysdeps/powerpc/soft-fp/sfp-machine.h
@@ -0,0 +1,115 @@
+#define _FP_W_TYPE_SIZE 32
+#define _FP_W_TYPE unsigned long
+#define _FP_WS_TYPE signed long
+#define _FP_I_TYPE long
+
+#define _FP_MUL_MEAT_S(R,X,Y) \
+ _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y) \
+ _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y) \
+ _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S 0
+#define _FP_NANSIGN_D 0
+#define _FP_NANSIGN_Q 0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+/* Someone please check this. */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
+ do { \
+ if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \
+ && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \
+ { \
+ R##_s = Y##_s; \
+ _FP_FRAC_COPY_##wc(R,Y); \
+ } \
+ else \
+ { \
+ R##_s = X##_s; \
+ _FP_FRAC_COPY_##wc(R,X); \
+ } \
+ R##_c = FP_CLS_NAN; \
+ } while (0)
+
+#if defined __NO_FPRS__ && !defined _SOFT_FLOAT
+
+/* Exception flags. We use the bit positions of the appropriate bits
+ in the FPEFSCR. */
+
+# include <fenv_libc.h>
+# include <sysdep.h>
+# include <sys/prctl.h>
+
+int __feraiseexcept_soft (int);
+libc_hidden_proto (__feraiseexcept_soft)
+
+# define FP_EX_INEXACT SPEFSCR_FINXS
+# define FP_EX_INVALID SPEFSCR_FINVS
+# define FP_EX_DIVZERO SPEFSCR_FDBZS
+# define FP_EX_UNDERFLOW SPEFSCR_FUNFS
+# define FP_EX_OVERFLOW SPEFSCR_FOVFS
+
+# define _FP_DECL_EX \
+ int _spefscr __attribute__ ((unused)), _ftrapex __attribute__ ((unused)) = 0
+# define FP_INIT_ROUNDMODE \
+ do \
+ { \
+ int _r; \
+ INTERNAL_SYSCALL_DECL (_err); \
+ \
+ _spefscr = fegetenv_register (); \
+ _r = INTERNAL_SYSCALL (prctl, _err, 2, PR_GET_FPEXC, &_ftrapex); \
+ if (INTERNAL_SYSCALL_ERROR_P (_r, _err)) \
+ _ftrapex = 0; \
+ } \
+ while (0)
+# define FP_INIT_EXCEPTIONS /* Empty. */
+
+# define FP_HANDLE_EXCEPTIONS __feraiseexcept_soft (_fex)
+# define FP_ROUNDMODE (_spefscr & 0x3)
+
+/* Not correct in general, but sufficient for the uses in soft-fp. */
+# define FP_TRAPPING_EXCEPTIONS (_ftrapex & PR_FP_EXC_UND \
+ ? FP_EX_UNDERFLOW \
+ : 0)
+
+#else
+
+/* Exception flags. We use the bit positions of the appropriate bits
+ in the FPSCR, which also correspond to the FE_* bits. This makes
+ everything easier ;-). */
+# define FP_EX_INVALID (1 << (31 - 2))
+# define FP_EX_OVERFLOW (1 << (31 - 3))
+# define FP_EX_UNDERFLOW (1 << (31 - 4))
+# define FP_EX_DIVZERO (1 << (31 - 5))
+# define FP_EX_INEXACT (1 << (31 - 6))
+
+# define FP_HANDLE_EXCEPTIONS __simulate_exceptions (_fex)
+# define FP_ROUNDMODE __sim_round_mode
+# define FP_TRAPPING_EXCEPTIONS (~__sim_disabled_exceptions & 0x3e000000)
+
+#endif
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+ 15483) and ideally preserved across signal handlers, like hardware
+ FP status words, but the latter is quite difficult to accomplish in
+ userland. */
+
+extern int __sim_exceptions;
+libc_hidden_proto (__sim_exceptions);
+extern int __sim_disabled_exceptions;
+libc_hidden_proto (__sim_disabled_exceptions);
+extern int __sim_round_mode;
+libc_hidden_proto (__sim_round_mode);
+
+extern void __simulate_exceptions (int x) attribute_hidden;
diff --git a/libc/sysdeps/powerpc/sysdep.h b/libc/sysdeps/powerpc/sysdep.h
index 1b5334ad3..bc2cb6681 100644
--- a/libc/sysdeps/powerpc/sysdep.h
+++ b/libc/sysdeps/powerpc/sysdep.h
@@ -144,6 +144,21 @@
#define VRSAVE 256
+/* The 32-bit words of a 64-bit dword are at these offsets in memory. */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define LOWORD 0
+# define HIWORD 4
+#else
+# define LOWORD 4
+# define HIWORD 0
+#endif
+
+/* The high 16-bit word of a 64-bit dword is at this offset in memory. */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define HISHORT 6
+#else
+# define HISHORT 0
+#endif
/* This seems to always be the case on PPC. */
#define ALIGNARG(log2) log2
diff --git a/libc/sysdeps/s390/ffs.c b/libc/sysdeps/s390/ffs.c
index 807441da6..2dbb7430e 100644
--- a/libc/sysdeps/s390/ffs.c
+++ b/libc/sysdeps/s390/ffs.c
@@ -63,6 +63,7 @@ __ffs (x)
}
weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
libc_hidden_builtin_def (ffs)
#if ULONG_MAX == UINT_MAX
#undef ffsl
diff --git a/libc/sysdeps/s390/s390-32/stackguard-macros.h b/libc/sysdeps/s390/s390-32/stackguard-macros.h
index b74c5799b..449e8d488 100644
--- a/libc/sysdeps/s390/s390-32/stackguard-macros.h
+++ b/libc/sysdeps/s390/s390-32/stackguard-macros.h
@@ -2,3 +2,14 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("ear %0,%%a0; l %0,0x14(%0)" : "=a" (x)); x; })
+
+/* On s390/s390x there is no unique pointer guard, instead we use the
+ same value as the stack guard. */
+#define POINTER_CHK_GUARD \
+ ({ \
+ uintptr_t x; \
+ asm ("ear %0,%%a0; l %0,%1(%0)" \
+ : "=a" (x) \
+ : "i" (offsetof (tcbhead_t, stack_guard))); \
+ x; \
+ })
diff --git a/libc/sysdeps/s390/s390-64/stackguard-macros.h b/libc/sysdeps/s390/s390-64/stackguard-macros.h
index 0cebb5f02..c8270fbe7 100644
--- a/libc/sysdeps/s390/s390-64/stackguard-macros.h
+++ b/libc/sysdeps/s390/s390-64/stackguard-macros.h
@@ -2,3 +2,17 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("ear %0,%%a0; sllg %0,%0,32; ear %0,%%a1; lg %0,0x28(%0)" : "=a" (x)); x; })
+
+/* On s390/s390x there is no unique pointer guard, instead we use the
+ same value as the stack guard. */
+#define POINTER_CHK_GUARD \
+ ({ \
+ uintptr_t x; \
+ asm ("ear %0,%%a0;" \
+ "sllg %0,%0,32;" \
+ "ear %0,%%a1;" \
+ "lg %0,%1(%0)" \
+ : "=a" (x) \
+ : "i" (offsetof (tcbhead_t, stack_guard))); \
+ x; \
+ })
diff --git a/libc/sysdeps/sh/stackguard-macros.h b/libc/sysdeps/sh/stackguard-macros.h
new file mode 100644
index 000000000..55a5771b6
--- /dev/null
+++ b/libc/sysdeps/sh/stackguard-macros.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+extern uintptr_t __stack_chk_guard;
+#define STACK_CHK_GUARD __stack_chk_guard
+
+#define POINTER_CHK_GUARD THREAD_GET_POINTER_GUARD()
diff --git a/libc/sysdeps/sparc/fpu/libm-test-ulps b/libc/sysdeps/sparc/fpu/libm-test-ulps
index cfc72a661..ecb871d6a 100644
--- a/libc/sysdeps/sparc/fpu/libm-test-ulps
+++ b/libc/sysdeps/sparc/fpu/libm-test-ulps
@@ -7505,6 +7505,34 @@ idouble: 1
Test "gamma (-0.5)":
ildouble: 1
ldouble: 1
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-25)":
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-30)":
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-40)":
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-64)":
+ildouble: 1
+ldouble: 1
Test "gamma (0.7)":
double: 1
float: 1
@@ -7512,6 +7540,25 @@ idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-5)":
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-60)":
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-70)":
+ildouble: 1
+ldouble: 1
Test "gamma (1.2)":
double: 1
float: 2
@@ -7725,6 +7772,17 @@ double: 2
float: 2
idouble: 2
ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p16383)":
+ildouble: 2
+ldouble: 2
Test "jn (2, 2.4048255576957729)":
double: 2
float: 1
@@ -7802,6 +7860,34 @@ ldouble: 3
Test "lgamma (-0.5)":
ildouble: 1
ldouble: 1
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-25)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-30)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-40)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-64)":
+ildouble: 1
+ldouble: 1
Test "lgamma (0.7)":
double: 1
float: 1
@@ -7809,6 +7895,25 @@ idouble: 1
ifloat: 1
ildouble: 1
ldouble: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-5)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-60)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-70)":
+ildouble: 1
+ldouble: 1
Test "lgamma (1.2)":
double: 1
float: 2
@@ -9239,7 +9344,9 @@ ildouble: 5
ldouble: 5
Test "yn (10, 1.0)":
double: 1
+float: 2
idouble: 1
+ifloat: 2
ildouble: 1
ldouble: 1
Test "yn (10, 10.0)":
@@ -9251,7 +9358,25 @@ ildouble: 2
ldouble: 2
Test "yn (10, 2.0)":
double: 2
+float: 1
idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p16383)":
ildouble: 2
ldouble: 2
Test "yn (3, 0.125)":
diff --git a/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c b/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c
index 551c40887..5e2449c91 100644
--- a/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c
+++ b/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c
@@ -24,7 +24,6 @@
long double _Q_neg(const long double a)
{
- FP_DECL_EX;
long double c = a;
#if (__BYTE_ORDER == __BIG_ENDIAN)
@@ -36,11 +35,9 @@ long double _Q_neg(const long double a)
#else
FP_DECL_Q(A); FP_DECL_Q(C);
- FP_UNPACK_Q(A, a);
+ FP_UNPACK_RAW_Q(A, a);
FP_NEG_Q(C, A);
- FP_PACK_Q(c, C);
+ FP_PACK_RAW_Q(c, C);
#endif
- FP_CLEAR_EXCEPTIONS;
- FP_HANDLE_EXCEPTIONS;
return c;
}
diff --git a/libc/sysdeps/sparc/sparc32/stackguard-macros.h b/libc/sysdeps/sparc/sparc32/stackguard-macros.h
index c0b02b0bb..1eef0f19f 100644
--- a/libc/sysdeps/sparc/sparc32/stackguard-macros.h
+++ b/libc/sysdeps/sparc/sparc32/stackguard-macros.h
@@ -2,3 +2,6 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("ld [%%g7+0x14], %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ uintptr_t x; asm ("ld [%%g7+0x18], %0" : "=r" (x)); x; })
diff --git a/libc/sysdeps/sparc/sparc64/stackguard-macros.h b/libc/sysdeps/sparc/sparc64/stackguard-macros.h
index 80f063558..cc0c12c04 100644
--- a/libc/sysdeps/sparc/sparc64/stackguard-macros.h
+++ b/libc/sysdeps/sparc/sparc64/stackguard-macros.h
@@ -2,3 +2,6 @@
#define STACK_CHK_GUARD \
({ uintptr_t x; asm ("ldx [%%g7+0x28], %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ uintptr_t x; asm ("ldx [%%g7+0x30], %0" : "=r" (x)); x; })
diff --git a/libc/sysdeps/unix/Makefile b/libc/sysdeps/unix/Makefile
index 375561f0d..2b607a007 100644
--- a/libc/sysdeps/unix/Makefile
+++ b/libc/sysdeps/unix/Makefile
@@ -51,12 +51,14 @@ $(objpfx)stub-syscalls.c: $(common-objpfx)sysd-syscalls \
for call in $(unix-stub-syscalls); do \
case $$call in \
*@@*) \
- ver=$${call##*@}; call=$${call%%@*}; ver=$${ver//./_}; \
+ ver=$${call##*@}; call=$${call%%@*}; \
+ ver=`echo "$ver" | sed 's/\./_/g'`; \
echo "strong_alias (_no_syscall, __$${call}_$${ver})"; \
echo "versioned_symbol (libc, __$${call}_$${ver}, $$call, $$ver);"\
;; \
*@*) \
- ver=$${call##*@}; call=$${call%%@*}; ver=$${ver//./_}; \
+ ver=$${call##*@}; call=$${call%%@*}; \
+ ver=`echo "$ver" | sed 's/\./_/g'`; \
echo "strong_alias (_no_syscall, __$${call}_$${ver})"; \
echo "compat_symbol (libc, __$${call}_$${ver}, $$call, $$ver);" \
;; \
diff --git a/libc/sysdeps/unix/make-syscalls.sh b/libc/sysdeps/unix/make-syscalls.sh
index f04f2abb3..6eba62c94 100644
--- a/libc/sysdeps/unix/make-syscalls.sh
+++ b/libc/sysdeps/unix/make-syscalls.sh
@@ -275,7 +275,7 @@ while read file srcfile caller syscall args strong weak; do
# name in the vDSO and KERNEL_X.Y is its symbol version.
vdso_symbol="${vdso_syscall%@*}"
vdso_symver="${vdso_syscall#*@}"
- vdso_symver="${vdso_symver//./_}"
+ vdso_symver=`echo "$vdso_symver" | sed 's/\./_/g'`
echo "\
\$(foreach p,\$(sysd-rules-targets),\$(objpfx)\$(patsubst %,\$p,$file).os): \\
\$(..)sysdeps/unix/make-syscalls.sh\
diff --git a/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
index b5929bd29..9b0421ee0 100644
--- a/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
+++ b/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
@@ -96,6 +96,9 @@
#ifndef __O_DSYNC
# define __O_DSYNC 010000
#endif
+#ifndef __O_TMPFILE
+# define __O_TMPFILE 020200000
+#endif
#ifndef F_GETLK
# ifndef __USE_FILE_OFFSET64
@@ -128,6 +131,7 @@
# define O_DIRECT __O_DIRECT /* Direct disk access. */
# define O_NOATIME __O_NOATIME /* Do not set atime. */
# define O_PATH __O_PATH /* Resolve pathname but do not open file. */
+# define O_TMPFILE __O_TMPFILE /* Atomically create nameless file. */
#endif
/* For now, Linux has no separate synchronicitiy options for read
diff --git a/libc/sysdeps/unix/sysv/linux/configure b/libc/sysdeps/unix/sysv/linux/configure
index 76cfea1dc..88fab5662 100644
--- a/libc/sysdeps/unix/sysv/linux/configure
+++ b/libc/sysdeps/unix/sysv/linux/configure
@@ -187,7 +187,7 @@ case "$machine" in
libc_cv_gcc_unwind_find_fde=yes
arch_minimum_kernel=2.6.16
;;
- powerpc/powerpc32)
+ powerpc/powerpc32*)
libc_cv_gcc_unwind_find_fde=yes
arch_minimum_kernel=2.6.16
;;
diff --git a/libc/sysdeps/unix/sysv/linux/configure.in b/libc/sysdeps/unix/sysv/linux/configure.in
index e55d9fdb0..5e5902d8e 100644
--- a/libc/sysdeps/unix/sysv/linux/configure.in
+++ b/libc/sysdeps/unix/sysv/linux/configure.in
@@ -43,7 +43,7 @@ case "$machine" in
libc_cv_gcc_unwind_find_fde=yes
arch_minimum_kernel=2.6.16
;;
- powerpc/powerpc32)
+ powerpc/powerpc32*)
libc_cv_gcc_unwind_find_fde=yes
arch_minimum_kernel=2.6.16
;;
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h
new file mode 100644
index 000000000..33be9e8db
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h
@@ -0,0 +1,54 @@
+/* sigstack, sigaltstack definitions.
+ Copyright (C) 1998-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _SIGNAL_H
+# error "Never include this file directly. Use <signal.h> instead"
+#endif
+
+
+/* Structure describing a signal stack (obsolete). */
+struct sigstack
+ {
+ void *ss_sp; /* Signal stack pointer. */
+ int ss_onstack; /* Nonzero if executing on this stack. */
+ };
+
+
+/* Possible values for `ss_flags.'. */
+enum
+{
+ SS_ONSTACK = 1,
+#define SS_ONSTACK SS_ONSTACK
+ SS_DISABLE
+#define SS_DISABLE SS_DISABLE
+};
+
+/* Minimum stack size for a signal handler. */
+#define MINSIGSTKSZ 4096
+
+/* System default stack size. */
+#define SIGSTKSZ 16384
+
+
+/* Alternate, preferred interface. */
+typedef struct sigaltstack
+ {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+ } stack_t;
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies
new file mode 100644
index 000000000..70c0d2eda
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies
new file mode 100644
index 000000000..c3e52c550
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies
new file mode 100644
index 000000000..2829f9cca
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies
new file mode 100644
index 000000000..80f917079
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/476/fpu
+powerpc/powerpc32/476
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies
new file mode 100644
index 000000000..00365c1cf
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc32/e500/nofpu
+powerpc/nofpu
+powerpc/soft-fp
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
index 926f34207..082d302e8 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
@@ -151,15 +151,15 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
# else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
@@ -262,8 +262,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
# endif
#endif
-#ifdef __GETCONTEXT_EXTRA
- __GETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+ getcontext_e500
#endif
/* We need to set up parms and call sigprocmask which will clobber
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
index 95902b13f..70e3c9762 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
@@ -47,7 +47,9 @@ ENTRY(__makecontext)
#ifdef PIC
mflr r0
cfi_register(lr,r0)
- bl 1f
+ /* Use this conditional form of branch and link to avoid destroying
+ the cpu link stack used to predict blr return addresses. */
+ bcl 20,31,1f
1: mflr r6
addi r6,r6,L(exitcode)-1b
mtlr r0
@@ -136,7 +138,9 @@ ENTRY(__novec_makecontext)
#ifdef PIC
mflr r0
cfi_register(lr,r0)
- bl 1f
+ /* Use this conditional form of branch and link to avoid destroying
+ the cpu link stack used to predict blr return addresses. */
+ bcl 20,31,1f
1: mflr r6
addi r6,r6,L(novec_exitcode)-1b
mtlr r0
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies
new file mode 100644
index 000000000..40836b6fb
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies
@@ -0,0 +1,2 @@
+powerpc/nofpu
+powerpc/soft-fp
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h
new file mode 100644
index 000000000..9eb1a9561
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h
@@ -0,0 +1,144 @@
+/* getcontext/setcontext/makecontext support for e500 high parts of registers.
+ Copyright (C) 2006-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _CONTEXT_E500_H
+#define _CONTEXT_E500_H 1
+
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+
+# define __CONTEXT_ENABLE_E500 1
+
+/* We follow the kernel's layout, which saves the high parts of the
+ SPE registers in the vregs area, immediately followed by the ACC
+ value (call-clobbered, not handled here) and the SPEFSCR value. */
+
+.macro getcontext_e500
+ la r10,(_UC_VREGS)(r3)
+ evstwwe r0,(0*4)(r10)
+ evstwwe r1,(1*4)(r10)
+ evstwwe r2,(2*4)(r10)
+ evstwwe r3,(3*4)(r10)
+ evstwwe r4,(4*4)(r10)
+ evstwwe r5,(5*4)(r10)
+ evstwwe r6,(6*4)(r10)
+ evstwwe r7,(7*4)(r10)
+ evstwwe r8,(8*4)(r10)
+ evstwwe r9,(9*4)(r10)
+ evstwwe r10,(10*4)(r10)
+ evstwwe r11,(11*4)(r10)
+ evstwwe r12,(12*4)(r10)
+ evstwwe r13,(13*4)(r10)
+ evstwwe r14,(14*4)(r10)
+ evstwwe r15,(15*4)(r10)
+ evstwwe r16,(16*4)(r10)
+ evstwwe r17,(17*4)(r10)
+ evstwwe r18,(18*4)(r10)
+ evstwwe r19,(19*4)(r10)
+ evstwwe r20,(20*4)(r10)
+ evstwwe r21,(21*4)(r10)
+ evstwwe r22,(22*4)(r10)
+ evstwwe r23,(23*4)(r10)
+ evstwwe r24,(24*4)(r10)
+ evstwwe r25,(25*4)(r10)
+ evstwwe r26,(26*4)(r10)
+ evstwwe r27,(27*4)(r10)
+ evstwwe r28,(28*4)(r10)
+ evstwwe r29,(29*4)(r10)
+ evstwwe r30,(30*4)(r10)
+ evstwwe r31,(31*4)(r10)
+ mfspefscr r9
+ stw r9,(34*4)(r10)
+.endm
+
+.macro setcontext_e500
+ lwz r3,_UC_VREGS+(0*4)(r31)
+ evmergelo r0,r3,r0
+ lwz r3,_UC_VREGS+(1*4)(r31)
+ evmergelo r1,r3,r1
+ lwz r3,_UC_VREGS+(2*4)(r31)
+ evmergelo r2,r3,r2
+ lwz r3,_UC_VREGS+(1*4)(r31)
+ evmergelo r1,r3,r1
+ lwz r3,_UC_VREGS+(2*4)(r31)
+ evmergelo r2,r3,r2
+ lwz r3,_UC_VREGS+(3*4)(r31)
+ evmergelo r3,r3,r3
+ lwz r3,_UC_VREGS+(4*4)(r31)
+ evmergelo r4,r3,r4
+ lwz r3,_UC_VREGS+(5*4)(r31)
+ evmergelo r5,r3,r5
+ lwz r3,_UC_VREGS+(6*4)(r31)
+ evmergelo r6,r3,r6
+ lwz r3,_UC_VREGS+(7*4)(r31)
+ evmergelo r7,r3,r7
+ lwz r3,_UC_VREGS+(8*4)(r31)
+ evmergelo r8,r3,r8
+ lwz r3,_UC_VREGS+(9*4)(r31)
+ evmergelo r9,r3,r9
+ lwz r3,_UC_VREGS+(10*4)(r31)
+ evmergelo r10,r3,r10
+ lwz r3,_UC_VREGS+(11*4)(r31)
+ evmergelo r11,r3,r11
+ lwz r3,_UC_VREGS+(12*4)(r31)
+ evmergelo r12,r3,r12
+ lwz r3,_UC_VREGS+(13*4)(r31)
+ evmergelo r13,r3,r13
+ lwz r3,_UC_VREGS+(14*4)(r31)
+ evmergelo r14,r3,r14
+ lwz r3,_UC_VREGS+(15*4)(r31)
+ evmergelo r15,r3,r15
+ lwz r3,_UC_VREGS+(16*4)(r31)
+ evmergelo r16,r3,r16
+ lwz r3,_UC_VREGS+(17*4)(r31)
+ evmergelo r17,r3,r17
+ lwz r3,_UC_VREGS+(18*4)(r31)
+ evmergelo r18,r3,r18
+ lwz r3,_UC_VREGS+(19*4)(r31)
+ evmergelo r19,r3,r19
+ lwz r3,_UC_VREGS+(20*4)(r31)
+ evmergelo r20,r3,r20
+ lwz r3,_UC_VREGS+(21*4)(r31)
+ evmergelo r21,r3,r21
+ lwz r3,_UC_VREGS+(22*4)(r31)
+ evmergelo r22,r3,r22
+ lwz r3,_UC_VREGS+(23*4)(r31)
+ evmergelo r23,r3,r23
+ lwz r3,_UC_VREGS+(24*4)(r31)
+ evmergelo r24,r3,r24
+ lwz r3,_UC_VREGS+(25*4)(r31)
+ evmergelo r25,r3,r25
+ lwz r3,_UC_VREGS+(26*4)(r31)
+ evmergelo r26,r3,r26
+ lwz r3,_UC_VREGS+(27*4)(r31)
+ evmergelo r27,r3,r27
+ lwz r3,_UC_VREGS+(28*4)(r31)
+ evmergelo r28,r3,r28
+ lwz r3,_UC_VREGS+(29*4)(r31)
+ evmergelo r29,r3,r29
+ lwz r3,_UC_VREGS+(30*4)(r31)
+ evmergelo r30,r3,r30
+ lwz r3,_UC_VREGS+(31*4)(r31)
+ evmergelo r31,r3,r31
+ lwz r3,_UC_VREGS+(34*4)(r31)
+ mtspefscr r3
+.endm
+#else
+# undef __CONTEXT_ENABLE_E500
+#endif
+
+#endif /* context-e500.h */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S
new file mode 100644
index 000000000..8bc3c7a43
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S
@@ -0,0 +1,60 @@
+/* Save current context.
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+#include <shlib-compat.h>
+
+#define __ASSEMBLY__
+#include <asm/ptrace.h>
+#include "ucontext_i.h"
+
+#include <context-e500.h>
+
+#define __CONTEXT_FUNC_NAME __getcontext
+#undef __CONTEXT_ENABLE_FPRS
+#undef __CONTEXT_ENABLE_VRS
+
+#include "getcontext-common.S"
+
+versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_3_4)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
+
+/* For the nofpu case the old/new versions are the same function. */
+strong_alias (__getcontext, __novec_getcontext)
+
+compat_symbol (libc, __novec_getcontext, getcontext, GLIBC_2_3_3)
+
+#endif
+
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_3_3)
+
+#define _ERRNO_H 1
+#include <bits/errno.h>
+
+ compat_text_section
+ENTRY (__getcontext_stub)
+ li r3,ENOSYS
+ b __syscall_error@local
+END (__getcontext_stub)
+ .previous
+
+compat_symbol (libc, __getcontext_stub, getcontext, GLIBC_2_1)
+
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data
new file mode 100644
index 000000000..fde53bf33
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data
@@ -0,0 +1,67 @@
+blkcnt64_t:x
+blkcnt_t:l
+blksize_t:l
+caddr_t:Pc
+clockid_t:i
+clock_t:l
+daddr_t:i
+dev_t:y
+fd_mask:l
+fsblkcnt64_t:y
+fsblkcnt_t:m
+fsfilcnt64_t:y
+fsfilcnt_t:m
+fsid_t:8__fsid_t
+gid_t:j
+id_t:j
+ino64_t:y
+ino_t:m
+int16_t:s
+int32_t:i
+int64_t:x
+int8_t:a
+intptr_t:i
+key_t:i
+loff_t:x
+mode_t:j
+nlink_t:j
+off64_t:x
+off_t:l
+pid_t:i
+pthread_attr_t:14pthread_attr_t
+pthread_barrier_t:17pthread_barrier_t
+pthread_barrierattr_t:21pthread_barrierattr_t
+pthread_cond_t:14pthread_cond_t
+pthread_condattr_t:18pthread_condattr_t
+pthread_key_t:j
+pthread_mutex_t:15pthread_mutex_t
+pthread_mutexattr_t:19pthread_mutexattr_t
+pthread_once_t:i
+pthread_rwlock_t:16pthread_rwlock_t
+pthread_rwlockattr_t:20pthread_rwlockattr_t
+pthread_spinlock_t:i
+pthread_t:m
+quad_t:x
+register_t:i
+rlim64_t:y
+rlim_t:m
+sigset_t:10__sigset_t
+size_t:j
+socklen_t:j
+ssize_t:i
+suseconds_t:l
+time_t:l
+u_char:h
+uid_t:j
+uint:j
+u_int:j
+u_int16_t:t
+u_int32_t:j
+u_int64_t:y
+u_int8_t:h
+ulong:m
+u_long:m
+u_quad_t:y
+useconds_t:j
+ushort:t
+u_short:t
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist
new file mode 100644
index 000000000..d71611f02
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist
@@ -0,0 +1,17 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __libc_memalign F
+ _r_debug D 0x14
+ calloc F
+ free F
+ malloc F
+ realloc F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __libc_stack_end D 0x4
+ _dl_mcount F
+GLIBC_2.3
+ GLIBC_2.3 A
+ __tls_get_addr F
+GLIBC_2.4
+ GLIBC_2.4 A
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist
new file mode 100644
index 000000000..f4ca37f44
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist
@@ -0,0 +1,3 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __ctype_get_mb_cur_max F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist
new file mode 100644
index 000000000..c9755d8a3
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist
@@ -0,0 +1,6 @@
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ gai_cancel F
+ gai_error F
+ gai_suspend F
+ getaddrinfo_a F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
new file mode 100644
index 000000000..9b6d66374
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
@@ -0,0 +1,2523 @@
+GCC_3.0
+ GCC_3.0 A
+ _Unwind_Find_FDE F
+ __deregister_frame_info_bases F
+ __register_frame_info_bases F
+ __register_frame_info_table_bases F
+GLIBC_2.0
+ GLIBC_2.0 A
+ _IO_adjust_column F
+ _IO_default_doallocate F
+ _IO_default_finish F
+ _IO_default_pbackfail F
+ _IO_default_uflow F
+ _IO_default_xsgetn F
+ _IO_default_xsputn F
+ _IO_do_write F
+ _IO_doallocbuf F
+ _IO_fclose F
+ _IO_fdopen F
+ _IO_feof F
+ _IO_ferror F
+ _IO_fflush F
+ _IO_fgetpos F
+ _IO_fgets F
+ _IO_file_attach F
+ _IO_file_close F
+ _IO_file_close_it F
+ _IO_file_doallocate F
+ _IO_file_fopen F
+ _IO_file_init F
+ _IO_file_jumps D 0x54
+ _IO_file_open F
+ _IO_file_overflow F
+ _IO_file_read F
+ _IO_file_seek F
+ _IO_file_seekoff F
+ _IO_file_setbuf F
+ _IO_file_stat F
+ _IO_file_sync F
+ _IO_file_underflow F
+ _IO_file_write F
+ _IO_file_xsputn F
+ _IO_flockfile F
+ _IO_flush_all F
+ _IO_flush_all_linebuffered F
+ _IO_fopen F
+ _IO_fprintf F
+ _IO_fputs F
+ _IO_fread F
+ _IO_free_backup_area F
+ _IO_fsetpos F
+ _IO_ftell F
+ _IO_ftrylockfile F
+ _IO_funlockfile F
+ _IO_fwrite F
+ _IO_getc F
+ _IO_getline F
+ _IO_gets F
+ _IO_init F
+ _IO_init_marker F
+ _IO_link_in F
+ _IO_list_all D 0x4
+ _IO_marker_delta F
+ _IO_marker_difference F
+ _IO_padn F
+ _IO_peekc_locked F
+ _IO_popen F
+ _IO_printf F
+ _IO_proc_close F
+ _IO_proc_open F
+ _IO_putc F
+ _IO_puts F
+ _IO_remove_marker F
+ _IO_seekmark F
+ _IO_seekoff F
+ _IO_seekpos F
+ _IO_setb F
+ _IO_setbuffer F
+ _IO_setvbuf F
+ _IO_sgetn F
+ _IO_sprintf F
+ _IO_sputbackc F
+ _IO_sscanf F
+ _IO_stderr_ D 0x50
+ _IO_stdin_ D 0x50
+ _IO_stdout_ D 0x50
+ _IO_str_init_readonly F
+ _IO_str_init_static F
+ _IO_str_overflow F
+ _IO_str_pbackfail F
+ _IO_str_seekoff F
+ _IO_str_underflow F
+ _IO_sungetc F
+ _IO_switch_to_get_mode F
+ _IO_un_link F
+ _IO_ungetc F
+ _IO_unsave_markers F
+ _IO_vfprintf F
+ _IO_vfscanf F
+ _IO_vsprintf F
+ __adjtimex F
+ __after_morecore_hook D 0x4
+ __argz_count F
+ __argz_next F
+ __argz_stringify F
+ __ashldi3 F
+ __ashrdi3 F
+ __assert_fail F
+ __assert_perror_fail F
+ __bsd_getpgrp F
+ __bzero F
+ __check_rhosts_file D 0x4
+ __clone F
+ __close F
+ __cmpdi2 F
+ __cmsg_nxthdr F
+ __connect F
+ __ctype32_b D 0x4
+ __ctype_b D 0x4
+ __ctype_get_mb_cur_max F
+ __ctype_tolower D 0x4
+ __ctype_toupper D 0x4
+ __curbrk D 0x4
+ __daylight D 0x4
+ __dcgettext F
+ __default_morecore F
+ __deregister_frame F
+ __deregister_frame_info F
+ __dgettext F
+ __divdi3 F
+ __dup2 F
+ __environ D 0x4
+ __errno_location F
+ __fcntl F
+ __ffs F
+ __finite F
+ __finitef F
+ __finitel F
+ __fixdfdi F
+ __fixsfdi F
+ __fixunsdfdi F
+ __fixunssfdi F
+ __floatdidf F
+ __floatdisf F
+ __fork F
+ __fpu_control D 0x4
+ __frame_state_for F
+ __free_hook D 0x4
+ __fxstat F
+ __getdelim F
+ __getpagesize F
+ __getpgid F
+ __getpid F
+ __gettimeofday F
+ __gmtime_r F
+ __h_errno_location F
+ __isinf F
+ __isinff F
+ __isinfl F
+ __isnan F
+ __isnanf F
+ __isnanl F
+ __iswctype F
+ __ivaliduser F
+ __libc_calloc F
+ __libc_free F
+ __libc_init_first F
+ __libc_mallinfo F
+ __libc_malloc F
+ __libc_mallopt F
+ __libc_memalign F
+ __libc_pvalloc F
+ __libc_realloc F
+ __libc_start_main F
+ __libc_valloc F
+ __lseek F
+ __lshrdi3 F
+ __lxstat F
+ __malloc_hook D 0x4
+ __malloc_initialize_hook D 0x4
+ __mbrlen F
+ __mbrtowc F
+ __memalign_hook D 0x4
+ __mempcpy F
+ __moddi3 F
+ __monstartup F
+ __morecore D 0x4
+ __nss_configure_lookup F
+ __nss_database_lookup F
+ __nss_group_lookup F
+ __nss_hosts_lookup F
+ __nss_next F
+ __nss_passwd_lookup F
+ __open F
+ __overflow F
+ __pipe F
+ __printf_fp F
+ __profile_frequency F
+ __progname D 0x4
+ __progname_full D 0x4
+ __rcmd_errstr D 0x4
+ __read F
+ __realloc_hook D 0x4
+ __register_frame F
+ __register_frame_info F
+ __register_frame_info_table F
+ __register_frame_table F
+ __res_randomid F
+ __sbrk F
+ __sched_get_priority_max F
+ __sched_get_priority_min F
+ __sched_getparam F
+ __sched_getscheduler F
+ __sched_setscheduler F
+ __sched_yield F
+ __secure_getenv F
+ __select F
+ __send F
+ __setpgid F
+ __sigaction F
+ __sigaddset F
+ __sigdelset F
+ __sigismember F
+ __sigpause F
+ __sigsetjmp F
+ __stpcpy F
+ __stpncpy F
+ __strcasecmp F
+ __strdup F
+ __strerror_r F
+ __strtod_internal F
+ __strtof_internal F
+ __strtok_r F
+ __strtol_internal F
+ __strtold_internal F
+ __strtoll_internal F
+ __strtoq_internal F
+ __strtoul_internal F
+ __strtoull_internal F
+ __strtouq_internal F
+ __sysv_signal F
+ __timezone D 0x4
+ __tzname D 0x8
+ __ucmpdi2 F
+ __udivdi3 F
+ __uflow F
+ __umoddi3 F
+ __underflow F
+ __vfscanf F
+ __vsnprintf F
+ __vsscanf F
+ __wait F
+ __waitpid F
+ __wcstod_internal F
+ __wcstof_internal F
+ __wcstol_internal F
+ __wcstold_internal F
+ __wcstoll_internal F
+ __wcstoul_internal F
+ __wcstoull_internal F
+ __write F
+ __xmknod F
+ __xpg_basename F
+ __xstat F
+ _environ D 0x4
+ _exit F
+ _libc_intl_domainname D 0x5
+ _longjmp F
+ _mcleanup F
+ _mcount F
+ _nl_default_dirname D 0x12
+ _nl_domain_bindings D 0x4
+ _nl_msg_cat_cntr D 0x4
+ _null_auth D 0xc
+ _obstack D 0x4
+ _obstack_allocated_p F
+ _obstack_begin F
+ _obstack_begin_1 F
+ _obstack_free F
+ _obstack_memory_used F
+ _obstack_newchunk F
+ _res D 0x200
+ _rpc_dtablesize F
+ _seterr_reply F
+ _setjmp F
+ _sys_errlist D 0x1ec
+ _sys_nerr D 0x4
+ _sys_siglist D 0x80
+ _tolower F
+ _toupper F
+ a64l F
+ abort F
+ abs F
+ accept F
+ access F
+ acct F
+ addmntent F
+ adjtime F
+ adjtimex F
+ advance F
+ alarm F
+ alphasort F
+ argz_add F
+ argz_add_sep F
+ argz_append F
+ argz_count F
+ argz_create F
+ argz_create_sep F
+ argz_delete F
+ argz_extract F
+ argz_insert F
+ argz_next F
+ argz_replace F
+ argz_stringify F
+ asctime F
+ asctime_r F
+ asprintf F
+ atexit F
+ atof F
+ atoi F
+ atol F
+ atoll F
+ authnone_create F
+ authunix_create F
+ authunix_create_default F
+ basename F
+ bcmp F
+ bcopy F
+ bdflush F
+ bind F
+ bindresvport F
+ bindtextdomain F
+ brk F
+ bsd_signal F
+ bsearch F
+ btowc F
+ bzero F
+ calloc F
+ callrpc F
+ canonicalize_file_name F
+ catclose F
+ catgets F
+ catopen F
+ cfgetispeed F
+ cfgetospeed F
+ cfmakeraw F
+ cfree F
+ cfsetispeed F
+ cfsetospeed F
+ cfsetspeed F
+ chdir F
+ chflags F
+ chmod F
+ chown F
+ chroot F
+ clearenv F
+ clearerr F
+ clearerr_unlocked F
+ clnt_broadcast F
+ clnt_create F
+ clnt_pcreateerror F
+ clnt_perrno F
+ clnt_perror F
+ clnt_spcreateerror F
+ clnt_sperrno F
+ clnt_sperror F
+ clntraw_create F
+ clnttcp_create F
+ clntudp_bufcreate F
+ clntudp_create F
+ clock F
+ clone F
+ close F
+ closedir F
+ closelog F
+ confstr F
+ connect F
+ copysign F
+ copysignf F
+ copysignl F
+ creat F
+ create_module F
+ ctermid F
+ ctime F
+ ctime_r F
+ cuserid F
+ daemon F
+ daylight D 0x4
+ dcgettext F
+ delete_module F
+ dgettext F
+ difftime F
+ dirfd F
+ dirname F
+ div F
+ dprintf F
+ drand48 F
+ drand48_r F
+ dup F
+ dup2 F
+ dysize F
+ ecvt F
+ ecvt_r F
+ endaliasent F
+ endfsent F
+ endgrent F
+ endhostent F
+ endmntent F
+ endnetent F
+ endnetgrent F
+ endprotoent F
+ endpwent F
+ endrpcent F
+ endservent F
+ endspent F
+ endttyent F
+ endusershell F
+ endutent F
+ environ D 0x4
+ envz_add F
+ envz_entry F
+ envz_get F
+ envz_merge F
+ envz_remove F
+ envz_strip F
+ erand48 F
+ erand48_r F
+ err F
+ error F
+ error_at_line F
+ error_message_count D 0x4
+ error_one_per_line D 0x4
+ error_print_progname D 0x4
+ errx F
+ ether_aton F
+ ether_aton_r F
+ ether_hostton F
+ ether_line F
+ ether_ntoa F
+ ether_ntoa_r F
+ ether_ntohost F
+ euidaccess F
+ execl F
+ execle F
+ execlp F
+ execv F
+ execve F
+ execvp F
+ exit F
+ fchdir F
+ fchflags F
+ fchmod F
+ fchown F
+ fclose F
+ fcloseall F
+ fcntl F
+ fcvt F
+ fcvt_r F
+ fdatasync F
+ fdopen F
+ feof F
+ feof_unlocked F
+ ferror F
+ ferror_unlocked F
+ fexecve F
+ fflush F
+ fflush_unlocked F
+ ffs F
+ fgetc F
+ fgetgrent F
+ fgetgrent_r F
+ fgetpos F
+ fgetpwent F
+ fgetpwent_r F
+ fgets F
+ fgetspent F
+ fgetspent_r F
+ fileno F
+ fileno_unlocked F
+ finite F
+ finitef F
+ finitel F
+ flock F
+ flockfile F
+ fnmatch F
+ fopen F
+ fopencookie F
+ fork F
+ fpathconf F
+ fprintf F
+ fputc F
+ fputc_unlocked F
+ fputs F
+ fread F
+ free F
+ freeaddrinfo F
+ freopen F
+ frexp F
+ frexpf F
+ frexpl F
+ fscanf F
+ fseek F
+ fsetpos F
+ fstatfs F
+ fsync F
+ ftell F
+ ftime F
+ ftok F
+ ftruncate F
+ ftrylockfile F
+ fts_children F
+ fts_close F
+ fts_open F
+ fts_read F
+ fts_set F
+ ftw F
+ funlockfile F
+ fwrite F
+ gcvt F
+ get_avphys_pages F
+ get_current_dir_name F
+ get_kernel_syms F
+ get_myaddress F
+ get_nprocs F
+ get_nprocs_conf F
+ get_phys_pages F
+ getaddrinfo F
+ getaliasbyname F
+ getaliasbyname_r F
+ getaliasent F
+ getaliasent_r F
+ getc F
+ getc_unlocked F
+ getchar F
+ getchar_unlocked F
+ getcwd F
+ getdelim F
+ getdirentries F
+ getdomainname F
+ getdtablesize F
+ getegid F
+ getenv F
+ geteuid F
+ getfsent F
+ getfsfile F
+ getfsspec F
+ getgid F
+ getgrent F
+ getgrent_r F
+ getgrgid F
+ getgrgid_r F
+ getgrnam F
+ getgrnam_r F
+ getgroups F
+ gethostbyaddr F
+ gethostbyaddr_r F
+ gethostbyname F
+ gethostbyname2 F
+ gethostbyname2_r F
+ gethostbyname_r F
+ gethostent F
+ gethostent_r F
+ gethostid F
+ gethostname F
+ getitimer F
+ getline F
+ getlogin F
+ getlogin_r F
+ getmntent F
+ getmntent_r F
+ getnetbyaddr F
+ getnetbyaddr_r F
+ getnetbyname F
+ getnetbyname_r F
+ getnetent F
+ getnetent_r F
+ getnetgrent F
+ getnetgrent_r F
+ getopt F
+ getopt_long F
+ getopt_long_only F
+ getpagesize F
+ getpass F
+ getpeername F
+ getpgid F
+ getpgrp F
+ getpid F
+ getppid F
+ getpriority F
+ getprotobyname F
+ getprotobyname_r F
+ getprotobynumber F
+ getprotobynumber_r F
+ getprotoent F
+ getprotoent_r F
+ getpublickey F
+ getpw F
+ getpwent F
+ getpwent_r F
+ getpwnam F
+ getpwnam_r F
+ getpwuid F
+ getpwuid_r F
+ getresgid F
+ getresuid F
+ getrlimit F
+ getrpcbyname F
+ getrpcbyname_r F
+ getrpcbynumber F
+ getrpcbynumber_r F
+ getrpcent F
+ getrpcent_r F
+ getrpcport F
+ getrusage F
+ gets F
+ getsecretkey F
+ getservbyname F
+ getservbyname_r F
+ getservbyport F
+ getservbyport_r F
+ getservent F
+ getservent_r F
+ getsid F
+ getsockname F
+ getsockopt F
+ getspent F
+ getspent_r F
+ getspnam F
+ getspnam_r F
+ getsubopt F
+ gettext F
+ gettimeofday F
+ getttyent F
+ getttynam F
+ getuid F
+ getusershell F
+ getutent F
+ getutent_r F
+ getutid F
+ getutid_r F
+ getutline F
+ getutline_r F
+ getw F
+ getwd F
+ glob F
+ glob_pattern_p F
+ globfree F
+ gmtime F
+ gmtime_r F
+ group_member F
+ gsignal F
+ gtty F
+ h_errlist D 0x14
+ h_nerr D 0x4
+ hasmntopt F
+ hcreate F
+ hcreate_r F
+ hdestroy F
+ hdestroy_r F
+ herror F
+ hsearch F
+ hsearch_r F
+ hstrerror F
+ htonl F
+ htons F
+ index F
+ inet_addr F
+ inet_aton F
+ inet_lnaof F
+ inet_makeaddr F
+ inet_netof F
+ inet_network F
+ inet_nsap_addr F
+ inet_nsap_ntoa F
+ inet_ntoa F
+ inet_ntop F
+ inet_pton F
+ init_module F
+ initgroups F
+ initstate F
+ initstate_r F
+ innetgr F
+ insque F
+ ioctl F
+ iruserok F
+ isalnum F
+ isalpha F
+ isascii F
+ isatty F
+ isblank F
+ iscntrl F
+ isdigit F
+ isfdtype F
+ isgraph F
+ isinf F
+ isinff F
+ isinfl F
+ islower F
+ isnan F
+ isnanf F
+ isnanl F
+ isprint F
+ ispunct F
+ isspace F
+ isupper F
+ iswalnum F
+ iswalpha F
+ iswcntrl F
+ iswctype F
+ iswdigit F
+ iswgraph F
+ iswlower F
+ iswprint F
+ iswpunct F
+ iswspace F
+ iswupper F
+ iswxdigit F
+ isxdigit F
+ jrand48 F
+ jrand48_r F
+ kill F
+ killpg F
+ klogctl F
+ l64a F
+ labs F
+ lchown F
+ lckpwdf F
+ lcong48 F
+ lcong48_r F
+ ldexp F
+ ldexpf F
+ ldexpl F
+ ldiv F
+ lfind F
+ link F
+ listen F
+ llabs F
+ lldiv F
+ llseek F
+ loc1 D 0x4
+ loc2 D 0x4
+ localeconv F
+ localtime F
+ localtime_r F
+ lockf F
+ locs D 0x4
+ longjmp F
+ lrand48 F
+ lrand48_r F
+ lsearch F
+ lseek F
+ madvise F
+ mallinfo F
+ malloc F
+ malloc_get_state F
+ malloc_set_state F
+ malloc_stats F
+ malloc_trim F
+ malloc_usable_size F
+ mallopt F
+ mallwatch D 0x4
+ mblen F
+ mbrlen F
+ mbrtowc F
+ mbsinit F
+ mbsnrtowcs F
+ mbsrtowcs F
+ mbstowcs F
+ mbtowc F
+ mcheck F
+ memalign F
+ memccpy F
+ memchr F
+ memcmp F
+ memcpy F
+ memfrob F
+ memmem F
+ memmove F
+ memset F
+ mkdir F
+ mkfifo F
+ mkstemp F
+ mktemp F
+ mktime F
+ mlock F
+ mlockall F
+ mmap F
+ modf F
+ modff F
+ modfl F
+ monstartup F
+ mount F
+ mprobe F
+ mprotect F
+ mrand48 F
+ mrand48_r F
+ mremap F
+ msgctl F
+ msgget F
+ msgrcv F
+ msgsnd F
+ msync F
+ mtrace F
+ munlock F
+ munlockall F
+ munmap F
+ muntrace F
+ nanosleep F
+ nfsservctl F
+ nice F
+ nl_langinfo F
+ nrand48 F
+ nrand48_r F
+ ntohl F
+ ntohs F
+ obstack_alloc_failed_handler D 0x4
+ obstack_exit_failure D 0x4
+ obstack_free F
+ obstack_printf F
+ obstack_vprintf F
+ on_exit F
+ open F
+ open_memstream F
+ opendir F
+ openlog F
+ optarg D 0x4
+ opterr D 0x4
+ optind D 0x4
+ optopt D 0x4
+ parse_printf_format F
+ pathconf F
+ pause F
+ pclose F
+ perror F
+ personality F
+ pipe F
+ pmap_getmaps F
+ pmap_getport F
+ pmap_rmtcall F
+ pmap_set F
+ pmap_unset F
+ poll F
+ popen F
+ prctl F
+ printf F
+ profil F
+ program_invocation_name D 0x4
+ program_invocation_short_name D 0x4
+ pselect F
+ psignal F
+ pthread_attr_destroy F
+ pthread_attr_getdetachstate F
+ pthread_attr_getinheritsched F
+ pthread_attr_getschedparam F
+ pthread_attr_getschedpolicy F
+ pthread_attr_getscope F
+ pthread_attr_init F
+ pthread_attr_setdetachstate F
+ pthread_attr_setinheritsched F
+ pthread_attr_setschedparam F
+ pthread_attr_setschedpolicy F
+ pthread_attr_setscope F
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+ pthread_condattr_destroy F
+ pthread_condattr_init F
+ pthread_equal F
+ pthread_exit F
+ pthread_getschedparam F
+ pthread_mutex_destroy F
+ pthread_mutex_init F
+ pthread_mutex_lock F
+ pthread_mutex_unlock F
+ pthread_self F
+ pthread_setcancelstate F
+ pthread_setcanceltype F
+ pthread_setschedparam F
+ ptrace F
+ putc F
+ putc_unlocked F
+ putchar F
+ putchar_unlocked F
+ putenv F
+ putpwent F
+ puts F
+ putspent F
+ pututline F
+ putw F
+ pvalloc F
+ qecvt F
+ qecvt_r F
+ qfcvt F
+ qfcvt_r F
+ qgcvt F
+ qsort F
+ query_module F
+ quotactl F
+ raise F
+ rand F
+ rand_r F
+ random F
+ random_r F
+ rcmd F
+ re_comp F
+ re_compile_fastmap F
+ re_compile_pattern F
+ re_exec F
+ re_match F
+ re_match_2 F
+ re_max_failures D 0x4
+ re_search F
+ re_search_2 F
+ re_set_registers F
+ re_set_syntax F
+ re_syntax_options D 0x4
+ read F
+ readdir F
+ readdir_r F
+ readlink F
+ readv F
+ realloc F
+ realpath F
+ reboot F
+ recv F
+ recvfrom F
+ recvmsg F
+ regcomp F
+ regerror F
+ regexec F
+ regfree F
+ register_printf_function F
+ registerrpc F
+ remove F
+ remque F
+ rename F
+ res_init F
+ revoke F
+ rewind F
+ rewinddir F
+ rexec F
+ rexecoptions D 0x4
+ rindex F
+ rmdir F
+ rpc_createerr D 0x10
+ rpmatch F
+ rresvport F
+ ruserok F
+ ruserpass F
+ sbrk F
+ scalbn F
+ scalbnf F
+ scalbnl F
+ scandir F
+ scanf F
+ sched_get_priority_max F
+ sched_get_priority_min F
+ sched_getparam F
+ sched_getscheduler F
+ sched_rr_get_interval F
+ sched_setparam F
+ sched_setscheduler F
+ sched_yield F
+ seed48 F
+ seed48_r F
+ seekdir F
+ select F
+ semctl F
+ semget F
+ semop F
+ send F
+ sendmsg F
+ sendto F
+ setaliasent F
+ setbuf F
+ setbuffer F
+ setcontext F
+ setdomainname F
+ setegid F
+ setenv F
+ seteuid F
+ setfsent F
+ setfsgid F
+ setfsuid F
+ setgid F
+ setgrent F
+ setgroups F
+ sethostent F
+ sethostid F
+ sethostname F
+ setitimer F
+ setjmp F
+ setlinebuf F
+ setlocale F
+ setlogin F
+ setlogmask F
+ setmntent F
+ setnetent F
+ setnetgrent F
+ setpgid F
+ setpgrp F
+ setpriority F
+ setprotoent F
+ setpwent F
+ setregid F
+ setresgid F
+ setresuid F
+ setreuid F
+ setrlimit F
+ setrpcent F
+ setservent F
+ setsid F
+ setsockopt F
+ setspent F
+ setstate F
+ setstate_r F
+ settimeofday F
+ setttyent F
+ setuid F
+ setusershell F
+ setutent F
+ setvbuf F
+ sgetspent F
+ sgetspent_r F
+ shmat F
+ shmctl F
+ shmdt F
+ shmget F
+ shutdown F
+ sigaction F
+ sigaddset F
+ sigaltstack F
+ sigandset F
+ sigblock F
+ sigdelset F
+ sigemptyset F
+ sigfillset F
+ siggetmask F
+ siginterrupt F
+ sigisemptyset F
+ sigismember F
+ siglongjmp F
+ signal F
+ sigorset F
+ sigpause F
+ sigpending F
+ sigprocmask F
+ sigreturn F
+ sigsetmask F
+ sigstack F
+ sigsuspend F
+ sigvec F
+ sigwait F
+ sleep F
+ snprintf F
+ socket F
+ socketpair F
+ sprintf F
+ srand F
+ srand48 F
+ srand48_r F
+ srandom F
+ srandom_r F
+ sscanf F
+ ssignal F
+ sstk F
+ statfs F
+ stderr D 0x4
+ stdin D 0x4
+ stdout D 0x4
+ step F
+ stime F
+ stpcpy F
+ stpncpy F
+ strcasecmp F
+ strcat F
+ strchr F
+ strcmp F
+ strcoll F
+ strcpy F
+ strcspn F
+ strdup F
+ strerror F
+ strerror_r F
+ strfmon F
+ strfry F
+ strftime F
+ strlen F
+ strncasecmp F
+ strncat F
+ strncmp F
+ strncpy F
+ strndup F
+ strnlen F
+ strpbrk F
+ strptime F
+ strrchr F
+ strsep F
+ strsignal F
+ strspn F
+ strstr F
+ strtod F
+ strtof F
+ strtok F
+ strtok_r F
+ strtol F
+ strtold F
+ strtoll F
+ strtoq F
+ strtoul F
+ strtoull F
+ strtouq F
+ strxfrm F
+ stty F
+ svc_exit F
+ svc_fdset D 0x80
+ svc_getreq F
+ svc_getreqset F
+ svc_register F
+ svc_run F
+ svc_sendreply F
+ svc_unregister F
+ svcauthdes_stats D 0xc
+ svcerr_auth F
+ svcerr_decode F
+ svcerr_noproc F
+ svcerr_noprog F
+ svcerr_progvers F
+ svcerr_systemerr F
+ svcerr_weakauth F
+ svcfd_create F
+ svcraw_create F
+ svctcp_create F
+ svcudp_bufcreate F
+ svcudp_create F
+ svcudp_enablecache F
+ swab F
+ swapoff F
+ swapon F
+ symlink F
+ sync F
+ sys_errlist D 0x1ec
+ sys_nerr D 0x4
+ sys_sigabbrev D 0x80
+ sys_siglist D 0x80
+ syscall F
+ sysconf F
+ sysctl F
+ sysinfo F
+ syslog F
+ system F
+ tcdrain F
+ tcflow F
+ tcflush F
+ tcgetattr F
+ tcgetpgrp F
+ tcsendbreak F
+ tcsetattr F
+ tcsetpgrp F
+ tdelete F
+ telldir F
+ tempnam F
+ textdomain F
+ tfind F
+ time F
+ timegm F
+ timelocal F
+ times F
+ timezone D 0x4
+ tmpfile F
+ tmpnam F
+ tmpnam_r F
+ toascii F
+ tolower F
+ toupper F
+ towctrans F
+ towlower F
+ towupper F
+ tr_break F
+ truncate F
+ tsearch F
+ ttyname F
+ ttyname_r F
+ ttyslot F
+ twalk F
+ tzname D 0x8
+ tzset F
+ ualarm F
+ ulckpwdf F
+ ulimit F
+ umask F
+ umount F
+ uname F
+ ungetc F
+ unlink F
+ unsetenv F
+ updwtmp F
+ uselib F
+ usleep F
+ ustat F
+ utime F
+ utimes F
+ utmpname F
+ valloc F
+ vasprintf F
+ vdprintf F
+ verr F
+ verrx F
+ vfork F
+ vfprintf F
+ vfscanf F
+ vhangup F
+ vlimit F
+ vprintf F
+ vscanf F
+ vsnprintf F
+ vsprintf F
+ vsscanf F
+ vsyslog F
+ vtimes F
+ vwarn F
+ vwarnx F
+ wait F
+ wait3 F
+ wait4 F
+ waitpid F
+ warn F
+ warnx F
+ wcpcpy F
+ wcpncpy F
+ wcrtomb F
+ wcscat F
+ wcschr F
+ wcscmp F
+ wcscoll F
+ wcscpy F
+ wcscspn F
+ wcsdup F
+ wcslen F
+ wcsncat F
+ wcsncmp F
+ wcsncpy F
+ wcsnrtombs F
+ wcspbrk F
+ wcsrchr F
+ wcsrtombs F
+ wcsspn F
+ wcsstr F
+ wcstod F
+ wcstof F
+ wcstok F
+ wcstol F
+ wcstold F
+ wcstombs F
+ wcstoq F
+ wcstoul F
+ wcstouq F
+ wcswidth F
+ wcsxfrm F
+ wctob F
+ wctomb F
+ wctrans F
+ wctype F
+ wcwidth F
+ wmemchr F
+ wmemcmp F
+ wmemcpy F
+ wmemmove F
+ wmemset F
+ write F
+ writev F
+ xdr_accepted_reply F
+ xdr_array F
+ xdr_authunix_parms F
+ xdr_bool F
+ xdr_bytes F
+ xdr_callhdr F
+ xdr_callmsg F
+ xdr_char F
+ xdr_cryptkeyarg F
+ xdr_cryptkeyarg2 F
+ xdr_cryptkeyres F
+ xdr_des_block F
+ xdr_double F
+ xdr_enum F
+ xdr_float F
+ xdr_free F
+ xdr_int F
+ xdr_key_netstarg F
+ xdr_key_netstres F
+ xdr_keybuf F
+ xdr_keystatus F
+ xdr_long F
+ xdr_netobj F
+ xdr_opaque F
+ xdr_opaque_auth F
+ xdr_pmap F
+ xdr_pmaplist F
+ xdr_pointer F
+ xdr_reference F
+ xdr_rejected_reply F
+ xdr_replymsg F
+ xdr_rmtcall_args F
+ xdr_rmtcallres F
+ xdr_short F
+ xdr_string F
+ xdr_u_char F
+ xdr_u_int F
+ xdr_u_long F
+ xdr_u_short F
+ xdr_union F
+ xdr_vector F
+ xdr_void F
+ xdr_wrapstring F
+ xdrmem_create F
+ xdrrec_create F
+ xdrrec_endofrecord F
+ xdrrec_eof F
+ xdrrec_skiprecord F
+ xdrstdio_create F
+ xencrypt F
+ xprt_register F
+ xprt_unregister F
+GLIBC_2.1
+ GLIBC_2.1 A
+ _IO_2_1_stderr_ D 0xa0
+ _IO_2_1_stdin_ D 0xa0
+ _IO_2_1_stdout_ D 0xa0
+ _IO_do_write F
+ _IO_fclose F
+ _IO_fdopen F
+ _IO_fgetpos64 F
+ _IO_file_attach F
+ _IO_file_close_it F
+ _IO_file_finish F
+ _IO_file_fopen F
+ _IO_file_init F
+ _IO_file_overflow F
+ _IO_file_seekoff F
+ _IO_file_setbuf F
+ _IO_file_sync F
+ _IO_file_underflow F
+ _IO_file_write F
+ _IO_file_xsputn F
+ _IO_fopen F
+ _IO_fsetpos64 F
+ _IO_getline_info F
+ _IO_popen F
+ _IO_proc_close F
+ _IO_proc_open F
+ __asprintf F
+ __backtrace F
+ __backtrace_symbols F
+ __backtrace_symbols_fd F
+ __duplocale F
+ __freelocale F
+ __fxstat64 F
+ __isalnum_l F
+ __isalpha_l F
+ __isascii_l F
+ __isblank_l F
+ __iscntrl_l F
+ __isdigit_l F
+ __isgraph_l F
+ __islower_l F
+ __isprint_l F
+ __ispunct_l F
+ __isspace_l F
+ __isupper_l F
+ __iswalnum_l F
+ __iswalpha_l F
+ __iswblank_l F
+ __iswcntrl_l F
+ __iswctype_l F
+ __iswdigit_l F
+ __iswgraph_l F
+ __iswlower_l F
+ __iswprint_l F
+ __iswpunct_l F
+ __iswspace_l F
+ __iswupper_l F
+ __iswxdigit_l F
+ __isxdigit_l F
+ __key_decryptsession_pk_LOCAL D 0x4
+ __key_encryptsession_pk_LOCAL D 0x4
+ __key_gendes_LOCAL D 0x4
+ __libc_allocate_rtsig F
+ __libc_current_sigrtmax F
+ __libc_current_sigrtmin F
+ __libc_freeres F
+ __libc_sa_len F
+ __lxstat64 F
+ __newlocale F
+ __poll F
+ __pread64 F
+ __pwrite64 F
+ __rawmemchr F
+ __signbit F
+ __signbitf F
+ __strcasecmp_l F
+ __strcasestr F
+ __strcoll_l F
+ __strfmon_l F
+ __strncasecmp_l F
+ __strtod_l F
+ __strtof_l F
+ __strtol_l F
+ __strtold_l F
+ __strtoll_l F
+ __strtoul_l F
+ __strtoull_l F
+ __strxfrm_l F
+ __toascii_l F
+ __tolower_l F
+ __toupper_l F
+ __towctrans F
+ __towctrans_l F
+ __towlower_l F
+ __towupper_l F
+ __wcscasecmp_l F
+ __wcscoll_l F
+ __wcsncasecmp_l F
+ __wcstod_l F
+ __wcstof_l F
+ __wcstol_l F
+ __wcstold_l F
+ __wcstoll_l F
+ __wcstoul_l F
+ __wcstoull_l F
+ __wcsxfrm_l F
+ __wctype_l F
+ __xstat64 F
+ _authenticate F
+ _dl_mcount_wrapper F
+ _dl_mcount_wrapper_check F
+ _sys_errlist D 0x1f4
+ _sys_nerr D 0x4
+ _sys_siglist D 0x100
+ addseverity F
+ alphasort64 F
+ argp_err_exit_status D 0x4
+ argp_error F
+ argp_failure F
+ argp_help F
+ argp_parse F
+ argp_program_bug_address D 0x4
+ argp_program_version D 0x4
+ argp_program_version_hook D 0x4
+ argp_state_help F
+ argp_usage F
+ authdes_create F
+ authdes_getucred F
+ authdes_pk_create F
+ backtrace F
+ backtrace_symbols F
+ backtrace_symbols_fd F
+ capget F
+ capset F
+ cbc_crypt F
+ chown F
+ clntunix_create F
+ creat64 F
+ des_setparity F
+ ecb_crypt F
+ endutxent F
+ fattach F
+ fclose F
+ fdetach F
+ fdopen F
+ ffsl F
+ ffsll F
+ fgetc_unlocked F
+ fgetpos64 F
+ fgets_unlocked F
+ fmtmsg F
+ fopen F
+ fopen64 F
+ fputs_unlocked F
+ fread_unlocked F
+ freopen64 F
+ fseeko F
+ fseeko64 F
+ fsetpos64 F
+ fstatfs64 F
+ fstatvfs F
+ fstatvfs64 F
+ ftello F
+ ftello64 F
+ ftruncate64 F
+ ftw64 F
+ fwrite_unlocked F
+ gai_strerror F
+ getcontext F
+ getdate F
+ getdate_err D 0x4
+ getdate_r F
+ getmsg F
+ getnameinfo F
+ getnetname F
+ getpmsg F
+ getpt F
+ getrlimit64 F
+ getutxent F
+ getutxid F
+ getutxline F
+ glob64 F
+ globfree64 F
+ gnu_get_libc_release F
+ gnu_get_libc_version F
+ grantpt F
+ host2netname F
+ iconv F
+ iconv_close F
+ iconv_open F
+ if_freenameindex F
+ if_indextoname F
+ if_nameindex F
+ if_nametoindex F
+ in6addr_any D 0x10
+ in6addr_loopback D 0x10
+ isastream F
+ iswblank F
+ key_decryptsession F
+ key_decryptsession_pk F
+ key_encryptsession F
+ key_encryptsession_pk F
+ key_gendes F
+ key_get_conv F
+ key_secretkey_is_set F
+ key_setnet F
+ key_setsecret F
+ lockf64 F
+ lseek64 F
+ makecontext F
+ mempcpy F
+ mmap64 F
+ netname2host F
+ netname2user F
+ nftw F
+ nftw64 F
+ ntp_adjtime F
+ ntp_gettime F
+ open64 F
+ passwd2des F
+ pclose F
+ popen F
+ pread F
+ pread64 F
+ printf_size F
+ printf_size_info F
+ pthread_attr_init F
+ ptsname F
+ ptsname_r F
+ putgrent F
+ putmsg F
+ putpmsg F
+ pututxline F
+ pwrite F
+ pwrite64 F
+ rawmemchr F
+ readdir64 F
+ readdir64_r F
+ rtime F
+ scandir64 F
+ sendfile F
+ setrlimit64 F
+ setutxent F
+ sighold F
+ sigignore F
+ sigqueue F
+ sigrelse F
+ sigset F
+ sigtimedwait F
+ sigwaitinfo F
+ statfs64 F
+ statvfs F
+ statvfs64 F
+ strcasestr F
+ strtoimax F
+ strtoumax F
+ strverscmp F
+ svcunix_create F
+ svcunixfd_create F
+ swapcontext F
+ sys_errlist D 0x1f4
+ sys_nerr D 0x4
+ sys_sigabbrev D 0x100
+ sys_siglist D 0x100
+ sysv_signal F
+ tcgetsid F
+ tdestroy F
+ tmpfile F
+ tmpfile64 F
+ truncate64 F
+ umount2 F
+ unlockpt F
+ updwtmpx F
+ user2netname F
+ utmpxname F
+ versionsort F
+ versionsort64 F
+ waitid F
+ wcscasecmp F
+ wcsncasecmp F
+ wcsnlen F
+ wcstoimax F
+ wcstoll F
+ wcstoull F
+ wcstoumax F
+ wcswcs F
+ wordexp F
+ wordfree F
+ xdecrypt F
+ xdr_authdes_cred F
+ xdr_authdes_verf F
+ xdr_getcredres F
+ xdr_int16_t F
+ xdr_int32_t F
+ xdr_int8_t F
+ xdr_netnamestr F
+ xdr_sizeof F
+ xdr_uint16_t F
+ xdr_uint32_t F
+ xdr_uint8_t F
+ xdr_unixcred F
+GLIBC_2.1.1
+ GLIBC_2.1.1 A
+ _Exit F
+ __mempcpy_small F
+ __stpcpy_small F
+ __strcpy_small F
+ __strcspn_c1 F
+ __strcspn_c2 F
+ __strcspn_c3 F
+ __strpbrk_c2 F
+ __strpbrk_c3 F
+ __strsep_1c F
+ __strsep_2c F
+ __strsep_3c F
+ __strsep_g F
+ __strspn_c1 F
+ __strspn_c2 F
+ __strspn_c3 F
+ __strtok_r_1c F
+ __strverscmp F
+ getutmp F
+ getutmpx F
+ imaxabs F
+ imaxdiv F
+ strchrnul F
+ xdr_hyper F
+ xdr_int64_t F
+ xdr_longlong_t F
+ xdr_u_hyper F
+ xdr_u_longlong_t F
+ xdr_uint64_t F
+GLIBC_2.1.2
+ GLIBC_2.1.2 A
+ __vfork F
+ getaliasbyname_r F
+ getaliasent_r F
+ getgrent_r F
+ getgrgid_r F
+ getgrnam_r F
+ gethostbyaddr_r F
+ gethostbyname2_r F
+ gethostbyname_r F
+ gethostent_r F
+ getnetbyaddr_r F
+ getnetbyname_r F
+ getnetent_r F
+ getprotobyname_r F
+ getprotobynumber_r F
+ getprotoent_r F
+ getpwent_r F
+ getpwnam_r F
+ getpwuid_r F
+ getrpcbyname_r F
+ getrpcbynumber_r F
+ getrpcent_r F
+ getservbyname_r F
+ getservbyport_r F
+ getservent_r F
+ getspent_r F
+ getspnam_r F
+GLIBC_2.1.3
+ GLIBC_2.1.3 A
+ __cxa_atexit F
+ __cxa_finalize F
+ __sigsuspend F
+GLIBC_2.10
+ GLIBC_2.10 A
+ __cxa_at_quick_exit F
+ __posix_getopt F
+ accept4 F
+ endsgent F
+ fallocate F
+ fgetsgent F
+ fgetsgent_r F
+ getsgent F
+ getsgent_r F
+ getsgnam F
+ getsgnam_r F
+ malloc_info F
+ preadv F
+ preadv64 F
+ psiginfo F
+ putsgent F
+ pwritev F
+ pwritev64 F
+ quick_exit F
+ register_printf_modifier F
+ register_printf_specifier F
+ register_printf_type F
+ setsgent F
+ sgetsgent F
+ sgetsgent_r F
+GLIBC_2.11
+ GLIBC_2.11 A
+ __longjmp_chk F
+ execvpe F
+ fallocate64 F
+ mkostemps F
+ mkostemps64 F
+ mkstemps F
+ mkstemps64 F
+GLIBC_2.12
+ GLIBC_2.12 A
+ _sys_errlist D 0x21c
+ _sys_nerr D 0x4
+ ntp_gettimex F
+ recvmmsg F
+ sys_errlist D 0x21c
+ sys_nerr D 0x4
+GLIBC_2.13
+ GLIBC_2.13 A
+ fanotify_init F
+ fanotify_mark F
+ prlimit F
+ prlimit64 F
+GLIBC_2.14
+ GLIBC_2.14 A
+ clock_adjtime F
+ name_to_handle_at F
+ open_by_handle_at F
+ sendmmsg F
+ setns F
+ syncfs F
+GLIBC_2.15
+ GLIBC_2.15 A
+ __fdelt_chk F
+ __fdelt_warn F
+ posix_spawn F
+ posix_spawnp F
+ process_vm_readv F
+ process_vm_writev F
+ scandirat F
+ scandirat64 F
+GLIBC_2.16
+ GLIBC_2.16 A
+ __getauxval F
+ __mcount_internal F
+ __poll_chk F
+ __ppoll_chk F
+ aligned_alloc F
+ c16rtomb F
+ c32rtomb F
+ getauxval F
+ mbrtoc16 F
+ mbrtoc32 F
+ timespec_get F
+GLIBC_2.17
+ GLIBC_2.17 A
+ __ppc_get_timebase_freq F
+ clock_getcpuclockid F
+ clock_getres F
+ clock_gettime F
+ clock_nanosleep F
+ clock_settime F
+ secure_getenv F
+GLIBC_2.18
+ GLIBC_2.18 A
+ __cxa_thread_atexit_impl F
+GLIBC_2.2
+ GLIBC_2.2 A
+ _IO_adjust_wcolumn F
+ _IO_fgetpos F
+ _IO_fgetpos64 F
+ _IO_free_wbackup_area F
+ _IO_fsetpos F
+ _IO_fsetpos64 F
+ _IO_init_wmarker F
+ _IO_iter_begin F
+ _IO_iter_end F
+ _IO_iter_file F
+ _IO_iter_next F
+ _IO_least_wmarker F
+ _IO_list_lock F
+ _IO_list_resetlock F
+ _IO_list_unlock F
+ _IO_seekwmark F
+ _IO_sputbackwc F
+ _IO_sungetwc F
+ _IO_switch_to_main_wget_area F
+ _IO_switch_to_wbackup_area F
+ _IO_switch_to_wget_mode F
+ _IO_unsave_wmarkers F
+ _IO_wdefault_doallocate F
+ _IO_wdefault_finish F
+ _IO_wdefault_pbackfail F
+ _IO_wdefault_uflow F
+ _IO_wdefault_xsgetn F
+ _IO_wdefault_xsputn F
+ _IO_wdo_write F
+ _IO_wdoallocbuf F
+ _IO_wfile_jumps D 0x54
+ _IO_wfile_overflow F
+ _IO_wfile_seekoff F
+ _IO_wfile_sync F
+ _IO_wfile_underflow F
+ _IO_wfile_xsputn F
+ _IO_wmarker_delta F
+ _IO_wsetb F
+ __assert F
+ __ctype32_tolower D 0x4
+ __ctype32_toupper D 0x4
+ __cyg_profile_func_enter F
+ __cyg_profile_func_exit F
+ __endmntent F
+ __fbufsize F
+ __flbf F
+ __fpending F
+ __fpurge F
+ __freadable F
+ __freading F
+ __fsetlocking F
+ __fwritable F
+ __fwriting F
+ __fxstat64 F
+ __getmntent_r F
+ __lxstat64 F
+ __nl_langinfo_l F
+ __open64 F
+ __res_init F
+ __res_nclose F
+ __res_ninit F
+ __res_state F
+ __setmntent F
+ __statfs F
+ __strndup F
+ __sysconf F
+ __sysctl F
+ __wctrans_l F
+ __woverflow F
+ __wuflow F
+ __wunderflow F
+ __xpg_sigpause F
+ __xstat64 F
+ _flushlbf F
+ _res_hconf D 0x30
+ bind_textdomain_codeset F
+ dcngettext F
+ dngettext F
+ fgetpos F
+ fgetpos64 F
+ fgetwc F
+ fgetwc_unlocked F
+ fgetws F
+ fgetws_unlocked F
+ fmemopen F
+ fopencookie F
+ fputwc F
+ fputwc_unlocked F
+ fputws F
+ fputws_unlocked F
+ fsetpos F
+ fsetpos64 F
+ fwide F
+ fwprintf F
+ fwscanf F
+ getdirentries64 F
+ getloadavg F
+ getrlimit F
+ getrlimit64 F
+ getwc F
+ getwc_unlocked F
+ getwchar F
+ getwchar_unlocked F
+ glob64 F
+ iruserok_af F
+ localeconv F
+ mcheck_check_all F
+ mcheck_pedantic F
+ memrchr F
+ mincore F
+ mkdtemp F
+ mkstemp64 F
+ moncontrol F
+ msgctl F
+ ngettext F
+ posix_fadvise F
+ posix_fadvise64 F
+ posix_fallocate F
+ posix_fallocate64 F
+ posix_madvise F
+ posix_memalign F
+ posix_spawn F
+ posix_spawn_file_actions_addclose F
+ posix_spawn_file_actions_adddup2 F
+ posix_spawn_file_actions_addopen F
+ posix_spawn_file_actions_destroy F
+ posix_spawn_file_actions_init F
+ posix_spawnattr_destroy F
+ posix_spawnattr_getflags F
+ posix_spawnattr_getpgroup F
+ posix_spawnattr_getschedparam F
+ posix_spawnattr_getschedpolicy F
+ posix_spawnattr_getsigdefault F
+ posix_spawnattr_getsigmask F
+ posix_spawnattr_init F
+ posix_spawnattr_setflags F
+ posix_spawnattr_setpgroup F
+ posix_spawnattr_setschedparam F
+ posix_spawnattr_setschedpolicy F
+ posix_spawnattr_setsigdefault F
+ posix_spawnattr_setsigmask F
+ posix_spawnp F
+ putwc F
+ putwc_unlocked F
+ putwchar F
+ putwchar_unlocked F
+ rcmd_af F
+ readdir64 F
+ readdir64_r F
+ rexec_af F
+ rresvport_af F
+ ruserok_af F
+ scandir64 F
+ semctl F
+ setrlimit F
+ shmctl F
+ svc_getreq_common F
+ svc_getreq_poll F
+ svc_max_pollfd D 0x4
+ svc_pollfd D 0x4
+ swprintf F
+ swscanf F
+ ungetwc F
+ vfwprintf F
+ vfwscanf F
+ vswprintf F
+ vswscanf F
+ vwprintf F
+ vwscanf F
+ wcschrnul F
+ wcsftime F
+ wmempcpy F
+ wprintf F
+ wscanf F
+GLIBC_2.2.1
+ GLIBC_2.2.1 A
+ pivot_root F
+ posix_openpt F
+GLIBC_2.2.2
+ GLIBC_2.2.2 A
+ __nss_hostname_digits_dots F
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ __rpc_thread_createerr F
+ __rpc_thread_svc_fdset F
+ __rpc_thread_svc_max_pollfd F
+ __rpc_thread_svc_pollfd F
+ fnmatch F
+ sprofil F
+GLIBC_2.2.4
+ GLIBC_2.2.4 A
+ dl_iterate_phdr F
+ getgrouplist F
+ sockatmark F
+GLIBC_2.2.6
+ GLIBC_2.2.6 A
+ __nanosleep F
+GLIBC_2.3
+ GLIBC_2.3 A
+ __ctype_b_loc F
+ __ctype_tolower_loc F
+ __ctype_toupper_loc F
+ __isctype F
+ __strftime_l F
+ __uselocale F
+ __wcsftime_l F
+ _sys_errlist D 0x1f8
+ _sys_nerr D 0x4
+ duplocale F
+ fgetxattr F
+ flistxattr F
+ freeifaddrs F
+ freelocale F
+ fremovexattr F
+ fsetxattr F
+ futimes F
+ getifaddrs F
+ getxattr F
+ isalnum_l F
+ isalpha_l F
+ isblank_l F
+ iscntrl_l F
+ isctype F
+ isdigit_l F
+ isgraph_l F
+ islower_l F
+ isprint_l F
+ ispunct_l F
+ isspace_l F
+ isupper_l F
+ iswalnum_l F
+ iswalpha_l F
+ iswblank_l F
+ iswcntrl_l F
+ iswctype_l F
+ iswdigit_l F
+ iswgraph_l F
+ iswlower_l F
+ iswprint_l F
+ iswpunct_l F
+ iswspace_l F
+ iswupper_l F
+ iswxdigit_l F
+ isxdigit_l F
+ lgetxattr F
+ listxattr F
+ llistxattr F
+ lremovexattr F
+ lsetxattr F
+ lutimes F
+ newlocale F
+ nl_langinfo_l F
+ readahead F
+ realpath F
+ removexattr F
+ sendfile64 F
+ setxattr F
+ strcasecmp_l F
+ strcoll_l F
+ strfmon_l F
+ strftime_l F
+ strncasecmp_l F
+ strtod_l F
+ strtof_l F
+ strtol_l F
+ strtold_l F
+ strtoll_l F
+ strtoul_l F
+ strtoull_l F
+ strxfrm_l F
+ sys_errlist D 0x1f8
+ sys_nerr D 0x4
+ tolower_l F
+ toupper_l F
+ towctrans_l F
+ towlower_l F
+ towupper_l F
+ uselocale F
+ wcscasecmp_l F
+ wcscoll_l F
+ wcsftime_l F
+ wcsncasecmp_l F
+ wcstod_l F
+ wcstof_l F
+ wcstol_l F
+ wcstold_l F
+ wcstoll_l F
+ wcstoul_l F
+ wcstoull_l F
+ wcsxfrm_l F
+ wctrans_l F
+ wctype_l F
+GLIBC_2.3.2
+ GLIBC_2.3.2 A
+ __adddf3 F
+ __addsf3 F
+ __divdf3 F
+ __divsf3 F
+ __eqdf2 F
+ __eqsf2 F
+ __extendsfdf2 F
+ __fixdfsi F
+ __fixsfsi F
+ __fixunsdfsi F
+ __fixunssfsi F
+ __floatsidf F
+ __floatsisf F
+ __gedf2 F
+ __gesf2 F
+ __ledf2 F
+ __lesf2 F
+ __muldf3 F
+ __mulsf3 F
+ __negdf2 F
+ __negsf2 F
+ __register_atfork F
+ __sim_disabled_exceptions D 0x4
+ __sim_exceptions D 0x4
+ __sim_round_mode D 0x4
+ __sqrtdf2 F
+ __sqrtsf2 F
+ __subdf3 F
+ __subsf3 F
+ __truncdfsf2 F
+ epoll_create F
+ epoll_ctl F
+ epoll_wait F
+ lchmod F
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+ strptime_l F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ _sys_siglist D 0x104
+ getcontext F
+ gnu_dev_major F
+ gnu_dev_makedev F
+ gnu_dev_minor F
+ inet6_option_alloc F
+ inet6_option_append F
+ inet6_option_find F
+ inet6_option_init F
+ inet6_option_next F
+ inet6_option_space F
+ makecontext F
+ nftw F
+ nftw64 F
+ posix_fadvise64 F
+ posix_fallocate64 F
+ remap_file_pages F
+ sched_getaffinity F
+ sched_setaffinity F
+ semtimedop F
+ setcontext F
+ swapcontext F
+ sys_sigabbrev D 0x104
+ sys_siglist D 0x104
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ __chk_fail F
+ __fprintf_chk F
+ __gets_chk F
+ __memcpy_chk F
+ __memmove_chk F
+ __mempcpy_chk F
+ __memset_chk F
+ __printf_chk F
+ __sigsetjmp F
+ __snprintf_chk F
+ __sprintf_chk F
+ __stpcpy_chk F
+ __strcat_chk F
+ __strcpy_chk F
+ __strncat_chk F
+ __strncpy_chk F
+ __vfprintf_chk F
+ __vprintf_chk F
+ __vsnprintf_chk F
+ __vsprintf_chk F
+ __xpg_strerror_r F
+ _longjmp F
+ _setjmp F
+ getcontext F
+ getipv4sourcefilter F
+ getsourcefilter F
+ longjmp F
+ makecontext F
+ regexec F
+ sched_getaffinity F
+ sched_setaffinity F
+ setcontext F
+ setipv4sourcefilter F
+ setjmp F
+ setsourcefilter F
+ siglongjmp F
+ swapcontext F
+ xdr_quad_t F
+ xdr_u_quad_t F
+GLIBC_2.4
+ GLIBC_2.4 A
+ _IO_fprintf F
+ _IO_printf F
+ _IO_sprintf F
+ _IO_sscanf F
+ _IO_vfprintf F
+ _IO_vfscanf F
+ _IO_vsprintf F
+ __asprintf F
+ __confstr_chk F
+ __fgets_chk F
+ __fgets_unlocked_chk F
+ __fgetws_chk F
+ __fgetws_unlocked_chk F
+ __finitel F
+ __floatundidf F
+ __floatundisf F
+ __floatunsidf F
+ __floatunsisf F
+ __fprintf_chk F
+ __fwprintf_chk F
+ __fxstatat F
+ __fxstatat64 F
+ __getcwd_chk F
+ __getdomainname_chk F
+ __getgroups_chk F
+ __gethostname_chk F
+ __getlogin_r_chk F
+ __getwd_chk F
+ __gtdf2 F
+ __gtsf2 F
+ __isinfl F
+ __isnanl F
+ __ltdf2 F
+ __ltsf2 F
+ __mbsnrtowcs_chk F
+ __mbsrtowcs_chk F
+ __mbstowcs_chk F
+ __nedf2 F
+ __nesf2 F
+ __nldbl__IO_fprintf F
+ __nldbl__IO_printf F
+ __nldbl__IO_sprintf F
+ __nldbl__IO_sscanf F
+ __nldbl__IO_vfprintf F
+ __nldbl__IO_vfscanf F
+ __nldbl__IO_vsprintf F
+ __nldbl___asprintf F
+ __nldbl___fprintf_chk F
+ __nldbl___fwprintf_chk F
+ __nldbl___printf_chk F
+ __nldbl___printf_fp F
+ __nldbl___snprintf_chk F
+ __nldbl___sprintf_chk F
+ __nldbl___strfmon_l F
+ __nldbl___swprintf_chk F
+ __nldbl___syslog_chk F
+ __nldbl___vfprintf_chk F
+ __nldbl___vfscanf F
+ __nldbl___vfwprintf_chk F
+ __nldbl___vprintf_chk F
+ __nldbl___vsnprintf F
+ __nldbl___vsnprintf_chk F
+ __nldbl___vsprintf_chk F
+ __nldbl___vsscanf F
+ __nldbl___vstrfmon F
+ __nldbl___vstrfmon_l F
+ __nldbl___vswprintf_chk F
+ __nldbl___vsyslog_chk F
+ __nldbl___vwprintf_chk F
+ __nldbl___wprintf_chk F
+ __nldbl_asprintf F
+ __nldbl_dprintf F
+ __nldbl_fprintf F
+ __nldbl_fscanf F
+ __nldbl_fwprintf F
+ __nldbl_fwscanf F
+ __nldbl_obstack_printf F
+ __nldbl_obstack_vprintf F
+ __nldbl_printf F
+ __nldbl_printf_size F
+ __nldbl_scanf F
+ __nldbl_snprintf F
+ __nldbl_sprintf F
+ __nldbl_sscanf F
+ __nldbl_strfmon F
+ __nldbl_strfmon_l F
+ __nldbl_swprintf F
+ __nldbl_swscanf F
+ __nldbl_syslog F
+ __nldbl_vasprintf F
+ __nldbl_vdprintf F
+ __nldbl_vfprintf F
+ __nldbl_vfscanf F
+ __nldbl_vfwprintf F
+ __nldbl_vfwscanf F
+ __nldbl_vprintf F
+ __nldbl_vscanf F
+ __nldbl_vsnprintf F
+ __nldbl_vsprintf F
+ __nldbl_vsscanf F
+ __nldbl_vswprintf F
+ __nldbl_vswscanf F
+ __nldbl_vsyslog F
+ __nldbl_vwprintf F
+ __nldbl_vwscanf F
+ __nldbl_wprintf F
+ __nldbl_wscanf F
+ __pread64_chk F
+ __pread_chk F
+ __printf_chk F
+ __printf_fp F
+ __ptsname_r_chk F
+ __read_chk F
+ __readlink_chk F
+ __realpath_chk F
+ __recv_chk F
+ __recvfrom_chk F
+ __signbitl F
+ __snprintf_chk F
+ __sprintf_chk F
+ __stack_chk_fail F
+ __stpncpy_chk F
+ __strfmon_l F
+ __strtold_internal F
+ __strtold_l F
+ __swprintf_chk F
+ __syslog_chk F
+ __ttyname_r_chk F
+ __unorddf2 F
+ __unordsf2 F
+ __vfprintf_chk F
+ __vfscanf F
+ __vfwprintf_chk F
+ __vprintf_chk F
+ __vsnprintf F
+ __vsnprintf_chk F
+ __vsprintf_chk F
+ __vsscanf F
+ __vswprintf_chk F
+ __vsyslog_chk F
+ __vwprintf_chk F
+ __wcpcpy_chk F
+ __wcpncpy_chk F
+ __wcrtomb_chk F
+ __wcscat_chk F
+ __wcscpy_chk F
+ __wcsncat_chk F
+ __wcsncpy_chk F
+ __wcsnrtombs_chk F
+ __wcsrtombs_chk F
+ __wcstold_internal F
+ __wcstold_l F
+ __wcstombs_chk F
+ __wctomb_chk F
+ __wmemcpy_chk F
+ __wmemmove_chk F
+ __wmempcpy_chk F
+ __wmemset_chk F
+ __wprintf_chk F
+ __xmknodat F
+ _sys_errlist D 0x210
+ _sys_nerr D 0x4
+ asprintf F
+ copysignl F
+ dprintf F
+ eaccess F
+ faccessat F
+ fchmodat F
+ fchownat F
+ fdopendir F
+ finitel F
+ fprintf F
+ frexpl F
+ fscanf F
+ futimesat F
+ fwprintf F
+ fwscanf F
+ inotify_add_watch F
+ inotify_init F
+ inotify_rm_watch F
+ isinfl F
+ isnanl F
+ ldexpl F
+ linkat F
+ mkdirat F
+ mkfifoat F
+ modfl F
+ obstack_printf F
+ obstack_vprintf F
+ open_wmemstream F
+ openat F
+ openat64 F
+ ppoll F
+ printf F
+ printf_size F
+ qecvt F
+ qecvt_r F
+ qfcvt F
+ qfcvt_r F
+ qgcvt F
+ readlinkat F
+ renameat F
+ scalbnl F
+ scanf F
+ snprintf F
+ sprintf F
+ sscanf F
+ strfmon F
+ strfmon_l F
+ strtold F
+ strtold_l F
+ swprintf F
+ swscanf F
+ symlinkat F
+ sys_errlist D 0x210
+ sys_nerr D 0x4
+ syslog F
+ unlinkat F
+ unshare F
+ vasprintf F
+ vdprintf F
+ vfprintf F
+ vfscanf F
+ vfwprintf F
+ vfwscanf F
+ vprintf F
+ vscanf F
+ vsnprintf F
+ vsprintf F
+ vsscanf F
+ vswprintf F
+ vswscanf F
+ vsyslog F
+ vwprintf F
+ vwscanf F
+ wcstold F
+ wcstold_l F
+ wprintf F
+ wscanf F
+GLIBC_2.5
+ GLIBC_2.5 A
+ __readlinkat_chk F
+ inet6_opt_append F
+ inet6_opt_find F
+ inet6_opt_finish F
+ inet6_opt_get_val F
+ inet6_opt_init F
+ inet6_opt_next F
+ inet6_opt_set_val F
+ inet6_rth_add F
+ inet6_rth_getaddr F
+ inet6_rth_init F
+ inet6_rth_reverse F
+ inet6_rth_segments F
+ inet6_rth_space F
+ splice F
+ tee F
+ vmsplice F
+GLIBC_2.6
+ GLIBC_2.6 A
+ __sched_cpucount F
+ epoll_pwait F
+ futimens F
+ sched_getcpu F
+ strerror_l F
+ sync_file_range F
+ utimensat F
+GLIBC_2.7
+ GLIBC_2.7 A
+ __fread_chk F
+ __fread_unlocked_chk F
+ __isoc99_fscanf F
+ __isoc99_fwscanf F
+ __isoc99_scanf F
+ __isoc99_sscanf F
+ __isoc99_swscanf F
+ __isoc99_vfscanf F
+ __isoc99_vfwscanf F
+ __isoc99_vscanf F
+ __isoc99_vsscanf F
+ __isoc99_vswscanf F
+ __isoc99_vwscanf F
+ __isoc99_wscanf F
+ __nldbl___isoc99_fscanf F
+ __nldbl___isoc99_fwscanf F
+ __nldbl___isoc99_scanf F
+ __nldbl___isoc99_sscanf F
+ __nldbl___isoc99_swscanf F
+ __nldbl___isoc99_vfscanf F
+ __nldbl___isoc99_vfwscanf F
+ __nldbl___isoc99_vscanf F
+ __nldbl___isoc99_vsscanf F
+ __nldbl___isoc99_vswscanf F
+ __nldbl___isoc99_vwscanf F
+ __nldbl___isoc99_wscanf F
+ __open64_2 F
+ __open_2 F
+ __openat64_2 F
+ __openat_2 F
+ __sched_cpualloc F
+ __sched_cpufree F
+ eventfd F
+ eventfd_read F
+ eventfd_write F
+ mkostemp F
+ mkostemp64 F
+ signalfd F
+GLIBC_2.8
+ GLIBC_2.8 A
+ __asprintf_chk F
+ __dprintf_chk F
+ __nldbl___asprintf_chk F
+ __nldbl___dprintf_chk F
+ __nldbl___obstack_printf_chk F
+ __nldbl___obstack_vprintf_chk F
+ __nldbl___vasprintf_chk F
+ __nldbl___vdprintf_chk F
+ __obstack_printf_chk F
+ __obstack_vprintf_chk F
+ __vasprintf_chk F
+ __vdprintf_chk F
+ qsort_r F
+ timerfd_create F
+ timerfd_gettime F
+ timerfd_settime F
+GLIBC_2.9
+ GLIBC_2.9 A
+ dup3 F
+ epoll_create1 F
+ inotify_init1 F
+ pipe2 F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist
new file mode 100644
index 000000000..1df145f26
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist
@@ -0,0 +1,9 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ crypt F
+ crypt_r F
+ encrypt F
+ encrypt_r F
+ fcrypt F
+ setkey F
+ setkey_r F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist
new file mode 100644
index 000000000..62e6b41ed
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist
@@ -0,0 +1,18 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ dladdr F
+ dlclose F
+ dlerror F
+ dlopen F
+ dlsym F
+GLIBC_2.1
+ GLIBC_2.1 A
+ dlopen F
+ dlvsym F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ dladdr1 F
+ dlinfo F
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ dlmopen F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist
new file mode 100644
index 000000000..9bd593c0e
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist
@@ -0,0 +1,519 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ _LIB_VERSION D 0x4
+ acos F
+ acosf F
+ acosh F
+ acoshf F
+ acoshl F
+ acosl F
+ asin F
+ asinf F
+ asinh F
+ asinhf F
+ asinhl F
+ asinl F
+ atan F
+ atan2 F
+ atan2f F
+ atan2l F
+ atanf F
+ atanh F
+ atanhf F
+ atanhl F
+ atanl F
+ cbrt F
+ cbrtf F
+ cbrtl F
+ ceil F
+ ceilf F
+ ceill F
+ copysign F
+ copysignf F
+ copysignl F
+ cos F
+ cosf F
+ cosh F
+ coshf F
+ coshl F
+ cosl F
+ drem F
+ dremf F
+ dreml F
+ erf F
+ erfc F
+ erfcf F
+ erfcl F
+ erff F
+ erfl F
+ exp F
+ expf F
+ expl F
+ expm1 F
+ expm1f F
+ expm1l F
+ fabs F
+ fabsf F
+ fabsl F
+ finite F
+ finitef F
+ finitel F
+ floor F
+ floorf F
+ floorl F
+ fmod F
+ fmodf F
+ fmodl F
+ frexp F
+ frexpf F
+ frexpl F
+ gamma F
+ gammaf F
+ gammal F
+ hypot F
+ hypotf F
+ hypotl F
+ ilogb F
+ ilogbf F
+ ilogbl F
+ j0 F
+ j0f F
+ j0l F
+ j1 F
+ j1f F
+ j1l F
+ jn F
+ jnf F
+ jnl F
+ ldexp F
+ ldexpf F
+ ldexpl F
+ lgamma F
+ lgamma_r F
+ lgammaf F
+ lgammaf_r F
+ lgammal F
+ lgammal_r F
+ log F
+ log10 F
+ log10f F
+ log10l F
+ log1p F
+ log1pf F
+ log1pl F
+ logb F
+ logbf F
+ logbl F
+ logf F
+ logl F
+ matherr F
+ modf F
+ modff F
+ modfl F
+ nextafter F
+ nextafterf F
+ nextafterl F
+ pow F
+ powf F
+ powl F
+ remainder F
+ remainderf F
+ remainderl F
+ rint F
+ rintf F
+ rintl F
+ scalb F
+ scalbf F
+ scalbl F
+ scalbn F
+ scalbnf F
+ scalbnl F
+ signgam D 0x4
+ significand F
+ significandf F
+ significandl F
+ sin F
+ sinf F
+ sinh F
+ sinhf F
+ sinhl F
+ sinl F
+ sqrt F
+ sqrtf F
+ sqrtl F
+ tan F
+ tanf F
+ tanh F
+ tanhf F
+ tanhl F
+ tanl F
+ y0 F
+ y0f F
+ y0l F
+ y1 F
+ y1f F
+ y1l F
+ yn F
+ ynf F
+ ynl F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __clog10 F
+ __clog10f F
+ __clog10l F
+ __fe_dfl_env D 0x8
+ __fe_enabled_env D 0x8
+ __fe_nonieee_env D 0x8
+ __finite F
+ __finitef F
+ __finitel F
+ __fpclassify F
+ __fpclassifyf F
+ __signbit F
+ __signbitf F
+ cabs F
+ cabsf F
+ cabsl F
+ cacos F
+ cacosf F
+ cacosh F
+ cacoshf F
+ cacoshl F
+ cacosl F
+ carg F
+ cargf F
+ cargl F
+ casin F
+ casinf F
+ casinh F
+ casinhf F
+ casinhl F
+ casinl F
+ catan F
+ catanf F
+ catanh F
+ catanhf F
+ catanhl F
+ catanl F
+ ccos F
+ ccosf F
+ ccosh F
+ ccoshf F
+ ccoshl F
+ ccosl F
+ cexp F
+ cexpf F
+ cexpl F
+ cimag F
+ cimagf F
+ cimagl F
+ clog F
+ clog10 F
+ clog10f F
+ clog10l F
+ clogf F
+ clogl F
+ conj F
+ conjf F
+ conjl F
+ cpow F
+ cpowf F
+ cpowl F
+ cproj F
+ cprojf F
+ cprojl F
+ creal F
+ crealf F
+ creall F
+ csin F
+ csinf F
+ csinh F
+ csinhf F
+ csinhl F
+ csinl F
+ csqrt F
+ csqrtf F
+ csqrtl F
+ ctan F
+ ctanf F
+ ctanh F
+ ctanhf F
+ ctanhl F
+ ctanl F
+ exp10 F
+ exp10f F
+ exp10l F
+ exp2 F
+ exp2f F
+ fdim F
+ fdimf F
+ fdiml F
+ feclearexcept F
+ fegetenv F
+ fegetexceptflag F
+ fegetround F
+ feholdexcept F
+ feraiseexcept F
+ fesetenv F
+ fesetexceptflag F
+ fesetround F
+ fetestexcept F
+ feupdateenv F
+ fma F
+ fmaf F
+ fmal F
+ fmax F
+ fmaxf F
+ fmaxl F
+ fmin F
+ fminf F
+ fminl F
+ llrint F
+ llrintf F
+ llrintl F
+ llround F
+ llroundf F
+ llroundl F
+ log2 F
+ log2f F
+ log2l F
+ lrint F
+ lrintf F
+ lrintl F
+ lround F
+ lroundf F
+ lroundl F
+ nan F
+ nanf F
+ nanl F
+ nearbyint F
+ nearbyintf F
+ nearbyintl F
+ nexttoward F
+ nexttowardf F
+ nexttowardl F
+ pow10 F
+ pow10f F
+ pow10l F
+ remquo F
+ remquof F
+ remquol F
+ round F
+ roundf F
+ roundl F
+ scalbln F
+ scalblnf F
+ scalblnl F
+ sincos F
+ sincosf F
+ sincosl F
+ tgamma F
+ tgammaf F
+ tgammal F
+ trunc F
+ truncf F
+ truncl F
+GLIBC_2.15
+ GLIBC_2.15 A
+ __acos_finite F
+ __acosf_finite F
+ __acosh_finite F
+ __acoshf_finite F
+ __acoshl_finite F
+ __acosl_finite F
+ __asin_finite F
+ __asinf_finite F
+ __asinl_finite F
+ __atan2_finite F
+ __atan2f_finite F
+ __atan2l_finite F
+ __atanh_finite F
+ __atanhf_finite F
+ __atanhl_finite F
+ __cosh_finite F
+ __coshf_finite F
+ __coshl_finite F
+ __exp10_finite F
+ __exp10f_finite F
+ __exp10l_finite F
+ __exp2_finite F
+ __exp2f_finite F
+ __exp2l_finite F
+ __exp_finite F
+ __expf_finite F
+ __expl_finite F
+ __fmod_finite F
+ __fmodf_finite F
+ __fmodl_finite F
+ __gamma_r_finite F
+ __gammaf_r_finite F
+ __gammal_r_finite F
+ __hypot_finite F
+ __hypotf_finite F
+ __hypotl_finite F
+ __j0_finite F
+ __j0f_finite F
+ __j0l_finite F
+ __j1_finite F
+ __j1f_finite F
+ __j1l_finite F
+ __jn_finite F
+ __jnf_finite F
+ __jnl_finite F
+ __lgamma_r_finite F
+ __lgammaf_r_finite F
+ __lgammal_r_finite F
+ __log10_finite F
+ __log10f_finite F
+ __log10l_finite F
+ __log2_finite F
+ __log2f_finite F
+ __log2l_finite F
+ __log_finite F
+ __logf_finite F
+ __logl_finite F
+ __pow_finite F
+ __powf_finite F
+ __powl_finite F
+ __remainder_finite F
+ __remainderf_finite F
+ __remainderl_finite F
+ __scalb_finite F
+ __scalbf_finite F
+ __scalbl_finite F
+ __sinh_finite F
+ __sinhf_finite F
+ __sinhl_finite F
+ __sqrt_finite F
+ __sqrtf_finite F
+ __sqrtl_finite F
+ __y0_finite F
+ __y0f_finite F
+ __y0l_finite F
+ __y1_finite F
+ __y1f_finite F
+ __y1l_finite F
+ __yn_finite F
+ __ynf_finite F
+ __ynl_finite F
+GLIBC_2.18
+ GLIBC_2.18 A
+ __issignaling F
+ __issignalingf F
+ __issignalingl F
+GLIBC_2.2
+ GLIBC_2.2 A
+ feclearexcept F
+ fedisableexcept F
+ feenableexcept F
+ fegetenv F
+ fegetexcept F
+ fegetexceptflag F
+ feraiseexcept F
+ fesetenv F
+ fesetexceptflag F
+ feupdateenv F
+GLIBC_2.4
+ GLIBC_2.4 A
+ __clog10l F
+ __finitel F
+ __fpclassifyl F
+ __nldbl_nexttowardf F
+ __signbitl F
+ acoshl F
+ acosl F
+ asinhl F
+ asinl F
+ atan2l F
+ atanhl F
+ atanl F
+ cabsl F
+ cacoshl F
+ cacosl F
+ cargl F
+ casinhl F
+ casinl F
+ catanhl F
+ catanl F
+ cbrtl F
+ ccoshl F
+ ccosl F
+ ceill F
+ cexpl F
+ cimagl F
+ clog10l F
+ clogl F
+ conjl F
+ copysignl F
+ coshl F
+ cosl F
+ cpowl F
+ cprojl F
+ creall F
+ csinhl F
+ csinl F
+ csqrtl F
+ ctanhl F
+ ctanl F
+ dreml F
+ erfcl F
+ erfl F
+ exp10l F
+ exp2l F
+ expl F
+ expm1l F
+ fabsl F
+ fdiml F
+ finitel F
+ floorl F
+ fmal F
+ fmaxl F
+ fminl F
+ fmodl F
+ frexpl F
+ gammal F
+ hypotl F
+ ilogbl F
+ j0l F
+ j1l F
+ jnl F
+ ldexpl F
+ lgammal F
+ lgammal_r F
+ llrintl F
+ llroundl F
+ log10l F
+ log1pl F
+ log2l F
+ logbl F
+ logl F
+ lrintl F
+ lroundl F
+ modfl F
+ nanl F
+ nearbyintl F
+ nextafterl F
+ nexttoward F
+ nexttowardf F
+ nexttowardl F
+ pow10l F
+ powl F
+ remainderl F
+ remquol F
+ rintl F
+ roundl F
+ scalbl F
+ scalblnl F
+ scalbnl F
+ significandl F
+ sincosl F
+ sinhl F
+ sinl F
+ sqrtl F
+ tanhl F
+ tanl F
+ tgammal F
+ truncl F
+ y0l F
+ y1l F
+ ynl F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist
new file mode 100644
index 000000000..4241e2d88
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist
@@ -0,0 +1,127 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __yp_check F
+ xdr_domainname F
+ xdr_keydat F
+ xdr_mapname F
+ xdr_peername F
+ xdr_valdat F
+ xdr_yp_buf F
+ xdr_ypbind_binding F
+ xdr_ypbind_resp F
+ xdr_ypbind_resptype F
+ xdr_ypbind_setdom F
+ xdr_ypdelete_args F
+ xdr_ypmap_parms F
+ xdr_ypmaplist F
+ xdr_yppush_status F
+ xdr_yppushresp_xfr F
+ xdr_ypreq_key F
+ xdr_ypreq_nokey F
+ xdr_ypreq_xfr F
+ xdr_ypresp_all F
+ xdr_ypresp_key_val F
+ xdr_ypresp_maplist F
+ xdr_ypresp_master F
+ xdr_ypresp_order F
+ xdr_ypresp_val F
+ xdr_ypresp_xfr F
+ xdr_ypstat F
+ xdr_ypupdate_args F
+ xdr_ypxfrstat F
+ yp_all F
+ yp_bind F
+ yp_first F
+ yp_get_default_domain F
+ yp_maplist F
+ yp_master F
+ yp_match F
+ yp_next F
+ yp_order F
+ yp_unbind F
+ yp_update F
+ ypbinderr_string F
+ yperr_string F
+ ypprot_err F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __free_fdresult F
+ __nis_default_access F
+ __nis_default_group F
+ __nis_default_owner F
+ __nis_default_ttl F
+ __nis_finddirectory F
+ __nis_hash F
+ __nisbind_connect F
+ __nisbind_create F
+ __nisbind_destroy F
+ __nisbind_next F
+ nis_add F
+ nis_add_entry F
+ nis_addmember F
+ nis_checkpoint F
+ nis_clone_directory F
+ nis_clone_object F
+ nis_clone_result F
+ nis_creategroup F
+ nis_destroy_object F
+ nis_destroygroup F
+ nis_dir_cmp F
+ nis_domain_of F
+ nis_domain_of_r F
+ nis_first_entry F
+ nis_free_directory F
+ nis_free_object F
+ nis_free_request F
+ nis_freenames F
+ nis_freeresult F
+ nis_freeservlist F
+ nis_freetags F
+ nis_getnames F
+ nis_getservlist F
+ nis_ismember F
+ nis_leaf_of F
+ nis_leaf_of_r F
+ nis_lerror F
+ nis_list F
+ nis_local_directory F
+ nis_local_group F
+ nis_local_host F
+ nis_local_principal F
+ nis_lookup F
+ nis_mkdir F
+ nis_modify F
+ nis_modify_entry F
+ nis_name_of F
+ nis_name_of_r F
+ nis_next_entry F
+ nis_perror F
+ nis_ping F
+ nis_print_directory F
+ nis_print_entry F
+ nis_print_group F
+ nis_print_group_entry F
+ nis_print_link F
+ nis_print_object F
+ nis_print_result F
+ nis_print_rights F
+ nis_print_table F
+ nis_read_obj F
+ nis_remove F
+ nis_remove_entry F
+ nis_removemember F
+ nis_rmdir F
+ nis_servstate F
+ nis_sperrno F
+ nis_sperror F
+ nis_sperror_r F
+ nis_stats F
+ nis_verifygroup F
+ nis_write_obj F
+ readColdStartFile F
+ writeColdStartFile F
+ xdr_cback_data F
+ xdr_obj_p F
+GLIBC_2.2
+ GLIBC_2.2 A
+ xdr_ypall F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist
new file mode 100644
index 000000000..c8a2a0471
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist
@@ -0,0 +1,277 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ _IO_flockfile F
+ _IO_ftrylockfile F
+ _IO_funlockfile F
+ __close F
+ __connect F
+ __errno_location F
+ __fcntl F
+ __fork F
+ __h_errno_location F
+ __lseek F
+ __open F
+ __pthread_getspecific F
+ __pthread_key_create F
+ __pthread_mutex_destroy F
+ __pthread_mutex_init F
+ __pthread_mutex_lock F
+ __pthread_mutex_trylock F
+ __pthread_mutex_unlock F
+ __pthread_mutexattr_destroy F
+ __pthread_mutexattr_init F
+ __pthread_mutexattr_settype F
+ __pthread_once F
+ __pthread_setspecific F
+ __read F
+ __send F
+ __sigaction F
+ __wait F
+ __write F
+ _pthread_cleanup_pop F
+ _pthread_cleanup_pop_restore F
+ _pthread_cleanup_push F
+ _pthread_cleanup_push_defer F
+ accept F
+ close F
+ connect F
+ fcntl F
+ flockfile F
+ fork F
+ fsync F
+ ftrylockfile F
+ funlockfile F
+ longjmp F
+ lseek F
+ msync F
+ nanosleep F
+ open F
+ pause F
+ pthread_atfork F
+ pthread_attr_destroy F
+ pthread_attr_getdetachstate F
+ pthread_attr_getinheritsched F
+ pthread_attr_getschedparam F
+ pthread_attr_getschedpolicy F
+ pthread_attr_getscope F
+ pthread_attr_init F
+ pthread_attr_setdetachstate F
+ pthread_attr_setinheritsched F
+ pthread_attr_setschedparam F
+ pthread_attr_setschedpolicy F
+ pthread_attr_setscope F
+ pthread_cancel F
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+ pthread_condattr_destroy F
+ pthread_condattr_init F
+ pthread_create F
+ pthread_detach F
+ pthread_equal F
+ pthread_exit F
+ pthread_getschedparam F
+ pthread_getspecific F
+ pthread_join F
+ pthread_key_create F
+ pthread_key_delete F
+ pthread_kill F
+ pthread_kill_other_threads_np F
+ pthread_mutex_destroy F
+ pthread_mutex_init F
+ pthread_mutex_lock F
+ pthread_mutex_trylock F
+ pthread_mutex_unlock F
+ pthread_mutexattr_destroy F
+ pthread_mutexattr_getkind_np F
+ pthread_mutexattr_init F
+ pthread_mutexattr_setkind_np F
+ pthread_once F
+ pthread_self F
+ pthread_setcancelstate F
+ pthread_setcanceltype F
+ pthread_setschedparam F
+ pthread_setspecific F
+ pthread_sigmask F
+ pthread_testcancel F
+ raise F
+ read F
+ recv F
+ recvfrom F
+ recvmsg F
+ sem_destroy F
+ sem_getvalue F
+ sem_init F
+ sem_post F
+ sem_trywait F
+ sem_wait F
+ send F
+ sendmsg F
+ sendto F
+ sigaction F
+ siglongjmp F
+ sigwait F
+ system F
+ tcdrain F
+ vfork F
+ wait F
+ waitpid F
+ write F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __libc_allocate_rtsig F
+ __libc_current_sigrtmax F
+ __libc_current_sigrtmin F
+ pthread_attr_getguardsize F
+ pthread_attr_getstackaddr F
+ pthread_attr_getstacksize F
+ pthread_attr_init F
+ pthread_attr_setguardsize F
+ pthread_attr_setstackaddr F
+ pthread_attr_setstacksize F
+ pthread_create F
+ pthread_getconcurrency F
+ pthread_mutexattr_gettype F
+ pthread_mutexattr_settype F
+ pthread_rwlock_destroy F
+ pthread_rwlock_init F
+ pthread_rwlock_rdlock F
+ pthread_rwlock_tryrdlock F
+ pthread_rwlock_trywrlock F
+ pthread_rwlock_unlock F
+ pthread_rwlock_wrlock F
+ pthread_rwlockattr_destroy F
+ pthread_rwlockattr_getkind_np F
+ pthread_rwlockattr_getpshared F
+ pthread_rwlockattr_init F
+ pthread_rwlockattr_setkind_np F
+ pthread_rwlockattr_setpshared F
+ pthread_setconcurrency F
+ sem_destroy F
+ sem_getvalue F
+ sem_init F
+ sem_post F
+ sem_trywait F
+ sem_wait F
+GLIBC_2.1.1
+ GLIBC_2.1.1 A
+ sem_close F
+ sem_open F
+ sem_unlink F
+GLIBC_2.1.2
+ GLIBC_2.1.2 A
+ __vfork F
+GLIBC_2.11
+ GLIBC_2.11 A
+ pthread_sigqueue F
+GLIBC_2.12
+ GLIBC_2.12 A
+ pthread_getname_np F
+ pthread_mutex_consistent F
+ pthread_mutexattr_getrobust F
+ pthread_mutexattr_setrobust F
+ pthread_setname_np F
+GLIBC_2.18
+ GLIBC_2.18 A
+ pthread_getattr_default_np F
+ pthread_setattr_default_np F
+GLIBC_2.2
+ GLIBC_2.2 A
+ __open64 F
+ __pread64 F
+ __pthread_rwlock_destroy F
+ __pthread_rwlock_init F
+ __pthread_rwlock_rdlock F
+ __pthread_rwlock_tryrdlock F
+ __pthread_rwlock_trywrlock F
+ __pthread_rwlock_unlock F
+ __pthread_rwlock_wrlock F
+ __pwrite64 F
+ __res_state F
+ lseek64 F
+ open64 F
+ pread F
+ pread64 F
+ pthread_attr_getstack F
+ pthread_attr_setstack F
+ pthread_barrier_destroy F
+ pthread_barrier_init F
+ pthread_barrier_wait F
+ pthread_barrierattr_destroy F
+ pthread_barrierattr_init F
+ pthread_barrierattr_setpshared F
+ pthread_condattr_getpshared F
+ pthread_condattr_setpshared F
+ pthread_getcpuclockid F
+ pthread_mutex_timedlock F
+ pthread_mutexattr_getpshared F
+ pthread_mutexattr_setpshared F
+ pthread_rwlock_timedrdlock F
+ pthread_rwlock_timedwrlock F
+ pthread_spin_destroy F
+ pthread_spin_init F
+ pthread_spin_lock F
+ pthread_spin_trylock F
+ pthread_spin_unlock F
+ pthread_yield F
+ pwrite F
+ pwrite64 F
+ sem_timedwait F
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ pthread_getattr_np F
+GLIBC_2.2.6
+ GLIBC_2.2.6 A
+ __nanosleep F
+GLIBC_2.3.2
+ GLIBC_2.3.2 A
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ __pthread_cleanup_routine F
+ __pthread_register_cancel F
+ __pthread_register_cancel_defer F
+ __pthread_unregister_cancel F
+ __pthread_unregister_cancel_restore F
+ __pthread_unwind_next F
+ pthread_attr_getaffinity_np F
+ pthread_attr_setaffinity_np F
+ pthread_barrierattr_getpshared F
+ pthread_condattr_getclock F
+ pthread_condattr_setclock F
+ pthread_getaffinity_np F
+ pthread_setaffinity_np F
+ pthread_timedjoin_np F
+ pthread_tryjoin_np F
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ longjmp F
+ pthread_attr_getaffinity_np F
+ pthread_attr_setaffinity_np F
+ pthread_getaffinity_np F
+ pthread_setaffinity_np F
+ pthread_setschedprio F
+ siglongjmp F
+GLIBC_2.4
+ GLIBC_2.4 A
+ pthread_mutex_consistent_np F
+ pthread_mutex_getprioceiling F
+ pthread_mutex_setprioceiling F
+ pthread_mutexattr_getprioceiling F
+ pthread_mutexattr_getprotocol F
+ pthread_mutexattr_getrobust_np F
+ pthread_mutexattr_setprioceiling F
+ pthread_mutexattr_setprotocol F
+ pthread_mutexattr_setrobust_np F
+GLIBC_2.6
+ GLIBC_2.6 A
+ pthread_attr_setstack F
+ pthread_attr_setstacksize F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist
new file mode 100644
index 000000000..f68333d4a
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist
@@ -0,0 +1,104 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __b64_ntop F
+ __b64_pton F
+ __dn_comp F
+ __dn_count_labels F
+ __dn_skipname F
+ __fp_nquery F
+ __fp_query F
+ __fp_resstat F
+ __hostalias F
+ __loc_aton F
+ __loc_ntoa F
+ __p_cdname F
+ __p_cdnname F
+ __p_class F
+ __p_class_syms D 0x54
+ __p_fqname F
+ __p_fqnname F
+ __p_option F
+ __p_query F
+ __p_secstodate F
+ __p_time F
+ __p_type F
+ __p_type_syms D 0x228
+ __putlong F
+ __putshort F
+ __res_close F
+ __res_dnok F
+ __res_hnok F
+ __res_isourserver F
+ __res_mailok F
+ __res_nameinquery F
+ __res_ownok F
+ __res_queriesmatch F
+ __res_send F
+ __sym_ntop F
+ __sym_ntos F
+ __sym_ston F
+ _gethtbyaddr F
+ _gethtbyname F
+ _gethtbyname2 F
+ _gethtent F
+ _getlong F
+ _getshort F
+ _res_opcodes D 0x40
+ _sethtent F
+ dn_expand F
+ inet_net_ntop F
+ inet_net_pton F
+ inet_neta F
+ res_gethostbyaddr F
+ res_gethostbyname F
+ res_gethostbyname2 F
+ res_mkquery F
+ res_query F
+ res_querydomain F
+ res_search F
+ res_send_setqhook F
+ res_send_setrhook F
+GLIBC_2.2
+ GLIBC_2.2 A
+ __dn_expand F
+ __res_hostalias F
+ __res_mkquery F
+ __res_nmkquery F
+ __res_nquery F
+ __res_nquerydomain F
+ __res_nsearch F
+ __res_nsend F
+ __res_query F
+ __res_querydomain F
+ __res_search F
+GLIBC_2.3.2
+ GLIBC_2.3.2 A
+ __p_rcode F
+GLIBC_2.9
+ GLIBC_2.9 A
+ ns_datetosecs F
+ ns_format_ttl F
+ ns_get16 F
+ ns_get32 F
+ ns_initparse F
+ ns_makecanon F
+ ns_msg_getflag F
+ ns_name_compress F
+ ns_name_ntol F
+ ns_name_ntop F
+ ns_name_pack F
+ ns_name_pton F
+ ns_name_rollback F
+ ns_name_skip F
+ ns_name_uncompress F
+ ns_name_unpack F
+ ns_parse_ttl F
+ ns_parserr F
+ ns_put16 F
+ ns_put32 F
+ ns_samedomain F
+ ns_samename F
+ ns_skiprr F
+ ns_sprintrr F
+ ns_sprintrrf F
+ ns_subdomain F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist
new file mode 100644
index 000000000..af7df27cb
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist
@@ -0,0 +1,52 @@
+GLIBC_2.1
+ GLIBC_2.1 A
+ aio_cancel F
+ aio_cancel64 F
+ aio_error F
+ aio_error64 F
+ aio_fsync F
+ aio_fsync64 F
+ aio_init F
+ aio_read F
+ aio_read64 F
+ aio_return F
+ aio_return64 F
+ aio_suspend F
+ aio_suspend64 F
+ aio_write F
+ aio_write64 F
+ lio_listio F
+ lio_listio64 F
+GLIBC_2.2
+ GLIBC_2.2 A
+ clock_getcpuclockid F
+ clock_getres F
+ clock_gettime F
+ clock_nanosleep F
+ clock_settime F
+ shm_open F
+ shm_unlink F
+ timer_create F
+ timer_delete F
+ timer_getoverrun F
+ timer_gettime F
+ timer_settime F
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ mq_close F
+ mq_getattr F
+ mq_notify F
+ mq_open F
+ mq_receive F
+ mq_send F
+ mq_setattr F
+ mq_timedreceive F
+ mq_timedsend F
+ mq_unlink F
+GLIBC_2.4
+ GLIBC_2.4 A
+ lio_listio F
+ lio_listio64 F
+GLIBC_2.7
+ GLIBC_2.7 A
+ __mq_open_2 F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist
new file mode 100644
index 000000000..f33138067
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist
@@ -0,0 +1,48 @@
+GLIBC_2.1.3
+ GLIBC_2.1.3 A
+ td_init F
+ td_log F
+ td_ta_clear_event F
+ td_ta_delete F
+ td_ta_enable_stats F
+ td_ta_event_addr F
+ td_ta_event_getmsg F
+ td_ta_get_nthreads F
+ td_ta_get_ph F
+ td_ta_get_stats F
+ td_ta_map_id2thr F
+ td_ta_map_lwp2thr F
+ td_ta_new F
+ td_ta_reset_stats F
+ td_ta_set_event F
+ td_ta_setconcurrency F
+ td_ta_thr_iter F
+ td_ta_tsd_iter F
+ td_thr_clear_event F
+ td_thr_dbresume F
+ td_thr_dbsuspend F
+ td_thr_event_enable F
+ td_thr_event_getmsg F
+ td_thr_get_info F
+ td_thr_getfpregs F
+ td_thr_getgregs F
+ td_thr_getxregs F
+ td_thr_getxregsize F
+ td_thr_set_event F
+ td_thr_setfpregs F
+ td_thr_setgregs F
+ td_thr_setprio F
+ td_thr_setsigpending F
+ td_thr_setxregs F
+ td_thr_sigsetmask F
+ td_thr_tsd F
+ td_thr_validate F
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ td_symbol_list F
+GLIBC_2.3
+ GLIBC_2.3 A
+ td_thr_tls_get_addr F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ td_thr_tlsbase F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist
new file mode 100644
index 000000000..7422687e3
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist
@@ -0,0 +1,8 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ forkpty F
+ login F
+ login_tty F
+ logout F
+ logwtmp F
+ openpty F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data
new file mode 100644
index 000000000..b87936cb3
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data
@@ -0,0 +1,41 @@
+libc.so: _Unwind_Find_FDE
+libc.so: __adddf3 ?
+libc.so: __addsf3 ?
+libc.so: __divdf3 ?
+libc.so: __divsf3 ?
+libc.so: __eqdf2 ?
+libc.so: __eqsf2 ?
+libc.so: __extendsfdf2 ?
+libc.so: __fixdfsi ?
+libc.so: __fixsfsi ?
+libc.so: __fixunsdfsi ?
+libc.so: __floatsidf ?
+libc.so: __floatsisf ?
+libc.so: __floatunsidf ?
+libc.so: __floatunsisf ?
+libc.so: __gedf2 ?
+libc.so: __gtdf2 ?
+libc.so: __ledf2 ?
+libc.so: __ltdf2 ?
+libc.so: __muldf3 ?
+libc.so: __mulsf3 ?
+libc.so: __nedf2 ?
+libc.so: __signbit
+libc.so: __signbitl
+libc.so: __subdf3 ?
+libc.so: __subsf3 ?
+libc.so: __truncdfsf2 ?
+libc.so: __unorddf2 ?
+libc.so: abort ?
+libc.so: calloc
+libc.so: free
+libc.so: malloc
+libc.so: memalign
+libc.so: realloc
+libm.so: __signbit
+libm.so: __signbitf
+libm.so: __signbitl
+libm.so: copysignl ?
+libm.so: fabsl
+libm.so: fegetround
+libm.so: matherr
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S
new file mode 100644
index 000000000..5f8653ffb
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S
@@ -0,0 +1,60 @@
+/* Jump to a new context.
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+#include <shlib-compat.h>
+
+#define __ASSEMBLY__
+#include <asm/ptrace.h>
+#include "ucontext_i.h"
+
+#include <context-e500.h>
+
+#define __CONTEXT_FUNC_NAME __setcontext
+#undef __CONTEXT_ENABLE_FPRS
+#undef __CONTEXT_ENABLE_VRS
+
+#include "setcontext-common.S"
+
+versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_3_4)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
+
+/* For the nofpu case the old/new versions are the same function. */
+strong_alias (__setcontext, __novec_setcontext)
+
+compat_symbol (libc, __novec_setcontext, setcontext, GLIBC_2_3_3)
+
+#endif
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_3)
+
+#define _ERRNO_H 1
+#include <bits/errno.h>
+
+ compat_text_section
+ENTRY (__setcontext_stub)
+ li r3,ENOSYS
+ b __syscall_error@local
+END (__setcontext_stub)
+ .previous
+
+compat_symbol (libc, __setcontext_stub, setcontext, GLIBC_2_0)
+
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S
new file mode 100644
index 000000000..de6d56f96
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S
@@ -0,0 +1,60 @@
+/* Save current context and jump to a new context.
+ Copyright (C) 2002-2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+#include <shlib-compat.h>
+
+#define __ASSEMBLY__
+#include <asm/ptrace.h>
+#include "ucontext_i.h"
+
+#include <context-e500.h>
+
+#define __CONTEXT_FUNC_NAME __swapcontext
+#undef __CONTEXT_ENABLE_FPRS
+#undef __CONTEXT_ENABLE_VRS
+
+# include "swapcontext-common.S"
+
+versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_3_4)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
+
+/* For the nofpu case the old/new versions are the same function. */
+strong_alias (__swapcontext, __novec_swapcontext)
+
+compat_symbol (libc, __novec_swapcontext, swapcontext, GLIBC_2_3_3)
+
+#endif
+
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_3_3)
+
+#define _ERRNO_H 1
+#include <bits/errno.h>
+
+ compat_text_section
+ENTRY (__swapcontext_stub)
+ li r3,ENOSYS
+ b __syscall_error@local
+END (__swapcontext_stub)
+ .previous
+
+compat_symbol (libc, __swapcontext_stub, swapcontext, GLIBC_2_1)
+
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
index fb7744839..6525cf742 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
@@ -79,15 +79,15 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
#else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
#endif
#ifdef __CONTEXT_ENABLE_FPRS
@@ -243,8 +243,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
lfd fp31,_UC_FREGS+(31*8)(r31)
#endif /* __CONTEXT_ENABLE_FPRS */
-#ifdef __SETCONTEXT_EXTRA
- __SETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+ setcontext_e500
#endif
/* Restore LR and CCR, and set CTR to the NIP value */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
index 7e0612595..caa5b8932 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
@@ -152,15 +152,15 @@ ENTRY(__CONTEXT_FUNC_NAME)
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
mtlr r8
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
mtlr r8
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
# else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
# endif
# ifdef __CONTEXT_ENABLE_VRS
@@ -265,8 +265,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
# endif /* __CONTEXT_ENABLE_VRS */
#endif /* __CONTEXT_ENABLE_FPRS */
-#ifdef __GETCONTEXT_EXTRA
- __GETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+ getcontext_e500
#endif
/* Restore ucontext (parm1) from stack. */
@@ -312,14 +312,14 @@ ENTRY(__CONTEXT_FUNC_NAME)
mtlr r8
# ifdef SHARED
lwz r7,_rtld_global_ro@got(r7)
- lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+ lwz r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
# else
lwz r7,_dl_hwcap@got(r7)
- lwz r7,4(r7)
+ lwz r7,LOWORD(r7)
# endif
# else
- lis r7,(_dl_hwcap+4)@ha
- lwz r7,(_dl_hwcap+4)@l(r7)
+ lis r7,(_dl_hwcap+LOWORD)@ha
+ lwz r7,(_dl_hwcap+LOWORD)@l(r7)
# endif
andis. r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
la r10,(_UC_VREGS)(r31)
@@ -472,8 +472,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
lfd fp31,_UC_FREGS+(31*8)(r31)
#endif /* __CONTEXT_ENABLE_FPRS */
-#ifdef __SETCONTEXT_EXTRA
- __SETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+ setcontext_e500
#endif
/* Restore LR and CCR, and set CTR to the NIP value */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
index 4a1666938..32fc47c3f 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
@@ -124,8 +124,10 @@ L(noparms):
/* If the target function returns we need to do some cleanup. We use a
code trick to get the address of our cleanup function into the link
- register. Do not add any code between here and L(exitcode). */
- bl L(gotexitcodeaddr);
+ register. Do not add any code between here and L(exitcode).
+ Use this conditional form of branch and link to avoid destroying
+ the cpu link stack used to predict blr return addresses. */
+ bcl 20,31,L(gotexitcodeaddr);
/* This is the helper code which gets called if a function which
is registered with 'makecontext' returns. In this case we
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
index 01084bb71..f384bc7fd 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
+++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
@@ -39,6 +39,7 @@
#define __O_DIRECT 0x100000 /* direct disk access hint */
#define __O_NOATIME 0x200000 /* Do not set atime. */
#define __O_PATH 0x1000000 /* Resolve pathname but do not open file. */
+#define __O_TMPFILE 0x2010000 /* Atomically create nameless file. */
#if __WORDSIZE == 64
# define __O_LARGEFILE 0
diff --git a/libc/sysdeps/unix/sysv/linux/tst-fanotify.c b/libc/sysdeps/unix/sysv/linux/tst-fanotify.c
index b21e160ca..ad9836b58 100644
--- a/libc/sysdeps/unix/sysv/linux/tst-fanotify.c
+++ b/libc/sysdeps/unix/sysv/linux/tst-fanotify.c
@@ -29,14 +29,15 @@ do_test (void)
fd = fanotify_init (0, 0);
if (fd < 0)
{
- switch (errno) {
- case ENOSYS:
- puts ("SKIP: missing support for fanotify (check CONFIG_FANOTIFY=y)");
- return 0;
- case EPERM:
- puts ("SKIP: missing proper permissions for runtime test");
- return 0;
- }
+ switch (errno)
+ {
+ case ENOSYS:
+ puts ("SKIP: missing support for fanotify (check CONFIG_FANOTIFY=y)");
+ return 0;
+ case EPERM:
+ puts ("SKIP: missing proper permissions for runtime test");
+ return 0;
+ }
perror ("fanotify_init (0, 0) failed");
return 1;
diff --git a/libc/sysdeps/x86_64/ffs.c b/libc/sysdeps/x86_64/ffs.c
index 27013d6ae..07ee7dd4a 100644
--- a/libc/sysdeps/x86_64/ffs.c
+++ b/libc/sysdeps/x86_64/ffs.c
@@ -35,4 +35,5 @@ __ffs (int x)
return cnt + 1;
}
weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
libc_hidden_builtin_def (ffs)
diff --git a/libc/sysdeps/x86_64/fpu/printf_fphex.c b/libc/sysdeps/x86_64/fpu/printf_fphex.c
index c85d1f79f..be55f9cf6 100644
--- a/libc/sysdeps/x86_64/fpu/printf_fphex.c
+++ b/libc/sysdeps/x86_64/fpu/printf_fphex.c
@@ -25,10 +25,11 @@ do { \
/* The "strange" 80 bit format on ix86 and m68k has an explicit \
leading digit in the 64 bit mantissa. */ \
unsigned long long int num; \
+ union ieee854_long_double u; \
+ u.d = fpnum.ldbl; \
\
- \
- num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32 \
- | fpnum.ldbl.ieee.mantissa1); \
+ num = (((unsigned long long int) u.ieee.mantissa0) << 32 \
+ | u.ieee.mantissa1); \
\
zero_mantissa = num == 0; \
\
@@ -61,7 +62,7 @@ do { \
\
/* We have 3 bits from the mantissa in the leading nibble. \
Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'. */ \
- exponent = fpnum.ldbl.ieee.exponent; \
+ exponent = u.ieee.exponent; \
\
if (exponent == 0) \
{ \
diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S
index 6c69f4b44..9b1de89d9 100644
--- a/libc/sysdeps/x86_64/memset.S
+++ b/libc/sysdeps/x86_64/memset.S
@@ -19,10 +19,6 @@
#include <sysdep.h>
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
.text
#if !defined NOT_IN_libc
ENTRY(__bzero)
@@ -71,12 +67,12 @@ L(entry_from_bzero):
L(return):
rep
ret
- ALIGN (4)
+ .p2align 4
L(between_32_64_bytes):
movdqu %xmm8, 16(%rdi)
movdqu %xmm8, -32(%rdi,%rdx)
ret
- ALIGN (4)
+ .p2align 4
L(loop_start):
leaq 64(%rdi), %rcx
movdqu %xmm8, (%rdi)
@@ -92,7 +88,7 @@ L(loop_start):
andq $-64, %rdx
cmpq %rdx, %rcx
je L(return)
- ALIGN (4)
+ .p2align 4
L(loop):
movdqa %xmm8, (%rcx)
movdqa %xmm8, 16(%rcx)
diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile
index 5ab950a53..9fd0fd64c 100644
--- a/libc/sysdeps/x86_64/multiarch/Makefile
+++ b/libc/sysdeps/x86_64/multiarch/Makefile
@@ -8,7 +8,7 @@ ifeq ($(subdir),string)
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcmp-sse2-unaligned strncmp-ssse3 \
- strend-sse4 memcmp-sse4 memcpy-ssse3 \
+ memcmp-sse4 memcpy-ssse3 \
memcpy-sse2-unaligned mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
@@ -17,7 +17,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
- strrchr-sse2-no-bsf strchr-sse2-no-bsf memcmp-ssse3
+ strchr-sse2-no-bsf memcmp-ssse3
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
CFLAGS-varshift.c += -msse4
diff --git a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index f8756d7af..71beab82e 100644
--- a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -110,7 +110,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strchr.S. */
IFUNC_IMPL (i, name, strchr,
- IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE4_2, __strchr_sse42)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
@@ -177,12 +176,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
__strpbrk_sse42)
IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
- /* Support sysdeps/x86_64/multiarch/strrchr.S. */
- IFUNC_IMPL (i, name, strrchr,
- IFUNC_IMPL_ADD (array, i, strrchr, HAS_SSE4_2,
- __strrchr_sse42)
- IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2_no_bsf)
- IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
/* Support sysdeps/x86_64/multiarch/strspn.S. */
IFUNC_IMPL (i, name, strspn,
diff --git a/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S b/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200f4..d7b147e5c 100644
--- a/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -25,10 +25,6 @@
# define MEMCMP __memcmp_sse4_1
# endif
-# ifndef ALIGN
-# define ALIGN(n) .p2align n
-# endif
-
# define JMPTBL(I, B) (I - B)
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
@@ -60,7 +56,7 @@ ENTRY (MEMCMP)
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(firstbyte):
movzbl (%rdi), %eax
movzbl (%rsi), %ecx
@@ -68,7 +64,7 @@ L(firstbyte):
ret
# endif
- ALIGN (4)
+ .p2align 4
L(79bytesormore):
movdqu (%rsi), %xmm1
movdqu (%rdi), %xmm2
@@ -316,7 +312,7 @@ L(less32bytesin256):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(512bytesormore):
# ifdef DATA_CACHE_SIZE_HALF
mov $DATA_CACHE_SIZE_HALF, %R8_LP
@@ -329,7 +325,7 @@ L(512bytesormore):
cmp %r8, %rdx
ja L(L2_L3_cache_unaglined)
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(64bytesormore_loop):
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
@@ -361,7 +357,7 @@ L(64bytesormore_loop):
L(L2_L3_cache_unaglined):
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(L2_L3_unaligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
@@ -396,7 +392,7 @@ L(L2_L3_unaligned_128bytes_loop):
/*
* This case is for machines which are sensitive for unaligned instructions.
*/
- ALIGN (4)
+ .p2align 4
L(2aligned):
cmp $128, %rdx
ja L(128bytesormorein2aligned)
@@ -444,7 +440,7 @@ L(less32bytesin64in2alinged):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(128bytesormorein2aligned):
cmp $512, %rdx
ja L(512bytesormorein2aligned)
@@ -519,7 +515,7 @@ L(less32bytesin128in2aligned):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(256bytesormorein2aligned):
sub $256, %rdx
@@ -632,7 +628,7 @@ L(less32bytesin256in2alinged):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(512bytesormorein2aligned):
# ifdef DATA_CACHE_SIZE_HALF
mov $DATA_CACHE_SIZE_HALF, %R8_LP
@@ -646,7 +642,7 @@ L(512bytesormorein2aligned):
ja L(L2_L3_cache_aglined)
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(64bytesormore_loopin2aligned):
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
@@ -678,7 +674,7 @@ L(64bytesormore_loopin2aligned):
L(L2_L3_cache_aglined):
sub $64, %rdx
- ALIGN (4)
+ .p2align 4
L(L2_L3_aligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
@@ -711,7 +707,7 @@ L(L2_L3_aligned_128bytes_loop):
BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(64bytesormore_loop_end):
add $16, %rdi
add $16, %rsi
@@ -806,7 +802,7 @@ L(8bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(12bytes):
mov -12(%rdi), %rax
mov -12(%rsi), %rcx
@@ -827,7 +823,7 @@ L(0bytes):
# ifndef USE_AS_WMEMCMP
/* unreal case for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(65bytes):
movdqu -65(%rdi), %xmm1
movdqu -65(%rsi), %xmm2
@@ -864,7 +860,7 @@ L(9bytes):
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(13bytes):
mov -13(%rdi), %rax
mov -13(%rsi), %rcx
@@ -877,7 +873,7 @@ L(13bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(5bytes):
mov -5(%rdi), %eax
mov -5(%rsi), %ecx
@@ -888,7 +884,7 @@ L(5bytes):
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(66bytes):
movdqu -66(%rdi), %xmm1
movdqu -66(%rsi), %xmm2
@@ -929,7 +925,7 @@ L(10bytes):
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(14bytes):
mov -14(%rdi), %rax
mov -14(%rsi), %rcx
@@ -942,7 +938,7 @@ L(14bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(6bytes):
mov -6(%rdi), %eax
mov -6(%rsi), %ecx
@@ -958,7 +954,7 @@ L(2bytes):
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(67bytes):
movdqu -67(%rdi), %xmm2
movdqu -67(%rsi), %xmm1
@@ -997,7 +993,7 @@ L(11bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(15bytes):
mov -15(%rdi), %rax
mov -15(%rsi), %rcx
@@ -1010,7 +1006,7 @@ L(15bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(7bytes):
mov -7(%rdi), %eax
mov -7(%rsi), %ecx
@@ -1023,7 +1019,7 @@ L(7bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(3bytes):
movzwl -3(%rdi), %eax
movzwl -3(%rsi), %ecx
@@ -1036,7 +1032,7 @@ L(1bytes):
ret
# endif
- ALIGN (4)
+ .p2align 4
L(68bytes):
movdqu -68(%rdi), %xmm2
movdqu -68(%rsi), %xmm1
@@ -1079,7 +1075,7 @@ L(20bytes):
# ifndef USE_AS_WMEMCMP
/* unreal cases for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(69bytes):
movdqu -69(%rsi), %xmm1
movdqu -69(%rdi), %xmm2
@@ -1115,7 +1111,7 @@ L(21bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(70bytes):
movdqu -70(%rsi), %xmm1
movdqu -70(%rdi), %xmm2
@@ -1151,7 +1147,7 @@ L(22bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(71bytes):
movdqu -71(%rsi), %xmm1
movdqu -71(%rdi), %xmm2
@@ -1188,7 +1184,7 @@ L(23bytes):
ret
# endif
- ALIGN (4)
+ .p2align 4
L(72bytes):
movdqu -72(%rsi), %xmm1
movdqu -72(%rdi), %xmm2
@@ -1227,7 +1223,7 @@ L(24bytes):
# ifndef USE_AS_WMEMCMP
/* unreal cases for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(73bytes):
movdqu -73(%rsi), %xmm1
movdqu -73(%rdi), %xmm2
@@ -1265,7 +1261,7 @@ L(25bytes):
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(74bytes):
movdqu -74(%rsi), %xmm1
movdqu -74(%rdi), %xmm2
@@ -1302,7 +1298,7 @@ L(26bytes):
movzwl -2(%rsi), %ecx
jmp L(diffin2bytes)
- ALIGN (4)
+ .p2align 4
L(75bytes):
movdqu -75(%rsi), %xmm1
movdqu -75(%rdi), %xmm2
@@ -1342,7 +1338,7 @@ L(27bytes):
xor %eax, %eax
ret
# endif
- ALIGN (4)
+ .p2align 4
L(76bytes):
movdqu -76(%rsi), %xmm1
movdqu -76(%rdi), %xmm2
@@ -1388,7 +1384,7 @@ L(28bytes):
# ifndef USE_AS_WMEMCMP
/* unreal cases for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(77bytes):
movdqu -77(%rsi), %xmm1
movdqu -77(%rdi), %xmm2
@@ -1430,7 +1426,7 @@ L(29bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(78bytes):
movdqu -78(%rsi), %xmm1
movdqu -78(%rdi), %xmm2
@@ -1470,7 +1466,7 @@ L(30bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(79bytes):
movdqu -79(%rsi), %xmm1
movdqu -79(%rdi), %xmm2
@@ -1510,7 +1506,7 @@ L(31bytes):
xor %eax, %eax
ret
# endif
- ALIGN (4)
+ .p2align 4
L(64bytes):
movdqu -64(%rdi), %xmm2
movdqu -64(%rsi), %xmm1
@@ -1548,7 +1544,7 @@ L(32bytes):
/*
* Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
*/
- ALIGN (3)
+ .p2align 3
L(less16bytes):
movsbq %dl, %rdx
mov (%rsi, %rdx), %rcx
@@ -1585,7 +1581,7 @@ L(diffin2bytes):
sub %ecx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(end):
and $0xff, %eax
and $0xff, %ecx
@@ -1599,7 +1595,7 @@ L(end):
neg %eax
ret
- ALIGN (4)
+ .p2align 4
L(nequal_bigger):
ret
@@ -1611,7 +1607,7 @@ L(unreal_case):
END (MEMCMP)
.section .rodata.sse4.1,"a",@progbits
- ALIGN (3)
+ .p2align 3
# ifndef USE_AS_WMEMCMP
L(table_64bytes):
.int JMPTBL (L(0bytes), L(table_64bytes))
diff --git a/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
index e319df926..e04f918df 100644
--- a/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -25,10 +25,6 @@
# define MEMCMP __memcmp_ssse3
# endif
-# ifndef ALIGN
-# define ALIGN(n) .p2align n
-# endif
-
/* Warning!
wmemcmp has to use SIGNED comparison for elements.
memcmp has to use UNSIGNED comparison for elemnts.
@@ -50,7 +46,7 @@ ENTRY (MEMCMP)
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
/* ECX >= 32. */
L(48bytesormore):
movdqu (%rdi), %xmm3
@@ -90,7 +86,7 @@ L(48bytesormore):
je L(shr_6)
jmp L(shr_7)
- ALIGN (2)
+ .p2align 2
L(next_unaligned_table):
cmp $8, %edx
je L(shr_8)
@@ -117,7 +113,7 @@ L(next_unaligned_table):
jmp L(shr_12)
# endif
- ALIGN (4)
+ .p2align 4
L(shr_0):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -137,7 +133,7 @@ L(shr_0):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_0_gobble):
movdqa (%rsi), %xmm0
xor %eax, %eax
@@ -180,7 +176,7 @@ L(next):
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_1):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -207,7 +203,7 @@ L(shr_1):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_1_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -258,7 +254,7 @@ L(shr_1_gobble_next):
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_2):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -285,7 +281,7 @@ L(shr_2):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_2_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -335,7 +331,7 @@ L(shr_2_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_3):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -362,7 +358,7 @@ L(shr_3):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_3_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -414,7 +410,7 @@ L(shr_3_gobble_next):
# endif
- ALIGN (4)
+ .p2align 4
L(shr_4):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -441,7 +437,7 @@ L(shr_4):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_4_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -493,7 +489,7 @@ L(shr_4_gobble_next):
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_5):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -520,7 +516,7 @@ L(shr_5):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_5_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -570,7 +566,7 @@ L(shr_5_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_6):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -597,7 +593,7 @@ L(shr_6):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_6_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -647,7 +643,7 @@ L(shr_6_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_7):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -674,7 +670,7 @@ L(shr_7):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_7_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -726,7 +722,7 @@ L(shr_7_gobble_next):
# endif
- ALIGN (4)
+ .p2align 4
L(shr_8):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -753,7 +749,7 @@ L(shr_8):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_8_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -805,7 +801,7 @@ L(shr_8_gobble_next):
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_9):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -832,7 +828,7 @@ L(shr_9):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_9_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -882,7 +878,7 @@ L(shr_9_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_10):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -909,7 +905,7 @@ L(shr_10):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_10_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -959,7 +955,7 @@ L(shr_10_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_11):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -986,7 +982,7 @@ L(shr_11):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_11_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1038,7 +1034,7 @@ L(shr_11_gobble_next):
# endif
- ALIGN (4)
+ .p2align 4
L(shr_12):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1065,7 +1061,7 @@ L(shr_12):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_12_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1117,7 +1113,7 @@ L(shr_12_gobble_next):
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(shr_13):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1144,7 +1140,7 @@ L(shr_13):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_13_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1194,7 +1190,7 @@ L(shr_13_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_14):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1221,7 +1217,7 @@ L(shr_14):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_14_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1271,7 +1267,7 @@ L(shr_14_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_15):
cmp $80, %rcx
lea -48(%rcx), %rcx
@@ -1298,7 +1294,7 @@ L(shr_15):
add %rcx, %rdi
jmp L(less48bytes)
- ALIGN (4)
+ .p2align 4
L(shr_15_gobble):
sub $32, %rcx
movdqa 16(%rsi), %xmm0
@@ -1348,7 +1344,7 @@ L(shr_15_gobble_next):
add %rcx, %rdi
jmp L(less48bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(exit):
pmovmskb %xmm1, %r8d
sub $0xffff, %r8d
@@ -1389,56 +1385,56 @@ L(less16bytes):
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte16):
movzbl -16(%rdi), %eax
movzbl -16(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte17):
movzbl -15(%rdi), %eax
movzbl -15(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte18):
movzbl -14(%rdi), %eax
movzbl -14(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte19):
movzbl -13(%rdi), %eax
movzbl -13(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte20):
movzbl -12(%rdi), %eax
movzbl -12(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte21):
movzbl -11(%rdi), %eax
movzbl -11(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(Byte22):
movzbl -10(%rdi), %eax
movzbl -10(%rsi), %edx
sub %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(next_24_bytes):
lea 8(%rdi), %rdi
lea 8(%rsi), %rsi
@@ -1479,14 +1475,14 @@ L(next_24_bytes):
jne L(find_diff)
ret
- ALIGN (4)
+ .p2align 4
L(second_double_word):
mov -12(%rdi), %eax
cmp -12(%rsi), %eax
jne L(find_diff)
ret
- ALIGN (4)
+ .p2align 4
L(next_two_double_words):
and $15, %dh
jz L(fourth_double_word)
@@ -1495,7 +1491,7 @@ L(next_two_double_words):
jne L(find_diff)
ret
- ALIGN (4)
+ .p2align 4
L(fourth_double_word):
mov -4(%rdi), %eax
cmp -4(%rsi), %eax
@@ -1503,7 +1499,7 @@ L(fourth_double_word):
ret
# endif
- ALIGN (4)
+ .p2align 4
L(less48bytes):
cmp $8, %ecx
jae L(more8bytes)
@@ -1527,7 +1523,7 @@ L(less48bytes):
jmp L(4bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more8bytes):
cmp $16, %ecx
jae L(more16bytes)
@@ -1551,7 +1547,7 @@ L(more8bytes):
jmp L(12bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more16bytes):
cmp $24, %ecx
jae L(more24bytes)
@@ -1575,7 +1571,7 @@ L(more16bytes):
jmp L(20bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more24bytes):
cmp $32, %ecx
jae L(more32bytes)
@@ -1599,7 +1595,7 @@ L(more24bytes):
jmp L(28bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more32bytes):
cmp $40, %ecx
jae L(more40bytes)
@@ -1623,7 +1619,7 @@ L(more32bytes):
jmp L(36bytes)
# endif
- ALIGN (4)
+ .p2align 4
L(more40bytes):
cmp $40, %ecx
je L(40bytes)
@@ -1642,7 +1638,7 @@ L(more40bytes):
je L(46bytes)
jmp L(47bytes)
- ALIGN (4)
+ .p2align 4
L(44bytes):
movl -44(%rdi), %eax
movl -44(%rsi), %ecx
@@ -1702,7 +1698,7 @@ L(0bytes):
xor %eax, %eax
ret
# else
- ALIGN (4)
+ .p2align 4
L(44bytes):
movl -44(%rdi), %eax
cmp -44(%rsi), %eax
@@ -1753,7 +1749,7 @@ L(0bytes):
# endif
# ifndef USE_AS_WMEMCMP
- ALIGN (4)
+ .p2align 4
L(45bytes):
movl -45(%rdi), %eax
movl -45(%rsi), %ecx
@@ -1816,7 +1812,7 @@ L(1bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(46bytes):
movl -46(%rdi), %eax
movl -46(%rsi), %ecx
@@ -1882,7 +1878,7 @@ L(2bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(47bytes):
movl -47(%rdi), %eax
movl -47(%rsi), %ecx
@@ -1951,7 +1947,7 @@ L(3bytes):
xor %eax, %eax
ret
- ALIGN (4)
+ .p2align 4
L(find_diff):
cmpb %cl, %al
jne L(set)
@@ -1973,19 +1969,19 @@ L(set):
# else
/* for wmemcmp */
- ALIGN (4)
+ .p2align 4
L(find_diff):
mov $1, %eax
jg L(find_diff_bigger)
neg %eax
ret
- ALIGN (4)
+ .p2align 4
L(find_diff_bigger):
ret
# endif
- ALIGN (4)
+ .p2align 4
L(equal):
xor %eax, %eax
ret
diff --git a/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index efdfea238..df6578ebc 100644
--- a/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -20,10 +20,6 @@
#include "asm-syntax.h"
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
ENTRY(__memcpy_sse2_unaligned)
movq %rsi, %rax
@@ -44,7 +40,7 @@ L(return):
movq %rdi, %rax
ret
.p2align 4,,10
- ALIGN(4)
+ .p2align 4
.L31:
movdqu 16(%rsi), %xmm8
cmpq $64, %rdx
@@ -77,7 +73,7 @@ L(return):
leaq 32(%r10), %r8
leaq 48(%r10), %rax
.p2align 4,,10
- ALIGN(4)
+ .p2align 4
L(loop):
movdqu (%rcx,%r10), %xmm8
movdqa %xmm8, (%rcx)
@@ -151,7 +147,7 @@ L(less_16):
.L3:
leaq -1(%rdx), %rax
.p2align 4,,10
- ALIGN(4)
+ .p2align 4
.L11:
movzbl (%rsi,%rax), %edx
movb %dl, (%rdi,%rax)
diff --git a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef27..0eb7d9b75 100644
--- a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -31,10 +31,6 @@
# define MEMCPY_CHK __memcpy_chk_ssse3_back
#endif
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
#define JMPTBL(I, B) I - B
/* Branch to an entry in a jump table. TABLE is a jump table with
@@ -87,7 +83,7 @@ L(bk_write):
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
#endif
- ALIGN (4)
+ .p2align 4
L(144bytesormore):
#ifndef USE_AS_MEMMOVE
@@ -119,7 +115,7 @@ L(144bytesormore):
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(copy_backward):
#ifdef DATA_CACHE_SIZE
mov $DATA_CACHE_SIZE, %RCX_LP
@@ -149,7 +145,7 @@ L(copy_backward):
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(shl_0):
mov %rdx, %r9
@@ -162,7 +158,7 @@ L(shl_0):
#endif
jae L(gobble_mem_fwd)
sub $0x80, %rdx
- ALIGN (4)
+ .p2align 4
L(shl_0_loop):
movdqa (%rsi), %xmm1
movdqa %xmm1, (%rdi)
@@ -190,7 +186,7 @@ L(shl_0_loop):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_bwd):
sub $0x80, %rdx
L(copy_backward_loop):
@@ -221,7 +217,7 @@ L(copy_backward_loop):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1):
sub $0x80, %rdx
movaps -0x01(%rsi), %xmm1
@@ -258,7 +254,7 @@ L(shl_1):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1_bwd):
movaps -0x01(%rsi), %xmm1
@@ -304,7 +300,7 @@ L(shl_1_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2):
sub $0x80, %rdx
movaps -0x02(%rsi), %xmm1
@@ -341,7 +337,7 @@ L(shl_2):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2_bwd):
movaps -0x02(%rsi), %xmm1
@@ -387,7 +383,7 @@ L(shl_2_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3):
sub $0x80, %rdx
movaps -0x03(%rsi), %xmm1
@@ -424,7 +420,7 @@ L(shl_3):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3_bwd):
movaps -0x03(%rsi), %xmm1
@@ -470,7 +466,7 @@ L(shl_3_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4):
sub $0x80, %rdx
movaps -0x04(%rsi), %xmm1
@@ -507,7 +503,7 @@ L(shl_4):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4_bwd):
movaps -0x04(%rsi), %xmm1
@@ -553,7 +549,7 @@ L(shl_4_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5):
sub $0x80, %rdx
movaps -0x05(%rsi), %xmm1
@@ -590,7 +586,7 @@ L(shl_5):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5_bwd):
movaps -0x05(%rsi), %xmm1
@@ -636,7 +632,7 @@ L(shl_5_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6):
sub $0x80, %rdx
movaps -0x06(%rsi), %xmm1
@@ -673,7 +669,7 @@ L(shl_6):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6_bwd):
movaps -0x06(%rsi), %xmm1
@@ -719,7 +715,7 @@ L(shl_6_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7):
sub $0x80, %rdx
movaps -0x07(%rsi), %xmm1
@@ -756,7 +752,7 @@ L(shl_7):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7_bwd):
movaps -0x07(%rsi), %xmm1
@@ -802,7 +798,7 @@ L(shl_7_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8):
sub $0x80, %rdx
movaps -0x08(%rsi), %xmm1
@@ -839,7 +835,7 @@ L(shl_8):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8_bwd):
movaps -0x08(%rsi), %xmm1
@@ -886,7 +882,7 @@ L(shl_8_end_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9):
sub $0x80, %rdx
movaps -0x09(%rsi), %xmm1
@@ -923,7 +919,7 @@ L(shl_9):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9_bwd):
movaps -0x09(%rsi), %xmm1
@@ -969,7 +965,7 @@ L(shl_9_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10):
sub $0x80, %rdx
movaps -0x0a(%rsi), %xmm1
@@ -1006,7 +1002,7 @@ L(shl_10):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10_bwd):
movaps -0x0a(%rsi), %xmm1
@@ -1052,7 +1048,7 @@ L(shl_10_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11):
sub $0x80, %rdx
movaps -0x0b(%rsi), %xmm1
@@ -1089,7 +1085,7 @@ L(shl_11):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11_bwd):
movaps -0x0b(%rsi), %xmm1
@@ -1135,7 +1131,7 @@ L(shl_11_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12):
sub $0x80, %rdx
movdqa -0x0c(%rsi), %xmm1
@@ -1173,7 +1169,7 @@ L(shl_12):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12_bwd):
movaps -0x0c(%rsi), %xmm1
@@ -1219,7 +1215,7 @@ L(shl_12_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13):
sub $0x80, %rdx
movaps -0x0d(%rsi), %xmm1
@@ -1256,7 +1252,7 @@ L(shl_13):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13_bwd):
movaps -0x0d(%rsi), %xmm1
@@ -1302,7 +1298,7 @@ L(shl_13_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14):
sub $0x80, %rdx
movaps -0x0e(%rsi), %xmm1
@@ -1339,7 +1335,7 @@ L(shl_14):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14_bwd):
movaps -0x0e(%rsi), %xmm1
@@ -1385,7 +1381,7 @@ L(shl_14_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15):
sub $0x80, %rdx
movaps -0x0f(%rsi), %xmm1
@@ -1422,7 +1418,7 @@ L(shl_15):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15_bwd):
movaps -0x0f(%rsi), %xmm1
@@ -1468,7 +1464,7 @@ L(shl_15_bwd):
sub %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(gobble_mem_fwd):
movdqu (%rsi), %xmm1
movdqu %xmm0, (%r8)
@@ -1570,7 +1566,7 @@ L(gobble_mem_fwd_end):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(gobble_mem_bwd):
add %rdx, %rsi
add %rdx, %rdi
@@ -2833,7 +2829,7 @@ L(bwd_write_1bytes):
END (MEMCPY)
.section .rodata.ssse3,"a",@progbits
- ALIGN (3)
+ .p2align 3
L(table_144_bytes_bwd):
.int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
.int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
@@ -2980,7 +2976,7 @@ L(table_144_bytes_bwd):
.int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
.int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
- ALIGN (3)
+ .p2align 3
L(table_144_bytes_fwd):
.int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
.int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
@@ -3127,7 +3123,7 @@ L(table_144_bytes_fwd):
.int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
.int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
- ALIGN (3)
+ .p2align 3
L(shl_table_fwd):
.int JMPTBL (L(shl_0), L(shl_table_fwd))
.int JMPTBL (L(shl_1), L(shl_table_fwd))
@@ -3146,7 +3142,7 @@ L(shl_table_fwd):
.int JMPTBL (L(shl_14), L(shl_table_fwd))
.int JMPTBL (L(shl_15), L(shl_table_fwd))
- ALIGN (3)
+ .p2align 3
L(shl_table_bwd):
.int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
.int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 9642ceecd..0cedab244 100644
--- a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -31,10 +31,6 @@
# define MEMCPY_CHK __memcpy_chk_ssse3
#endif
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
#define JMPTBL(I, B) I - B
/* Branch to an entry in a jump table. TABLE is a jump table with
@@ -80,7 +76,7 @@ L(copy_forward):
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(80bytesormore):
#ifndef USE_AS_MEMMOVE
cmp %dil, %sil
@@ -113,7 +109,7 @@ L(80bytesormore):
#endif
BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4)
- ALIGN (4)
+ .p2align 4
L(copy_backward):
movdqu -16(%rsi, %rdx), %xmm0
add %rdx, %rsi
@@ -144,7 +140,7 @@ L(copy_backward):
#endif
BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0):
sub $16, %rdx
movdqa (%rsi), %xmm1
@@ -172,7 +168,7 @@ L(shl_0_less_64bytes):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble):
#ifdef DATA_CACHE_SIZE_HALF
cmp $DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -228,7 +224,7 @@ L(shl_0_cache_less_64bytes):
add %rdx, %rdi
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble_mem_loop):
prefetcht0 0x1c0(%rsi)
prefetcht0 0x280(%rsi)
@@ -287,7 +283,7 @@ L(shl_0_mem_less_32bytes):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_bwd):
sub $16, %rdx
movdqa -0x10(%rsi), %xmm1
@@ -313,7 +309,7 @@ L(shl_0_bwd):
L(shl_0_less_64bytes_bwd):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble_bwd):
#ifdef DATA_CACHE_SIZE_HALF
cmp $DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -367,7 +363,7 @@ L(shl_0_gobble_bwd_loop):
L(shl_0_gobble_bwd_less_64bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_0_gobble_mem_bwd_loop):
prefetcht0 -0x1c0(%rsi)
prefetcht0 -0x280(%rsi)
@@ -423,7 +419,7 @@ L(shl_0_mem_bwd_less_64bytes):
L(shl_0_mem_bwd_less_32bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1):
lea (L(shl_1_loop_L1)-L(shl_1))(%r9), %r9
cmp %rcx, %rdx
@@ -466,7 +462,7 @@ L(shl_1_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_1_bwd):
lea (L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -508,7 +504,7 @@ L(shl_1_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2):
lea (L(shl_2_loop_L1)-L(shl_2))(%r9), %r9
cmp %rcx, %rdx
@@ -551,7 +547,7 @@ L(shl_2_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_2_bwd):
lea (L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -593,7 +589,7 @@ L(shl_2_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3):
lea (L(shl_3_loop_L1)-L(shl_3))(%r9), %r9
cmp %rcx, %rdx
@@ -636,7 +632,7 @@ L(shl_3_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_3_bwd):
lea (L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -678,7 +674,7 @@ L(shl_3_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4):
lea (L(shl_4_loop_L1)-L(shl_4))(%r9), %r9
cmp %rcx, %rdx
@@ -721,7 +717,7 @@ L(shl_4_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_4_bwd):
lea (L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -763,7 +759,7 @@ L(shl_4_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5):
lea (L(shl_5_loop_L1)-L(shl_5))(%r9), %r9
cmp %rcx, %rdx
@@ -806,7 +802,7 @@ L(shl_5_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_5_bwd):
lea (L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -848,7 +844,7 @@ L(shl_5_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6):
lea (L(shl_6_loop_L1)-L(shl_6))(%r9), %r9
cmp %rcx, %rdx
@@ -891,7 +887,7 @@ L(shl_6_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_6_bwd):
lea (L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -933,7 +929,7 @@ L(shl_6_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7):
lea (L(shl_7_loop_L1)-L(shl_7))(%r9), %r9
cmp %rcx, %rdx
@@ -976,7 +972,7 @@ L(shl_7_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_7_bwd):
lea (L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1018,7 +1014,7 @@ L(shl_7_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8):
lea (L(shl_8_loop_L1)-L(shl_8))(%r9), %r9
cmp %rcx, %rdx
@@ -1051,7 +1047,7 @@ L(shl_8_loop_L1):
movaps %xmm5, -0x10(%rdi)
jmp *%r9
ud2
- ALIGN (4)
+ .p2align 4
L(shl_8_end):
lea 64(%rdx), %rdx
movaps %xmm4, -0x20(%rdi)
@@ -1061,7 +1057,7 @@ L(shl_8_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_8_bwd):
lea (L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1103,7 +1099,7 @@ L(shl_8_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9):
lea (L(shl_9_loop_L1)-L(shl_9))(%r9), %r9
cmp %rcx, %rdx
@@ -1146,7 +1142,7 @@ L(shl_9_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_9_bwd):
lea (L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1188,7 +1184,7 @@ L(shl_9_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10):
lea (L(shl_10_loop_L1)-L(shl_10))(%r9), %r9
cmp %rcx, %rdx
@@ -1231,7 +1227,7 @@ L(shl_10_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_10_bwd):
lea (L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1273,7 +1269,7 @@ L(shl_10_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11):
lea (L(shl_11_loop_L1)-L(shl_11))(%r9), %r9
cmp %rcx, %rdx
@@ -1316,7 +1312,7 @@ L(shl_11_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_11_bwd):
lea (L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1358,7 +1354,7 @@ L(shl_11_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12):
lea (L(shl_12_loop_L1)-L(shl_12))(%r9), %r9
cmp %rcx, %rdx
@@ -1401,7 +1397,7 @@ L(shl_12_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_12_bwd):
lea (L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1443,7 +1439,7 @@ L(shl_12_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13):
lea (L(shl_13_loop_L1)-L(shl_13))(%r9), %r9
cmp %rcx, %rdx
@@ -1486,7 +1482,7 @@ L(shl_13_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_13_bwd):
lea (L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1528,7 +1524,7 @@ L(shl_13_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14):
lea (L(shl_14_loop_L1)-L(shl_14))(%r9), %r9
cmp %rcx, %rdx
@@ -1571,7 +1567,7 @@ L(shl_14_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_14_bwd):
lea (L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1613,7 +1609,7 @@ L(shl_14_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15):
lea (L(shl_15_loop_L1)-L(shl_15))(%r9), %r9
cmp %rcx, %rdx
@@ -1656,7 +1652,7 @@ L(shl_15_end):
add %rdx, %rsi
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(shl_15_bwd):
lea (L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9
cmp %rcx, %rdx
@@ -1698,7 +1694,7 @@ L(shl_15_bwd_end):
movdqu %xmm0, (%r8)
BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
- ALIGN (4)
+ .p2align 4
L(write_72bytes):
movdqu -72(%rsi), %xmm0
movdqu -56(%rsi), %xmm1
@@ -1716,7 +1712,7 @@ L(write_72bytes):
mov %rcx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_64bytes):
movdqu -64(%rsi), %xmm0
mov -48(%rsi), %rcx
@@ -1734,7 +1730,7 @@ L(write_64bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_56bytes):
movdqu -56(%rsi), %xmm0
mov -40(%rsi), %r8
@@ -1750,7 +1746,7 @@ L(write_56bytes):
mov %rcx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_48bytes):
mov -48(%rsi), %rcx
mov -40(%rsi), %r8
@@ -1766,7 +1762,7 @@ L(write_48bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_40bytes):
mov -40(%rsi), %r8
mov -32(%rsi), %r9
@@ -1780,7 +1776,7 @@ L(write_40bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_32bytes):
mov -32(%rsi), %r9
mov -24(%rsi), %r10
@@ -1792,7 +1788,7 @@ L(write_32bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_24bytes):
mov -24(%rsi), %r10
mov -16(%rsi), %r11
@@ -1802,7 +1798,7 @@ L(write_24bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_16bytes):
mov -16(%rsi), %r11
mov -8(%rsi), %rdx
@@ -1810,14 +1806,14 @@ L(write_16bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_8bytes):
mov -8(%rsi), %rdx
mov %rdx, -8(%rdi)
L(write_0bytes):
ret
- ALIGN (4)
+ .p2align 4
L(write_73bytes):
movdqu -73(%rsi), %xmm0
movdqu -57(%rsi), %xmm1
@@ -1837,7 +1833,7 @@ L(write_73bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_65bytes):
movdqu -65(%rsi), %xmm0
movdqu -49(%rsi), %xmm1
@@ -1855,7 +1851,7 @@ L(write_65bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_57bytes):
movdqu -57(%rsi), %xmm0
mov -41(%rsi), %r8
@@ -1873,7 +1869,7 @@ L(write_57bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_49bytes):
movdqu -49(%rsi), %xmm0
mov -33(%rsi), %r9
@@ -1889,7 +1885,7 @@ L(write_49bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_41bytes):
mov -41(%rsi), %r8
mov -33(%rsi), %r9
@@ -1905,7 +1901,7 @@ L(write_41bytes):
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_33bytes):
mov -33(%rsi), %r9
mov -25(%rsi), %r10
@@ -1919,7 +1915,7 @@ L(write_33bytes):
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_25bytes):
mov -25(%rsi), %r10
mov -17(%rsi), %r11
@@ -1931,7 +1927,7 @@ L(write_25bytes):
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_17bytes):
mov -17(%rsi), %r11
mov -9(%rsi), %rcx
@@ -1941,7 +1937,7 @@ L(write_17bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_9bytes):
mov -9(%rsi), %rcx
mov -4(%rsi), %edx
@@ -1949,13 +1945,13 @@ L(write_9bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_1bytes):
mov -1(%rsi), %dl
mov %dl, -1(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_74bytes):
movdqu -74(%rsi), %xmm0
movdqu -58(%rsi), %xmm1
@@ -1975,7 +1971,7 @@ L(write_74bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_66bytes):
movdqu -66(%rsi), %xmm0
movdqu -50(%rsi), %xmm1
@@ -1995,7 +1991,7 @@ L(write_66bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_58bytes):
movdqu -58(%rsi), %xmm1
mov -42(%rsi), %r8
@@ -2013,7 +2009,7 @@ L(write_58bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_50bytes):
movdqu -50(%rsi), %xmm0
mov -34(%rsi), %r9
@@ -2029,7 +2025,7 @@ L(write_50bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_42bytes):
mov -42(%rsi), %r8
mov -34(%rsi), %r9
@@ -2045,7 +2041,7 @@ L(write_42bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_34bytes):
mov -34(%rsi), %r9
mov -26(%rsi), %r10
@@ -2059,7 +2055,7 @@ L(write_34bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_26bytes):
mov -26(%rsi), %r10
mov -18(%rsi), %r11
@@ -2071,7 +2067,7 @@ L(write_26bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_18bytes):
mov -18(%rsi), %r11
mov -10(%rsi), %rcx
@@ -2081,7 +2077,7 @@ L(write_18bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_10bytes):
mov -10(%rsi), %rcx
mov -4(%rsi), %edx
@@ -2089,13 +2085,13 @@ L(write_10bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_2bytes):
mov -2(%rsi), %dx
mov %dx, -2(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_75bytes):
movdqu -75(%rsi), %xmm0
movdqu -59(%rsi), %xmm1
@@ -2115,7 +2111,7 @@ L(write_75bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_67bytes):
movdqu -67(%rsi), %xmm0
movdqu -59(%rsi), %xmm1
@@ -2135,7 +2131,7 @@ L(write_67bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_59bytes):
movdqu -59(%rsi), %xmm0
mov -43(%rsi), %r8
@@ -2153,7 +2149,7 @@ L(write_59bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_51bytes):
movdqu -51(%rsi), %xmm0
mov -35(%rsi), %r9
@@ -2169,7 +2165,7 @@ L(write_51bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_43bytes):
mov -43(%rsi), %r8
mov -35(%rsi), %r9
@@ -2185,7 +2181,7 @@ L(write_43bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_35bytes):
mov -35(%rsi), %r9
mov -27(%rsi), %r10
@@ -2199,7 +2195,7 @@ L(write_35bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_27bytes):
mov -27(%rsi), %r10
mov -19(%rsi), %r11
@@ -2211,7 +2207,7 @@ L(write_27bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_19bytes):
mov -19(%rsi), %r11
mov -11(%rsi), %rcx
@@ -2221,7 +2217,7 @@ L(write_19bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_11bytes):
mov -11(%rsi), %rcx
mov -4(%rsi), %edx
@@ -2229,7 +2225,7 @@ L(write_11bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_3bytes):
mov -3(%rsi), %dx
mov -2(%rsi), %cx
@@ -2237,7 +2233,7 @@ L(write_3bytes):
mov %cx, -2(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_76bytes):
movdqu -76(%rsi), %xmm0
movdqu -60(%rsi), %xmm1
@@ -2257,7 +2253,7 @@ L(write_76bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_68bytes):
movdqu -68(%rsi), %xmm0
movdqu -52(%rsi), %xmm1
@@ -2275,7 +2271,7 @@ L(write_68bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_60bytes):
movdqu -60(%rsi), %xmm0
mov -44(%rsi), %r8
@@ -2293,7 +2289,7 @@ L(write_60bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_52bytes):
movdqu -52(%rsi), %xmm0
mov -36(%rsi), %r9
@@ -2309,7 +2305,7 @@ L(write_52bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_44bytes):
mov -44(%rsi), %r8
mov -36(%rsi), %r9
@@ -2325,7 +2321,7 @@ L(write_44bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_36bytes):
mov -36(%rsi), %r9
mov -28(%rsi), %r10
@@ -2339,7 +2335,7 @@ L(write_36bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_28bytes):
mov -28(%rsi), %r10
mov -20(%rsi), %r11
@@ -2351,7 +2347,7 @@ L(write_28bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_20bytes):
mov -20(%rsi), %r11
mov -12(%rsi), %rcx
@@ -2361,7 +2357,7 @@ L(write_20bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_12bytes):
mov -12(%rsi), %rcx
mov -4(%rsi), %edx
@@ -2369,13 +2365,13 @@ L(write_12bytes):
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_4bytes):
mov -4(%rsi), %edx
mov %edx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_77bytes):
movdqu -77(%rsi), %xmm0
movdqu -61(%rsi), %xmm1
@@ -2395,7 +2391,7 @@ L(write_77bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_69bytes):
movdqu -69(%rsi), %xmm0
movdqu -53(%rsi), %xmm1
@@ -2413,7 +2409,7 @@ L(write_69bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_61bytes):
movdqu -61(%rsi), %xmm0
mov -45(%rsi), %r8
@@ -2431,7 +2427,7 @@ L(write_61bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_53bytes):
movdqu -53(%rsi), %xmm0
mov -45(%rsi), %r8
@@ -2448,7 +2444,7 @@ L(write_53bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_45bytes):
mov -45(%rsi), %r8
mov -37(%rsi), %r9
@@ -2464,7 +2460,7 @@ L(write_45bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_37bytes):
mov -37(%rsi), %r9
mov -29(%rsi), %r10
@@ -2478,7 +2474,7 @@ L(write_37bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_29bytes):
mov -29(%rsi), %r10
mov -21(%rsi), %r11
@@ -2490,7 +2486,7 @@ L(write_29bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_21bytes):
mov -21(%rsi), %r11
mov -13(%rsi), %rcx
@@ -2500,7 +2496,7 @@ L(write_21bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_13bytes):
mov -13(%rsi), %rcx
mov -8(%rsi), %rdx
@@ -2508,7 +2504,7 @@ L(write_13bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_5bytes):
mov -5(%rsi), %edx
mov -4(%rsi), %ecx
@@ -2516,7 +2512,7 @@ L(write_5bytes):
mov %ecx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_78bytes):
movdqu -78(%rsi), %xmm0
movdqu -62(%rsi), %xmm1
@@ -2536,7 +2532,7 @@ L(write_78bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_70bytes):
movdqu -70(%rsi), %xmm0
movdqu -54(%rsi), %xmm1
@@ -2554,7 +2550,7 @@ L(write_70bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_62bytes):
movdqu -62(%rsi), %xmm0
mov -46(%rsi), %r8
@@ -2572,7 +2568,7 @@ L(write_62bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_54bytes):
movdqu -54(%rsi), %xmm0
mov -38(%rsi), %r9
@@ -2588,7 +2584,7 @@ L(write_54bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_46bytes):
mov -46(%rsi), %r8
mov -38(%rsi), %r9
@@ -2604,7 +2600,7 @@ L(write_46bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_38bytes):
mov -38(%rsi), %r9
mov -30(%rsi), %r10
@@ -2618,7 +2614,7 @@ L(write_38bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_30bytes):
mov -30(%rsi), %r10
mov -22(%rsi), %r11
@@ -2630,7 +2626,7 @@ L(write_30bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_22bytes):
mov -22(%rsi), %r11
mov -14(%rsi), %rcx
@@ -2640,7 +2636,7 @@ L(write_22bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_14bytes):
mov -14(%rsi), %rcx
mov -8(%rsi), %rdx
@@ -2648,7 +2644,7 @@ L(write_14bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_6bytes):
mov -6(%rsi), %edx
mov -4(%rsi), %ecx
@@ -2656,7 +2652,7 @@ L(write_6bytes):
mov %ecx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_79bytes):
movdqu -79(%rsi), %xmm0
movdqu -63(%rsi), %xmm1
@@ -2676,7 +2672,7 @@ L(write_79bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_71bytes):
movdqu -71(%rsi), %xmm0
movdqu -55(%rsi), %xmm1
@@ -2694,7 +2690,7 @@ L(write_71bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_63bytes):
movdqu -63(%rsi), %xmm0
mov -47(%rsi), %r8
@@ -2712,7 +2708,7 @@ L(write_63bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_55bytes):
movdqu -55(%rsi), %xmm0
mov -39(%rsi), %r9
@@ -2728,7 +2724,7 @@ L(write_55bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_47bytes):
mov -47(%rsi), %r8
mov -39(%rsi), %r9
@@ -2744,7 +2740,7 @@ L(write_47bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_39bytes):
mov -39(%rsi), %r9
mov -31(%rsi), %r10
@@ -2758,7 +2754,7 @@ L(write_39bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_31bytes):
mov -31(%rsi), %r10
mov -23(%rsi), %r11
@@ -2770,7 +2766,7 @@ L(write_31bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_23bytes):
mov -23(%rsi), %r11
mov -15(%rsi), %rcx
@@ -2780,7 +2776,7 @@ L(write_23bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_15bytes):
mov -15(%rsi), %rcx
mov -8(%rsi), %rdx
@@ -2788,7 +2784,7 @@ L(write_15bytes):
mov %rdx, -8(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(write_7bytes):
mov -7(%rsi), %edx
mov -4(%rsi), %ecx
@@ -2796,7 +2792,7 @@ L(write_7bytes):
mov %ecx, -4(%rdi)
ret
- ALIGN (4)
+ .p2align 4
L(large_page_fwd):
movdqu (%rsi), %xmm1
lea 16(%rsi), %rsi
@@ -2859,7 +2855,7 @@ L(large_page_less_64bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
#ifdef USE_AS_MEMMOVE
- ALIGN (4)
+ .p2align 4
L(ll_cache_copy_fwd_start):
prefetcht0 0x1c0(%rsi)
prefetcht0 0x200(%rsi)
@@ -2906,7 +2902,7 @@ L(large_page_ll_less_fwd_64bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
#endif
- ALIGN (4)
+ .p2align 4
L(large_page_bwd):
movdqu -0x10(%rsi), %xmm1
lea -16(%rsi), %rsi
@@ -2966,7 +2962,7 @@ L(large_page_less_bwd_64bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
#ifdef USE_AS_MEMMOVE
- ALIGN (4)
+ .p2align 4
L(ll_cache_copy_bwd_start):
prefetcht0 -0x1c0(%rsi)
prefetcht0 -0x200(%rsi)
@@ -3014,7 +3010,7 @@ L(large_page_ll_less_bwd_64bytes):
END (MEMCPY)
.section .rodata.ssse3,"a",@progbits
- ALIGN (3)
+ .p2align 3
L(table_less_80bytes):
.int JMPTBL (L(write_0bytes), L(table_less_80bytes))
.int JMPTBL (L(write_1bytes), L(table_less_80bytes))
@@ -3097,7 +3093,7 @@ L(table_less_80bytes):
.int JMPTBL (L(write_78bytes), L(table_less_80bytes))
.int JMPTBL (L(write_79bytes), L(table_less_80bytes))
- ALIGN (3)
+ .p2align 3
L(shl_table):
.int JMPTBL (L(shl_0), L(shl_table))
.int JMPTBL (L(shl_1), L(shl_table))
@@ -3116,7 +3112,7 @@ L(shl_table):
.int JMPTBL (L(shl_14), L(shl_table))
.int JMPTBL (L(shl_15), L(shl_table))
- ALIGN (3)
+ .p2align 3
L(shl_table_bwd):
.int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
.int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/libc/sysdeps/x86_64/multiarch/strchr.S b/libc/sysdeps/x86_64/multiarch/strchr.S
index f170238b5..3f0b2c5f5 100644
--- a/libc/sysdeps/x86_64/multiarch/strchr.S
+++ b/libc/sysdeps/x86_64/multiarch/strchr.S
@@ -29,12 +29,6 @@ ENTRY(strchr)
jne 1f
call __init_cpu_features
1: leaq __strchr_sse2(%rip), %rax
- testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
- jnz 2f
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jz 2f
- leaq __strchr_sse42(%rip), %rax
- ret
2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
jz 3f
leaq __strchr_sse2_no_bsf(%rip), %rax
@@ -42,127 +36,6 @@ ENTRY(strchr)
END(strchr)
-/*
- This implementation uses SSE4 instructions to compare up to 16 bytes
- at a time looking for the first occurrence of the character c in the
- string s:
-
- char *strchr (const char *s, int c);
-
- We use 0xa:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_EACH
- | _SIDD_LEAST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
-
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- C C C C C C C C C C C C C C C C
-
- to find out if the first 16byte data element has a byte C and the
- offset of the first byte. There are 3 cases:
-
- 1. The first 16byte data element has the byte C at the offset X.
- 2. The first 16byte data element has EOS and doesn't have the byte C.
- 3. The first 16byte data element is valid and doesn't have the byte C.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
-
- case ECX CFlag ZFlag SFlag
- 1 X 1 0/1 0
- 2 16 0 1 0
- 3 16 0 0 0
-
- We exit from the loop for cases 1 and 2 with jbe which branches
- when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
- X for case 1. */
-
- .section .text.sse4.2,"ax",@progbits
- .align 16
- .type __strchr_sse42, @function
- .globl __strchr_sse42
- .hidden __strchr_sse42
-__strchr_sse42:
- cfi_startproc
- CALL_MCOUNT
- testb %sil, %sil
- je __strend_sse4
- pxor %xmm2, %xmm2
- movd %esi, %xmm1
- movl %edi, %ecx
- pshufb %xmm2, %xmm1
- andl $15, %ecx
- movq %rdi, %r8
- je L(aligned_start)
-
-/* Handle unaligned string. */
- andq $-16, %r8
- movdqa (%r8), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %edx
- /* Check if there is a match. */
- pmovmskb %xmm0, %esi
- /* Remove the leading bytes. */
- sarl %cl, %edx
- sarl %cl, %esi
- testl %esi, %esi
- je L(unaligned_no_match)
- /* Check which byte is a match. */
- bsfl %esi, %eax
- /* Is there a NULL? */
- testl %edx, %edx
- je L(unaligned_match)
- bsfl %edx, %esi
- cmpl %esi, %eax
- /* Return NULL if NULL comes first. */
- ja L(return_null)
-L(unaligned_match):
- addq %rdi, %rax
- ret
-
- .p2align 4
-L(unaligned_no_match):
- testl %edx, %edx
- jne L(return_null)
-
-/* Loop start on aligned string. */
-L(loop):
- addq $16, %r8
-L(aligned_start):
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- addq $16, %r8
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- addq $16, %r8
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- addq $16, %r8
- pcmpistri $0x2, (%r8), %xmm1
- jbe L(wrap)
- jmp L(loop)
-L(wrap):
- jc L(loop_exit)
-
-/* Return NULL. */
-L(return_null):
- xorl %eax, %eax
- ret
-
-/* Loop exit. */
- .p2align 4
-L(loop_exit):
- leaq (%r8,%rcx), %rax
- ret
- cfi_endproc
- .size __strchr_sse42, .-__strchr_sse42
-
# undef ENTRY
# define ENTRY(name) \
diff --git a/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
index eed843297..4a8e57a24 100644
--- a/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -17,7 +17,6 @@
<http://www.gnu.org/licenses/>. */
#include "sysdep.h"
-#define ALIGN(x) .p2align x
ENTRY ( __strcmp_sse2_unaligned)
movl %edi, %eax
@@ -43,7 +42,7 @@ L(return):
subl %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(next_48_bytes):
movdqu 16(%rdi), %xmm6
movdqu 16(%rsi), %xmm3
@@ -85,7 +84,7 @@ L(main_loop_header):
movq %rcx, %rsi
jmp L(loop_start)
- ALIGN (4)
+ .p2align 4
L(loop):
addq $64, %rax
addq $64, %rdx
@@ -141,7 +140,7 @@ L(back_to_loop):
subl %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(loop_cross_page):
xor %r10, %r10
movq %rdx, %r9
@@ -191,7 +190,7 @@ L(loop_cross_page):
subl %edx, %eax
ret
- ALIGN (4)
+ .p2align 4
L(cross_page_loop):
cmpb %cl, %al
jne L(different)
diff --git a/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S
deleted file mode 100644
index fcef610db..000000000
--- a/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S
+++ /dev/null
@@ -1,555 +0,0 @@
-/* strrchr with SSE2 without bsf and bsr
- Copyright (C) 2011-2013 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if defined SHARED && !defined NOT_IN_libc
-
-# include <sysdep.h>
-# include "asm-syntax.h"
-
- atom_text_section
-ENTRY (__strrchr_sse2_no_bsf)
-
- movd %rsi, %xmm1
- pxor %xmm2, %xmm2
- mov %rdi, %rcx
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
- /* ECX has OFFSET. */
- and $63, %rcx
- cmp $48, %rcx
- pshufd $0, %xmm1, %xmm1
- ja L(crosscache)
-
-/* unaligned string. */
- movdqu (%rdi), %xmm0
- pcmpeqb %xmm0, %xmm2
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm2, %rcx
- /* Check if there is a match. */
- pmovmskb %xmm0, %rax
- add $16, %rdi
-
- test %rax, %rax
- jnz L(unaligned_match1)
-
- test %rcx, %rcx
- jnz L(return_null)
-
- and $-16, %rdi
- xor %r8, %r8
- jmp L(loop)
-
- .p2align 4
-L(unaligned_match1):
- test %rcx, %rcx
- jnz L(prolog_find_zero_1)
-
- mov %rax, %r8
- mov %rdi, %rsi
- and $-16, %rdi
- jmp L(loop)
-
- .p2align 4
-L(crosscache):
-/* Hancle unaligned string. */
- and $15, %rcx
- and $-16, %rdi
- pxor %xmm3, %xmm3
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- /* Find where NULL is. */
- pmovmskb %xmm3, %rdx
- /* Check if there is a match. */
- pmovmskb %xmm0, %rax
- /* Remove the leading bytes. */
- shr %cl, %rdx
- shr %cl, %rax
- add $16, %rdi
-
- test %rax, %rax
- jnz L(unaligned_match)
-
- test %rdx, %rdx
- jnz L(return_null)
-
- xor %r8, %r8
- jmp L(loop)
-
- .p2align 4
-L(unaligned_match):
- test %rdx, %rdx
- jnz L(prolog_find_zero)
-
- mov %rax, %r8
- lea (%rdi, %rcx), %rsi
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %rdi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %rdi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %rdi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm0
- pcmpeqb %xmm0, %xmm2
- add $16, %rdi
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- or %rax, %rcx
- jz L(loop)
-
-L(matches):
- test %rax, %rax
- jnz L(match)
-L(return_value):
- test %r8, %r8
- jz L(return_null)
- mov %r8, %rax
- mov %rsi, %rdi
- jmp L(match_exit)
-
- .p2align 4
-L(match):
- pmovmskb %xmm2, %rcx
- test %rcx, %rcx
- jnz L(find_zero)
- mov %rax, %r8
- mov %rdi, %rsi
- jmp L(loop)
-
- .p2align 4
-L(find_zero):
- test %cl, %cl
- jz L(find_zero_high)
- mov %cl, %dl
- and $15, %dl
- jz L(find_zero_8)
- test $0x01, %cl
- jnz L(FindZeroExit1)
- test $0x02, %cl
- jnz L(FindZeroExit2)
- test $0x04, %cl
- jnz L(FindZeroExit3)
- and $1 << 4 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(find_zero_8):
- test $0x10, %cl
- jnz L(FindZeroExit5)
- test $0x20, %cl
- jnz L(FindZeroExit6)
- test $0x40, %cl
- jnz L(FindZeroExit7)
- and $1 << 8 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(find_zero_high):
- mov %ch, %dh
- and $15, %dh
- jz L(find_zero_high_8)
- test $0x01, %ch
- jnz L(FindZeroExit9)
- test $0x02, %ch
- jnz L(FindZeroExit10)
- test $0x04, %ch
- jnz L(FindZeroExit11)
- and $1 << 12 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(find_zero_high_8):
- test $0x10, %ch
- jnz L(FindZeroExit13)
- test $0x20, %ch
- jnz L(FindZeroExit14)
- test $0x40, %ch
- jnz L(FindZeroExit15)
- and $1 << 16 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit1):
- and $1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit2):
- and $1 << 2 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit3):
- and $1 << 3 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit5):
- and $1 << 5 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit6):
- and $1 << 6 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit7):
- and $1 << 7 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit9):
- and $1 << 9 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit10):
- and $1 << 10 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit11):
- and $1 << 11 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit13):
- and $1 << 13 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit14):
- and $1 << 14 - 1, %rax
- jz L(return_value)
- jmp L(match_exit)
-
- .p2align 4
-L(FindZeroExit15):
- and $1 << 15 - 1, %rax
- jz L(return_value)
-
- .p2align 4
-L(match_exit):
- test %ah, %ah
- jnz L(match_exit_high)
- mov %al, %dl
- and $15 << 4, %dl
- jnz L(match_exit_8)
- test $0x08, %al
- jnz L(Exit4)
- test $0x04, %al
- jnz L(Exit3)
- test $0x02, %al
- jnz L(Exit2)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(match_exit_8):
- test $0x80, %al
- jnz L(Exit8)
- test $0x40, %al
- jnz L(Exit7)
- test $0x20, %al
- jnz L(Exit6)
- lea -12(%rdi), %rax
- ret
-
- .p2align 4
-L(match_exit_high):
- mov %ah, %dh
- and $15 << 4, %dh
- jnz L(match_exit_high_8)
- test $0x08, %ah
- jnz L(Exit12)
- test $0x04, %ah
- jnz L(Exit11)
- test $0x02, %ah
- jnz L(Exit10)
- lea -8(%rdi), %rax
- ret
-
- .p2align 4
-L(match_exit_high_8):
- test $0x80, %ah
- jnz L(Exit16)
- test $0x40, %ah
- jnz L(Exit15)
- test $0x20, %ah
- jnz L(Exit14)
- lea -4(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit2):
- lea -15(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit3):
- lea -14(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit4):
- lea -13(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit6):
- lea -11(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit7):
- lea -10(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit8):
- lea -9(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit10):
- lea -7(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit11):
- lea -6(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit12):
- lea -5(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit14):
- lea -3(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit15):
- lea -2(%rdi), %rax
- ret
-
- .p2align 4
-L(Exit16):
- lea -1(%rdi), %rax
- ret
-
-/* Return NULL. */
- .p2align 4
-L(return_null):
- xor %rax, %rax
- ret
-
- .p2align 4
-L(prolog_find_zero):
- add %rcx, %rdi
- mov %rdx, %rcx
-L(prolog_find_zero_1):
- test %cl, %cl
- jz L(prolog_find_zero_high)
- mov %cl, %dl
- and $15, %dl
- jz L(prolog_find_zero_8)
- test $0x01, %cl
- jnz L(PrologFindZeroExit1)
- test $0x02, %cl
- jnz L(PrologFindZeroExit2)
- test $0x04, %cl
- jnz L(PrologFindZeroExit3)
- and $1 << 4 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_8):
- test $0x10, %cl
- jnz L(PrologFindZeroExit5)
- test $0x20, %cl
- jnz L(PrologFindZeroExit6)
- test $0x40, %cl
- jnz L(PrologFindZeroExit7)
- and $1 << 8 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_high):
- mov %ch, %dh
- and $15, %dh
- jz L(prolog_find_zero_high_8)
- test $0x01, %ch
- jnz L(PrologFindZeroExit9)
- test $0x02, %ch
- jnz L(PrologFindZeroExit10)
- test $0x04, %ch
- jnz L(PrologFindZeroExit11)
- and $1 << 12 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_high_8):
- test $0x10, %ch
- jnz L(PrologFindZeroExit13)
- test $0x20, %ch
- jnz L(PrologFindZeroExit14)
- test $0x40, %ch
- jnz L(PrologFindZeroExit15)
- and $1 << 16 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit1):
- and $1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit2):
- and $1 << 2 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit3):
- and $1 << 3 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit5):
- and $1 << 5 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit6):
- and $1 << 6 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit7):
- and $1 << 7 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit9):
- and $1 << 9 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit10):
- and $1 << 10 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit11):
- and $1 << 11 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit13):
- and $1 << 13 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit14):
- and $1 << 14 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
- .p2align 4
-L(PrologFindZeroExit15):
- and $1 << 15 - 1, %rax
- jnz L(match_exit)
- xor %rax, %rax
- ret
-
-END (__strrchr_sse2_no_bsf)
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strrchr.S b/libc/sysdeps/x86_64/multiarch/strrchr.S
deleted file mode 100644
index 3f92a41ef..000000000
--- a/libc/sysdeps/x86_64/multiarch/strrchr.S
+++ /dev/null
@@ -1,288 +0,0 @@
-/* Multiple versions of strrchr
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2009-2013 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc and for
- the DSO. In static binaries we need strrchr before the initialization
- happened. */
-#if defined SHARED && !defined NOT_IN_libc
- .text
-ENTRY(strrchr)
- .type strrchr, @gnu_indirect_function
- cmpl $0, __cpu_features+KIND_OFFSET(%rip)
- jne 1f
- call __init_cpu_features
-1: leaq __strrchr_sse2(%rip), %rax
- testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
- jnz 2f
- testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
- jz 2f
- leaq __strrchr_sse42(%rip), %rax
- ret
-2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
- jz 3f
- leaq __strrchr_sse2_no_bsf(%rip), %rax
-3: ret
-END(strrchr)
-
-/*
- This implementation uses SSE4 instructions to compare up to 16 bytes
- at a time looking for the last occurrence of the character c in the
- string s:
-
- char *strrchr (const char *s, int c);
-
- We use 0x4a:
- _SIDD_SBYTE_OPS
- | _SIDD_CMP_EQUAL_EACH
- | _SIDD_MOST_SIGNIFICANT
- on pcmpistri to compare xmm/mem128
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- X X X X X X X X X X X X X X X X
-
- against xmm
-
- 0 1 2 3 4 5 6 7 8 9 A B C D E F
- C C C C C C C C C C C C C C C C
-
- to find out if the first 16byte data element has a byte C and the
- last offset. There are 4 cases:
-
- 1. The first 16byte data element has EOS and has the byte C at the
- last offset X.
- 2. The first 16byte data element is valid and has the byte C at the
- last offset X.
- 3. The first 16byte data element has EOS and doesn't have the byte C.
- 4. The first 16byte data element is valid and doesn't have the byte C.
-
- Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
-
- case ECX CFlag ZFlag SFlag
- 1 X 1 1 0
- 2 X 1 0 0
- 3 16 0 1 0
- 4 16 0 0 0
-
- We exit from the loop for cases 1 and 3 with jz which branches
- when ZFlag is 1. If CFlag == 1, ECX has the offset X for case 1. */
-
-
- .section .text.sse4.2,"ax",@progbits
- .align 16
- .type __strrchr_sse42, @function
- .globl __strrchr_sse42
- .hidden __strrchr_sse42
-__strrchr_sse42:
- cfi_startproc
- CALL_MCOUNT
- testb %sil, %sil
- je __strend_sse4
- xor %eax,%eax /* RAX has the last occurrence of s. */
- movd %esi, %xmm1
- punpcklbw %xmm1, %xmm1
- movl %edi, %esi
- punpcklbw %xmm1, %xmm1
- andl $15, %esi
- pshufd $0, %xmm1, %xmm1
- movq %rdi, %r8
- je L(loop)
-
-/* Handle unaligned string using psrldq. */
- leaq L(psrldq_table)(%rip), %rdx
- andq $-16, %r8
- movslq (%rdx,%rsi,4),%r9
- movdqa (%r8), %xmm0
- addq %rdx, %r9
- jmp *%r9
-
-/* Handle unaligned string with offset 1 using psrldq. */
- .p2align 4
-L(psrldq_1):
- psrldq $1, %xmm0
-
- .p2align 4
-L(unaligned_pcmpistri):
- pcmpistri $0x4a, %xmm1, %xmm0
- jnc L(unaligned_no_byte)
- leaq (%rdi,%rcx), %rax
-L(unaligned_no_byte):
- /* Find the length of the unaligned string. */
- pcmpistri $0x3a, %xmm0, %xmm0
- movl $16, %edx
- subl %esi, %edx
- cmpl %ecx, %edx
- /* Return RAX if the unaligned fragment to next 16B already
- contain the NULL terminator. */
- jg L(exit)
- addq $16, %r8
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- pcmpistri $0x4a, (%r8), %xmm1
- jbe L(match_or_eos)
- addq $16, %r8
- jmp L(loop)
- .p2align 4
-L(match_or_eos):
- je L(had_eos)
-L(match_no_eos):
- leaq (%r8,%rcx), %rax
- addq $16, %r8
- jmp L(loop)
- .p2align 4
-L(had_eos):
- jnc L(exit)
- leaq (%r8,%rcx), %rax
- .p2align 4
-L(exit):
- ret
-
-/* Handle unaligned string with offset 15 using psrldq. */
- .p2align 4
-L(psrldq_15):
- psrldq $15, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 14 using psrldq. */
- .p2align 4
-L(psrldq_14):
- psrldq $14, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 13 using psrldq. */
- .p2align 4
-L(psrldq_13):
- psrldq $13, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 12 using psrldq. */
- .p2align 4
-L(psrldq_12):
- psrldq $12, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 11 using psrldq. */
- .p2align 4
-L(psrldq_11):
- psrldq $11, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 10 using psrldq. */
- .p2align 4
-L(psrldq_10):
- psrldq $10, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 9 using psrldq. */
- .p2align 4
-L(psrldq_9):
- psrldq $9, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 8 using psrldq. */
- .p2align 4
-L(psrldq_8):
- psrldq $8, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 7 using psrldq. */
- .p2align 4
-L(psrldq_7):
- psrldq $7, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 6 using psrldq. */
- .p2align 4
-L(psrldq_6):
- psrldq $6, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 5 using psrldq. */
- .p2align 4
-L(psrldq_5):
- psrldq $5, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 4 using psrldq. */
- .p2align 4
-L(psrldq_4):
- psrldq $4, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 3 using psrldq. */
- .p2align 4
-L(psrldq_3):
- psrldq $3, %xmm0
- jmp L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 2 using psrldq. */
- .p2align 4
-L(psrldq_2):
- psrldq $2, %xmm0
- jmp L(unaligned_pcmpistri)
-
- cfi_endproc
- .size __strrchr_sse42, .-__strrchr_sse42
-
- .section .rodata.sse4.2,"a",@progbits
- .p2align 4
-L(psrldq_table):
- .int L(loop) - L(psrldq_table)
- .int L(psrldq_1) - L(psrldq_table)
- .int L(psrldq_2) - L(psrldq_table)
- .int L(psrldq_3) - L(psrldq_table)
- .int L(psrldq_4) - L(psrldq_table)
- .int L(psrldq_5) - L(psrldq_table)
- .int L(psrldq_6) - L(psrldq_table)
- .int L(psrldq_7) - L(psrldq_table)
- .int L(psrldq_8) - L(psrldq_table)
- .int L(psrldq_9) - L(psrldq_table)
- .int L(psrldq_10) - L(psrldq_table)
- .int L(psrldq_11) - L(psrldq_table)
- .int L(psrldq_12) - L(psrldq_table)
- .int L(psrldq_13) - L(psrldq_table)
- .int L(psrldq_14) - L(psrldq_table)
- .int L(psrldq_15) - L(psrldq_table)
-
-
-# undef ENTRY
-# define ENTRY(name) \
- .type __strrchr_sse2, @function; \
- .align 16; \
- .globl __strrchr_sse2; \
- .hidden __strrchr_sse2; \
- __strrchr_sse2: cfi_startproc; \
- CALL_MCOUNT
-# undef END
-# define END(name) \
- cfi_endproc; .size __strrchr_sse2, .-__strrchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strrchr calls through a PLT.
- The speedup we get from using SSE4.2 instruction is likely eaten away
- by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
- .globl __GI_strrchr; __GI_strrchr = __strrchr_sse2
-#endif
-
-#include "../strrchr.S"
diff --git a/libc/sysdeps/x86_64/stackguard-macros.h b/libc/sysdeps/x86_64/stackguard-macros.h
index d7fedb373..1948800cd 100644
--- a/libc/sysdeps/x86_64/stackguard-macros.h
+++ b/libc/sysdeps/x86_64/stackguard-macros.h
@@ -4,3 +4,8 @@
({ uintptr_t x; \
asm ("mov %%fs:%c1, %0" : "=r" (x) \
: "i" (offsetof (tcbhead_t, stack_guard))); x; })
+
+#define POINTER_CHK_GUARD \
+ ({ uintptr_t x; \
+ asm ("mov %%fs:%c1, %0" : "=r" (x) \
+ : "i" (offsetof (tcbhead_t, pointer_guard))); x; })
diff --git a/libc/sysdeps/x86_64/strchr.S b/libc/sysdeps/x86_64/strchr.S
index d89f1eba8..7440500a6 100644
--- a/libc/sysdeps/x86_64/strchr.S
+++ b/libc/sysdeps/x86_64/strchr.S
@@ -19,51 +19,169 @@
#include <sysdep.h>
-
.text
ENTRY (strchr)
movd %esi, %xmm1
- movq %rdi, %rcx
- punpcklbw %xmm1, %xmm1
- andq $~15, %rdi
- pxor %xmm2, %xmm2
+ movl %edi, %eax
+ andl $4095, %eax
punpcklbw %xmm1, %xmm1
- orl $0xffffffff, %esi
- movdqa (%rdi), %xmm0
+ cmpl $4032, %eax
+ punpcklwd %xmm1, %xmm1
pshufd $0, %xmm1, %xmm1
- subq %rdi, %rcx
- movdqa %xmm0, %xmm3
- leaq 16(%rdi), %rdi
+ jg L(cross_page)
+ movdqu (%rdi), %xmm0
+ pxor %xmm3, %xmm3
+ movdqa %xmm0, %xmm4
pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- andl %esi, %edx
- andl %esi, %ecx
- orl %edx, %ecx
- jnz 1f
+ pcmpeqb %xmm3, %xmm4
+ por %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ je L(next_48_bytes)
+ bsf %eax, %eax
+#ifdef AS_STRCHRNUL
+ leaq (%rdi,%rax), %rax
+#else
+ movl $0, %edx
+ leaq (%rdi,%rax), %rax
+ cmpb %sil, (%rax)
+ cmovne %rdx, %rax
+#endif
+ ret
-2: movdqa (%rdi), %xmm0
- leaq 16(%rdi), %rdi
- movdqa %xmm0, %xmm3
+ .p2align 3
+ L(next_48_bytes):
+ movdqu 16(%rdi), %xmm0
+ movdqa %xmm0, %xmm4
pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- orl %edx, %ecx
- jz 2b
+ pcmpeqb %xmm3, %xmm4
+ por %xmm4, %xmm0
+ pmovmskb %xmm0, %ecx
+ movdqu 32(%rdi), %xmm0
+ movdqa %xmm0, %xmm4
+ pcmpeqb %xmm1, %xmm0
+ salq $16, %rcx
+ pcmpeqb %xmm3, %xmm4
+ por %xmm4, %xmm0
+ pmovmskb %xmm0, %eax
+ movdqu 48(%rdi), %xmm0
+ pcmpeqb %xmm0, %xmm3
+ salq $32, %rax
+ pcmpeqb %xmm1, %xmm0
+ orq %rcx, %rax
+ por %xmm3, %xmm0
+ pmovmskb %xmm0, %ecx
+ salq $48, %rcx
+ orq %rcx, %rax
+ testq %rax, %rax
+ jne L(return)
+L(loop_start):
+ /* We use this alignment to force loop be aligned to 8 but not
+ 16 bytes. This gives better sheduling on AMD processors. */
+ .p2align 4
+ pxor %xmm6, %xmm6
+ andq $-64, %rdi
+ .p2align 3
+L(loop64):
+ addq $64, %rdi
+ movdqa (%rdi), %xmm5
+ movdqa 16(%rdi), %xmm2
+ movdqa 32(%rdi), %xmm3
+ pxor %xmm1, %xmm5
+ movdqa 48(%rdi), %xmm4
+ pxor %xmm1, %xmm2
+ pxor %xmm1, %xmm3
+ pminub (%rdi), %xmm5
+ pxor %xmm1, %xmm4
+ pminub 16(%rdi), %xmm2
+ pminub 32(%rdi), %xmm3
+ pminub %xmm2, %xmm5
+ pminub 48(%rdi), %xmm4
+ pminub %xmm3, %xmm5
+ pminub %xmm4, %xmm5
+ pcmpeqb %xmm6, %xmm5
+ pmovmskb %xmm5, %eax
+
+ testl %eax, %eax
+ je L(loop64)
+
+ movdqa (%rdi), %xmm5
+ movdqa %xmm5, %xmm0
+ pcmpeqb %xmm1, %xmm5
+ pcmpeqb %xmm6, %xmm0
+ por %xmm0, %xmm5
+ pcmpeqb %xmm6, %xmm2
+ pcmpeqb %xmm6, %xmm3
+ pcmpeqb %xmm6, %xmm4
+
+ pmovmskb %xmm5, %ecx
+ pmovmskb %xmm2, %eax
+ salq $16, %rax
+ pmovmskb %xmm3, %r8d
+ pmovmskb %xmm4, %edx
+ salq $32, %r8
+ orq %r8, %rax
+ orq %rcx, %rax
+ salq $48, %rdx
+ orq %rdx, %rax
+ .p2align 3
+L(return):
+ bsfq %rax, %rax
+#ifdef AS_STRCHRNUL
+ leaq (%rdi,%rax), %rax
+#else
+ movl $0, %edx
+ leaq (%rdi,%rax), %rax
+ cmpb %sil, (%rax)
+ cmovne %rdx, %rax
+#endif
+ ret
+ .p2align 4
+
+L(cross_page):
+ movq %rdi, %rdx
+ pxor %xmm2, %xmm2
+ andq $-64, %rdx
+ movdqa %xmm1, %xmm0
+ movdqa (%rdx), %xmm3
+ movdqa %xmm3, %xmm4
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm2, %xmm4
+ por %xmm4, %xmm3
+ pmovmskb %xmm3, %r8d
+ movdqa 16(%rdx), %xmm3
+ movdqa %xmm3, %xmm4
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm2, %xmm4
+ por %xmm4, %xmm3
+ pmovmskb %xmm3, %eax
+ movdqa 32(%rdx), %xmm3
+ movdqa %xmm3, %xmm4
+ pcmpeqb %xmm1, %xmm3
+ salq $16, %rax
+ pcmpeqb %xmm2, %xmm4
+ por %xmm4, %xmm3
+ pmovmskb %xmm3, %r9d
+ movdqa 48(%rdx), %xmm3
+ pcmpeqb %xmm3, %xmm2
+ salq $32, %r9
+ pcmpeqb %xmm3, %xmm0
+ orq %r9, %rax
+ orq %r8, %rax
+ por %xmm2, %xmm0
+ pmovmskb %xmm0, %ecx
+ salq $48, %rcx
+ orq %rcx, %rax
+ movl %edi, %ecx
+ subb %dl, %cl
+ shrq %cl, %rax
+ testq %rax, %rax
+ jne L(return)
+ jmp L(loop_start)
-1: bsfl %edx, %edx
- jz 4f
- bsfl %ecx, %ecx
- leaq -16(%rdi,%rdx), %rax
- cmpl %edx, %ecx
- je 5f
-4: xorl %eax, %eax
-5: ret
END (strchr)
+#ifndef AS_STRCHRNUL
weak_alias (strchr, index)
libc_hidden_builtin_def (strchr)
-
+#endif
diff --git a/libc/sysdeps/x86_64/strchrnul.S b/libc/sysdeps/x86_64/strchrnul.S
index d8c345ba7..bceeb6187 100644
--- a/libc/sysdeps/x86_64/strchrnul.S
+++ b/libc/sysdeps/x86_64/strchrnul.S
@@ -20,43 +20,8 @@
#include <sysdep.h>
-
- .text
-ENTRY (__strchrnul)
- movd %esi, %xmm1
- movq %rdi, %rcx
- punpcklbw %xmm1, %xmm1
- andq $~15, %rdi
- pxor %xmm2, %xmm2
- punpcklbw %xmm1, %xmm1
- orl $0xffffffff, %esi
- movdqa (%rdi), %xmm0
- pshufd $0, %xmm1, %xmm1
- subq %rdi, %rcx
- movdqa %xmm0, %xmm3
- leaq 16(%rdi), %rdi
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- orl %edx, %ecx
- andl %esi, %ecx
- jnz 1f
-
-2: movdqa (%rdi), %xmm0
- leaq 16(%rdi), %rdi
- movdqa %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- orl %edx, %ecx
- jz 2b
-
-1: bsfl %ecx, %edx
- leaq -16(%rdi,%rdx), %rax
- ret
-END (__strchrnul)
+#define strchr __strchrnul
+#define AS_STRCHRNUL
+#include "strchr.S"
weak_alias (__strchrnul, strchrnul)
diff --git a/libc/sysdeps/x86_64/strrchr.S b/libc/sysdeps/x86_64/strrchr.S
index e413b0743..2a07ff75a 100644
--- a/libc/sysdeps/x86_64/strrchr.S
+++ b/libc/sysdeps/x86_64/strrchr.S
@@ -1,6 +1,5 @@
/* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
- For AMD x86-64.
- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+ Copyright (C) 2013 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,63 +16,212 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
+#include <sysdep.h>
.text
ENTRY (strrchr)
movd %esi, %xmm1
- movq %rdi, %rcx
- punpcklbw %xmm1, %xmm1
- andq $~15, %rdi
- pxor %xmm2, %xmm2
- punpcklbw %xmm1, %xmm1
- orl $0xffffffff, %esi
- movdqa (%rdi), %xmm0
+ movq %rdi, %rax
+ andl $4095, %eax
+ punpcklbw %xmm1, %xmm1
+ cmpq $4032, %rax
+ punpcklwd %xmm1, %xmm1
pshufd $0, %xmm1, %xmm1
- subq %rdi, %rcx
+ ja L(cross_page)
+ movdqu (%rdi), %xmm0
+ pxor %xmm2, %xmm2
movdqa %xmm0, %xmm3
- leaq 16(%rdi), %rdi
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm3
- shl %cl, %esi
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- andl %esi, %edx
- andl %esi, %ecx
- xorl %eax, %eax
- movl %edx, %esi
- orl %ecx, %esi
- jnz 1f
+ pmovmskb %xmm0, %ecx
+ pmovmskb %xmm3, %edx
+ testq %rdx, %rdx
+ je L(next_48_bytes)
+ leaq -1(%rdx), %rax
+ xorq %rdx, %rax
+ andq %rcx, %rax
+ je L(exit)
+ bsrq %rax, %rax
+ addq %rdi, %rax
+ ret
-2: movdqa (%rdi), %xmm0
- leaq 16(%rdi), %rdi
- movdqa %xmm0, %xmm3
+ .p2align 4
+L(next_48_bytes):
+ movdqu 16(%rdi), %xmm4
+ movdqa %xmm4, %xmm5
+ movdqu 32(%rdi), %xmm3
+ pcmpeqb %xmm1, %xmm4
+ pcmpeqb %xmm2, %xmm5
+ movdqu 48(%rdi), %xmm0
+ pmovmskb %xmm5, %edx
+ movdqa %xmm3, %xmm5
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm2, %xmm5
+ pcmpeqb %xmm0, %xmm2
+ salq $16, %rdx
+ pmovmskb %xmm3, %r8d
+ pmovmskb %xmm5, %eax
+ pmovmskb %xmm2, %esi
+ salq $32, %r8
+ salq $32, %rax
pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm0, %edx
- pmovmskb %xmm3, %ecx
- movl %edx, %esi
- orl %ecx, %esi
- jz 2b
+ orq %rdx, %rax
+ movq %rsi, %rdx
+ pmovmskb %xmm4, %esi
+ salq $48, %rdx
+ salq $16, %rsi
+ orq %r8, %rsi
+ orq %rcx, %rsi
+ pmovmskb %xmm0, %ecx
+ salq $48, %rcx
+ orq %rcx, %rsi
+ orq %rdx, %rax
+ je L(loop_header2)
+ leaq -1(%rax), %rcx
+ xorq %rax, %rcx
+ andq %rcx, %rsi
+ je L(exit)
+ bsrq %rsi, %rsi
+ leaq (%rdi,%rsi), %rax
+ ret
-1: bsfl %ecx, %r9d
- movl $0xffffffff, %r8d
- movl $31, %ecx
- jnz 5f
+ .p2align 4
+L(loop_header2):
+ testq %rsi, %rsi
+ movq %rdi, %rcx
+ je L(no_c_found)
+L(loop_header):
+ addq $64, %rdi
+ pxor %xmm7, %xmm7
+ andq $-64, %rdi
+ jmp L(loop_entry)
+
+ .p2align 4
+L(loop64):
+ testq %rdx, %rdx
+ cmovne %rdx, %rsi
+ cmovne %rdi, %rcx
+ addq $64, %rdi
+L(loop_entry):
+ movdqa 32(%rdi), %xmm3
+ pxor %xmm6, %xmm6
+ movdqa 48(%rdi), %xmm2
+ movdqa %xmm3, %xmm0
+ movdqa 16(%rdi), %xmm4
+ pminub %xmm2, %xmm0
+ movdqa (%rdi), %xmm5
+ pminub %xmm4, %xmm0
+ pminub %xmm5, %xmm0
+ pcmpeqb %xmm7, %xmm0
+ pmovmskb %xmm0, %eax
+ movdqa %xmm5, %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %r9d
+ movdqa %xmm4, %xmm0
+ pcmpeqb %xmm1, %xmm0
+ pmovmskb %xmm0, %edx
+ movdqa %xmm3, %xmm0
+ pcmpeqb %xmm1, %xmm0
+ salq $16, %rdx
+ pmovmskb %xmm0, %r10d
+ movdqa %xmm2, %xmm0
+ pcmpeqb %xmm1, %xmm0
+ salq $32, %r10
+ orq %r10, %rdx
+ pmovmskb %xmm0, %r8d
+ orq %r9, %rdx
+ salq $48, %r8
+ orq %r8, %rdx
+ testl %eax, %eax
+ je L(loop64)
+ pcmpeqb %xmm6, %xmm4
+ pcmpeqb %xmm6, %xmm3
+ pcmpeqb %xmm6, %xmm5
+ pmovmskb %xmm4, %eax
+ pmovmskb %xmm3, %r10d
+ pcmpeqb %xmm6, %xmm2
+ pmovmskb %xmm5, %r9d
+ salq $32, %r10
+ salq $16, %rax
+ pmovmskb %xmm2, %r8d
+ orq %r10, %rax
+ orq %r9, %rax
+ salq $48, %r8
+ orq %r8, %rax
+ leaq -1(%rax), %r8
+ xorq %rax, %r8
+ andq %r8, %rdx
+ cmovne %rdi, %rcx
+ cmovne %rdx, %rsi
+ bsrq %rsi, %rsi
+ leaq (%rcx,%rsi), %rax
+ ret
- bsrl %edx, %edx
- jz 2b
- leaq -16(%rdi,%rdx), %rax
- jmp 2b
+ .p2align 4
+L(no_c_found):
+ movl $1, %esi
+ xorl %ecx, %ecx
+ jmp L(loop_header)
+
+ .p2align 4
+L(exit):
+ xorl %eax, %eax
+ ret
-5: subl %r9d, %ecx
- shrl %cl, %r8d
- andl %r8d, %edx
- bsrl %edx, %edx
- jz 4f
- leaq -16(%rdi,%rdx), %rax
-4: ret
+ .p2align 4
+L(cross_page):
+ movq %rdi, %rax
+ pxor %xmm0, %xmm0
+ andq $-64, %rax
+ movdqu (%rax), %xmm5
+ movdqa %xmm5, %xmm6
+ movdqu 16(%rax), %xmm4
+ pcmpeqb %xmm1, %xmm5
+ pcmpeqb %xmm0, %xmm6
+ movdqu 32(%rax), %xmm3
+ pmovmskb %xmm6, %esi
+ movdqa %xmm4, %xmm6
+ movdqu 48(%rax), %xmm2
+ pcmpeqb %xmm1, %xmm4
+ pcmpeqb %xmm0, %xmm6
+ pmovmskb %xmm6, %edx
+ movdqa %xmm3, %xmm6
+ pcmpeqb %xmm1, %xmm3
+ pcmpeqb %xmm0, %xmm6
+ pcmpeqb %xmm2, %xmm0
+ salq $16, %rdx
+ pmovmskb %xmm3, %r9d
+ pmovmskb %xmm6, %r8d
+ pmovmskb %xmm0, %ecx
+ salq $32, %r9
+ salq $32, %r8
+ pcmpeqb %xmm1, %xmm2
+ orq %r8, %rdx
+ salq $48, %rcx
+ pmovmskb %xmm5, %r8d
+ orq %rsi, %rdx
+ pmovmskb %xmm4, %esi
+ orq %rcx, %rdx
+ pmovmskb %xmm2, %ecx
+ salq $16, %rsi
+ salq $48, %rcx
+ orq %r9, %rsi
+ orq %r8, %rsi
+ orq %rcx, %rsi
+ movl %edi, %ecx
+ subl %eax, %ecx
+ shrq %cl, %rdx
+ shrq %cl, %rsi
+ testq %rdx, %rdx
+ je L(loop_header2)
+ leaq -1(%rdx), %rax
+ xorq %rdx, %rax
+ andq %rax, %rsi
+ je L(exit)
+ bsrq %rsi, %rax
+ addq %rdi, %rax
+ ret
END (strrchr)
weak_alias (strrchr, rindex)