diff options
author | ro <ro@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-02 15:03:19 +0000 |
---|---|---|
committer | ro <ro@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-02 15:03:19 +0000 |
commit | 9213d2eb44a8b9bcc432b57e246d9b52d5bdc949 (patch) | |
tree | bfbde9a54f663fb7556b9dacd07709ef97c1961c /gcc | |
parent | 237490bf10db39b859bd28598ff64f1bd2c84421 (diff) | |
download | gcc-9213d2eb44a8b9bcc432b57e246d9b52d5bdc949.tar.gz |
Move libgcc1 to toplevel libgcc
gcc:
* Makefile.in (LIB1ASMSRC): Don't export.
(libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC.
* config/arm/arm.c: Update lib1funcs.asm filename.
* config/arm/linux-eabi.h: Likewise.
* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to
../libgcc/config/arm.
* config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S.
* config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/arm/t-arm-elf (LIB1ASMFUNCS): Remove.
* config/arm/t-bpabi: Likewise.
* config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove.
* config/arm/t-strongarm-elf: Likewise.
* config/arm/t-symbian: Likewise.
* config/arm/t-vxworks: Likewise.
* config/arm/t-wince-pe: Likewise.
* config/avr/libgcc.S: Move to ../libgcc/config/avr.
* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/bfin/lib1funcs.asm: Move to
../libgcc/config/bfin/lib1funcs.S.
* config/bfin/t-bfin: Remove.
* config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/bfin/t-bfin-linux: Likewise.
* config/bfin/t-bfin-uclinux: Likewise.
* config/c6x/lib1funcs.asm: Move to
../libgcc/config/c6x/lib1funcs.S.
* config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/fr30/lib1funcs.asm: Move to
../libgcc/config/fr30/lib1funcs.S.
* config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/frv/lib1funcs.asm: Move to
../libgcc/config/frv/lib1funcs.S.
* config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/h8300/fixunssfsi.c: Update lib1funcs.asm filename.
* config/h8300/lib1funcs.asm: Move to
../libgcc/config/h8300/lib1funcs.S.
* config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S.
* config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/i386/t-interix: Likewise.
* config/ia64/lib1funcs.asm: Move to
../libgcc/config/ia64/lib1funcs.S.
* config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove.
* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove.
* config/m32c/m32c.c: Update m32c-lib1.S filename.
* config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S.
* config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove.
* config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S.
* config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file.
* config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S.
* config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S.
* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/mips/mips16.S: Move to ../libgcc/config/mips.
* config/mips/t-libgcc-mips16: Remove.
* config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove.
* config/pa/milli64.S: Move to ../libgcc/config/pa.
* config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
* config/pa/t-linux64: Likewise.
* config/picochip/libgccExtras/fake_libgcc.asm: Move to
../libgcc/config/picochip/lib1funcs.S.
* config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
* config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S.
* config/sh/lib1funcs.h: Move to ../libgcc/config/sh.
* config/sh/sh.h: Update lib1funcs.asm filename.
* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove.
* config/sh/t-netbsd: Likewise.
* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE):
Remove.
* config/sh/t-sh64 (LIB1ASMFUNCS): Remove.
* config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S.
* config/sparc/lb1spl.asm: Remove.
* config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/sparc/t-leon: Likewise.
* config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove.
* config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S.
* config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove
* config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S.
* config/vax/t-linux: Remove.
* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to
../libgcc/config/xtensa.
* config/xtensa/lib1funcs.asm: Move to
../libgcc/config/xtensa/lib1funcs.S.
* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file.
(bfin*-*): Likewise.
(mips64*-*-linux*, mipsisa64*-*-linux*): Remove
mips/t-libgcc-mips16 from tmake_file.
(mips*-*-linux*): Likewise.
(mips*-sde-elf*): Likewise.
(mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*)
(mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*)
(mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise.
(mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise.
(mips-*-elf*, mipsel-*-elf*): Likewise.
(mips64-*-elf*, mips64el-*-elf*): Likewise.
(mips64orion-*-elf*, mips64orionel-*-elf*): Likewise.
(mips*-*-rtems*): Likewise.
(mipstx39-*-elf*, mipstx39el-*-elf*): Likewise.
(vax-*-linux*): Remove vax/t-linux from tmake_file.
libgcc:
* Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use
$(srcdir) to refer to $(LIB1ASMSRC).
Use $<.
* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
config/arm/ieee754-df.S, config/arm/ieee754-sf.S,
config/arm/lib1funcs.S: New files.
* config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S.
* config/arm/t-arm: New file.
* config/arm/t-bpabi (LIB1ASMFUNCS): Set.
* config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi,
config/arm/t-strongarm-elf: New files.
* config/arm/t-symbian (LIB1ASMFUNCS): Set.
* config/arm/t-vxworks, config/arm/t-wince-pe: New files.
* config/avr/lib1funcs.S: New file.
* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/bfin/lib1funcs.S, config/bfin/t-bfin: New files.
* config/c6x/lib1funcs.S: New file.
* config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/fr30/lib1funcs.S, config/fr30/t-fr30: New files.
* config/frv/lib1funcs.S: New file.
* config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/h8300/lib1funcs.S, config/h8300/t-h8300: New files.
* config/i386/cygwin.S, config/i386/t-chkstk: New files.
* config/ia64/__divxf3.asm: Rename to ...
* config/ia64/__divxf3.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/_fixtfdi.asm: Rename to ...
* config/ia64/_fixtfdi.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/_fixunstfdi.asm: Rename to ...
* config/ia64/_fixunstfdi.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/_floatditf.asm: Rename to ...
* config/ia64/_floatditf.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/lib1funcs.S: New file.
* config/ia64/t-hpux (LIB1ASMFUNCS): Set.
* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix.
* config/m32c/lib1funcs.S, config/m32c/t-m32c: New files.
* config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files.
* config/mcore/lib1funcs.S, config/mcore/t-mcore: New files.
* config/mep/lib1funcs.S: New file.
* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/mips/mips16.S: New file.
* config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/pa/milli64.S: New file.
* config/pa/t-linux, config/pa/t-linux64: New files.
* config/picochip/lib1funcs.S: New file.
* config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files.
* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set.
* config/sh/t-netbsd: New file.
* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set.
Use $(srcdir) to refer to lib1funcs.S, adapt filename.
* config/sh/t-sh64: New file.
* config/sparc/lb1spc.S: New file.
* config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm
filename.
* config/v850/lib1funcs.S, config/v850/t-v850: New files.
* config/vax/lib1funcs.S, config/vax/t-linux: New files.
* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S,
config/xtensa/lib1funcs.S: New files.
* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to
tmake_file.
(arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file.
(arm*-*-netbsdelf*): Add arm/t-arm to tmake_file.
(arm*-*-linux*): Likewise.
Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for
arm*-*-linux-*eabi, add arm/t-linux otherwise.
(arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file.
(arm*-*-ecos-elf): Likewise.
(arm*-*-eabi*, arm*-*-symbianelf*): Likewise.
(arm*-*-rtems*): Likewise.
(arm*-*-elf): Likewise.
(arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file.
(avr-*-rtems*): Add to tmake_file, add avr/t-avr.
(bfin*-elf*): Add bfin/t-bfin to tmake_file.
(bfin*-uclinux*): Likewise.
(bfin*-linux-uclibc*): Likewise.
(bfin*-rtems*): Likewise.
(bfin*-*): Likewise.
(fido-*-elf): Merge into m68k-*-elf*.
(fr30-*-elf)): Add fr30/t-fr30 to tmake_file.
(frv-*-*linux*): Add frv/t-frv to tmake_file.
(h8300-*-rtems*): Add h8300/t-h8300 to tmake_file.
(h8300-*-elf*): Likewise.
(hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file.
(hppa*-*-linux*): Add pa/t-linux to tmake_file.
(i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file.
(i[34567]86-*-mingw*): Likewise.
(x86_64-*-mingw*): Likewise.
(i[34567]86-*-interix3*): Likewise.
(ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file.
(ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file.
(m68k-*-elf*): Also handle fido-*-elf.
Add m68k/t-floatlib to tmake_file.
(m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file.
(m68k-*-linux*): Likewise.
(m68k-*-rtems*): Likewise.
(mcore-*-elf): Add mcore/t-mcore to tmake_file.
(sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for
sh64*-*-*.
(sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file.
Add sh/t-sh64 to tmake_file for sh64*-*-linux*.
(sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*)
(sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh,
sh/t-netbsd to tmake_file.
Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*.
(sh-*-rtems*): Add sh/t-sh to tmake_file.
(sh-wrs-vxworks): Likewise.
(sparc-*-linux*): Add sparc/t-softmul to tmake_file except for
*-leon[3-9]*.
(v850*-*-*): Add v850/t-v850 to tmake_file.
(vax-*-linux*): Add vax/t-linux to tmake_file.
(m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180773 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
80 files changed, 124 insertions, 29698 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5c3a91da561..071cce6c29c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,111 @@ 2011-11-02 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + * Makefile.in (LIB1ASMSRC): Don't export. + (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC. + * config/arm/arm.c: Update lib1funcs.asm filename. + * config/arm/linux-eabi.h: Likewise. + * config/arm/bpabi-v6m.S, config/arm/bpabi.S, + config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to + ../libgcc/config/arm. + * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S. + * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove. + * config/arm/t-bpabi: Likewise. + * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove. + * config/arm/t-strongarm-elf: Likewise. + * config/arm/t-symbian: Likewise. + * config/arm/t-vxworks: Likewise. + * config/arm/t-wince-pe: Likewise. + * config/avr/libgcc.S: Move to ../libgcc/config/avr. + * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/bfin/lib1funcs.asm: Move to + ../libgcc/config/bfin/lib1funcs.S. + * config/bfin/t-bfin: Remove. + * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/bfin/t-bfin-linux: Likewise. + * config/bfin/t-bfin-uclinux: Likewise. + * config/c6x/lib1funcs.asm: Move to + ../libgcc/config/c6x/lib1funcs.S. + * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/fr30/lib1funcs.asm: Move to + ../libgcc/config/fr30/lib1funcs.S. + * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/frv/lib1funcs.asm: Move to + ../libgcc/config/frv/lib1funcs.S. + * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename. + * config/h8300/lib1funcs.asm: Move to + ../libgcc/config/h8300/lib1funcs.S. + * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S. + * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/i386/t-interix: Likewise. + * config/ia64/lib1funcs.asm: Move to + ../libgcc/config/ia64/lib1funcs.S. + * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove. + * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove. + * config/m32c/m32c.c: Update m32c-lib1.S filename. + * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S. + * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove. + * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S. + * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file. + * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S. + * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S. + * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/mips/mips16.S: Move to ../libgcc/config/mips. + * config/mips/t-libgcc-mips16: Remove. + * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove. + * config/pa/milli64.S: Move to ../libgcc/config/pa. + * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove. + * config/pa/t-linux64: Likewise. + * config/picochip/libgccExtras/fake_libgcc.asm: Move to + ../libgcc/config/picochip/lib1funcs.S. + * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove. + * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S. + * config/sh/lib1funcs.h: Move to ../libgcc/config/sh. + * config/sh/sh.h: Update lib1funcs.asm filename. + * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove. + * config/sh/t-netbsd: Likewise. + * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): + Remove. + * config/sh/t-sh64 (LIB1ASMFUNCS): Remove. + * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S. + * config/sparc/lb1spl.asm: Remove. + * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config/sparc/t-leon: Likewise. + * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove. + * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S. + * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove + * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S. + * config/vax/t-linux: Remove. + * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to + ../libgcc/config/xtensa. + * config/xtensa/lib1funcs.asm: Move to + ../libgcc/config/xtensa/lib1funcs.S. + * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove. + * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file. + (bfin*-*): Likewise. + (mips64*-*-linux*, mipsisa64*-*-linux*): Remove + mips/t-libgcc-mips16 from tmake_file. + (mips*-*-linux*): Likewise. + (mips*-sde-elf*): Likewise. + (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*) + (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*) + (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise. + (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise. + (mips-*-elf*, mipsel-*-elf*): Likewise. + (mips64-*-elf*, mips64el-*-elf*): Likewise. + (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise. + (mips*-*-rtems*): Likewise. + (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise. + (vax-*-linux*): Remove vax/t-linux from tmake_file. + +2011-11-02 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + * config.gcc (extra_parts): Remove. (*-*-freebsd*): Remove extra_parts. (*-*-linux*, frv-*-*linux*, *-*-kfreebsd*-gnu, *-*-knetbsd*-gnu, diff --git a/gcc/Makefile.in b/gcc/Makefile.in index b6951dc1486..38449d7c30e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1110,7 +1110,6 @@ export DESTDIR export GCC_FOR_TARGET export INCLUDES export INSTALL_DATA -export LIB1ASMSRC export LIBGCC2_CFLAGS export LIPO_FOR_TARGET export MACHMODE_H @@ -1878,8 +1877,6 @@ libgcc-support: libgcc.mvars stmp-int-hdrs $(TCONFIG_H) \ libgcc.mvars: config.status Makefile $(LIB2ADD) $(LIB2ADD_ST) specs \ xgcc$(exeext) : > tmp-libgcc.mvars - echo LIB1ASMFUNCS = '$(LIB1ASMFUNCS)' >> tmp-libgcc.mvars - echo LIB1ASMSRC = '$(LIB1ASMSRC)' >> tmp-libgcc.mvars echo LIB2FUNCS_ST = '$(LIB2FUNCS_ST)' >> tmp-libgcc.mvars echo LIB2FUNCS_EXCLUDE = '$(LIB2FUNCS_EXCLUDE)' >> tmp-libgcc.mvars echo LIB2ADD = '$(call srcdirify,$(LIB2ADD))' >> tmp-libgcc.mvars diff --git a/gcc/config.gcc b/gcc/config.gcc index 6bbec7db39e..79230a6935a 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -950,11 +950,10 @@ bfin*-linux-uclibc*) ;; bfin*-rtems*) tm_file="${tm_file} dbxelf.h elfos.h bfin/elf.h bfin/rtems.h rtems.h newlib-stdint.h" - tmake_file="bfin/t-bfin t-rtems bfin/t-rtems" + tmake_file="t-rtems bfin/t-rtems" ;; bfin*-*) tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h bfin/elf.h" - tmake_file=bfin/t-bfin use_collect2=no use_gcc_stdint=wrap ;; @@ -1737,7 +1736,7 @@ mips*-*-netbsd*) # NetBSD/mips, either endian. ;; mips64*-*-linux* | mipsisa64*-*-linux*) tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/gnu-user64.h mips/linux64.h" - tmake_file="${tmake_file} mips/t-linux64 mips/t-libgcc-mips16" + tmake_file="${tmake_file} mips/t-linux64" tm_defines="${tm_defines} MIPS_ABI_DEFAULT=ABI_N32" case ${target} in mips64el-st-linux-gnu) @@ -1758,7 +1757,6 @@ mips64*-*-linux* | mipsisa64*-*-linux*) ;; mips*-*-linux*) # Linux MIPS, either endian. tm_file="dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/linux.h" - tmake_file="${tmake_file} mips/t-libgcc-mips16" if test x$enable_targets = xall; then tm_file="${tm_file} mips/gnu-user64.h mips/linux64.h" tmake_file="${tmake_file} mips/t-linux64" @@ -1785,7 +1783,7 @@ mips*-*-openbsd*) ;; mips*-sde-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/sde.h" - tmake_file="mips/t-sde mips/t-libgcc-mips16" + tmake_file="mips/t-sde" extra_options="${extra_options} mips/sde.opt" case "${with_newlib}" in yes) @@ -1822,7 +1820,7 @@ mipsisa32r2-*-elf* | mipsisa32r2el-*-elf* | \ mipsisa64-*-elf* | mipsisa64el-*-elf* | \ mipsisa64r2-*-elf* | mipsisa64r2el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-isa3264 mips/t-libgcc-mips16" + tmake_file="mips/t-isa3264" case ${target} in mipsisa32r2*) tm_defines="${tm_defines} MIPS_ISA_DEFAULT=33" @@ -1859,17 +1857,17 @@ mipsisa64sr71k-*-elf*) ;; mipsisa64sb1-*-elf* | mipsisa64sb1el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16 mips/t-sb1" + tmake_file="mips/t-elf mips/t-sb1" target_cpu_default="MASK_64BIT|MASK_FLOAT64" tm_defines="${tm_defines} MIPS_ISA_DEFAULT=64 MIPS_CPU_STRING_DEFAULT=\\\"sb1\\\" MIPS_ABI_DEFAULT=ABI_O64" ;; mips-*-elf* | mipsel-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16" + tmake_file="mips/t-elf" ;; mips64-*-elf* | mips64el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16" + tmake_file="mips/t-elf" target_cpu_default="MASK_64BIT|MASK_FLOAT64" tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64" ;; @@ -1880,13 +1878,13 @@ mips64vr-*-elf* | mips64vrel-*-elf*) ;; mips64orion-*-elf* | mips64orionel-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elforion.h mips/elf.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16" + tmake_file="mips/t-elf" target_cpu_default="MASK_64BIT|MASK_FLOAT64" tm_defines="${tm_defines} MIPS_ISA_DEFAULT=3 MIPS_ABI_DEFAULT=ABI_O64" ;; mips*-*-rtems*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/elf.h mips/rtems.h rtems.h" - tmake_file="mips/t-elf mips/t-libgcc-mips16 t-rtems mips/t-rtems" + tmake_file="mips/t-elf t-rtems mips/t-rtems" ;; mips-wrs-vxworks) tm_file="elfos.h ${tm_file} mips/elf.h vx-common.h vxworks.h mips/vxworks.h" @@ -1894,7 +1892,7 @@ mips-wrs-vxworks) ;; mipstx39-*-elf* | mipstx39el-*-elf*) tm_file="elfos.h newlib-stdint.h ${tm_file} mips/r3900.h mips/elf.h" - tmake_file="mips/t-r3900 mips/t-libgcc-mips16" + tmake_file="mips/t-r3900" ;; mmix-knuth-mmixware) tm_file="${tm_file} newlib-stdint.h" @@ -2511,7 +2509,6 @@ v850*-*-*) vax-*-linux*) tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h vax/elf.h vax/linux.h" extra_options="${extra_options} vax/elf.opt" - tmake_file="${tmake_file} vax/t-linux" ;; vax-*-netbsdelf*) tm_file="${tm_file} elfos.h netbsd.h netbsd-elf.h vax/elf.h vax/netbsd-elf.h" diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index e07c8c328c6..5f0d5629462 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -23495,7 +23495,7 @@ arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal ARM insns and therefore guarantee that the shift count is modulo 256. - DImode shifts (those implemented by lib1funcs.asm or by optabs.c) + DImode shifts (those implemented by lib1funcs.S or by optabs.c) guarantee no particular behavior for out-of-range counts. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/arm/bpabi-v6m.S b/gcc/config/arm/bpabi-v6m.S deleted file mode 100644 index 4ecea6da5a6..00000000000 --- a/gcc/config/arm/bpabi-v6m.S +++ /dev/null @@ -1,318 +0,0 @@ -/* Miscellaneous BPABI functions. ARMv6M implementation - - Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc. - Contributed by CodeSourcery. - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef __ARM_EABI__ -/* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte - alignment from the caller. */ - /* .eabi_attribute 24, 0 -- default setting. */ - /* Tag_ABI_align_preserved: This code preserves 8-byte - alignment in any callee. */ - .eabi_attribute 25, 1 -#endif /* __ARM_EABI__ */ - -#ifdef L_aeabi_lcmp - -FUNC_START aeabi_lcmp - cmp xxh, yyh - beq 1f - bgt 2f - mov r0, #1 - neg r0, r0 - RET -2: - mov r0, #1 - RET -1: - sub r0, xxl, yyl - beq 1f - bhi 2f - mov r0, #1 - neg r0, r0 - RET -2: - mov r0, #1 -1: - RET - FUNC_END aeabi_lcmp - -#endif /* L_aeabi_lcmp */ - -#ifdef L_aeabi_ulcmp - -FUNC_START aeabi_ulcmp - cmp xxh, yyh - bne 1f - sub r0, xxl, yyl - beq 2f -1: - bcs 1f - mov r0, #1 - neg r0, r0 - RET -1: - mov r0, #1 -2: - RET - FUNC_END aeabi_ulcmp - -#endif /* L_aeabi_ulcmp */ - -.macro test_div_by_zero signed - cmp yyh, #0 - bne 7f - cmp yyl, #0 - bne 7f - cmp xxh, #0 - bne 2f - cmp xxl, #0 -2: - .ifc \signed, unsigned - beq 3f - mov xxh, #0 - mvn xxh, xxh @ 0xffffffff - mov xxl, xxh -3: - .else - beq 5f - blt 6f - mov xxl, #0 - mvn xxl, xxl @ 0xffffffff - lsr xxh, xxl, #1 @ 0x7fffffff - b 5f -6: mov xxh, #0x80 - lsl xxh, xxh, #24 @ 0x80000000 - mov xxl, #0 -5: - .endif - @ tailcalls are tricky on v6-m. - push {r0, r1, r2} - ldr r0, 1f - adr r1, 1f - add r0, r1 - str r0, [sp, #8] - @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -1: - .word __aeabi_ldiv0 - 1b -7: -.endm - -#ifdef L_aeabi_ldivmod - -FUNC_START aeabi_ldivmod - test_div_by_zero signed - - push {r0, r1} - mov r0, sp - push {r0, lr} - ldr r0, [sp, #8] - bl SYM(__gnu_ldivmod_helper) - ldr r3, [sp, #4] - mov lr, r3 - add sp, sp, #8 - pop {r2, r3} - RET - FUNC_END aeabi_ldivmod - -#endif /* L_aeabi_ldivmod */ - -#ifdef L_aeabi_uldivmod - -FUNC_START aeabi_uldivmod - test_div_by_zero unsigned - - push {r0, r1} - mov r0, sp - push {r0, lr} - ldr r0, [sp, #8] - bl SYM(__gnu_uldivmod_helper) - ldr r3, [sp, #4] - mov lr, r3 - add sp, sp, #8 - pop {r2, r3} - RET - FUNC_END aeabi_uldivmod - -#endif /* L_aeabi_uldivmod */ - -#ifdef L_arm_addsubsf3 - -FUNC_START aeabi_frsub - - push {r4, lr} - mov r4, #1 - lsl r4, #31 - eor r0, r0, r4 - bl __aeabi_fadd - pop {r4, pc} - - FUNC_END aeabi_frsub - -#endif /* L_arm_addsubsf3 */ - -#ifdef L_arm_cmpsf2 - -FUNC_START aeabi_cfrcmple - - mov ip, r0 - mov r0, r1 - mov r1, ip - b 6f - -FUNC_START aeabi_cfcmpeq -FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: push {r0, r1, r2, r3, r4, lr} - bl __lesf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - bmi 1f - mov r1, #0 - cmn r0, r1 -1: - pop {r0, r1, r2, r3, r4, pc} - - FUNC_END aeabi_cfcmple - FUNC_END aeabi_cfcmpeq - FUNC_END aeabi_cfrcmple - -FUNC_START aeabi_fcmpeq - - push {r4, lr} - bl __eqsf2 - neg r0, r0 - add r0, r0, #1 - pop {r4, pc} - - FUNC_END aeabi_fcmpeq - -.macro COMPARISON cond, helper, mode=sf2 -FUNC_START aeabi_fcmp\cond - - push {r4, lr} - bl __\helper\mode - cmp r0, #0 - b\cond 1f - mov r0, #0 - pop {r4, pc} -1: - mov r0, #1 - pop {r4, pc} - - FUNC_END aeabi_fcmp\cond -.endm - -COMPARISON lt, le -COMPARISON le, le -COMPARISON gt, ge -COMPARISON ge, ge - -#endif /* L_arm_cmpsf2 */ - -#ifdef L_arm_addsubdf3 - -FUNC_START aeabi_drsub - - push {r4, lr} - mov r4, #1 - lsl r4, #31 - eor xxh, xxh, r4 - bl __aeabi_dadd - pop {r4, pc} - - FUNC_END aeabi_drsub - -#endif /* L_arm_addsubdf3 */ - -#ifdef L_arm_cmpdf2 - -FUNC_START aeabi_cdrcmple - - mov ip, r0 - mov r0, r2 - mov r2, ip - mov ip, r1 - mov r1, r3 - mov r3, ip - b 6f - -FUNC_START aeabi_cdcmpeq -FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: push {r0, r1, r2, r3, r4, lr} - bl __ledf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - bmi 1f - mov r1, #0 - cmn r0, r1 -1: - pop {r0, r1, r2, r3, r4, pc} - - FUNC_END aeabi_cdcmple - FUNC_END aeabi_cdcmpeq - FUNC_END aeabi_cdrcmple - -FUNC_START aeabi_dcmpeq - - push {r4, lr} - bl __eqdf2 - neg r0, r0 - add r0, r0, #1 - pop {r4, pc} - - FUNC_END aeabi_dcmpeq - -.macro COMPARISON cond, helper, mode=df2 -FUNC_START aeabi_dcmp\cond - - push {r4, lr} - bl __\helper\mode - cmp r0, #0 - b\cond 1f - mov r0, #0 - pop {r4, pc} -1: - mov r0, #1 - pop {r4, pc} - - FUNC_END aeabi_dcmp\cond -.endm - -COMPARISON lt, le -COMPARISON le, le -COMPARISON gt, ge -COMPARISON ge, ge - -#endif /* L_arm_cmpdf2 */ diff --git a/gcc/config/arm/bpabi.S b/gcc/config/arm/bpabi.S deleted file mode 100644 index 2ff338927fa..00000000000 --- a/gcc/config/arm/bpabi.S +++ /dev/null @@ -1,163 +0,0 @@ -/* Miscellaneous BPABI functions. - - Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010 - Free Software Foundation, Inc. - Contributed by CodeSourcery, LLC. - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef __ARM_EABI__ -/* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte - alignment from the caller. */ - /* .eabi_attribute 24, 0 -- default setting. */ - /* Tag_ABI_align_preserved: This code preserves 8-byte - alignment in any callee. */ - .eabi_attribute 25, 1 -#endif /* __ARM_EABI__ */ - -#ifdef L_aeabi_lcmp - -ARM_FUNC_START aeabi_lcmp - cmp xxh, yyh - do_it lt - movlt r0, #-1 - do_it gt - movgt r0, #1 - do_it ne - RETc(ne) - subs r0, xxl, yyl - do_it lo - movlo r0, #-1 - do_it hi - movhi r0, #1 - RET - FUNC_END aeabi_lcmp - -#endif /* L_aeabi_lcmp */ - -#ifdef L_aeabi_ulcmp - -ARM_FUNC_START aeabi_ulcmp - cmp xxh, yyh - do_it lo - movlo r0, #-1 - do_it hi - movhi r0, #1 - do_it ne - RETc(ne) - cmp xxl, yyl - do_it lo - movlo r0, #-1 - do_it hi - movhi r0, #1 - do_it eq - moveq r0, #0 - RET - FUNC_END aeabi_ulcmp - -#endif /* L_aeabi_ulcmp */ - -.macro test_div_by_zero signed -/* Tail-call to divide-by-zero handlers which may be overridden by the user, - so unwinding works properly. */ -#if defined(__thumb2__) - cbnz yyh, 1f - cbnz yyl, 1f - cmp xxh, #0 - do_it eq - cmpeq xxl, #0 - .ifc \signed, unsigned - beq 2f - mov xxh, #0xffffffff - mov xxl, xxh -2: - .else - do_it lt, t - movlt xxl, #0 - movlt xxh, #0x80000000 - do_it gt, t - movgt xxh, #0x7fffffff - movgt xxl, #0xffffffff - .endif - b SYM (__aeabi_ldiv0) __PLT__ -1: -#else - /* Note: Thumb-1 code calls via an ARM shim on processors which - support ARM mode. */ - cmp yyh, #0 - cmpeq yyl, #0 - bne 2f - cmp xxh, #0 - cmpeq xxl, #0 - .ifc \signed, unsigned - movne xxh, #0xffffffff - movne xxl, #0xffffffff - .else - movlt xxh, #0x80000000 - movlt xxl, #0 - movgt xxh, #0x7fffffff - movgt xxl, #0xffffffff - .endif - b SYM (__aeabi_ldiv0) __PLT__ -2: -#endif -.endm - -#ifdef L_aeabi_ldivmod - -ARM_FUNC_START aeabi_ldivmod - test_div_by_zero signed - - sub sp, sp, #8 -#if defined(__thumb2__) - mov ip, sp - push {ip, lr} -#else - do_push {sp, lr} -#endif - bl SYM(__gnu_ldivmod_helper) __PLT__ - ldr lr, [sp, #4] - add sp, sp, #8 - do_pop {r2, r3} - RET - -#endif /* L_aeabi_ldivmod */ - -#ifdef L_aeabi_uldivmod - -ARM_FUNC_START aeabi_uldivmod - test_div_by_zero unsigned - - sub sp, sp, #8 -#if defined(__thumb2__) - mov ip, sp - push {ip, lr} -#else - do_push {sp, lr} -#endif - bl SYM(__gnu_uldivmod_helper) __PLT__ - ldr lr, [sp, #4] - add sp, sp, #8 - do_pop {r2, r3} - RET - -#endif /* L_aeabi_divmod */ - diff --git a/gcc/config/arm/ieee754-df.S b/gcc/config/arm/ieee754-df.S deleted file mode 100644 index eb0c38632d0..00000000000 --- a/gcc/config/arm/ieee754-df.S +++ /dev/null @@ -1,1447 +0,0 @@ -/* ieee754-df.S double-precision floating point support for ARM - - Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. - Contributed by Nicolas Pitre (nico@cam.org) - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -/* - * Notes: - * - * The goal of this code is to be as fast as possible. This is - * not meant to be easy to understand for the casual reader. - * For slightly simpler code please see the single precision version - * of this file. - * - * Only the default rounding mode is intended for best performances. - * Exceptions aren't supported yet, but that can be added quite easily - * if necessary without impacting performances. - */ - - -@ For FPA, float words are always big-endian. -@ For VFP, floats words follow the memory system mode. -#if defined(__VFP_FP__) && !defined(__ARMEB__) -#define xl r0 -#define xh r1 -#define yl r2 -#define yh r3 -#else -#define xh r0 -#define xl r1 -#define yh r2 -#define yl r3 -#endif - - -#ifdef L_arm_negdf2 - -ARM_FUNC_START negdf2 -ARM_FUNC_ALIAS aeabi_dneg negdf2 - - @ flip sign bit - eor xh, xh, #0x80000000 - RET - - FUNC_END aeabi_dneg - FUNC_END negdf2 - -#endif - -#ifdef L_arm_addsubdf3 - -ARM_FUNC_START aeabi_drsub - - eor xh, xh, #0x80000000 @ flip sign bit of first arg - b 1f - -ARM_FUNC_START subdf3 -ARM_FUNC_ALIAS aeabi_dsub subdf3 - - eor yh, yh, #0x80000000 @ flip sign bit of second arg -#if defined(__INTERWORKING_STUBS__) - b 1f @ Skip Thumb-code prologue -#endif - -ARM_FUNC_START adddf3 -ARM_FUNC_ALIAS aeabi_dadd adddf3 - -1: do_push {r4, r5, lr} - - @ Look for zeroes, equal values, INF, or NAN. - shift1 lsl, r4, xh, #1 - shift1 lsl, r5, yh, #1 - teq r4, r5 - do_it eq - teqeq xl, yl - do_it ne, ttt - COND(orr,s,ne) ip, r4, xl - COND(orr,s,ne) ip, r5, yl - COND(mvn,s,ne) ip, r4, asr #21 - COND(mvn,s,ne) ip, r5, asr #21 - beq LSYM(Lad_s) - - @ Compute exponent difference. Make largest exponent in r4, - @ corresponding arg in xh-xl, and positive exponent difference in r5. - shift1 lsr, r4, r4, #21 - rsbs r5, r4, r5, lsr #21 - do_it lt - rsblt r5, r5, #0 - ble 1f - add r4, r4, r5 - eor yl, xl, yl - eor yh, xh, yh - eor xl, yl, xl - eor xh, yh, xh - eor yl, xl, yl - eor yh, xh, yh -1: - @ If exponent difference is too large, return largest argument - @ already in xh-xl. We need up to 54 bit to handle proper rounding - @ of 0x1p54 - 1.1. - cmp r5, #54 - do_it hi - RETLDM "r4, r5" hi - - @ Convert mantissa to signed integer. - tst xh, #0x80000000 - mov xh, xh, lsl #12 - mov ip, #0x00100000 - orr xh, ip, xh, lsr #12 - beq 1f -#if defined(__thumb2__) - negs xl, xl - sbc xh, xh, xh, lsl #1 -#else - rsbs xl, xl, #0 - rsc xh, xh, #0 -#endif -1: - tst yh, #0x80000000 - mov yh, yh, lsl #12 - orr yh, ip, yh, lsr #12 - beq 1f -#if defined(__thumb2__) - negs yl, yl - sbc yh, yh, yh, lsl #1 -#else - rsbs yl, yl, #0 - rsc yh, yh, #0 -#endif -1: - @ If exponent == difference, one or both args were denormalized. - @ Since this is not common case, rescale them off line. - teq r4, r5 - beq LSYM(Lad_d) -LSYM(Lad_x): - - @ Compensate for the exponent overlapping the mantissa MSB added later - sub r4, r4, #1 - - @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. - rsbs lr, r5, #32 - blt 1f - shift1 lsl, ip, yl, lr - shiftop adds xl xl yl lsr r5 yl - adc xh, xh, #0 - shiftop adds xl xl yh lsl lr yl - shiftop adcs xh xh yh asr r5 yh - b 2f -1: sub r5, r5, #32 - add lr, lr, #32 - cmp yl, #1 - shift1 lsl,ip, yh, lr - do_it cs - orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later - shiftop adds xl xl yh asr r5 yh - adcs xh, xh, yh, asr #31 -2: - @ We now have a result in xh-xl-ip. - @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) - and r5, xh, #0x80000000 - bpl LSYM(Lad_p) -#if defined(__thumb2__) - mov lr, #0 - negs ip, ip - sbcs xl, lr, xl - sbc xh, lr, xh -#else - rsbs ip, ip, #0 - rscs xl, xl, #0 - rsc xh, xh, #0 -#endif - - @ Determine how to normalize the result. -LSYM(Lad_p): - cmp xh, #0x00100000 - bcc LSYM(Lad_a) - cmp xh, #0x00200000 - bcc LSYM(Lad_e) - - @ Result needs to be shifted right. - movs xh, xh, lsr #1 - movs xl, xl, rrx - mov ip, ip, rrx - add r4, r4, #1 - - @ Make sure we did not bust our exponent. - mov r2, r4, lsl #21 - cmn r2, #(2 << 21) - bcs LSYM(Lad_o) - - @ Our result is now properly aligned into xh-xl, remaining bits in ip. - @ Round with MSB of ip. If halfway between two numbers, round towards - @ LSB of xl = 0. - @ Pack final result together. -LSYM(Lad_e): - cmp ip, #0x80000000 - do_it eq - COND(mov,s,eq) ip, xl, lsr #1 - adcs xl, xl, #0 - adc xh, xh, r4, lsl #20 - orr xh, xh, r5 - RETLDM "r4, r5" - - @ Result must be shifted left and exponent adjusted. -LSYM(Lad_a): - movs ip, ip, lsl #1 - adcs xl, xl, xl - adc xh, xh, xh - tst xh, #0x00100000 - sub r4, r4, #1 - bne LSYM(Lad_e) - - @ No rounding necessary since ip will always be 0 at this point. -LSYM(Lad_l): - -#if __ARM_ARCH__ < 5 - - teq xh, #0 - movne r3, #20 - moveq r3, #52 - moveq xh, xl - moveq xl, #0 - mov r2, xh - cmp r2, #(1 << 16) - movhs r2, r2, lsr #16 - subhs r3, r3, #16 - cmp r2, #(1 << 8) - movhs r2, r2, lsr #8 - subhs r3, r3, #8 - cmp r2, #(1 << 4) - movhs r2, r2, lsr #4 - subhs r3, r3, #4 - cmp r2, #(1 << 2) - subhs r3, r3, #2 - sublo r3, r3, r2, lsr #1 - sub r3, r3, r2, lsr #3 - -#else - - teq xh, #0 - do_it eq, t - moveq xh, xl - moveq xl, #0 - clz r3, xh - do_it eq - addeq r3, r3, #32 - sub r3, r3, #11 - -#endif - - @ determine how to shift the value. - subs r2, r3, #32 - bge 2f - adds r2, r2, #12 - ble 1f - - @ shift value left 21 to 31 bits, or actually right 11 to 1 bits - @ since a register switch happened above. - add ip, r2, #20 - rsb r2, r2, #12 - shift1 lsl, xl, xh, ip - shift1 lsr, xh, xh, r2 - b 3f - - @ actually shift value left 1 to 20 bits, which might also represent - @ 32 to 52 bits if counting the register switch that happened earlier. -1: add r2, r2, #20 -2: do_it le - rsble ip, r2, #32 - shift1 lsl, xh, xh, r2 -#if defined(__thumb2__) - lsr ip, xl, ip - itt le - orrle xh, xh, ip - lslle xl, xl, r2 -#else - orrle xh, xh, xl, lsr ip - movle xl, xl, lsl r2 -#endif - - @ adjust exponent accordingly. -3: subs r4, r4, r3 - do_it ge, tt - addge xh, xh, r4, lsl #20 - orrge xh, xh, r5 - RETLDM "r4, r5" ge - - @ Exponent too small, denormalize result. - @ Find out proper shift value. - mvn r4, r4 - subs r4, r4, #31 - bge 2f - adds r4, r4, #12 - bgt 1f - - @ shift result right of 1 to 20 bits, sign is in r5. - add r4, r4, #20 - rsb r2, r4, #32 - shift1 lsr, xl, xl, r4 - shiftop orr xl xl xh lsl r2 yh - shiftop orr xh r5 xh lsr r4 yh - RETLDM "r4, r5" - - @ shift result right of 21 to 31 bits, or left 11 to 1 bits after - @ a register switch from xh to xl. -1: rsb r4, r4, #12 - rsb r2, r4, #32 - shift1 lsr, xl, xl, r2 - shiftop orr xl xl xh lsl r4 yh - mov xh, r5 - RETLDM "r4, r5" - - @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch - @ from xh to xl. -2: shift1 lsr, xl, xh, r4 - mov xh, r5 - RETLDM "r4, r5" - - @ Adjust exponents for denormalized arguments. - @ Note that r4 must not remain equal to 0. -LSYM(Lad_d): - teq r4, #0 - eor yh, yh, #0x00100000 - do_it eq, te - eoreq xh, xh, #0x00100000 - addeq r4, r4, #1 - subne r5, r5, #1 - b LSYM(Lad_x) - - -LSYM(Lad_s): - mvns ip, r4, asr #21 - do_it ne - COND(mvn,s,ne) ip, r5, asr #21 - beq LSYM(Lad_i) - - teq r4, r5 - do_it eq - teqeq xl, yl - beq 1f - - @ Result is x + 0.0 = x or 0.0 + y = y. - orrs ip, r4, xl - do_it eq, t - moveq xh, yh - moveq xl, yl - RETLDM "r4, r5" - -1: teq xh, yh - - @ Result is x - x = 0. - do_it ne, tt - movne xh, #0 - movne xl, #0 - RETLDM "r4, r5" ne - - @ Result is x + x = 2x. - movs ip, r4, lsr #21 - bne 2f - movs xl, xl, lsl #1 - adcs xh, xh, xh - do_it cs - orrcs xh, xh, #0x80000000 - RETLDM "r4, r5" -2: adds r4, r4, #(2 << 21) - do_it cc, t - addcc xh, xh, #(1 << 20) - RETLDM "r4, r5" cc - and r5, xh, #0x80000000 - - @ Overflow: return INF. -LSYM(Lad_o): - orr xh, r5, #0x7f000000 - orr xh, xh, #0x00f00000 - mov xl, #0 - RETLDM "r4, r5" - - @ At least one of x or y is INF/NAN. - @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) - @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) - @ if either is NAN: return NAN - @ if opposite sign: return NAN - @ otherwise return xh-xl (which is INF or -INF) -LSYM(Lad_i): - mvns ip, r4, asr #21 - do_it ne, te - movne xh, yh - movne xl, yl - COND(mvn,s,eq) ip, r5, asr #21 - do_it ne, t - movne yh, xh - movne yl, xl - orrs r4, xl, xh, lsl #12 - do_it eq, te - COND(orr,s,eq) r5, yl, yh, lsl #12 - teqeq xh, yh - orrne xh, xh, #0x00080000 @ quiet NAN - RETLDM "r4, r5" - - FUNC_END aeabi_dsub - FUNC_END subdf3 - FUNC_END aeabi_dadd - FUNC_END adddf3 - -ARM_FUNC_START floatunsidf -ARM_FUNC_ALIAS aeabi_ui2d floatunsidf - - teq r0, #0 - do_it eq, t - moveq r1, #0 - RETc(eq) - do_push {r4, r5, lr} - mov r4, #0x400 @ initial exponent - add r4, r4, #(52-1 - 1) - mov r5, #0 @ sign bit is 0 - .ifnc xl, r0 - mov xl, r0 - .endif - mov xh, #0 - b LSYM(Lad_l) - - FUNC_END aeabi_ui2d - FUNC_END floatunsidf - -ARM_FUNC_START floatsidf -ARM_FUNC_ALIAS aeabi_i2d floatsidf - - teq r0, #0 - do_it eq, t - moveq r1, #0 - RETc(eq) - do_push {r4, r5, lr} - mov r4, #0x400 @ initial exponent - add r4, r4, #(52-1 - 1) - ands r5, r0, #0x80000000 @ sign bit in r5 - do_it mi - rsbmi r0, r0, #0 @ absolute value - .ifnc xl, r0 - mov xl, r0 - .endif - mov xh, #0 - b LSYM(Lad_l) - - FUNC_END aeabi_i2d - FUNC_END floatsidf - -ARM_FUNC_START extendsfdf2 -ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 - - movs r2, r0, lsl #1 @ toss sign bit - mov xh, r2, asr #3 @ stretch exponent - mov xh, xh, rrx @ retrieve sign bit - mov xl, r2, lsl #28 @ retrieve remaining bits - do_it ne, ttt - COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent - teqne r3, #0xff000000 @ if not 0, check if INF or NAN - eorne xh, xh, #0x38000000 @ fixup exponent otherwise. - RETc(ne) @ and return it. - - teq r2, #0 @ if actually 0 - do_it ne, e - teqne r3, #0xff000000 @ or INF or NAN - RETc(eq) @ we are done already. - - @ value was denormalized. We can normalize it now. - do_push {r4, r5, lr} - mov r4, #0x380 @ setup corresponding exponent - and r5, xh, #0x80000000 @ move sign bit in r5 - bic xh, xh, #0x80000000 - b LSYM(Lad_l) - - FUNC_END aeabi_f2d - FUNC_END extendsfdf2 - -ARM_FUNC_START floatundidf -ARM_FUNC_ALIAS aeabi_ul2d floatundidf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqd f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - @ For hard FPA code we want to return via the tail below so that - @ we can return the result in f0 as well as in r0/r1 for backwards - @ compatibility. - adr ip, LSYM(f0_ret) - @ Push pc as well so that RETLDM works correctly. - do_push {r4, r5, ip, lr, pc} -#else - do_push {r4, r5, lr} -#endif - - mov r5, #0 - b 2f - -ARM_FUNC_START floatdidf -ARM_FUNC_ALIAS aeabi_l2d floatdidf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqd f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - @ For hard FPA code we want to return via the tail below so that - @ we can return the result in f0 as well as in r0/r1 for backwards - @ compatibility. - adr ip, LSYM(f0_ret) - @ Push pc as well so that RETLDM works correctly. - do_push {r4, r5, ip, lr, pc} -#else - do_push {r4, r5, lr} -#endif - - ands r5, ah, #0x80000000 @ sign bit in r5 - bpl 2f -#if defined(__thumb2__) - negs al, al - sbc ah, ah, ah, lsl #1 -#else - rsbs al, al, #0 - rsc ah, ah, #0 -#endif -2: - mov r4, #0x400 @ initial exponent - add r4, r4, #(52-1 - 1) - - @ FPA little-endian: must swap the word order. - .ifnc xh, ah - mov ip, al - mov xh, ah - mov xl, ip - .endif - - movs ip, xh, lsr #22 - beq LSYM(Lad_p) - - @ The value is too big. Scale it down a bit... - mov r2, #3 - movs ip, ip, lsr #3 - do_it ne - addne r2, r2, #3 - movs ip, ip, lsr #3 - do_it ne - addne r2, r2, #3 - add r2, r2, ip, lsr #3 - - rsb r3, r2, #32 - shift1 lsl, ip, xl, r3 - shift1 lsr, xl, xl, r2 - shiftop orr xl xl xh lsl r3 lr - shift1 lsr, xh, xh, r2 - add r4, r4, r2 - b LSYM(Lad_p) - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - - @ Legacy code expects the result to be returned in f0. Copy it - @ there as well. -LSYM(f0_ret): - do_push {r0, r1} - ldfd f0, [sp], #8 - RETLDM - -#endif - - FUNC_END floatdidf - FUNC_END aeabi_l2d - FUNC_END floatundidf - FUNC_END aeabi_ul2d - -#endif /* L_addsubdf3 */ - -#ifdef L_arm_muldivdf3 - -ARM_FUNC_START muldf3 -ARM_FUNC_ALIAS aeabi_dmul muldf3 - do_push {r4, r5, r6, lr} - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - orr ip, ip, #0x700 - ands r4, ip, xh, lsr #20 - do_it ne, tte - COND(and,s,ne) r5, ip, yh, lsr #20 - teqne r4, ip - teqne r5, ip - bleq LSYM(Lml_s) - - @ Add exponents together - add r4, r4, r5 - - @ Determine final sign. - eor r6, xh, yh - - @ Convert mantissa to unsigned integer. - @ If power of two, branch to a separate path. - bic xh, xh, ip, lsl #21 - bic yh, yh, ip, lsl #21 - orrs r5, xl, xh, lsl #12 - do_it ne - COND(orr,s,ne) r5, yl, yh, lsl #12 - orr xh, xh, #0x00100000 - orr yh, yh, #0x00100000 - beq LSYM(Lml_1) - -#if __ARM_ARCH__ < 4 - - @ Put sign bit in r6, which will be restored in yl later. - and r6, r6, #0x80000000 - - @ Well, no way to make it shorter without the umull instruction. - stmfd sp!, {r6, r7, r8, r9, sl, fp} - mov r7, xl, lsr #16 - mov r8, yl, lsr #16 - mov r9, xh, lsr #16 - mov sl, yh, lsr #16 - bic xl, xl, r7, lsl #16 - bic yl, yl, r8, lsl #16 - bic xh, xh, r9, lsl #16 - bic yh, yh, sl, lsl #16 - mul ip, xl, yl - mul fp, xl, r8 - mov lr, #0 - adds ip, ip, fp, lsl #16 - adc lr, lr, fp, lsr #16 - mul fp, r7, yl - adds ip, ip, fp, lsl #16 - adc lr, lr, fp, lsr #16 - mul fp, xl, sl - mov r5, #0 - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, r7, yh - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, xh, r8 - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, r9, yl - adds lr, lr, fp, lsl #16 - adc r5, r5, fp, lsr #16 - mul fp, xh, sl - mul r6, r9, sl - adds r5, r5, fp, lsl #16 - adc r6, r6, fp, lsr #16 - mul fp, r9, yh - adds r5, r5, fp, lsl #16 - adc r6, r6, fp, lsr #16 - mul fp, xl, yh - adds lr, lr, fp - mul fp, r7, sl - adcs r5, r5, fp - mul fp, xh, yl - adc r6, r6, #0 - adds lr, lr, fp - mul fp, r9, r8 - adcs r5, r5, fp - mul fp, r7, r8 - adc r6, r6, #0 - adds lr, lr, fp - mul fp, xh, yh - adcs r5, r5, fp - adc r6, r6, #0 - ldmfd sp!, {yl, r7, r8, r9, sl, fp} - -#else - - @ Here is the actual multiplication. - umull ip, lr, xl, yl - mov r5, #0 - umlal lr, r5, xh, yl - and yl, r6, #0x80000000 - umlal lr, r5, xl, yh - mov r6, #0 - umlal r5, r6, xh, yh - -#endif - - @ The LSBs in ip are only significant for the final rounding. - @ Fold them into lr. - teq ip, #0 - do_it ne - orrne lr, lr, #1 - - @ Adjust result upon the MSB position. - sub r4, r4, #0xff - cmp r6, #(1 << (20-11)) - sbc r4, r4, #0x300 - bcs 1f - movs lr, lr, lsl #1 - adcs r5, r5, r5 - adc r6, r6, r6 -1: - @ Shift to final position, add sign to result. - orr xh, yl, r6, lsl #11 - orr xh, xh, r5, lsr #21 - mov xl, r5, lsl #11 - orr xl, xl, lr, lsr #21 - mov lr, lr, lsl #11 - - @ Check exponent range for under/overflow. - subs ip, r4, #(254 - 1) - do_it hi - cmphi ip, #0x700 - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - cmp lr, #0x80000000 - do_it eq - COND(mov,s,eq) lr, xl, lsr #1 - adcs xl, xl, #0 - adc xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" - - @ Multiplication by 0x1p*: let''s shortcut a lot of code. -LSYM(Lml_1): - and r6, r6, #0x80000000 - orr xh, r6, xh - orr xl, xl, yl - eor xh, xh, yh - subs r4, r4, ip, lsr #1 - do_it gt, tt - COND(rsb,s,gt) r5, r4, ip - orrgt xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" gt - - @ Under/overflow: fix things up for the code below. - orr xh, xh, #0x00100000 - mov lr, #0 - subs r4, r4, #1 - -LSYM(Lml_u): - @ Overflow? - bgt LSYM(Lml_o) - - @ Check if denormalized result is possible, otherwise return signed 0. - cmn r4, #(53 + 1) - do_it le, tt - movle xl, #0 - bicle xh, xh, #0x7fffffff - RETLDM "r4, r5, r6" le - - @ Find out proper shift value. - rsb r4, r4, #0 - subs r4, r4, #32 - bge 2f - adds r4, r4, #12 - bgt 1f - - @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. - add r4, r4, #20 - rsb r5, r4, #32 - shift1 lsl, r3, xl, r5 - shift1 lsr, xl, xl, r4 - shiftop orr xl xl xh lsl r5 r2 - and r2, xh, #0x80000000 - bic xh, xh, #0x80000000 - adds xl, xl, r3, lsr #31 - shiftop adc xh r2 xh lsr r4 r6 - orrs lr, lr, r3, lsl #1 - do_it eq - biceq xl, xl, r3, lsr #31 - RETLDM "r4, r5, r6" - - @ shift result right of 21 to 31 bits, or left 11 to 1 bits after - @ a register switch from xh to xl. Then round. -1: rsb r4, r4, #12 - rsb r5, r4, #32 - shift1 lsl, r3, xl, r4 - shift1 lsr, xl, xl, r5 - shiftop orr xl xl xh lsl r4 r2 - bic xh, xh, #0x7fffffff - adds xl, xl, r3, lsr #31 - adc xh, xh, #0 - orrs lr, lr, r3, lsl #1 - do_it eq - biceq xl, xl, r3, lsr #31 - RETLDM "r4, r5, r6" - - @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch - @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. -2: rsb r5, r4, #32 - shiftop orr lr lr xl lsl r5 r2 - shift1 lsr, r3, xl, r4 - shiftop orr r3 r3 xh lsl r5 r2 - shift1 lsr, xl, xh, r4 - bic xh, xh, #0x7fffffff - shiftop bic xl xl xh lsr r4 r2 - add xl, xl, r3, lsr #31 - orrs lr, lr, r3, lsl #1 - do_it eq - biceq xl, xl, r3, lsr #31 - RETLDM "r4, r5, r6" - - @ One or both arguments are denormalized. - @ Scale them leftwards and preserve sign bit. -LSYM(Lml_d): - teq r4, #0 - bne 2f - and r6, xh, #0x80000000 -1: movs xl, xl, lsl #1 - adc xh, xh, xh - tst xh, #0x00100000 - do_it eq - subeq r4, r4, #1 - beq 1b - orr xh, xh, r6 - teq r5, #0 - do_it ne - RETc(ne) -2: and r6, yh, #0x80000000 -3: movs yl, yl, lsl #1 - adc yh, yh, yh - tst yh, #0x00100000 - do_it eq - subeq r5, r5, #1 - beq 3b - orr yh, yh, r6 - RET - -LSYM(Lml_s): - @ Isolate the INF and NAN cases away - teq r4, ip - and r5, ip, yh, lsr #20 - do_it ne - teqne r5, ip - beq 1f - - @ Here, one or more arguments are either denormalized or zero. - orrs r6, xl, xh, lsl #1 - do_it ne - COND(orr,s,ne) r6, yl, yh, lsl #1 - bne LSYM(Lml_d) - - @ Result is 0, but determine sign anyway. -LSYM(Lml_z): - eor xh, xh, yh - and xh, xh, #0x80000000 - mov xl, #0 - RETLDM "r4, r5, r6" - -1: @ One or both args are INF or NAN. - orrs r6, xl, xh, lsl #1 - do_it eq, te - moveq xl, yl - moveq xh, yh - COND(orr,s,ne) r6, yl, yh, lsl #1 - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN - teq r4, ip - bne 1f - orrs r6, xl, xh, lsl #12 - bne LSYM(Lml_n) @ NAN * <anything> -> NAN -1: teq r5, ip - bne LSYM(Lml_i) - orrs r6, yl, yh, lsl #12 - do_it ne, t - movne xl, yl - movne xh, yh - bne LSYM(Lml_n) @ <anything> * NAN -> NAN - - @ Result is INF, but we need to determine its sign. -LSYM(Lml_i): - eor xh, xh, yh - - @ Overflow: return INF (sign already in xh). -LSYM(Lml_o): - and xh, xh, #0x80000000 - orr xh, xh, #0x7f000000 - orr xh, xh, #0x00f00000 - mov xl, #0 - RETLDM "r4, r5, r6" - - @ Return a quiet NAN. -LSYM(Lml_n): - orr xh, xh, #0x7f000000 - orr xh, xh, #0x00f80000 - RETLDM "r4, r5, r6" - - FUNC_END aeabi_dmul - FUNC_END muldf3 - -ARM_FUNC_START divdf3 -ARM_FUNC_ALIAS aeabi_ddiv divdf3 - - do_push {r4, r5, r6, lr} - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - orr ip, ip, #0x700 - ands r4, ip, xh, lsr #20 - do_it ne, tte - COND(and,s,ne) r5, ip, yh, lsr #20 - teqne r4, ip - teqne r5, ip - bleq LSYM(Ldv_s) - - @ Substract divisor exponent from dividend''s. - sub r4, r4, r5 - - @ Preserve final sign into lr. - eor lr, xh, yh - - @ Convert mantissa to unsigned integer. - @ Dividend -> r5-r6, divisor -> yh-yl. - orrs r5, yl, yh, lsl #12 - mov xh, xh, lsl #12 - beq LSYM(Ldv_1) - mov yh, yh, lsl #12 - mov r5, #0x10000000 - orr yh, r5, yh, lsr #4 - orr yh, yh, yl, lsr #24 - mov yl, yl, lsl #8 - orr r5, r5, xh, lsr #4 - orr r5, r5, xl, lsr #24 - mov r6, xl, lsl #8 - - @ Initialize xh with final sign bit. - and xh, lr, #0x80000000 - - @ Ensure result will land to known bit position. - @ Apply exponent bias accordingly. - cmp r5, yh - do_it eq - cmpeq r6, yl - adc r4, r4, #(255 - 2) - add r4, r4, #0x300 - bcs 1f - movs yh, yh, lsr #1 - mov yl, yl, rrx -1: - @ Perform first substraction to align result to a nibble. - subs r6, r6, yl - sbc r5, r5, yh - movs yh, yh, lsr #1 - mov yl, yl, rrx - mov xl, #0x00100000 - mov ip, #0x00080000 - - @ The actual division loop. -1: subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip - movs yh, yh, lsr #1 - mov yl, yl, rrx - subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip, lsr #1 - movs yh, yh, lsr #1 - mov yl, yl, rrx - subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip, lsr #2 - movs yh, yh, lsr #1 - mov yl, yl, rrx - subs lr, r6, yl - sbcs lr, r5, yh - do_it cs, tt - subcs r6, r6, yl - movcs r5, lr - orrcs xl, xl, ip, lsr #3 - - orrs lr, r5, r6 - beq 2f - mov r5, r5, lsl #4 - orr r5, r5, r6, lsr #28 - mov r6, r6, lsl #4 - mov yh, yh, lsl #3 - orr yh, yh, yl, lsr #29 - mov yl, yl, lsl #3 - movs ip, ip, lsr #4 - bne 1b - - @ We are done with a word of the result. - @ Loop again for the low word if this pass was for the high word. - tst xh, #0x00100000 - bne 3f - orr xh, xh, xl - mov xl, #0 - mov ip, #0x80000000 - b 1b -2: - @ Be sure result starts in the high word. - tst xh, #0x00100000 - do_it eq, t - orreq xh, xh, xl - moveq xl, #0 -3: - @ Check exponent range for under/overflow. - subs ip, r4, #(254 - 1) - do_it hi - cmphi ip, #0x700 - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - subs ip, r5, yh - do_it eq, t - COND(sub,s,eq) ip, r6, yl - COND(mov,s,eq) ip, xl, lsr #1 - adcs xl, xl, #0 - adc xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" - - @ Division by 0x1p*: shortcut a lot of code. -LSYM(Ldv_1): - and lr, lr, #0x80000000 - orr xh, lr, xh, lsr #12 - adds r4, r4, ip, lsr #1 - do_it gt, tt - COND(rsb,s,gt) r5, r4, ip - orrgt xh, xh, r4, lsl #20 - RETLDM "r4, r5, r6" gt - - orr xh, xh, #0x00100000 - mov lr, #0 - subs r4, r4, #1 - b LSYM(Lml_u) - - @ Result mightt need to be denormalized: put remainder bits - @ in lr for rounding considerations. -LSYM(Ldv_u): - orr lr, r5, r6 - b LSYM(Lml_u) - - @ One or both arguments is either INF, NAN or zero. -LSYM(Ldv_s): - and r5, ip, yh, lsr #20 - teq r4, ip - do_it eq - teqeq r5, ip - beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN - teq r4, ip - bne 1f - orrs r4, xl, xh, lsl #12 - bne LSYM(Lml_n) @ NAN / <anything> -> NAN - teq r5, ip - bne LSYM(Lml_i) @ INF / <anything> -> INF - mov xl, yl - mov xh, yh - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN -1: teq r5, ip - bne 2f - orrs r5, yl, yh, lsl #12 - beq LSYM(Lml_z) @ <anything> / INF -> 0 - mov xl, yl - mov xh, yh - b LSYM(Lml_n) @ <anything> / NAN -> NAN -2: @ If both are nonzero, we need to normalize and resume above. - orrs r6, xl, xh, lsl #1 - do_it ne - COND(orr,s,ne) r6, yl, yh, lsl #1 - bne LSYM(Lml_d) - @ One or both arguments are 0. - orrs r4, xl, xh, lsl #1 - bne LSYM(Lml_i) @ <non_zero> / 0 -> INF - orrs r5, yl, yh, lsl #1 - bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 - b LSYM(Lml_n) @ 0 / 0 -> NAN - - FUNC_END aeabi_ddiv - FUNC_END divdf3 - -#endif /* L_muldivdf3 */ - -#ifdef L_arm_cmpdf2 - -@ Note: only r0 (return value) and ip are clobbered here. - -ARM_FUNC_START gtdf2 -ARM_FUNC_ALIAS gedf2 gtdf2 - mov ip, #-1 - b 1f - -ARM_FUNC_START ltdf2 -ARM_FUNC_ALIAS ledf2 ltdf2 - mov ip, #1 - b 1f - -ARM_FUNC_START cmpdf2 -ARM_FUNC_ALIAS nedf2 cmpdf2 -ARM_FUNC_ALIAS eqdf2 cmpdf2 - mov ip, #1 @ how should we specify unordered here? - -1: str ip, [sp, #-4]! - - @ Trap any INF/NAN first. - mov ip, xh, lsl #1 - mvns ip, ip, asr #21 - mov ip, yh, lsl #1 - do_it ne - COND(mvn,s,ne) ip, ip, asr #21 - beq 3f - - @ Test for equality. - @ Note that 0.0 is equal to -0.0. -2: add sp, sp, #4 - orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 - do_it eq, e - COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 - teqne xh, yh @ or xh == yh - do_it eq, tt - teqeq xl, yl @ and xl == yl - moveq r0, #0 @ then equal. - RETc(eq) - - @ Clear C flag - cmn r0, #0 - - @ Compare sign, - teq xh, yh - - @ Compare values if same sign - do_it pl - cmppl xh, yh - do_it eq - cmpeq xl, yl - - @ Result: - do_it cs, e - movcs r0, yh, asr #31 - mvncc r0, yh, asr #31 - orr r0, r0, #1 - RET - - @ Look for a NAN. -3: mov ip, xh, lsl #1 - mvns ip, ip, asr #21 - bne 4f - orrs ip, xl, xh, lsl #12 - bne 5f @ x is NAN -4: mov ip, yh, lsl #1 - mvns ip, ip, asr #21 - bne 2b - orrs ip, yl, yh, lsl #12 - beq 2b @ y is not NAN -5: ldr r0, [sp], #4 @ unordered return code - RET - - FUNC_END gedf2 - FUNC_END gtdf2 - FUNC_END ledf2 - FUNC_END ltdf2 - FUNC_END nedf2 - FUNC_END eqdf2 - FUNC_END cmpdf2 - -ARM_FUNC_START aeabi_cdrcmple - - mov ip, r0 - mov r0, r2 - mov r2, ip - mov ip, r1 - mov r1, r3 - mov r3, ip - b 6f - -ARM_FUNC_START aeabi_cdcmpeq -ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: do_push {r0, lr} - ARM_CALL cmpdf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - do_it mi - cmnmi r0, #0 - RETLDM "r0" - - FUNC_END aeabi_cdcmple - FUNC_END aeabi_cdcmpeq - FUNC_END aeabi_cdrcmple - -ARM_FUNC_START aeabi_dcmpeq - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdcmple - do_it eq, e - moveq r0, #1 @ Equal to. - movne r0, #0 @ Less than, greater than, or unordered. - RETLDM - - FUNC_END aeabi_dcmpeq - -ARM_FUNC_START aeabi_dcmplt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdcmple - do_it cc, e - movcc r0, #1 @ Less than. - movcs r0, #0 @ Equal to, greater than, or unordered. - RETLDM - - FUNC_END aeabi_dcmplt - -ARM_FUNC_START aeabi_dcmple - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdcmple - do_it ls, e - movls r0, #1 @ Less than or equal to. - movhi r0, #0 @ Greater than or unordered. - RETLDM - - FUNC_END aeabi_dcmple - -ARM_FUNC_START aeabi_dcmpge - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdrcmple - do_it ls, e - movls r0, #1 @ Operand 2 is less than or equal to operand 1. - movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. - RETLDM - - FUNC_END aeabi_dcmpge - -ARM_FUNC_START aeabi_dcmpgt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cdrcmple - do_it cc, e - movcc r0, #1 @ Operand 2 is less than operand 1. - movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, - @ or they are unordered. - RETLDM - - FUNC_END aeabi_dcmpgt - -#endif /* L_cmpdf2 */ - -#ifdef L_arm_unorddf2 - -ARM_FUNC_START unorddf2 -ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 - - mov ip, xh, lsl #1 - mvns ip, ip, asr #21 - bne 1f - orrs ip, xl, xh, lsl #12 - bne 3f @ x is NAN -1: mov ip, yh, lsl #1 - mvns ip, ip, asr #21 - bne 2f - orrs ip, yl, yh, lsl #12 - bne 3f @ y is NAN -2: mov r0, #0 @ arguments are ordered. - RET - -3: mov r0, #1 @ arguments are unordered. - RET - - FUNC_END aeabi_dcmpun - FUNC_END unorddf2 - -#endif /* L_unorddf2 */ - -#ifdef L_arm_fixdfsi - -ARM_FUNC_START fixdfsi -ARM_FUNC_ALIAS aeabi_d2iz fixdfsi - - @ check exponent range. - mov r2, xh, lsl #1 - adds r2, r2, #(1 << 21) - bcs 2f @ value is INF or NAN - bpl 1f @ value is too small - mov r3, #(0xfffffc00 + 31) - subs r2, r3, r2, asr #21 - bls 3f @ value is too large - - @ scale value - mov r3, xh, lsl #11 - orr r3, r3, #0x80000000 - orr r3, r3, xl, lsr #21 - tst xh, #0x80000000 @ the sign bit - shift1 lsr, r0, r3, r2 - do_it ne - rsbne r0, r0, #0 - RET - -1: mov r0, #0 - RET - -2: orrs xl, xl, xh, lsl #12 - bne 4f @ x is NAN. -3: ands r0, xh, #0x80000000 @ the sign bit - do_it eq - moveq r0, #0x7fffffff @ maximum signed positive si - RET - -4: mov r0, #0 @ How should we convert NAN? - RET - - FUNC_END aeabi_d2iz - FUNC_END fixdfsi - -#endif /* L_fixdfsi */ - -#ifdef L_arm_fixunsdfsi - -ARM_FUNC_START fixunsdfsi -ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi - - @ check exponent range. - movs r2, xh, lsl #1 - bcs 1f @ value is negative - adds r2, r2, #(1 << 21) - bcs 2f @ value is INF or NAN - bpl 1f @ value is too small - mov r3, #(0xfffffc00 + 31) - subs r2, r3, r2, asr #21 - bmi 3f @ value is too large - - @ scale value - mov r3, xh, lsl #11 - orr r3, r3, #0x80000000 - orr r3, r3, xl, lsr #21 - shift1 lsr, r0, r3, r2 - RET - -1: mov r0, #0 - RET - -2: orrs xl, xl, xh, lsl #12 - bne 4f @ value is NAN. -3: mov r0, #0xffffffff @ maximum unsigned si - RET - -4: mov r0, #0 @ How should we convert NAN? - RET - - FUNC_END aeabi_d2uiz - FUNC_END fixunsdfsi - -#endif /* L_fixunsdfsi */ - -#ifdef L_arm_truncdfsf2 - -ARM_FUNC_START truncdfsf2 -ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 - - @ check exponent range. - mov r2, xh, lsl #1 - subs r3, r2, #((1023 - 127) << 21) - do_it cs, t - COND(sub,s,cs) ip, r3, #(1 << 21) - COND(rsb,s,cs) ip, ip, #(254 << 21) - bls 2f @ value is out of range - -1: @ shift and round mantissa - and ip, xh, #0x80000000 - mov r2, xl, lsl #3 - orr xl, ip, xl, lsr #29 - cmp r2, #0x80000000 - adc r0, xl, r3, lsl #2 - do_it eq - biceq r0, r0, #1 - RET - -2: @ either overflow or underflow - tst xh, #0x40000000 - bne 3f @ overflow - - @ check if denormalized value is possible - adds r2, r3, #(23 << 21) - do_it lt, t - andlt r0, xh, #0x80000000 @ too small, return signed 0. - RETc(lt) - - @ denormalize value so we can resume with the code above afterwards. - orr xh, xh, #0x00100000 - mov r2, r2, lsr #21 - rsb r2, r2, #24 - rsb ip, r2, #32 -#if defined(__thumb2__) - lsls r3, xl, ip -#else - movs r3, xl, lsl ip -#endif - shift1 lsr, xl, xl, r2 - do_it ne - orrne xl, xl, #1 @ fold r3 for rounding considerations. - mov r3, xh, lsl #11 - mov r3, r3, lsr #11 - shiftop orr xl xl r3 lsl ip ip - shift1 lsr, r3, r3, r2 - mov r3, r3, lsl #1 - b 1b - -3: @ chech for NAN - mvns r3, r2, asr #21 - bne 5f @ simple overflow - orrs r3, xl, xh, lsl #12 - do_it ne, tt - movne r0, #0x7f000000 - orrne r0, r0, #0x00c00000 - RETc(ne) @ return NAN - -5: @ return INF with sign - and r0, xh, #0x80000000 - orr r0, r0, #0x7f000000 - orr r0, r0, #0x00800000 - RET - - FUNC_END aeabi_d2f - FUNC_END truncdfsf2 - -#endif /* L_truncdfsf2 */ diff --git a/gcc/config/arm/ieee754-sf.S b/gcc/config/arm/ieee754-sf.S deleted file mode 100644 index c93f66d8ff8..00000000000 --- a/gcc/config/arm/ieee754-sf.S +++ /dev/null @@ -1,1060 +0,0 @@ -/* ieee754-sf.S single-precision floating point support for ARM - - Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. - Contributed by Nicolas Pitre (nico@cam.org) - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -/* - * Notes: - * - * The goal of this code is to be as fast as possible. This is - * not meant to be easy to understand for the casual reader. - * - * Only the default rounding mode is intended for best performances. - * Exceptions aren't supported yet, but that can be added quite easily - * if necessary without impacting performances. - */ - -#ifdef L_arm_negsf2 - -ARM_FUNC_START negsf2 -ARM_FUNC_ALIAS aeabi_fneg negsf2 - - eor r0, r0, #0x80000000 @ flip sign bit - RET - - FUNC_END aeabi_fneg - FUNC_END negsf2 - -#endif - -#ifdef L_arm_addsubsf3 - -ARM_FUNC_START aeabi_frsub - - eor r0, r0, #0x80000000 @ flip sign bit of first arg - b 1f - -ARM_FUNC_START subsf3 -ARM_FUNC_ALIAS aeabi_fsub subsf3 - - eor r1, r1, #0x80000000 @ flip sign bit of second arg -#if defined(__INTERWORKING_STUBS__) - b 1f @ Skip Thumb-code prologue -#endif - -ARM_FUNC_START addsf3 -ARM_FUNC_ALIAS aeabi_fadd addsf3 - -1: @ Look for zeroes, equal values, INF, or NAN. - movs r2, r0, lsl #1 - do_it ne, ttt - COND(mov,s,ne) r3, r1, lsl #1 - teqne r2, r3 - COND(mvn,s,ne) ip, r2, asr #24 - COND(mvn,s,ne) ip, r3, asr #24 - beq LSYM(Lad_s) - - @ Compute exponent difference. Make largest exponent in r2, - @ corresponding arg in r0, and positive exponent difference in r3. - mov r2, r2, lsr #24 - rsbs r3, r2, r3, lsr #24 - do_it gt, ttt - addgt r2, r2, r3 - eorgt r1, r0, r1 - eorgt r0, r1, r0 - eorgt r1, r0, r1 - do_it lt - rsblt r3, r3, #0 - - @ If exponent difference is too large, return largest argument - @ already in r0. We need up to 25 bit to handle proper rounding - @ of 0x1p25 - 1.1. - cmp r3, #25 - do_it hi - RETc(hi) - - @ Convert mantissa to signed integer. - tst r0, #0x80000000 - orr r0, r0, #0x00800000 - bic r0, r0, #0xff000000 - do_it ne - rsbne r0, r0, #0 - tst r1, #0x80000000 - orr r1, r1, #0x00800000 - bic r1, r1, #0xff000000 - do_it ne - rsbne r1, r1, #0 - - @ If exponent == difference, one or both args were denormalized. - @ Since this is not common case, rescale them off line. - teq r2, r3 - beq LSYM(Lad_d) -LSYM(Lad_x): - - @ Compensate for the exponent overlapping the mantissa MSB added later - sub r2, r2, #1 - - @ Shift and add second arg to first arg in r0. - @ Keep leftover bits into r1. - shiftop adds r0 r0 r1 asr r3 ip - rsb r3, r3, #32 - shift1 lsl, r1, r1, r3 - - @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) - and r3, r0, #0x80000000 - bpl LSYM(Lad_p) -#if defined(__thumb2__) - negs r1, r1 - sbc r0, r0, r0, lsl #1 -#else - rsbs r1, r1, #0 - rsc r0, r0, #0 -#endif - - @ Determine how to normalize the result. -LSYM(Lad_p): - cmp r0, #0x00800000 - bcc LSYM(Lad_a) - cmp r0, #0x01000000 - bcc LSYM(Lad_e) - - @ Result needs to be shifted right. - movs r0, r0, lsr #1 - mov r1, r1, rrx - add r2, r2, #1 - - @ Make sure we did not bust our exponent. - cmp r2, #254 - bhs LSYM(Lad_o) - - @ Our result is now properly aligned into r0, remaining bits in r1. - @ Pack final result together. - @ Round with MSB of r1. If halfway between two numbers, round towards - @ LSB of r0 = 0. -LSYM(Lad_e): - cmp r1, #0x80000000 - adc r0, r0, r2, lsl #23 - do_it eq - biceq r0, r0, #1 - orr r0, r0, r3 - RET - - @ Result must be shifted left and exponent adjusted. -LSYM(Lad_a): - movs r1, r1, lsl #1 - adc r0, r0, r0 - tst r0, #0x00800000 - sub r2, r2, #1 - bne LSYM(Lad_e) - - @ No rounding necessary since r1 will always be 0 at this point. -LSYM(Lad_l): - -#if __ARM_ARCH__ < 5 - - movs ip, r0, lsr #12 - moveq r0, r0, lsl #12 - subeq r2, r2, #12 - tst r0, #0x00ff0000 - moveq r0, r0, lsl #8 - subeq r2, r2, #8 - tst r0, #0x00f00000 - moveq r0, r0, lsl #4 - subeq r2, r2, #4 - tst r0, #0x00c00000 - moveq r0, r0, lsl #2 - subeq r2, r2, #2 - cmp r0, #0x00800000 - movcc r0, r0, lsl #1 - sbcs r2, r2, #0 - -#else - - clz ip, r0 - sub ip, ip, #8 - subs r2, r2, ip - shift1 lsl, r0, r0, ip - -#endif - - @ Final result with sign - @ If exponent negative, denormalize result. - do_it ge, et - addge r0, r0, r2, lsl #23 - rsblt r2, r2, #0 - orrge r0, r0, r3 -#if defined(__thumb2__) - do_it lt, t - lsrlt r0, r0, r2 - orrlt r0, r3, r0 -#else - orrlt r0, r3, r0, lsr r2 -#endif - RET - - @ Fixup and adjust bit position for denormalized arguments. - @ Note that r2 must not remain equal to 0. -LSYM(Lad_d): - teq r2, #0 - eor r1, r1, #0x00800000 - do_it eq, te - eoreq r0, r0, #0x00800000 - addeq r2, r2, #1 - subne r3, r3, #1 - b LSYM(Lad_x) - -LSYM(Lad_s): - mov r3, r1, lsl #1 - - mvns ip, r2, asr #24 - do_it ne - COND(mvn,s,ne) ip, r3, asr #24 - beq LSYM(Lad_i) - - teq r2, r3 - beq 1f - - @ Result is x + 0.0 = x or 0.0 + y = y. - teq r2, #0 - do_it eq - moveq r0, r1 - RET - -1: teq r0, r1 - - @ Result is x - x = 0. - do_it ne, t - movne r0, #0 - RETc(ne) - - @ Result is x + x = 2x. - tst r2, #0xff000000 - bne 2f - movs r0, r0, lsl #1 - do_it cs - orrcs r0, r0, #0x80000000 - RET -2: adds r2, r2, #(2 << 24) - do_it cc, t - addcc r0, r0, #(1 << 23) - RETc(cc) - and r3, r0, #0x80000000 - - @ Overflow: return INF. -LSYM(Lad_o): - orr r0, r3, #0x7f000000 - orr r0, r0, #0x00800000 - RET - - @ At least one of r0/r1 is INF/NAN. - @ if r0 != INF/NAN: return r1 (which is INF/NAN) - @ if r1 != INF/NAN: return r0 (which is INF/NAN) - @ if r0 or r1 is NAN: return NAN - @ if opposite sign: return NAN - @ otherwise return r0 (which is INF or -INF) -LSYM(Lad_i): - mvns r2, r2, asr #24 - do_it ne, et - movne r0, r1 - COND(mvn,s,eq) r3, r3, asr #24 - movne r1, r0 - movs r2, r0, lsl #9 - do_it eq, te - COND(mov,s,eq) r3, r1, lsl #9 - teqeq r0, r1 - orrne r0, r0, #0x00400000 @ quiet NAN - RET - - FUNC_END aeabi_frsub - FUNC_END aeabi_fadd - FUNC_END addsf3 - FUNC_END aeabi_fsub - FUNC_END subsf3 - -ARM_FUNC_START floatunsisf -ARM_FUNC_ALIAS aeabi_ui2f floatunsisf - - mov r3, #0 - b 1f - -ARM_FUNC_START floatsisf -ARM_FUNC_ALIAS aeabi_i2f floatsisf - - ands r3, r0, #0x80000000 - do_it mi - rsbmi r0, r0, #0 - -1: movs ip, r0 - do_it eq - RETc(eq) - - @ Add initial exponent to sign - orr r3, r3, #((127 + 23) << 23) - - .ifnc ah, r0 - mov ah, r0 - .endif - mov al, #0 - b 2f - - FUNC_END aeabi_i2f - FUNC_END floatsisf - FUNC_END aeabi_ui2f - FUNC_END floatunsisf - -ARM_FUNC_START floatundisf -ARM_FUNC_ALIAS aeabi_ul2f floatundisf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqs f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - - mov r3, #0 - b 1f - -ARM_FUNC_START floatdisf -ARM_FUNC_ALIAS aeabi_l2f floatdisf - - orrs r2, r0, r1 -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - do_it eq, t - mvfeqs f0, #0.0 -#else - do_it eq -#endif - RETc(eq) - - ands r3, ah, #0x80000000 @ sign bit in r3 - bpl 1f -#if defined(__thumb2__) - negs al, al - sbc ah, ah, ah, lsl #1 -#else - rsbs al, al, #0 - rsc ah, ah, #0 -#endif -1: -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - @ For hard FPA code we want to return via the tail below so that - @ we can return the result in f0 as well as in r0 for backwards - @ compatibility. - str lr, [sp, #-8]! - adr lr, LSYM(f0_ret) -#endif - - movs ip, ah - do_it eq, tt - moveq ip, al - moveq ah, al - moveq al, #0 - - @ Add initial exponent to sign - orr r3, r3, #((127 + 23 + 32) << 23) - do_it eq - subeq r3, r3, #(32 << 23) -2: sub r3, r3, #(1 << 23) - -#if __ARM_ARCH__ < 5 - - mov r2, #23 - cmp ip, #(1 << 16) - do_it hs, t - movhs ip, ip, lsr #16 - subhs r2, r2, #16 - cmp ip, #(1 << 8) - do_it hs, t - movhs ip, ip, lsr #8 - subhs r2, r2, #8 - cmp ip, #(1 << 4) - do_it hs, t - movhs ip, ip, lsr #4 - subhs r2, r2, #4 - cmp ip, #(1 << 2) - do_it hs, e - subhs r2, r2, #2 - sublo r2, r2, ip, lsr #1 - subs r2, r2, ip, lsr #3 - -#else - - clz r2, ip - subs r2, r2, #8 - -#endif - - sub r3, r3, r2, lsl #23 - blt 3f - - shiftop add r3 r3 ah lsl r2 ip - shift1 lsl, ip, al, r2 - rsb r2, r2, #32 - cmp ip, #0x80000000 - shiftop adc r0 r3 al lsr r2 r2 - do_it eq - biceq r0, r0, #1 - RET - -3: add r2, r2, #32 - shift1 lsl, ip, ah, r2 - rsb r2, r2, #32 - orrs al, al, ip, lsl #1 - shiftop adc r0 r3 ah lsr r2 r2 - do_it eq - biceq r0, r0, ip, lsr #31 - RET - -#if !defined (__VFP_FP__) && !defined(__SOFTFP__) - -LSYM(f0_ret): - str r0, [sp, #-4]! - ldfs f0, [sp], #4 - RETLDM - -#endif - - FUNC_END floatdisf - FUNC_END aeabi_l2f - FUNC_END floatundisf - FUNC_END aeabi_ul2f - -#endif /* L_addsubsf3 */ - -#ifdef L_arm_muldivsf3 - -ARM_FUNC_START mulsf3 -ARM_FUNC_ALIAS aeabi_fmul mulsf3 - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - ands r2, ip, r0, lsr #23 - do_it ne, tt - COND(and,s,ne) r3, ip, r1, lsr #23 - teqne r2, ip - teqne r3, ip - beq LSYM(Lml_s) -LSYM(Lml_x): - - @ Add exponents together - add r2, r2, r3 - - @ Determine final sign. - eor ip, r0, r1 - - @ Convert mantissa to unsigned integer. - @ If power of two, branch to a separate path. - @ Make up for final alignment. - movs r0, r0, lsl #9 - do_it ne - COND(mov,s,ne) r1, r1, lsl #9 - beq LSYM(Lml_1) - mov r3, #0x08000000 - orr r0, r3, r0, lsr #5 - orr r1, r3, r1, lsr #5 - -#if __ARM_ARCH__ < 4 - - @ Put sign bit in r3, which will be restored into r0 later. - and r3, ip, #0x80000000 - - @ Well, no way to make it shorter without the umull instruction. - do_push {r3, r4, r5} - mov r4, r0, lsr #16 - mov r5, r1, lsr #16 - bic r0, r0, r4, lsl #16 - bic r1, r1, r5, lsl #16 - mul ip, r4, r5 - mul r3, r0, r1 - mul r0, r5, r0 - mla r0, r4, r1, r0 - adds r3, r3, r0, lsl #16 - adc r1, ip, r0, lsr #16 - do_pop {r0, r4, r5} - -#else - - @ The actual multiplication. - umull r3, r1, r0, r1 - - @ Put final sign in r0. - and r0, ip, #0x80000000 - -#endif - - @ Adjust result upon the MSB position. - cmp r1, #(1 << 23) - do_it cc, tt - movcc r1, r1, lsl #1 - orrcc r1, r1, r3, lsr #31 - movcc r3, r3, lsl #1 - - @ Add sign to result. - orr r0, r0, r1 - - @ Apply exponent bias, check for under/overflow. - sbc r2, r2, #127 - cmp r2, #(254 - 1) - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - cmp r3, #0x80000000 - adc r0, r0, r2, lsl #23 - do_it eq - biceq r0, r0, #1 - RET - - @ Multiplication by 0x1p*: let''s shortcut a lot of code. -LSYM(Lml_1): - teq r0, #0 - and ip, ip, #0x80000000 - do_it eq - moveq r1, r1, lsl #9 - orr r0, ip, r0, lsr #9 - orr r0, r0, r1, lsr #9 - subs r2, r2, #127 - do_it gt, tt - COND(rsb,s,gt) r3, r2, #255 - orrgt r0, r0, r2, lsl #23 - RETc(gt) - - @ Under/overflow: fix things up for the code below. - orr r0, r0, #0x00800000 - mov r3, #0 - subs r2, r2, #1 - -LSYM(Lml_u): - @ Overflow? - bgt LSYM(Lml_o) - - @ Check if denormalized result is possible, otherwise return signed 0. - cmn r2, #(24 + 1) - do_it le, t - bicle r0, r0, #0x7fffffff - RETc(le) - - @ Shift value right, round, etc. - rsb r2, r2, #0 - movs r1, r0, lsl #1 - shift1 lsr, r1, r1, r2 - rsb r2, r2, #32 - shift1 lsl, ip, r0, r2 - movs r0, r1, rrx - adc r0, r0, #0 - orrs r3, r3, ip, lsl #1 - do_it eq - biceq r0, r0, ip, lsr #31 - RET - - @ One or both arguments are denormalized. - @ Scale them leftwards and preserve sign bit. -LSYM(Lml_d): - teq r2, #0 - and ip, r0, #0x80000000 -1: do_it eq, tt - moveq r0, r0, lsl #1 - tsteq r0, #0x00800000 - subeq r2, r2, #1 - beq 1b - orr r0, r0, ip - teq r3, #0 - and ip, r1, #0x80000000 -2: do_it eq, tt - moveq r1, r1, lsl #1 - tsteq r1, #0x00800000 - subeq r3, r3, #1 - beq 2b - orr r1, r1, ip - b LSYM(Lml_x) - -LSYM(Lml_s): - @ Isolate the INF and NAN cases away - and r3, ip, r1, lsr #23 - teq r2, ip - do_it ne - teqne r3, ip - beq 1f - - @ Here, one or more arguments are either denormalized or zero. - bics ip, r0, #0x80000000 - do_it ne - COND(bic,s,ne) ip, r1, #0x80000000 - bne LSYM(Lml_d) - - @ Result is 0, but determine sign anyway. -LSYM(Lml_z): - eor r0, r0, r1 - bic r0, r0, #0x7fffffff - RET - -1: @ One or both args are INF or NAN. - teq r0, #0x0 - do_it ne, ett - teqne r0, #0x80000000 - moveq r0, r1 - teqne r1, #0x0 - teqne r1, #0x80000000 - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN - teq r2, ip - bne 1f - movs r2, r0, lsl #9 - bne LSYM(Lml_n) @ NAN * <anything> -> NAN -1: teq r3, ip - bne LSYM(Lml_i) - movs r3, r1, lsl #9 - do_it ne - movne r0, r1 - bne LSYM(Lml_n) @ <anything> * NAN -> NAN - - @ Result is INF, but we need to determine its sign. -LSYM(Lml_i): - eor r0, r0, r1 - - @ Overflow: return INF (sign already in r0). -LSYM(Lml_o): - and r0, r0, #0x80000000 - orr r0, r0, #0x7f000000 - orr r0, r0, #0x00800000 - RET - - @ Return a quiet NAN. -LSYM(Lml_n): - orr r0, r0, #0x7f000000 - orr r0, r0, #0x00c00000 - RET - - FUNC_END aeabi_fmul - FUNC_END mulsf3 - -ARM_FUNC_START divsf3 -ARM_FUNC_ALIAS aeabi_fdiv divsf3 - - @ Mask out exponents, trap any zero/denormal/INF/NAN. - mov ip, #0xff - ands r2, ip, r0, lsr #23 - do_it ne, tt - COND(and,s,ne) r3, ip, r1, lsr #23 - teqne r2, ip - teqne r3, ip - beq LSYM(Ldv_s) -LSYM(Ldv_x): - - @ Substract divisor exponent from dividend''s - sub r2, r2, r3 - - @ Preserve final sign into ip. - eor ip, r0, r1 - - @ Convert mantissa to unsigned integer. - @ Dividend -> r3, divisor -> r1. - movs r1, r1, lsl #9 - mov r0, r0, lsl #9 - beq LSYM(Ldv_1) - mov r3, #0x10000000 - orr r1, r3, r1, lsr #4 - orr r3, r3, r0, lsr #4 - - @ Initialize r0 (result) with final sign bit. - and r0, ip, #0x80000000 - - @ Ensure result will land to known bit position. - @ Apply exponent bias accordingly. - cmp r3, r1 - do_it cc - movcc r3, r3, lsl #1 - adc r2, r2, #(127 - 2) - - @ The actual division loop. - mov ip, #0x00800000 -1: cmp r3, r1 - do_it cs, t - subcs r3, r3, r1 - orrcs r0, r0, ip - cmp r3, r1, lsr #1 - do_it cs, t - subcs r3, r3, r1, lsr #1 - orrcs r0, r0, ip, lsr #1 - cmp r3, r1, lsr #2 - do_it cs, t - subcs r3, r3, r1, lsr #2 - orrcs r0, r0, ip, lsr #2 - cmp r3, r1, lsr #3 - do_it cs, t - subcs r3, r3, r1, lsr #3 - orrcs r0, r0, ip, lsr #3 - movs r3, r3, lsl #4 - do_it ne - COND(mov,s,ne) ip, ip, lsr #4 - bne 1b - - @ Check exponent for under/overflow. - cmp r2, #(254 - 1) - bhi LSYM(Lml_u) - - @ Round the result, merge final exponent. - cmp r3, r1 - adc r0, r0, r2, lsl #23 - do_it eq - biceq r0, r0, #1 - RET - - @ Division by 0x1p*: let''s shortcut a lot of code. -LSYM(Ldv_1): - and ip, ip, #0x80000000 - orr r0, ip, r0, lsr #9 - adds r2, r2, #127 - do_it gt, tt - COND(rsb,s,gt) r3, r2, #255 - orrgt r0, r0, r2, lsl #23 - RETc(gt) - - orr r0, r0, #0x00800000 - mov r3, #0 - subs r2, r2, #1 - b LSYM(Lml_u) - - @ One or both arguments are denormalized. - @ Scale them leftwards and preserve sign bit. -LSYM(Ldv_d): - teq r2, #0 - and ip, r0, #0x80000000 -1: do_it eq, tt - moveq r0, r0, lsl #1 - tsteq r0, #0x00800000 - subeq r2, r2, #1 - beq 1b - orr r0, r0, ip - teq r3, #0 - and ip, r1, #0x80000000 -2: do_it eq, tt - moveq r1, r1, lsl #1 - tsteq r1, #0x00800000 - subeq r3, r3, #1 - beq 2b - orr r1, r1, ip - b LSYM(Ldv_x) - - @ One or both arguments are either INF, NAN, zero or denormalized. -LSYM(Ldv_s): - and r3, ip, r1, lsr #23 - teq r2, ip - bne 1f - movs r2, r0, lsl #9 - bne LSYM(Lml_n) @ NAN / <anything> -> NAN - teq r3, ip - bne LSYM(Lml_i) @ INF / <anything> -> INF - mov r0, r1 - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN -1: teq r3, ip - bne 2f - movs r3, r1, lsl #9 - beq LSYM(Lml_z) @ <anything> / INF -> 0 - mov r0, r1 - b LSYM(Lml_n) @ <anything> / NAN -> NAN -2: @ If both are nonzero, we need to normalize and resume above. - bics ip, r0, #0x80000000 - do_it ne - COND(bic,s,ne) ip, r1, #0x80000000 - bne LSYM(Ldv_d) - @ One or both arguments are zero. - bics r2, r0, #0x80000000 - bne LSYM(Lml_i) @ <non_zero> / 0 -> INF - bics r3, r1, #0x80000000 - bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 - b LSYM(Lml_n) @ 0 / 0 -> NAN - - FUNC_END aeabi_fdiv - FUNC_END divsf3 - -#endif /* L_muldivsf3 */ - -#ifdef L_arm_cmpsf2 - - @ The return value in r0 is - @ - @ 0 if the operands are equal - @ 1 if the first operand is greater than the second, or - @ the operands are unordered and the operation is - @ CMP, LT, LE, NE, or EQ. - @ -1 if the first operand is less than the second, or - @ the operands are unordered and the operation is GT - @ or GE. - @ - @ The Z flag will be set iff the operands are equal. - @ - @ The following registers are clobbered by this function: - @ ip, r0, r1, r2, r3 - -ARM_FUNC_START gtsf2 -ARM_FUNC_ALIAS gesf2 gtsf2 - mov ip, #-1 - b 1f - -ARM_FUNC_START ltsf2 -ARM_FUNC_ALIAS lesf2 ltsf2 - mov ip, #1 - b 1f - -ARM_FUNC_START cmpsf2 -ARM_FUNC_ALIAS nesf2 cmpsf2 -ARM_FUNC_ALIAS eqsf2 cmpsf2 - mov ip, #1 @ how should we specify unordered here? - -1: str ip, [sp, #-4]! - - @ Trap any INF/NAN first. - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mvns ip, r2, asr #24 - do_it ne - COND(mvn,s,ne) ip, r3, asr #24 - beq 3f - - @ Compare values. - @ Note that 0.0 is equal to -0.0. -2: add sp, sp, #4 - orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag - do_it ne - teqne r0, r1 @ if not 0 compare sign - do_it pl - COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0 - - @ Result: - do_it hi - movhi r0, r1, asr #31 - do_it lo - mvnlo r0, r1, asr #31 - do_it ne - orrne r0, r0, #1 - RET - - @ Look for a NAN. -3: mvns ip, r2, asr #24 - bne 4f - movs ip, r0, lsl #9 - bne 5f @ r0 is NAN -4: mvns ip, r3, asr #24 - bne 2b - movs ip, r1, lsl #9 - beq 2b @ r1 is not NAN -5: ldr r0, [sp], #4 @ return unordered code. - RET - - FUNC_END gesf2 - FUNC_END gtsf2 - FUNC_END lesf2 - FUNC_END ltsf2 - FUNC_END nesf2 - FUNC_END eqsf2 - FUNC_END cmpsf2 - -ARM_FUNC_START aeabi_cfrcmple - - mov ip, r0 - mov r0, r1 - mov r1, ip - b 6f - -ARM_FUNC_START aeabi_cfcmpeq -ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq - - @ The status-returning routines are required to preserve all - @ registers except ip, lr, and cpsr. -6: do_push {r0, r1, r2, r3, lr} - ARM_CALL cmpsf2 - @ Set the Z flag correctly, and the C flag unconditionally. - cmp r0, #0 - @ Clear the C flag if the return value was -1, indicating - @ that the first operand was smaller than the second. - do_it mi - cmnmi r0, #0 - RETLDM "r0, r1, r2, r3" - - FUNC_END aeabi_cfcmple - FUNC_END aeabi_cfcmpeq - FUNC_END aeabi_cfrcmple - -ARM_FUNC_START aeabi_fcmpeq - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfcmple - do_it eq, e - moveq r0, #1 @ Equal to. - movne r0, #0 @ Less than, greater than, or unordered. - RETLDM - - FUNC_END aeabi_fcmpeq - -ARM_FUNC_START aeabi_fcmplt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfcmple - do_it cc, e - movcc r0, #1 @ Less than. - movcs r0, #0 @ Equal to, greater than, or unordered. - RETLDM - - FUNC_END aeabi_fcmplt - -ARM_FUNC_START aeabi_fcmple - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfcmple - do_it ls, e - movls r0, #1 @ Less than or equal to. - movhi r0, #0 @ Greater than or unordered. - RETLDM - - FUNC_END aeabi_fcmple - -ARM_FUNC_START aeabi_fcmpge - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfrcmple - do_it ls, e - movls r0, #1 @ Operand 2 is less than or equal to operand 1. - movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. - RETLDM - - FUNC_END aeabi_fcmpge - -ARM_FUNC_START aeabi_fcmpgt - - str lr, [sp, #-8]! - ARM_CALL aeabi_cfrcmple - do_it cc, e - movcc r0, #1 @ Operand 2 is less than operand 1. - movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, - @ or they are unordered. - RETLDM - - FUNC_END aeabi_fcmpgt - -#endif /* L_cmpsf2 */ - -#ifdef L_arm_unordsf2 - -ARM_FUNC_START unordsf2 -ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 - - mov r2, r0, lsl #1 - mov r3, r1, lsl #1 - mvns ip, r2, asr #24 - bne 1f - movs ip, r0, lsl #9 - bne 3f @ r0 is NAN -1: mvns ip, r3, asr #24 - bne 2f - movs ip, r1, lsl #9 - bne 3f @ r1 is NAN -2: mov r0, #0 @ arguments are ordered. - RET -3: mov r0, #1 @ arguments are unordered. - RET - - FUNC_END aeabi_fcmpun - FUNC_END unordsf2 - -#endif /* L_unordsf2 */ - -#ifdef L_arm_fixsfsi - -ARM_FUNC_START fixsfsi -ARM_FUNC_ALIAS aeabi_f2iz fixsfsi - - @ check exponent range. - mov r2, r0, lsl #1 - cmp r2, #(127 << 24) - bcc 1f @ value is too small - mov r3, #(127 + 31) - subs r2, r3, r2, lsr #24 - bls 2f @ value is too large - - @ scale value - mov r3, r0, lsl #8 - orr r3, r3, #0x80000000 - tst r0, #0x80000000 @ the sign bit - shift1 lsr, r0, r3, r2 - do_it ne - rsbne r0, r0, #0 - RET - -1: mov r0, #0 - RET - -2: cmp r2, #(127 + 31 - 0xff) - bne 3f - movs r2, r0, lsl #9 - bne 4f @ r0 is NAN. -3: ands r0, r0, #0x80000000 @ the sign bit - do_it eq - moveq r0, #0x7fffffff @ the maximum signed positive si - RET - -4: mov r0, #0 @ What should we convert NAN to? - RET - - FUNC_END aeabi_f2iz - FUNC_END fixsfsi - -#endif /* L_fixsfsi */ - -#ifdef L_arm_fixunssfsi - -ARM_FUNC_START fixunssfsi -ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi - - @ check exponent range. - movs r2, r0, lsl #1 - bcs 1f @ value is negative - cmp r2, #(127 << 24) - bcc 1f @ value is too small - mov r3, #(127 + 31) - subs r2, r3, r2, lsr #24 - bmi 2f @ value is too large - - @ scale the value - mov r3, r0, lsl #8 - orr r3, r3, #0x80000000 - shift1 lsr, r0, r3, r2 - RET - -1: mov r0, #0 - RET - -2: cmp r2, #(127 + 31 - 0xff) - bne 3f - movs r2, r0, lsl #9 - bne 4f @ r0 is NAN. -3: mov r0, #0xffffffff @ maximum unsigned si - RET - -4: mov r0, #0 @ What should we convert NAN to? - RET - - FUNC_END aeabi_f2uiz - FUNC_END fixunssfsi - -#endif /* L_fixunssfsi */ diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm deleted file mode 100644 index 2e76c01df4b..00000000000 --- a/gcc/config/arm/lib1funcs.asm +++ /dev/null @@ -1,1829 +0,0 @@ -@ libgcc routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) - -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008, - 2009, 2010 Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -/* An executable stack is *not* required for these functions. */ -#if defined(__ELF__) && defined(__linux__) -.section .note.GNU-stack,"",%progbits -.previous -#endif /* __ELF__ and __linux__ */ - -#ifdef __ARM_EABI__ -/* Some attributes that are common to all routines in this file. */ - /* Tag_ABI_align_needed: This code does not require 8-byte - alignment from the caller. */ - /* .eabi_attribute 24, 0 -- default setting. */ - /* Tag_ABI_align_preserved: This code preserves 8-byte - alignment in any callee. */ - .eabi_attribute 25, 1 -#endif /* __ARM_EABI__ */ -/* ------------------------------------------------------------------------ */ - -/* We need to know what prefix to add to function names. */ - -#ifndef __USER_LABEL_PREFIX__ -#error __USER_LABEL_PREFIX__ not defined -#endif - -/* ANSI concatenation macros. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) - -#ifdef __ELF__ -#ifdef __thumb__ -#define __PLT__ /* Not supported in Thumb assembler (for now). */ -#elif defined __vxworks && !defined __PIC__ -#define __PLT__ /* Not supported by the kernel loader. */ -#else -#define __PLT__ (PLT) -#endif -#define TYPE(x) .type SYM(x),function -#define SIZE(x) .size SYM(x), . - SYM(x) -#define LSYM(x) .x -#else -#define __PLT__ -#define TYPE(x) -#define SIZE(x) -#define LSYM(x) x -#endif - -/* Function end macros. Variants for interworking. */ - -#if defined(__ARM_ARCH_2__) -# define __ARM_ARCH__ 2 -#endif - -#if defined(__ARM_ARCH_3__) -# define __ARM_ARCH__ 3 -#endif - -#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ - || defined(__ARM_ARCH_4T__) -/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with - long multiply instructions. That includes v3M. */ -# define __ARM_ARCH__ 4 -#endif - -#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -# define __ARM_ARCH__ 5 -#endif - -#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ - || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ - || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ - || defined(__ARM_ARCH_6M__) -# define __ARM_ARCH__ 6 -#endif - -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ - || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ - || defined(__ARM_ARCH_7EM__) -# define __ARM_ARCH__ 7 -#endif - -#ifndef __ARM_ARCH__ -#error Unable to determine architecture. -#endif - -/* There are times when we might prefer Thumb1 code even if ARM code is - permitted, for example, the code might be smaller, or there might be - interworking problems with switching to ARM state if interworking is - disabled. */ -#if (defined(__thumb__) \ - && !defined(__thumb2__) \ - && (!defined(__THUMB_INTERWORK__) \ - || defined (__OPTIMIZE_SIZE__) \ - || defined(__ARM_ARCH_6M__))) -# define __prefer_thumb__ -#endif - -/* How to return from a function call depends on the architecture variant. */ - -#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) - -# define RET bx lr -# define RETc(x) bx##x lr - -/* Special precautions for interworking on armv4t. */ -# if (__ARM_ARCH__ == 4) - -/* Always use bx, not ldr pc. */ -# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) -# define __INTERWORKING__ -# endif /* __THUMB__ || __THUMB_INTERWORK__ */ - -/* Include thumb stub before arm mode code. */ -# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) -# define __INTERWORKING_STUBS__ -# endif /* __thumb__ && !__THUMB_INTERWORK__ */ - -#endif /* __ARM_ARCH == 4 */ - -#else - -# define RET mov pc, lr -# define RETc(x) mov##x pc, lr - -#endif - -.macro cfi_pop advance, reg, cfa_offset -#ifdef __ELF__ - .pushsection .debug_frame - .byte 0x4 /* DW_CFA_advance_loc4 */ - .4byte \advance - .byte (0xc0 | \reg) /* DW_CFA_restore */ - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 \cfa_offset - .popsection -#endif -.endm -.macro cfi_push advance, reg, offset, cfa_offset -#ifdef __ELF__ - .pushsection .debug_frame - .byte 0x4 /* DW_CFA_advance_loc4 */ - .4byte \advance - .byte (0x80 | \reg) /* DW_CFA_offset */ - .uleb128 (\offset / -4) - .byte 0xe /* DW_CFA_def_cfa_offset */ - .uleb128 \cfa_offset - .popsection -#endif -.endm -.macro cfi_start start_label, end_label -#ifdef __ELF__ - .pushsection .debug_frame -LSYM(Lstart_frame): - .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE -LSYM(Lstart_cie): - .4byte 0xffffffff @ CIE Identifier Tag - .byte 0x1 @ CIE Version - .ascii "\0" @ CIE Augmentation - .uleb128 0x1 @ CIE Code Alignment Factor - .sleb128 -4 @ CIE Data Alignment Factor - .byte 0xe @ CIE RA Column - .byte 0xc @ DW_CFA_def_cfa - .uleb128 0xd - .uleb128 0x0 - - .align 2 -LSYM(Lend_cie): - .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length -LSYM(Lstart_fde): - .4byte LSYM(Lstart_frame) @ FDE CIE offset - .4byte \start_label @ FDE initial location - .4byte \end_label-\start_label @ FDE address range - .popsection -#endif -.endm -.macro cfi_end end_label -#ifdef __ELF__ - .pushsection .debug_frame - .align 2 -LSYM(Lend_fde): - .popsection -\end_label: -#endif -.endm - -/* Don't pass dirn, it's there just to get token pasting right. */ - -.macro RETLDM regs=, cond=, unwind=, dirn=ia -#if defined (__INTERWORKING__) - .ifc "\regs","" - ldr\cond lr, [sp], #8 - .else -# if defined(__thumb2__) - pop\cond {\regs, lr} -# else - ldm\cond\dirn sp!, {\regs, lr} -# endif - .endif - .ifnc "\unwind", "" - /* Mark LR as restored. */ -97: cfi_pop 97b - \unwind, 0xe, 0x0 - .endif - bx\cond lr -#else - /* Caller is responsible for providing IT instruction. */ - .ifc "\regs","" - ldr\cond pc, [sp], #8 - .else -# if defined(__thumb2__) - pop\cond {\regs, pc} -# else - ldm\cond\dirn sp!, {\regs, pc} -# endif - .endif -#endif -.endm - -/* The Unified assembly syntax allows the same code to be assembled for both - ARM and Thumb-2. However this is only supported by recent gas, so define - a set of macros to allow ARM code on older assemblers. */ -#if defined(__thumb2__) -.macro do_it cond, suffix="" - it\suffix \cond -.endm -.macro shift1 op, arg0, arg1, arg2 - \op \arg0, \arg1, \arg2 -.endm -#define do_push push -#define do_pop pop -#define COND(op1, op2, cond) op1 ## op2 ## cond -/* Perform an arithmetic operation with a variable shift operand. This - requires two instructions and a scratch register on Thumb-2. */ -.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp - \shiftop \tmp, \src2, \shiftreg - \name \dest, \src1, \tmp -.endm -#else -.macro do_it cond, suffix="" -.endm -.macro shift1 op, arg0, arg1, arg2 - mov \arg0, \arg1, \op \arg2 -.endm -#define do_push stmfd sp!, -#define do_pop ldmfd sp!, -#define COND(op1, op2, cond) op1 ## cond ## op2 -.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp - \name \dest, \src1, \src2, \shiftop \shiftreg -.endm -#endif - -#ifdef __ARM_EABI__ -.macro ARM_LDIV0 name signed - cmp r0, #0 - .ifc \signed, unsigned - movne r0, #0xffffffff - .else - movgt r0, #0x7fffffff - movlt r0, #0x80000000 - .endif - b SYM (__aeabi_idiv0) __PLT__ -.endm -#else -.macro ARM_LDIV0 name signed - str lr, [sp, #-8]! -98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 - bl SYM (__div0) __PLT__ - mov r0, #0 @ About as wrong as it could be. - RETLDM unwind=98b -.endm -#endif - - -#ifdef __ARM_EABI__ -.macro THUMB_LDIV0 name signed -#if defined(__ARM_ARCH_6M__) - .ifc \signed, unsigned - cmp r0, #0 - beq 1f - mov r0, #0 - mvn r0, r0 @ 0xffffffff -1: - .else - cmp r0, #0 - beq 2f - blt 3f - mov r0, #0 - mvn r0, r0 - lsr r0, r0, #1 @ 0x7fffffff - b 2f -3: mov r0, #0x80 - lsl r0, r0, #24 @ 0x80000000 -2: - .endif - push {r0, r1, r2} - ldr r0, 4f - adr r1, 4f - add r0, r1 - str r0, [sp, #8] - @ We know we are not on armv4t, so pop pc is safe. - pop {r0, r1, pc} - .align 2 -4: - .word __aeabi_idiv0 - 4b -#elif defined(__thumb2__) - .syntax unified - .ifc \signed, unsigned - cbz r0, 1f - mov r0, #0xffffffff -1: - .else - cmp r0, #0 - do_it gt - movgt r0, #0x7fffffff - do_it lt - movlt r0, #0x80000000 - .endif - b.w SYM(__aeabi_idiv0) __PLT__ -#else - .align 2 - bx pc - nop - .arm - cmp r0, #0 - .ifc \signed, unsigned - movne r0, #0xffffffff - .else - movgt r0, #0x7fffffff - movlt r0, #0x80000000 - .endif - b SYM(__aeabi_idiv0) __PLT__ - .thumb -#endif -.endm -#else -.macro THUMB_LDIV0 name signed - push { r1, lr } -98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 - bl SYM (__div0) - mov r0, #0 @ About as wrong as it could be. -#if defined (__INTERWORKING__) - pop { r1, r2 } - bx r2 -#else - pop { r1, pc } -#endif -.endm -#endif - -.macro FUNC_END name - SIZE (__\name) -.endm - -.macro DIV_FUNC_END name signed - cfi_start __\name, LSYM(Lend_div0) -LSYM(Ldiv0): -#ifdef __thumb__ - THUMB_LDIV0 \name \signed -#else - ARM_LDIV0 \name \signed -#endif - cfi_end LSYM(Lend_div0) - FUNC_END \name -.endm - -.macro THUMB_FUNC_START name - .globl SYM (\name) - TYPE (\name) - .thumb_func -SYM (\name): -.endm - -/* Function start macros. Variants for ARM and Thumb. */ - -#ifdef __thumb__ -#define THUMB_FUNC .thumb_func -#define THUMB_CODE .force_thumb -# if defined(__thumb2__) -#define THUMB_SYNTAX .syntax divided -# else -#define THUMB_SYNTAX -# endif -#else -#define THUMB_FUNC -#define THUMB_CODE -#define THUMB_SYNTAX -#endif - -.macro FUNC_START name - .text - .globl SYM (__\name) - TYPE (__\name) - .align 0 - THUMB_CODE - THUMB_FUNC - THUMB_SYNTAX -SYM (__\name): -.endm - -/* Special function that will always be coded in ARM assembly, even if - in Thumb-only compilation. */ - -#if defined(__thumb2__) - -/* For Thumb-2 we build everything in thumb mode. */ -.macro ARM_FUNC_START name - FUNC_START \name - .syntax unified -.endm -#define EQUIV .thumb_set -.macro ARM_CALL name - bl __\name -.endm - -#elif defined(__INTERWORKING_STUBS__) - -.macro ARM_FUNC_START name - FUNC_START \name - bx pc - nop - .arm -/* A hook to tell gdb that we've switched to ARM mode. Also used to call - directly from other local arm routines. */ -_L__\name: -.endm -#define EQUIV .thumb_set -/* Branch directly to a function declared with ARM_FUNC_START. - Must be called in arm mode. */ -.macro ARM_CALL name - bl _L__\name -.endm - -#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ - -#ifdef __ARM_ARCH_6M__ -#define EQUIV .thumb_set -#else -.macro ARM_FUNC_START name - .text - .globl SYM (__\name) - TYPE (__\name) - .align 0 - .arm -SYM (__\name): -.endm -#define EQUIV .set -.macro ARM_CALL name - bl __\name -.endm -#endif - -#endif - -.macro FUNC_ALIAS new old - .globl SYM (__\new) -#if defined (__thumb__) - .thumb_set SYM (__\new), SYM (__\old) -#else - .set SYM (__\new), SYM (__\old) -#endif -.endm - -#ifndef __ARM_ARCH_6M__ -.macro ARM_FUNC_ALIAS new old - .globl SYM (__\new) - EQUIV SYM (__\new), SYM (__\old) -#if defined(__INTERWORKING_STUBS__) - .set SYM (_L__\new), SYM (_L__\old) -#endif -.endm -#endif - -#ifdef __ARMEB__ -#define xxh r0 -#define xxl r1 -#define yyh r2 -#define yyl r3 -#else -#define xxh r1 -#define xxl r0 -#define yyh r3 -#define yyl r2 -#endif - -#ifdef __ARM_EABI__ -.macro WEAK name - .weak SYM (__\name) -.endm -#endif - -#ifdef __thumb__ -/* Register aliases. */ - -work .req r4 @ XXXX is this safe ? -dividend .req r0 -divisor .req r1 -overdone .req r2 -result .req r2 -curbit .req r3 -#endif -#if 0 -ip .req r12 -sp .req r13 -lr .req r14 -pc .req r15 -#endif - -/* ------------------------------------------------------------------------ */ -/* Bodies of the division and modulo routines. */ -/* ------------------------------------------------------------------------ */ -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) - -#if defined (__thumb2__) - clz \curbit, \dividend - clz \result, \divisor - sub \curbit, \result, \curbit - rsb \curbit, \curbit, #31 - adr \result, 1f - add \curbit, \result, \curbit, lsl #4 - mov \result, #0 - mov pc, \curbit -.p2align 3 -1: - .set shift, 32 - .rept 32 - .set shift, shift - 1 - cmp.w \dividend, \divisor, lsl #shift - nop.n - adc.w \result, \result, \result - it cs - subcs.w \dividend, \dividend, \divisor, lsl #shift - .endr -#else - clz \curbit, \dividend - clz \result, \divisor - sub \curbit, \result, \curbit - rsbs \curbit, \curbit, #31 - addne \curbit, \curbit, \curbit, lsl #1 - mov \result, #0 - addne pc, pc, \curbit, lsl #2 - nop - .set shift, 32 - .rept 32 - .set shift, shift - 1 - cmp \dividend, \divisor, lsl #shift - adc \result, \result, \result - subcs \dividend, \dividend, \divisor, lsl #shift - .endr -#endif - -#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ -#if __ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else /* __ARM_ARCH__ < 5 */ - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4-bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif /* __ARM_ARCH__ < 5 */ - - @ Division loop -1: cmp \dividend, \divisor - do_it hs, t - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - do_it hs, t - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - do_it hs, t - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - do_it hs, t - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - do_it ne, t - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ - -.endm -/* ------------------------------------------------------------------------ */ -.macro ARM_DIV2_ORDER divisor, order - -#if __ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm -/* ------------------------------------------------------------------------ */ -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - rsbs \order, \order, #31 - addne pc, pc, \order, lsl #3 - nop - .set shift, 32 - .rept 32 - .set shift, shift - 1 - cmp \dividend, \divisor, lsl #shift - subcs \dividend, \dividend, \divisor, lsl #shift - .endr - -#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ -#if __ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else /* __ARM_ARCH__ < 5 */ - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif /* __ARM_ARCH__ < 5 */ - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: - -#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ - -.endm -/* ------------------------------------------------------------------------ */ -.macro THUMB_DIV_MOD_BODY modulo - @ Load the constant 0x10000000 into our work register. - mov work, #1 - lsl work, #28 -LSYM(Loop1): - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, work - bhs LSYM(Lbignum) - cmp divisor, dividend - bhs LSYM(Lbignum) - lsl divisor, #4 - lsl curbit, #4 - b LSYM(Loop1) -LSYM(Lbignum): - @ Set work to 0x80000000 - lsl work, #3 -LSYM(Loop2): - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, work - bhs LSYM(Loop3) - cmp divisor, dividend - bhs LSYM(Loop3) - lsl divisor, #1 - lsl curbit, #1 - b LSYM(Loop2) -LSYM(Loop3): - @ Test for possible subtractions ... - .if \modulo - @ ... On the final pass, this may subtract too much from the dividend, - @ so keep track of which subtractions are done, we can fix them up - @ afterwards. - mov overdone, #0 - cmp dividend, divisor - blo LSYM(Lover1) - sub dividend, dividend, divisor -LSYM(Lover1): - lsr work, divisor, #1 - cmp dividend, work - blo LSYM(Lover2) - sub dividend, dividend, work - mov ip, curbit - mov work, #1 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover2): - lsr work, divisor, #2 - cmp dividend, work - blo LSYM(Lover3) - sub dividend, dividend, work - mov ip, curbit - mov work, #2 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover3): - lsr work, divisor, #3 - cmp dividend, work - blo LSYM(Lover4) - sub dividend, dividend, work - mov ip, curbit - mov work, #3 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover4): - mov ip, curbit - .else - @ ... and note which bits are done in the result. On the final pass, - @ this may subtract too much from the dividend, but the result will be ok, - @ since the "bit" will have been shifted out at the bottom. - cmp dividend, divisor - blo LSYM(Lover1) - sub dividend, dividend, divisor - orr result, result, curbit -LSYM(Lover1): - lsr work, divisor, #1 - cmp dividend, work - blo LSYM(Lover2) - sub dividend, dividend, work - lsr work, curbit, #1 - orr result, work -LSYM(Lover2): - lsr work, divisor, #2 - cmp dividend, work - blo LSYM(Lover3) - sub dividend, dividend, work - lsr work, curbit, #2 - orr result, work -LSYM(Lover3): - lsr work, divisor, #3 - cmp dividend, work - blo LSYM(Lover4) - sub dividend, dividend, work - lsr work, curbit, #3 - orr result, work -LSYM(Lover4): - .endif - - cmp dividend, #0 @ Early termination? - beq LSYM(Lover5) - lsr curbit, #4 @ No, any more bits to do? - beq LSYM(Lover5) - lsr divisor, #4 - b LSYM(Loop3) -LSYM(Lover5): - .if \modulo - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - mov work, #0xe - lsl work, #28 - and overdone, work - beq LSYM(Lgot_result) - - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - mov curbit, ip - mov work, #0x7 - tst curbit, work - beq LSYM(Lgot_result) - - mov curbit, ip - mov work, #3 - ror curbit, work - tst overdone, curbit - beq LSYM(Lover6) - lsr work, divisor, #3 - add dividend, work -LSYM(Lover6): - mov curbit, ip - mov work, #2 - ror curbit, work - tst overdone, curbit - beq LSYM(Lover7) - lsr work, divisor, #2 - add dividend, work -LSYM(Lover7): - mov curbit, ip - mov work, #1 - ror curbit, work - tst overdone, curbit - beq LSYM(Lgot_result) - lsr work, divisor, #1 - add dividend, work - .endif -LSYM(Lgot_result): -.endm -/* ------------------------------------------------------------------------ */ -/* Start of the Real Functions */ -/* ------------------------------------------------------------------------ */ -#ifdef L_udivsi3 - -#if defined(__prefer_thumb__) - - FUNC_START udivsi3 - FUNC_ALIAS aeabi_uidiv udivsi3 - - cmp divisor, #0 - beq LSYM(Ldiv0) -LSYM(udivsi3_skip_div0_test): - mov curbit, #1 - mov result, #0 - - push { work } - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 0 - - mov r0, result - pop { work } - RET - -#else /* ARM version/Thumb-2. */ - - ARM_FUNC_START udivsi3 - ARM_FUNC_ALIAS aeabi_uidiv udivsi3 - - /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily - check for division-by-zero a second time. */ -LSYM(udivsi3_skip_div0_test): - subs r2, r1, #1 - do_it eq - RETc(eq) - bcc LSYM(Ldiv0) - cmp r0, r1 - bls 11f - tst r1, r2 - beq 12f - - ARM_DIV_BODY r0, r1, r2, r3 - - mov r0, r2 - RET - -11: do_it eq, e - moveq r0, #1 - movne r0, #0 - RET - -12: ARM_DIV2_ORDER r1, r2 - - mov r0, r0, lsr r2 - RET - -#endif /* ARM version */ - - DIV_FUNC_END udivsi3 unsigned - -#if defined(__prefer_thumb__) -FUNC_START aeabi_uidivmod - cmp r1, #0 - beq LSYM(Ldiv0) - push {r0, r1, lr} - bl LSYM(udivsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 -#else -ARM_FUNC_START aeabi_uidivmod - cmp r1, #0 - beq LSYM(Ldiv0) - stmfd sp!, { r0, r1, lr } - bl LSYM(udivsi3_skip_div0_test) - ldmfd sp!, { r1, r2, lr } - mul r3, r2, r0 - sub r1, r1, r3 - RET -#endif - FUNC_END aeabi_uidivmod - -#endif /* L_udivsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_umodsi3 - - FUNC_START umodsi3 - -#ifdef __thumb__ - - cmp divisor, #0 - beq LSYM(Ldiv0) - mov curbit, #1 - cmp dividend, divisor - bhs LSYM(Lover10) - RET - -LSYM(Lover10): - push { work } - - THUMB_DIV_MOD_BODY 1 - - pop { work } - RET - -#else /* ARM version. */ - - subs r2, r1, #1 @ compare divisor with 1 - bcc LSYM(Ldiv0) - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - RETc(ls) - - ARM_MOD_BODY r0, r1, r2, r3 - - RET - -#endif /* ARM version. */ - - DIV_FUNC_END umodsi3 unsigned - -#endif /* L_umodsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_divsi3 - -#if defined(__prefer_thumb__) - - FUNC_START divsi3 - FUNC_ALIAS aeabi_idiv divsi3 - - cmp divisor, #0 - beq LSYM(Ldiv0) -LSYM(divsi3_skip_div0_test): - push { work } - mov work, dividend - eor work, divisor @ Save the sign of the result. - mov ip, work - mov curbit, #1 - mov result, #0 - cmp divisor, #0 - bpl LSYM(Lover10) - neg divisor, divisor @ Loops below use unsigned. -LSYM(Lover10): - cmp dividend, #0 - bpl LSYM(Lover11) - neg dividend, dividend -LSYM(Lover11): - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 0 - - mov r0, result - mov work, ip - cmp work, #0 - bpl LSYM(Lover12) - neg r0, r0 -LSYM(Lover12): - pop { work } - RET - -#else /* ARM/Thumb-2 version. */ - - ARM_FUNC_START divsi3 - ARM_FUNC_ALIAS aeabi_idiv divsi3 - - cmp r1, #0 - beq LSYM(Ldiv0) -LSYM(divsi3_skip_div0_test): - eor ip, r0, r1 @ save the sign of the result. - do_it mi - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - do_it mi - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - do_it mi - rsbmi r0, r0, #0 - RET - -10: teq ip, r0 @ same sign ? - do_it mi - rsbmi r0, r0, #0 - RET - -11: do_it lo - movlo r0, #0 - do_it eq,t - moveq r0, ip, asr #31 - orreq r0, r0, #1 - RET - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - do_it mi - rsbmi r0, r0, #0 - RET - -#endif /* ARM version */ - - DIV_FUNC_END divsi3 signed - -#if defined(__prefer_thumb__) -FUNC_START aeabi_idivmod - cmp r1, #0 - beq LSYM(Ldiv0) - push {r0, r1, lr} - bl LSYM(divsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 -#else -ARM_FUNC_START aeabi_idivmod - cmp r1, #0 - beq LSYM(Ldiv0) - stmfd sp!, { r0, r1, lr } - bl LSYM(divsi3_skip_div0_test) - ldmfd sp!, { r1, r2, lr } - mul r3, r2, r0 - sub r1, r1, r3 - RET -#endif - FUNC_END aeabi_idivmod - -#endif /* L_divsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_modsi3 - - FUNC_START modsi3 - -#ifdef __thumb__ - - mov curbit, #1 - cmp divisor, #0 - beq LSYM(Ldiv0) - bpl LSYM(Lover10) - neg divisor, divisor @ Loops below use unsigned. -LSYM(Lover10): - push { work } - @ Need to save the sign of the dividend, unfortunately, we need - @ work later on. Must do this after saving the original value of - @ the work register, because we will pop this value off first. - push { dividend } - cmp dividend, #0 - bpl LSYM(Lover11) - neg dividend, dividend -LSYM(Lover11): - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 1 - - pop { work } - cmp work, #0 - bpl LSYM(Lover12) - neg dividend, dividend -LSYM(Lover12): - pop { work } - RET - -#else /* ARM version. */ - - cmp r1, #0 - beq LSYM(Ldiv0) - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - RET - -#endif /* ARM version */ - - DIV_FUNC_END modsi3 signed - -#endif /* L_modsi3 */ -/* ------------------------------------------------------------------------ */ -#ifdef L_dvmd_tls - -#ifdef __ARM_EABI__ - WEAK aeabi_idiv0 - WEAK aeabi_ldiv0 - FUNC_START aeabi_idiv0 - FUNC_START aeabi_ldiv0 - RET - FUNC_END aeabi_ldiv0 - FUNC_END aeabi_idiv0 -#else - FUNC_START div0 - RET - FUNC_END div0 -#endif - -#endif /* L_divmodsi_tools */ -/* ------------------------------------------------------------------------ */ -#ifdef L_dvmd_lnx -@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls - -/* Constant taken from <asm/signal.h>. */ -#define SIGFPE 8 - -#ifdef __ARM_EABI__ - WEAK aeabi_idiv0 - WEAK aeabi_ldiv0 - ARM_FUNC_START aeabi_idiv0 - ARM_FUNC_START aeabi_ldiv0 -#else - ARM_FUNC_START div0 -#endif - - do_push {r1, lr} - mov r0, #SIGFPE - bl SYM(raise) __PLT__ - RETLDM r1 - -#ifdef __ARM_EABI__ - FUNC_END aeabi_ldiv0 - FUNC_END aeabi_idiv0 -#else - FUNC_END div0 -#endif - -#endif /* L_dvmd_lnx */ -#ifdef L_clear_cache -#if defined __ARM_EABI__ && defined __linux__ -@ EABI GNU/Linux call to cacheflush syscall. - ARM_FUNC_START clear_cache - do_push {r7} -#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) - movw r7, #2 - movt r7, #0xf -#else - mov r7, #0xf0000 - add r7, r7, #2 -#endif - mov r2, #0 - swi 0 - do_pop {r7} - RET - FUNC_END clear_cache -#else -#error "This is only for ARM EABI GNU/Linux" -#endif -#endif /* L_clear_cache */ -/* ------------------------------------------------------------------------ */ -/* Dword shift operations. */ -/* All the following Dword shift variants rely on the fact that - shft xxx, Reg - is in fact done as - shft xxx, (Reg & 255) - so for Reg value in (32...63) and (-1...-31) we will get zero (in the - case of logical shifts) or the sign (for asr). */ - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ -#ifndef __symbian__ - -#ifdef L_lshrdi3 - - FUNC_START lshrdi3 - FUNC_ALIAS aeabi_llsr lshrdi3 - -#ifdef __thumb__ - lsr al, r2 - mov r3, ah - lsr ah, r2 - mov ip, r3 - sub r2, #32 - lsr r3, r2 - orr al, r3 - neg r2, r2 - mov r3, ip - lsl r3, r2 - orr al, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, lsr r2 - RET -#endif - FUNC_END aeabi_llsr - FUNC_END lshrdi3 - -#endif - -#ifdef L_ashrdi3 - - FUNC_START ashrdi3 - FUNC_ALIAS aeabi_lasr ashrdi3 - -#ifdef __thumb__ - lsr al, r2 - mov r3, ah - asr ah, r2 - sub r2, #32 - @ If r2 is negative at this point the following step would OR - @ the sign bit into all of AL. That's not what we want... - bmi 1f - mov ip, r3 - asr r3, r2 - orr al, r3 - mov r3, ip -1: - neg r2, r2 - lsl r3, r2 - orr al, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, asr r2 - RET -#endif - - FUNC_END aeabi_lasr - FUNC_END ashrdi3 - -#endif - -#ifdef L_ashldi3 - - FUNC_START ashldi3 - FUNC_ALIAS aeabi_llsl ashldi3 - -#ifdef __thumb__ - lsl ah, r2 - mov r3, al - lsl al, r2 - mov ip, r3 - sub r2, #32 - lsl r3, r2 - orr ah, r3 - neg r2, r2 - mov r3, ip - lsr r3, r2 - orr ah, r3 - RET -#else - subs r3, r2, #32 - rsb ip, r2, #32 - movmi ah, ah, lsl r2 - movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip - mov al, al, lsl r2 - RET -#endif - FUNC_END aeabi_llsl - FUNC_END ashldi3 - -#endif - -#endif /* __symbian__ */ - -#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ - || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) -#define HAVE_ARM_CLZ 1 -#endif - -#ifdef L_clzsi2 -#if defined(__ARM_ARCH_6M__) -FUNC_START clzsi2 - mov r1, #28 - mov r3, #1 - lsl r3, r3, #16 - cmp r0, r3 /* 0x10000 */ - bcc 2f - lsr r0, r0, #16 - sub r1, r1, #16 -2: lsr r3, r3, #8 - cmp r0, r3 /* #0x100 */ - bcc 2f - lsr r0, r0, #8 - sub r1, r1, #8 -2: lsr r3, r3, #4 - cmp r0, r3 /* #0x10 */ - bcc 2f - lsr r0, r0, #4 - sub r1, r1, #4 -2: adr r2, 1f - ldrb r0, [r2, r0] - add r0, r0, r1 - bx lr -.align 2 -1: -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 - FUNC_END clzsi2 -#else -ARM_FUNC_START clzsi2 -# if defined(HAVE_ARM_CLZ) - clz r0, r0 - RET -# else - mov r1, #28 - cmp r0, #0x10000 - do_it cs, t - movcs r0, r0, lsr #16 - subcs r1, r1, #16 - cmp r0, #0x100 - do_it cs, t - movcs r0, r0, lsr #8 - subcs r1, r1, #8 - cmp r0, #0x10 - do_it cs, t - movcs r0, r0, lsr #4 - subcs r1, r1, #4 - adr r2, 1f - ldrb r0, [r2, r0] - add r0, r0, r1 - RET -.align 2 -1: -.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 -# endif /* !HAVE_ARM_CLZ */ - FUNC_END clzsi2 -#endif -#endif /* L_clzsi2 */ - -#ifdef L_clzdi2 -#if !defined(HAVE_ARM_CLZ) - -# if defined(__ARM_ARCH_6M__) -FUNC_START clzdi2 - push {r4, lr} -# else -ARM_FUNC_START clzdi2 - do_push {r4, lr} -# endif - cmp xxh, #0 - bne 1f -# ifdef __ARMEB__ - mov r0, xxl - bl __clzsi2 - add r0, r0, #32 - b 2f -1: - bl __clzsi2 -# else - bl __clzsi2 - add r0, r0, #32 - b 2f -1: - mov r0, xxh - bl __clzsi2 -# endif -2: -# if defined(__ARM_ARCH_6M__) - pop {r4, pc} -# else - RETLDM r4 -# endif - FUNC_END clzdi2 - -#else /* HAVE_ARM_CLZ */ - -ARM_FUNC_START clzdi2 - cmp xxh, #0 - do_it eq, et - clzeq r0, xxl - clzne r0, xxh - addeq r0, r0, #32 - RET - FUNC_END clzdi2 - -#endif -#endif /* L_clzdi2 */ - -/* ------------------------------------------------------------------------ */ -/* These next two sections are here despite the fact that they contain Thumb - assembler because their presence allows interworked code to be linked even - when the GCC library is this one. */ - -/* Do not build the interworking functions when the target architecture does - not support Thumb instructions. (This can be a multilib option). */ -#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ - || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ - || __ARM_ARCH__ >= 6 - -#if defined L_call_via_rX - -/* These labels & instructions are used by the Arm/Thumb interworking code. - The address of function to be called is loaded into a register and then - one of these labels is called via a BL instruction. This puts the - return address into the link register with the bottom bit set, and the - code here switches to the correct mode before executing the function. */ - - .text - .align 0 - .force_thumb - -.macro call_via register - THUMB_FUNC_START _call_via_\register - - bx \register - nop - - SIZE (_call_via_\register) -.endm - - call_via r0 - call_via r1 - call_via r2 - call_via r3 - call_via r4 - call_via r5 - call_via r6 - call_via r7 - call_via r8 - call_via r9 - call_via sl - call_via fp - call_via ip - call_via sp - call_via lr - -#endif /* L_call_via_rX */ - -/* Don't bother with the old interworking routines for Thumb-2. */ -/* ??? Maybe only omit these on "m" variants. */ -#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) - -#if defined L_interwork_call_via_rX - -/* These labels & instructions are used by the Arm/Thumb interworking code, - when the target address is in an unknown instruction set. The address - of function to be called is loaded into a register and then one of these - labels is called via a BL instruction. This puts the return address - into the link register with the bottom bit set, and the code here - switches to the correct mode before executing the function. Unfortunately - the target code cannot be relied upon to return via a BX instruction, so - instead we have to store the resturn address on the stack and allow the - called function to return here instead. Upon return we recover the real - return address and use a BX to get back to Thumb mode. - - There are three variations of this code. The first, - _interwork_call_via_rN(), will push the return address onto the - stack and pop it in _arm_return(). It should only be used if all - arguments are passed in registers. - - The second, _interwork_r7_call_via_rN(), instead stores the return - address at [r7, #-4]. It is the caller's responsibility to ensure - that this address is valid and contains no useful data. - - The third, _interwork_r11_call_via_rN(), works in the same way but - uses r11 instead of r7. It is useful if the caller does not really - need a frame pointer. */ - - .text - .align 0 - - .code 32 - .globl _arm_return -LSYM(Lstart_arm_return): - cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) - cfi_push 0, 0xe, -0x8, 0x8 - nop @ This nop is for the benefit of debuggers, so that - @ backtraces will use the correct unwind information. -_arm_return: - RETLDM unwind=LSYM(Lstart_arm_return) - cfi_end LSYM(Lend_arm_return) - - .globl _arm_return_r7 -_arm_return_r7: - ldr lr, [r7, #-4] - bx lr - - .globl _arm_return_r11 -_arm_return_r11: - ldr lr, [r11, #-4] - bx lr - -.macro interwork_with_frame frame, register, name, return - .code 16 - - THUMB_FUNC_START \name - - bx pc - nop - - .code 32 - tst \register, #1 - streq lr, [\frame, #-4] - adreq lr, _arm_return_\frame - bx \register - - SIZE (\name) -.endm - -.macro interwork register - .code 16 - - THUMB_FUNC_START _interwork_call_via_\register - - bx pc - nop - - .code 32 - .globl LSYM(Lchange_\register) -LSYM(Lchange_\register): - tst \register, #1 - streq lr, [sp, #-8]! - adreq lr, _arm_return - bx \register - - SIZE (_interwork_call_via_\register) - - interwork_with_frame r7,\register,_interwork_r7_call_via_\register - interwork_with_frame r11,\register,_interwork_r11_call_via_\register -.endm - - interwork r0 - interwork r1 - interwork r2 - interwork r3 - interwork r4 - interwork r5 - interwork r6 - interwork r7 - interwork r8 - interwork r9 - interwork sl - interwork fp - interwork ip - interwork sp - - /* The LR case has to be handled a little differently... */ - .code 16 - - THUMB_FUNC_START _interwork_call_via_lr - - bx pc - nop - - .code 32 - .globl .Lchange_lr -.Lchange_lr: - tst lr, #1 - stmeqdb r13!, {lr, pc} - mov ip, lr - adreq lr, _arm_return - bx ip - - SIZE (_interwork_call_via_lr) - -#endif /* L_interwork_call_via_rX */ -#endif /* !__thumb2__ */ - -/* Functions to support compact pic switch tables in thumb1 state. - All these routines take an index into the table in r0. The - table is at LR & ~1 (but this must be rounded up in the case - of 32-bit entires). They are only permitted to clobber r12 - and r14 and r0 must be preserved on exit. */ -#ifdef L_thumb1_case_sqi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_sqi - push {r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r1, r1, #1 - ldrsb r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r1} - bx lr - SIZE (__gnu_thumb1_case_sqi) -#endif - -#ifdef L_thumb1_case_uqi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_uqi - push {r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r1, r1, #1 - ldrb r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r1} - bx lr - SIZE (__gnu_thumb1_case_uqi) -#endif - -#ifdef L_thumb1_case_shi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_shi - push {r0, r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r0, r0, #1 - lsls r1, r1, #1 - ldrsh r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r0, r1} - bx lr - SIZE (__gnu_thumb1_case_shi) -#endif - -#ifdef L_thumb1_case_uhi - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_uhi - push {r0, r1} - mov r1, lr - lsrs r1, r1, #1 - lsls r0, r0, #1 - lsls r1, r1, #1 - ldrh r1, [r1, r0] - lsls r1, r1, #1 - add lr, lr, r1 - pop {r0, r1} - bx lr - SIZE (__gnu_thumb1_case_uhi) -#endif - -#ifdef L_thumb1_case_si - - .text - .align 0 - .force_thumb - .syntax unified - THUMB_FUNC_START __gnu_thumb1_case_si - push {r0, r1} - mov r1, lr - adds.n r1, r1, #2 /* Align to word. */ - lsrs r1, r1, #2 - lsls r0, r0, #2 - lsls r1, r1, #2 - ldr r0, [r1, r0] - adds r0, r0, r1 - mov lr, r0 - pop {r0, r1} - mov pc, lr /* We know we were called from thumb code. */ - SIZE (__gnu_thumb1_case_si) -#endif - -#endif /* Arch supports thumb. */ - -#ifndef __symbian__ -#ifndef __ARM_ARCH_6M__ -#include "ieee754-df.S" -#include "ieee754-sf.S" -#include "bpabi.S" -#else /* __ARM_ARCH_6M__ */ -#include "bpabi-v6m.S" -#endif /* __ARM_ARCH_6M__ */ -#endif /* !__symbian__ */ diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h index a3830955948..80bd8259375 100644 --- a/gcc/config/arm/linux-eabi.h +++ b/gcc/config/arm/linux-eabi.h @@ -97,7 +97,7 @@ #undef LIBGCC_SPEC /* Clear the instruction cache from `beg' to `end'. This is - implemented in lib1funcs.asm, so ensure an error if this definition + implemented in lib1funcs.S, so ensure an error if this definition is used. */ #undef CLEAR_INSN_CACHE #define CLEAR_INSN_CACHE(BEG, END) not_used diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index b970ec26a35..a9a174d473d 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -40,9 +40,6 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \ $(srcdir)/config/arm/thumb2.md \ $(srcdir)/config/arm/arm-fixed.md -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ - _thumb1_case_uhi _thumb1_case_si s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf index bfcf6ffd939..a605d26244f 100644 --- a/gcc/config/arm/t-arm-elf +++ b/gcc/config/arm/t-arm-elf @@ -17,20 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# For most CPUs we have an assembly soft-float implementations. -# However this is not true for ARMv6M. Here we want to use the soft-fp C -# implementation. The soft-fp code is only build for ARMv6M. This pulls -# in the asm implementation for other CPUs. -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ - _call_via_rX _interwork_call_via_rX \ - _lshrdi3 _ashrdi3 _ashldi3 \ - _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ - _arm_fixdfsi _arm_fixunsdfsi \ - _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ - _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ - _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \ - _clzsi2 _clzdi2 - MULTILIB_OPTIONS = marm/mthumb MULTILIB_DIRNAMES = arm thumb MULTILIB_EXCEPTIONS = diff --git a/gcc/config/arm/t-bpabi b/gcc/config/arm/t-bpabi index 047525682fc..c9d5ed4d674 100644 --- a/gcc/config/arm/t-bpabi +++ b/gcc/config/arm/t-bpabi @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# Add the bpabi.S functions. -LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod - # Add the BPABI C functions. LIB2FUNCS_EXTRA = $(srcdir)/config/arm/bpabi.c \ $(srcdir)/config/arm/unaligned-funcs.c diff --git a/gcc/config/arm/t-linux b/gcc/config/arm/t-linux index a0c5110f0c0..a204834014e 100644 --- a/gcc/config/arm/t-linux +++ b/gcc/config/arm/t-linux @@ -21,10 +21,6 @@ # difference. TARGET_LIBGCC2_CFLAGS = -fomit-frame-pointer -fPIC -LIB1ASMSRC = arm/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \ - _arm_addsubdf3 _arm_addsubsf3 - # MULTILIB_OPTIONS = mfloat-abi=hard/mfloat-abi=soft # MULTILIB_DIRNAMES = hard-float soft-float diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi index fed979e980b..3030229fafa 100644 --- a/gcc/config/arm/t-linux-eabi +++ b/gcc/config/arm/t-linux-eabi @@ -28,8 +28,5 @@ MULTILIB_DIRNAMES = #MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te #MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* -# Use a version of div0 which raises SIGFPE, and a special __clear_cache. -LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache - LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic.c LIB2FUNCS_STATIC_EXTRA += $(srcdir)/config/arm/linux-atomic-64bit.c diff --git a/gcc/config/arm/t-strongarm-elf b/gcc/config/arm/t-strongarm-elf index 95680031e54..4d51e660c8b 100644 --- a/gcc/config/arm/t-strongarm-elf +++ b/gcc/config/arm/t-strongarm-elf @@ -17,8 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 - MULTILIB_OPTIONS = mlittle-endian/mbig-endian mfloat-abi=hard/mfloat-abi=soft MULTILIB_DIRNAMES = le be fpu soft MULTILIB_EXCEPTIONS = diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian index cf716147849..736a01d10f4 100644 --- a/gcc/config/arm/t-symbian +++ b/gcc/config/arm/t-symbian @@ -16,20 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 - -# These functions have __aeabi equivalents and will never be called by GCC. -# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being -# used -- and we make sure that definitions are not available in lib1funcs.asm, -# either, so they end up undefined. -LIB1ASMFUNCS += \ - _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \ - _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \ - _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \ - _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ - _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ - _fixsfsi _fixunssfsi - EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h # Include half-float helpers. LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/arm/fp16.c diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks index 8ac0d9bcec5..0900ffe15ed 100644 --- a/gcc/config/arm/t-vxworks +++ b/gcc/config/arm/t-vxworks @@ -16,8 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 - MULTILIB_OPTIONS = \ mrtp fPIC \ t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe diff --git a/gcc/config/arm/t-wince-pe b/gcc/config/arm/t-wince-pe index 9ce1f313140..8a8c65fd396 100644 --- a/gcc/config/arm/t-wince-pe +++ b/gcc/config/arm/t-wince-pe @@ -17,8 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 - pe.o: $(srcdir)/config/arm/pe.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) output.h flags.h $(TREE_H) expr.h $(TM_P_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S deleted file mode 100644 index 8c369c96a77..00000000000 --- a/gcc/config/avr/libgcc.S +++ /dev/null @@ -1,1533 +0,0 @@ -/* -*- Mode: Asm -*- */ -/* Copyright (C) 1998, 1999, 2000, 2007, 2008, 2009 - Free Software Foundation, Inc. - Contributed by Denis Chertykov <chertykov@gmail.com> - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#define __zero_reg__ r1 -#define __tmp_reg__ r0 -#define __SREG__ 0x3f -#define __SP_H__ 0x3e -#define __SP_L__ 0x3d -#define __RAMPZ__ 0x3B -#define __EIND__ 0x3C - -/* Most of the functions here are called directly from avr.md - patterns, instead of using the standard libcall mechanisms. - This can make better code because GCC knows exactly which - of the call-used registers (not all of them) are clobbered. */ - -/* FIXME: At present, there is no SORT directive in the linker - script so that we must not assume that different modules - in the same input section like .libgcc.text.mul will be - located close together. Therefore, we cannot use - RCALL/RJMP to call a function like __udivmodhi4 from - __divmodhi4 and have to use lengthy XCALL/XJMP even - though they are in the same input section and all same - input sections together are small enough to reach every - location with a RCALL/RJMP instruction. */ - - .macro mov_l r_dest, r_src -#if defined (__AVR_HAVE_MOVW__) - movw \r_dest, \r_src -#else - mov \r_dest, \r_src -#endif - .endm - - .macro mov_h r_dest, r_src -#if defined (__AVR_HAVE_MOVW__) - ; empty -#else - mov \r_dest, \r_src -#endif - .endm - -#if defined (__AVR_HAVE_JMP_CALL__) -#define XCALL call -#define XJMP jmp -#else -#define XCALL rcall -#define XJMP rjmp -#endif - -.macro DEFUN name -.global \name -.func \name -\name: -.endm - -.macro ENDF name -.size \name, .-\name -.endfunc -.endm - - -.section .text.libgcc.mul, "ax", @progbits - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ -#if !defined (__AVR_HAVE_MUL__) -/******************************************************* - Multiplication 8 x 8 without MUL -*******************************************************/ -#if defined (L_mulqi3) - -#define r_arg2 r22 /* multiplicand */ -#define r_arg1 r24 /* multiplier */ -#define r_res __tmp_reg__ /* result */ - -DEFUN __mulqi3 - clr r_res ; clear result -__mulqi3_loop: - sbrc r_arg1,0 - add r_res,r_arg2 - add r_arg2,r_arg2 ; shift multiplicand - breq __mulqi3_exit ; while multiplicand != 0 - lsr r_arg1 ; - brne __mulqi3_loop ; exit if multiplier = 0 -__mulqi3_exit: - mov r_arg1,r_res ; result to return register - ret -ENDF __mulqi3 - -#undef r_arg2 -#undef r_arg1 -#undef r_res - -#endif /* defined (L_mulqi3) */ - -#if defined (L_mulqihi3) -DEFUN __mulqihi3 - clr r25 - sbrc r24, 7 - dec r25 - clr r23 - sbrc r22, 7 - dec r22 - XJMP __mulhi3 -ENDF __mulqihi3: -#endif /* defined (L_mulqihi3) */ - -#if defined (L_umulqihi3) -DEFUN __umulqihi3 - clr r25 - clr r23 - XJMP __mulhi3 -ENDF __umulqihi3 -#endif /* defined (L_umulqihi3) */ - -/******************************************************* - Multiplication 16 x 16 without MUL -*******************************************************/ -#if defined (L_mulhi3) -#define r_arg1L r24 /* multiplier Low */ -#define r_arg1H r25 /* multiplier High */ -#define r_arg2L r22 /* multiplicand Low */ -#define r_arg2H r23 /* multiplicand High */ -#define r_resL __tmp_reg__ /* result Low */ -#define r_resH r21 /* result High */ - -DEFUN __mulhi3 - clr r_resH ; clear result - clr r_resL ; clear result -__mulhi3_loop: - sbrs r_arg1L,0 - rjmp __mulhi3_skip1 - add r_resL,r_arg2L ; result + multiplicand - adc r_resH,r_arg2H -__mulhi3_skip1: - add r_arg2L,r_arg2L ; shift multiplicand - adc r_arg2H,r_arg2H - - cp r_arg2L,__zero_reg__ - cpc r_arg2H,__zero_reg__ - breq __mulhi3_exit ; while multiplicand != 0 - - lsr r_arg1H ; gets LSB of multiplier - ror r_arg1L - sbiw r_arg1L,0 - brne __mulhi3_loop ; exit if multiplier = 0 -__mulhi3_exit: - mov r_arg1H,r_resH ; result to return register - mov r_arg1L,r_resL - ret -ENDF __mulhi3 - -#undef r_arg1L -#undef r_arg1H -#undef r_arg2L -#undef r_arg2H -#undef r_resL -#undef r_resH - -#endif /* defined (L_mulhi3) */ - -/******************************************************* - Widening Multiplication 32 = 16 x 16 without MUL -*******************************************************/ - -#if defined (L_mulhisi3) -DEFUN __mulhisi3 -;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - sbrc r23, 7 - dec r24 - mov r25, r24 - clr r20 - sbrc r19, 7 - dec r20 - mov r21, r20 - XJMP __mulsi3 -ENDF __mulhisi3 -#endif /* defined (L_mulhisi3) */ - -#if defined (L_umulhisi3) -DEFUN __umulhisi3 -;;; FIXME: This is dead code (noone calls it) - mov_l r18, r24 - mov_h r19, r25 - clr r24 - clr r25 - mov_l r20, r24 - mov_h r21, r25 - XJMP __mulsi3 -ENDF __umulhisi3 -#endif /* defined (L_umulhisi3) */ - -#if defined (L_mulsi3) -/******************************************************* - Multiplication 32 x 32 without MUL -*******************************************************/ -#define r_arg1L r22 /* multiplier Low */ -#define r_arg1H r23 -#define r_arg1HL r24 -#define r_arg1HH r25 /* multiplier High */ - -#define r_arg2L r18 /* multiplicand Low */ -#define r_arg2H r19 -#define r_arg2HL r20 -#define r_arg2HH r21 /* multiplicand High */ - -#define r_resL r26 /* result Low */ -#define r_resH r27 -#define r_resHL r30 -#define r_resHH r31 /* result High */ - -DEFUN __mulsi3 - clr r_resHH ; clear result - clr r_resHL ; clear result - clr r_resH ; clear result - clr r_resL ; clear result -__mulsi3_loop: - sbrs r_arg1L,0 - rjmp __mulsi3_skip1 - add r_resL,r_arg2L ; result + multiplicand - adc r_resH,r_arg2H - adc r_resHL,r_arg2HL - adc r_resHH,r_arg2HH -__mulsi3_skip1: - add r_arg2L,r_arg2L ; shift multiplicand - adc r_arg2H,r_arg2H - adc r_arg2HL,r_arg2HL - adc r_arg2HH,r_arg2HH - - lsr r_arg1HH ; gets LSB of multiplier - ror r_arg1HL - ror r_arg1H - ror r_arg1L - brne __mulsi3_loop - sbiw r_arg1HL,0 - cpc r_arg1H,r_arg1L - brne __mulsi3_loop ; exit if multiplier = 0 -__mulsi3_exit: - mov_h r_arg1HH,r_resHH ; result to return register - mov_l r_arg1HL,r_resHL - mov_h r_arg1H,r_resH - mov_l r_arg1L,r_resL - ret -ENDF __mulsi3 - -#undef r_arg1L -#undef r_arg1H -#undef r_arg1HL -#undef r_arg1HH - -#undef r_arg2L -#undef r_arg2H -#undef r_arg2HL -#undef r_arg2HH - -#undef r_resL -#undef r_resH -#undef r_resHL -#undef r_resHH - -#endif /* defined (L_mulsi3) */ - -#endif /* !defined (__AVR_HAVE_MUL__) */ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -#if defined (__AVR_HAVE_MUL__) -#define A0 26 -#define B0 18 -#define C0 22 - -#define A1 A0+1 - -#define B1 B0+1 -#define B2 B0+2 -#define B3 B0+3 - -#define C1 C0+1 -#define C2 C0+2 -#define C3 C0+3 - -/******************************************************* - Widening Multiplication 32 = 16 x 16 -*******************************************************/ - -#if defined (L_mulhisi3) -;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 -;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __mulhisi3 - XCALL __umulhisi3 - ;; Sign-extend B - tst B1 - brpl 1f - sub C2, A0 - sbc C3, A1 -1: ;; Sign-extend A - XJMP __usmulhisi3_tail -ENDF __mulhisi3 -#endif /* L_mulhisi3 */ - -#if defined (L_usmulhisi3) -;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 -;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __usmulhisi3 - XCALL __umulhisi3 - ;; FALLTHRU -ENDF __usmulhisi3 - -DEFUN __usmulhisi3_tail - ;; Sign-extend A - sbrs A1, 7 - ret - sub C2, B0 - sbc C3, B1 - ret -ENDF __usmulhisi3_tail -#endif /* L_usmulhisi3 */ - -#if defined (L_umulhisi3) -;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 -;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __umulhisi3 - mul A0, B0 - movw C0, r0 - mul A1, B1 - movw C2, r0 - mul A0, B1 - rcall 1f - mul A1, B0 -1: add C1, r0 - adc C2, r1 - clr __zero_reg__ - adc C3, __zero_reg__ - ret -ENDF __umulhisi3 -#endif /* L_umulhisi3 */ - -/******************************************************* - Widening Multiplication 32 = 16 x 32 -*******************************************************/ - -#if defined (L_mulshisi3) -;;; R25:R22 = (signed long) R27:R26 * R21:R18 -;;; (C3:C0) = (signed long) A1:A0 * B3:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __mulshisi3 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Some cores have problem skipping 2-word instruction - tst A1 - brmi __mulohisi3 -#else - sbrs A1, 7 -#endif /* __AVR_HAVE_JMP_CALL__ */ - XJMP __muluhisi3 - ;; FALLTHRU -ENDF __mulshisi3 - -;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 -;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __mulohisi3 - XCALL __muluhisi3 - ;; One-extend R27:R26 (A1:A0) - sub C2, B0 - sbc C3, B1 - ret -ENDF __mulohisi3 -#endif /* L_mulshisi3 */ - -#if defined (L_muluhisi3) -;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 -;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 -;;; Clobbers: __tmp_reg__ -DEFUN __muluhisi3 - XCALL __umulhisi3 - mul A0, B3 - add C3, r0 - mul A1, B2 - add C3, r0 - mul A0, B2 - add C2, r0 - adc C3, r1 - clr __zero_reg__ - ret -ENDF __muluhisi3 -#endif /* L_muluhisi3 */ - -/******************************************************* - Multiplication 32 x 32 -*******************************************************/ - -#if defined (L_mulsi3) -;;; R25:R22 = R25:R22 * R21:R18 -;;; (C3:C0) = C3:C0 * B3:B0 -;;; Clobbers: R26, R27, __tmp_reg__ -DEFUN __mulsi3 - movw A0, C0 - push C2 - push C3 - XCALL __muluhisi3 - pop A1 - pop A0 - ;; A1:A0 now contains the high word of A - mul A0, B0 - add C2, r0 - adc C3, r1 - mul A0, B1 - add C3, r0 - mul A1, B0 - add C3, r0 - clr __zero_reg__ - ret -ENDF __mulsi3 -#endif /* L_mulsi3 */ - -#undef A0 -#undef A1 - -#undef B0 -#undef B1 -#undef B2 -#undef B3 - -#undef C0 -#undef C1 -#undef C2 -#undef C3 - -#endif /* __AVR_HAVE_MUL__ */ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - -.section .text.libgcc.div, "ax", @progbits - -/******************************************************* - Division 8 / 8 => (result + remainder) -*******************************************************/ -#define r_rem r25 /* remainder */ -#define r_arg1 r24 /* dividend, quotient */ -#define r_arg2 r22 /* divisor */ -#define r_cnt r23 /* loop count */ - -#if defined (L_udivmodqi4) -DEFUN __udivmodqi4 - sub r_rem,r_rem ; clear remainder and carry - ldi r_cnt,9 ; init loop counter - rjmp __udivmodqi4_ep ; jump to entry point -__udivmodqi4_loop: - rol r_rem ; shift dividend into remainder - cp r_rem,r_arg2 ; compare remainder & divisor - brcs __udivmodqi4_ep ; remainder <= divisor - sub r_rem,r_arg2 ; restore remainder -__udivmodqi4_ep: - rol r_arg1 ; shift dividend (with CARRY) - dec r_cnt ; decrement loop counter - brne __udivmodqi4_loop - com r_arg1 ; complement result - ; because C flag was complemented in loop - ret -ENDF __udivmodqi4 -#endif /* defined (L_udivmodqi4) */ - -#if defined (L_divmodqi4) -DEFUN __divmodqi4 - bst r_arg1,7 ; store sign of dividend - mov __tmp_reg__,r_arg1 - eor __tmp_reg__,r_arg2; r0.7 is sign of result - sbrc r_arg1,7 - neg r_arg1 ; dividend negative : negate - sbrc r_arg2,7 - neg r_arg2 ; divisor negative : negate - XCALL __udivmodqi4 ; do the unsigned div/mod - brtc __divmodqi4_1 - neg r_rem ; correct remainder sign -__divmodqi4_1: - sbrc __tmp_reg__,7 - neg r_arg1 ; correct result sign -__divmodqi4_exit: - ret -ENDF __divmodqi4 -#endif /* defined (L_divmodqi4) */ - -#undef r_rem -#undef r_arg1 -#undef r_arg2 -#undef r_cnt - - -/******************************************************* - Division 16 / 16 => (result + remainder) -*******************************************************/ -#define r_remL r26 /* remainder Low */ -#define r_remH r27 /* remainder High */ - -/* return: remainder */ -#define r_arg1L r24 /* dividend Low */ -#define r_arg1H r25 /* dividend High */ - -/* return: quotient */ -#define r_arg2L r22 /* divisor Low */ -#define r_arg2H r23 /* divisor High */ - -#define r_cnt r21 /* loop count */ - -#if defined (L_udivmodhi4) -DEFUN __udivmodhi4 - sub r_remL,r_remL - sub r_remH,r_remH ; clear remainder and carry - ldi r_cnt,17 ; init loop counter - rjmp __udivmodhi4_ep ; jump to entry point -__udivmodhi4_loop: - rol r_remL ; shift dividend into remainder - rol r_remH - cp r_remL,r_arg2L ; compare remainder & divisor - cpc r_remH,r_arg2H - brcs __udivmodhi4_ep ; remainder < divisor - sub r_remL,r_arg2L ; restore remainder - sbc r_remH,r_arg2H -__udivmodhi4_ep: - rol r_arg1L ; shift dividend (with CARRY) - rol r_arg1H - dec r_cnt ; decrement loop counter - brne __udivmodhi4_loop - com r_arg1L - com r_arg1H -; div/mod results to return registers, as for the div() function - mov_l r_arg2L, r_arg1L ; quotient - mov_h r_arg2H, r_arg1H - mov_l r_arg1L, r_remL ; remainder - mov_h r_arg1H, r_remH - ret -ENDF __udivmodhi4 -#endif /* defined (L_udivmodhi4) */ - -#if defined (L_divmodhi4) -DEFUN __divmodhi4 - .global _div -_div: - bst r_arg1H,7 ; store sign of dividend - mov __tmp_reg__,r_arg1H - eor __tmp_reg__,r_arg2H ; r0.7 is sign of result - rcall __divmodhi4_neg1 ; dividend negative : negate - sbrc r_arg2H,7 - rcall __divmodhi4_neg2 ; divisor negative : negate - XCALL __udivmodhi4 ; do the unsigned div/mod - rcall __divmodhi4_neg1 ; correct remainder sign - tst __tmp_reg__ - brpl __divmodhi4_exit -__divmodhi4_neg2: - com r_arg2H - neg r_arg2L ; correct divisor/result sign - sbci r_arg2H,0xff -__divmodhi4_exit: - ret -__divmodhi4_neg1: - brtc __divmodhi4_exit - com r_arg1H - neg r_arg1L ; correct dividend/remainder sign - sbci r_arg1H,0xff - ret -ENDF __divmodhi4 -#endif /* defined (L_divmodhi4) */ - -#undef r_remH -#undef r_remL - -#undef r_arg1H -#undef r_arg1L - -#undef r_arg2H -#undef r_arg2L - -#undef r_cnt - -/******************************************************* - Division 32 / 32 => (result + remainder) -*******************************************************/ -#define r_remHH r31 /* remainder High */ -#define r_remHL r30 -#define r_remH r27 -#define r_remL r26 /* remainder Low */ - -/* return: remainder */ -#define r_arg1HH r25 /* dividend High */ -#define r_arg1HL r24 -#define r_arg1H r23 -#define r_arg1L r22 /* dividend Low */ - -/* return: quotient */ -#define r_arg2HH r21 /* divisor High */ -#define r_arg2HL r20 -#define r_arg2H r19 -#define r_arg2L r18 /* divisor Low */ - -#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ - -#if defined (L_udivmodsi4) -DEFUN __udivmodsi4 - ldi r_remL, 33 ; init loop counter - mov r_cnt, r_remL - sub r_remL,r_remL - sub r_remH,r_remH ; clear remainder and carry - mov_l r_remHL, r_remL - mov_h r_remHH, r_remH - rjmp __udivmodsi4_ep ; jump to entry point -__udivmodsi4_loop: - rol r_remL ; shift dividend into remainder - rol r_remH - rol r_remHL - rol r_remHH - cp r_remL,r_arg2L ; compare remainder & divisor - cpc r_remH,r_arg2H - cpc r_remHL,r_arg2HL - cpc r_remHH,r_arg2HH - brcs __udivmodsi4_ep ; remainder <= divisor - sub r_remL,r_arg2L ; restore remainder - sbc r_remH,r_arg2H - sbc r_remHL,r_arg2HL - sbc r_remHH,r_arg2HH -__udivmodsi4_ep: - rol r_arg1L ; shift dividend (with CARRY) - rol r_arg1H - rol r_arg1HL - rol r_arg1HH - dec r_cnt ; decrement loop counter - brne __udivmodsi4_loop - ; __zero_reg__ now restored (r_cnt == 0) - com r_arg1L - com r_arg1H - com r_arg1HL - com r_arg1HH -; div/mod results to return registers, as for the ldiv() function - mov_l r_arg2L, r_arg1L ; quotient - mov_h r_arg2H, r_arg1H - mov_l r_arg2HL, r_arg1HL - mov_h r_arg2HH, r_arg1HH - mov_l r_arg1L, r_remL ; remainder - mov_h r_arg1H, r_remH - mov_l r_arg1HL, r_remHL - mov_h r_arg1HH, r_remHH - ret -ENDF __udivmodsi4 -#endif /* defined (L_udivmodsi4) */ - -#if defined (L_divmodsi4) -DEFUN __divmodsi4 - bst r_arg1HH,7 ; store sign of dividend - mov __tmp_reg__,r_arg1HH - eor __tmp_reg__,r_arg2HH ; r0.7 is sign of result - rcall __divmodsi4_neg1 ; dividend negative : negate - sbrc r_arg2HH,7 - rcall __divmodsi4_neg2 ; divisor negative : negate - XCALL __udivmodsi4 ; do the unsigned div/mod - rcall __divmodsi4_neg1 ; correct remainder sign - rol __tmp_reg__ - brcc __divmodsi4_exit -__divmodsi4_neg2: - com r_arg2HH - com r_arg2HL - com r_arg2H - neg r_arg2L ; correct divisor/quotient sign - sbci r_arg2H,0xff - sbci r_arg2HL,0xff - sbci r_arg2HH,0xff -__divmodsi4_exit: - ret -__divmodsi4_neg1: - brtc __divmodsi4_exit - com r_arg1HH - com r_arg1HL - com r_arg1H - neg r_arg1L ; correct dividend/remainder sign - sbci r_arg1H, 0xff - sbci r_arg1HL,0xff - sbci r_arg1HH,0xff - ret -ENDF __divmodsi4 -#endif /* defined (L_divmodsi4) */ - - -.section .text.libgcc.prologue, "ax", @progbits - -/********************************** - * This is a prologue subroutine - **********************************/ -#if defined (L_prologue) - -DEFUN __prologue_saves__ - push r2 - push r3 - push r4 - push r5 - push r6 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push r16 - push r17 - push r28 - push r29 - in r28,__SP_L__ - in r29,__SP_H__ - sub r28,r26 - sbc r29,r27 - in __tmp_reg__,__SREG__ - cli - out __SP_H__,r29 - out __SREG__,__tmp_reg__ - out __SP_L__,r28 -#if defined (__AVR_HAVE_EIJMP_EICALL__) - eijmp -#else - ijmp -#endif - -ENDF __prologue_saves__ -#endif /* defined (L_prologue) */ - -/* - * This is an epilogue subroutine - */ -#if defined (L_epilogue) - -DEFUN __epilogue_restores__ - ldd r2,Y+18 - ldd r3,Y+17 - ldd r4,Y+16 - ldd r5,Y+15 - ldd r6,Y+14 - ldd r7,Y+13 - ldd r8,Y+12 - ldd r9,Y+11 - ldd r10,Y+10 - ldd r11,Y+9 - ldd r12,Y+8 - ldd r13,Y+7 - ldd r14,Y+6 - ldd r15,Y+5 - ldd r16,Y+4 - ldd r17,Y+3 - ldd r26,Y+2 - ldd r27,Y+1 - add r28,r30 - adc r29,__zero_reg__ - in __tmp_reg__,__SREG__ - cli - out __SP_H__,r29 - out __SREG__,__tmp_reg__ - out __SP_L__,r28 - mov_l r28, r26 - mov_h r29, r27 - ret -ENDF __epilogue_restores__ -#endif /* defined (L_epilogue) */ - -#ifdef L_exit - .section .fini9,"ax",@progbits -DEFUN _exit - .weak exit -exit: -ENDF _exit - - /* Code from .fini8 ... .fini1 sections inserted by ld script. */ - - .section .fini0,"ax",@progbits - cli -__stop_program: - rjmp __stop_program -#endif /* defined (L_exit) */ - -#ifdef L_cleanup - .weak _cleanup - .func _cleanup -_cleanup: - ret -.endfunc -#endif /* defined (L_cleanup) */ - - -.section .text.libgcc, "ax", @progbits - -#ifdef L_tablejump -DEFUN __tablejump2__ - lsl r30 - rol r31 - ;; FALLTHRU -ENDF __tablejump2__ - -DEFUN __tablejump__ -#if defined (__AVR_HAVE_LPMX__) - lpm __tmp_reg__, Z+ - lpm r31, Z - mov r30, __tmp_reg__ -#if defined (__AVR_HAVE_EIJMP_EICALL__) - eijmp -#else - ijmp -#endif - -#else /* !HAVE_LPMX */ - lpm - adiw r30, 1 - push r0 - lpm - push r0 -#if defined (__AVR_HAVE_EIJMP_EICALL__) - in __tmp_reg__, __EIND__ - push __tmp_reg__ -#endif - ret -#endif /* !HAVE_LPMX */ -ENDF __tablejump__ -#endif /* defined (L_tablejump) */ - -#ifdef L_copy_data - .section .init4,"ax",@progbits -DEFUN __do_copy_data -#if defined(__AVR_HAVE_ELPMX__) - ldi r17, hi8(__data_end) - ldi r26, lo8(__data_start) - ldi r27, hi8(__data_start) - ldi r30, lo8(__data_load_start) - ldi r31, hi8(__data_load_start) - ldi r16, hh8(__data_load_start) - out __RAMPZ__, r16 - rjmp .L__do_copy_data_start -.L__do_copy_data_loop: - elpm r0, Z+ - st X+, r0 -.L__do_copy_data_start: - cpi r26, lo8(__data_end) - cpc r27, r17 - brne .L__do_copy_data_loop -#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) - ldi r17, hi8(__data_end) - ldi r26, lo8(__data_start) - ldi r27, hi8(__data_start) - ldi r30, lo8(__data_load_start) - ldi r31, hi8(__data_load_start) - ldi r16, hh8(__data_load_start - 0x10000) -.L__do_copy_data_carry: - inc r16 - out __RAMPZ__, r16 - rjmp .L__do_copy_data_start -.L__do_copy_data_loop: - elpm - st X+, r0 - adiw r30, 1 - brcs .L__do_copy_data_carry -.L__do_copy_data_start: - cpi r26, lo8(__data_end) - cpc r27, r17 - brne .L__do_copy_data_loop -#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) - ldi r17, hi8(__data_end) - ldi r26, lo8(__data_start) - ldi r27, hi8(__data_start) - ldi r30, lo8(__data_load_start) - ldi r31, hi8(__data_load_start) - rjmp .L__do_copy_data_start -.L__do_copy_data_loop: -#if defined (__AVR_HAVE_LPMX__) - lpm r0, Z+ -#else - lpm - adiw r30, 1 -#endif - st X+, r0 -.L__do_copy_data_start: - cpi r26, lo8(__data_end) - cpc r27, r17 - brne .L__do_copy_data_loop -#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ -ENDF __do_copy_data -#endif /* L_copy_data */ - -/* __do_clear_bss is only necessary if there is anything in .bss section. */ - -#ifdef L_clear_bss - .section .init4,"ax",@progbits -DEFUN __do_clear_bss - ldi r17, hi8(__bss_end) - ldi r26, lo8(__bss_start) - ldi r27, hi8(__bss_start) - rjmp .do_clear_bss_start -.do_clear_bss_loop: - st X+, __zero_reg__ -.do_clear_bss_start: - cpi r26, lo8(__bss_end) - cpc r27, r17 - brne .do_clear_bss_loop -ENDF __do_clear_bss -#endif /* L_clear_bss */ - -/* __do_global_ctors and __do_global_dtors are only necessary - if there are any constructors/destructors. */ - -#ifdef L_ctors - .section .init6,"ax",@progbits -DEFUN __do_global_ctors -#if defined(__AVR_HAVE_RAMPZ__) - ldi r17, hi8(__ctors_start) - ldi r28, lo8(__ctors_end) - ldi r29, hi8(__ctors_end) - ldi r16, hh8(__ctors_end) - rjmp .L__do_global_ctors_start -.L__do_global_ctors_loop: - sbiw r28, 2 - sbc r16, __zero_reg__ - mov_h r31, r29 - mov_l r30, r28 - out __RAMPZ__, r16 - XCALL __tablejump_elpm__ -.L__do_global_ctors_start: - cpi r28, lo8(__ctors_start) - cpc r29, r17 - ldi r24, hh8(__ctors_start) - cpc r16, r24 - brne .L__do_global_ctors_loop -#else - ldi r17, hi8(__ctors_start) - ldi r28, lo8(__ctors_end) - ldi r29, hi8(__ctors_end) - rjmp .L__do_global_ctors_start -.L__do_global_ctors_loop: - sbiw r28, 2 - mov_h r31, r29 - mov_l r30, r28 - XCALL __tablejump__ -.L__do_global_ctors_start: - cpi r28, lo8(__ctors_start) - cpc r29, r17 - brne .L__do_global_ctors_loop -#endif /* defined(__AVR_HAVE_RAMPZ__) */ -ENDF __do_global_ctors -#endif /* L_ctors */ - -#ifdef L_dtors - .section .fini6,"ax",@progbits -DEFUN __do_global_dtors -#if defined(__AVR_HAVE_RAMPZ__) - ldi r17, hi8(__dtors_end) - ldi r28, lo8(__dtors_start) - ldi r29, hi8(__dtors_start) - ldi r16, hh8(__dtors_start) - rjmp .L__do_global_dtors_start -.L__do_global_dtors_loop: - sbiw r28, 2 - sbc r16, __zero_reg__ - mov_h r31, r29 - mov_l r30, r28 - out __RAMPZ__, r16 - XCALL __tablejump_elpm__ -.L__do_global_dtors_start: - cpi r28, lo8(__dtors_end) - cpc r29, r17 - ldi r24, hh8(__dtors_end) - cpc r16, r24 - brne .L__do_global_dtors_loop -#else - ldi r17, hi8(__dtors_end) - ldi r28, lo8(__dtors_start) - ldi r29, hi8(__dtors_start) - rjmp .L__do_global_dtors_start -.L__do_global_dtors_loop: - mov_h r31, r29 - mov_l r30, r28 - XCALL __tablejump__ - adiw r28, 2 -.L__do_global_dtors_start: - cpi r28, lo8(__dtors_end) - cpc r29, r17 - brne .L__do_global_dtors_loop -#endif /* defined(__AVR_HAVE_RAMPZ__) */ -ENDF __do_global_dtors -#endif /* L_dtors */ - -.section .text.libgcc, "ax", @progbits - -#ifdef L_tablejump_elpm -DEFUN __tablejump_elpm__ -#if defined (__AVR_HAVE_ELPM__) -#if defined (__AVR_HAVE_LPMX__) - elpm __tmp_reg__, Z+ - elpm r31, Z - mov r30, __tmp_reg__ -#if defined (__AVR_HAVE_EIJMP_EICALL__) - eijmp -#else - ijmp -#endif - -#else - elpm - adiw r30, 1 - push r0 - elpm - push r0 -#if defined (__AVR_HAVE_EIJMP_EICALL__) - in __tmp_reg__, __EIND__ - push __tmp_reg__ -#endif - ret -#endif -#endif /* defined (__AVR_HAVE_ELPM__) */ -ENDF __tablejump_elpm__ -#endif /* defined (L_tablejump_elpm) */ - - -.section .text.libgcc.builtins, "ax", @progbits - -/********************************** - * Find first set Bit (ffs) - **********************************/ - -#if defined (L_ffssi2) -;; find first set bit -;; r25:r24 = ffs32 (r25:r22) -;; clobbers: r22, r26 -DEFUN __ffssi2 - clr r26 - tst r22 - brne 1f - subi r26, -8 - or r22, r23 - brne 1f - subi r26, -8 - or r22, r24 - brne 1f - subi r26, -8 - or r22, r25 - brne 1f - ret -1: mov r24, r22 - XJMP __loop_ffsqi2 -ENDF __ffssi2 -#endif /* defined (L_ffssi2) */ - -#if defined (L_ffshi2) -;; find first set bit -;; r25:r24 = ffs16 (r25:r24) -;; clobbers: r26 -DEFUN __ffshi2 - clr r26 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Some cores have problem skipping 2-word instruction - tst r24 - breq 2f -#else - cpse r24, __zero_reg__ -#endif /* __AVR_HAVE_JMP_CALL__ */ -1: XJMP __loop_ffsqi2 -2: ldi r26, 8 - or r24, r25 - brne 1b - ret -ENDF __ffshi2 -#endif /* defined (L_ffshi2) */ - -#if defined (L_loop_ffsqi2) -;; Helper for ffshi2, ffssi2 -;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) -;; r24 must be != 0 -;; clobbers: r26 -DEFUN __loop_ffsqi2 - inc r26 - lsr r24 - brcc __loop_ffsqi2 - mov r24, r26 - clr r25 - ret -ENDF __loop_ffsqi2 -#endif /* defined (L_loop_ffsqi2) */ - - -/********************************** - * Count trailing Zeros (ctz) - **********************************/ - -#if defined (L_ctzsi2) -;; count trailing zeros -;; r25:r24 = ctz32 (r25:r22) -;; clobbers: r26, r22 -;; ctz(0) = 255 -;; Note that ctz(0) in undefined for GCC -DEFUN __ctzsi2 - XCALL __ffssi2 - dec r24 - ret -ENDF __ctzsi2 -#endif /* defined (L_ctzsi2) */ - -#if defined (L_ctzhi2) -;; count trailing zeros -;; r25:r24 = ctz16 (r25:r24) -;; clobbers: r26 -;; ctz(0) = 255 -;; Note that ctz(0) in undefined for GCC -DEFUN __ctzhi2 - XCALL __ffshi2 - dec r24 - ret -ENDF __ctzhi2 -#endif /* defined (L_ctzhi2) */ - - -/********************************** - * Count leading Zeros (clz) - **********************************/ - -#if defined (L_clzdi2) -;; count leading zeros -;; r25:r24 = clz64 (r25:r18) -;; clobbers: r22, r23, r26 -DEFUN __clzdi2 - XCALL __clzsi2 - sbrs r24, 5 - ret - mov_l r22, r18 - mov_h r23, r19 - mov_l r24, r20 - mov_h r25, r21 - XCALL __clzsi2 - subi r24, -32 - ret -ENDF __clzdi2 -#endif /* defined (L_clzdi2) */ - -#if defined (L_clzsi2) -;; count leading zeros -;; r25:r24 = clz32 (r25:r22) -;; clobbers: r26 -DEFUN __clzsi2 - XCALL __clzhi2 - sbrs r24, 4 - ret - mov_l r24, r22 - mov_h r25, r23 - XCALL __clzhi2 - subi r24, -16 - ret -ENDF __clzsi2 -#endif /* defined (L_clzsi2) */ - -#if defined (L_clzhi2) -;; count leading zeros -;; r25:r24 = clz16 (r25:r24) -;; clobbers: r26 -DEFUN __clzhi2 - clr r26 - tst r25 - brne 1f - subi r26, -8 - or r25, r24 - brne 1f - ldi r24, 16 - ret -1: cpi r25, 16 - brsh 3f - subi r26, -3 - swap r25 -2: inc r26 -3: lsl r25 - brcc 2b - mov r24, r26 - clr r25 - ret -ENDF __clzhi2 -#endif /* defined (L_clzhi2) */ - - -/********************************** - * Parity - **********************************/ - -#if defined (L_paritydi2) -;; r25:r24 = parity64 (r25:r18) -;; clobbers: __tmp_reg__ -DEFUN __paritydi2 - eor r24, r18 - eor r24, r19 - eor r24, r20 - eor r24, r21 - XJMP __paritysi2 -ENDF __paritydi2 -#endif /* defined (L_paritydi2) */ - -#if defined (L_paritysi2) -;; r25:r24 = parity32 (r25:r22) -;; clobbers: __tmp_reg__ -DEFUN __paritysi2 - eor r24, r22 - eor r24, r23 - XJMP __parityhi2 -ENDF __paritysi2 -#endif /* defined (L_paritysi2) */ - -#if defined (L_parityhi2) -;; r25:r24 = parity16 (r25:r24) -;; clobbers: __tmp_reg__ -DEFUN __parityhi2 - eor r24, r25 -;; FALLTHRU -ENDF __parityhi2 - -;; r25:r24 = parity8 (r24) -;; clobbers: __tmp_reg__ -DEFUN __parityqi2 - ;; parity is in r24[0..7] - mov __tmp_reg__, r24 - swap __tmp_reg__ - eor r24, __tmp_reg__ - ;; parity is in r24[0..3] - subi r24, -4 - andi r24, -5 - subi r24, -6 - ;; parity is in r24[0,3] - sbrc r24, 3 - inc r24 - ;; parity is in r24[0] - andi r24, 1 - clr r25 - ret -ENDF __parityqi2 -#endif /* defined (L_parityhi2) */ - - -/********************************** - * Population Count - **********************************/ - -#if defined (L_popcounthi2) -;; population count -;; r25:r24 = popcount16 (r25:r24) -;; clobbers: __tmp_reg__ -DEFUN __popcounthi2 - XCALL __popcountqi2 - push r24 - mov r24, r25 - XCALL __popcountqi2 - clr r25 - ;; FALLTHRU -ENDF __popcounthi2 - -DEFUN __popcounthi2_tail - pop __tmp_reg__ - add r24, __tmp_reg__ - ret -ENDF __popcounthi2_tail -#endif /* defined (L_popcounthi2) */ - -#if defined (L_popcountsi2) -;; population count -;; r25:r24 = popcount32 (r25:r22) -;; clobbers: __tmp_reg__ -DEFUN __popcountsi2 - XCALL __popcounthi2 - push r24 - mov_l r24, r22 - mov_h r25, r23 - XCALL __popcounthi2 - XJMP __popcounthi2_tail -ENDF __popcountsi2 -#endif /* defined (L_popcountsi2) */ - -#if defined (L_popcountdi2) -;; population count -;; r25:r24 = popcount64 (r25:r18) -;; clobbers: r22, r23, __tmp_reg__ -DEFUN __popcountdi2 - XCALL __popcountsi2 - push r24 - mov_l r22, r18 - mov_h r23, r19 - mov_l r24, r20 - mov_h r25, r21 - XCALL __popcountsi2 - XJMP __popcounthi2_tail -ENDF __popcountdi2 -#endif /* defined (L_popcountdi2) */ - -#if defined (L_popcountqi2) -;; population count -;; r24 = popcount8 (r24) -;; clobbers: __tmp_reg__ -DEFUN __popcountqi2 - mov __tmp_reg__, r24 - andi r24, 1 - lsr __tmp_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __zero_reg__ - lsr __tmp_reg__ - adc r24, __tmp_reg__ - ret -ENDF __popcountqi2 -#endif /* defined (L_popcountqi2) */ - - -/********************************** - * Swap bytes - **********************************/ - -;; swap two registers with different register number -.macro bswap a, b - eor \a, \b - eor \b, \a - eor \a, \b -.endm - -#if defined (L_bswapsi2) -;; swap bytes -;; r25:r22 = bswap32 (r25:r22) -DEFUN __bswapsi2 - bswap r22, r25 - bswap r23, r24 - ret -ENDF __bswapsi2 -#endif /* defined (L_bswapsi2) */ - -#if defined (L_bswapdi2) -;; swap bytes -;; r25:r18 = bswap64 (r25:r18) -DEFUN __bswapdi2 - bswap r18, r25 - bswap r19, r24 - bswap r20, r23 - bswap r21, r22 - ret -ENDF __bswapdi2 -#endif /* defined (L_bswapdi2) */ - - -/********************************** - * 64-bit shifts - **********************************/ - -#if defined (L_ashrdi3) -;; Arithmetic shift right -;; r25:r18 = ashr64 (r25:r18, r17:r16) -DEFUN __ashrdi3 - push r16 - andi r16, 63 - breq 2f -1: asr r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - dec r16 - brne 1b -2: pop r16 - ret -ENDF __ashrdi3 -#endif /* defined (L_ashrdi3) */ - -#if defined (L_lshrdi3) -;; Logic shift right -;; r25:r18 = lshr64 (r25:r18, r17:r16) -DEFUN __lshrdi3 - push r16 - andi r16, 63 - breq 2f -1: lsr r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - dec r16 - brne 1b -2: pop r16 - ret -ENDF __lshrdi3 -#endif /* defined (L_lshrdi3) */ - -#if defined (L_ashldi3) -;; Shift left -;; r25:r18 = ashl64 (r25:r18, r17:r16) -DEFUN __ashldi3 - push r16 - andi r16, 63 - breq 2f -1: lsl r18 - rol r19 - rol r20 - rol r21 - rol r22 - rol r23 - rol r24 - rol r25 - dec r16 - brne 1b -2: pop r16 - ret -ENDF __ashldi3 -#endif /* defined (L_ashldi3) */ - - -.section .text.libgcc.fmul, "ax", @progbits - -/***********************************************************/ -;;; Softmul versions of FMUL, FMULS and FMULSU to implement -;;; __builtin_avr_fmul* if !AVR_HAVE_MUL -/***********************************************************/ - -#define A1 24 -#define B1 25 -#define C0 22 -#define C1 23 -#define A0 __tmp_reg__ - -#ifdef L_fmuls -;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction -;;; Clobbers: r24, r25, __tmp_reg__ -DEFUN __fmuls - ;; A0.7 = negate result? - mov A0, A1 - eor A0, B1 - ;; B1 = |B1| - sbrc B1, 7 - neg B1 - XJMP __fmulsu_exit -ENDF __fmuls -#endif /* L_fmuls */ - -#ifdef L_fmulsu -;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction -;;; Clobbers: r24, r25, __tmp_reg__ -DEFUN __fmulsu - ;; A0.7 = negate result? - mov A0, A1 -;; FALLTHRU -ENDF __fmulsu - -;; Helper for __fmuls and __fmulsu -DEFUN __fmulsu_exit - ;; A1 = |A1| - sbrc A1, 7 - neg A1 -#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ - ;; Some cores have problem skipping 2-word instruction - tst A0 - brmi 1f -#else - sbrs A0, 7 -#endif /* __AVR_HAVE_JMP_CALL__ */ - XJMP __fmul -1: XCALL __fmul - ;; C = -C iff A0.7 = 1 - com C1 - neg C0 - sbci C1, -1 - ret -ENDF __fmulsu_exit -#endif /* L_fmulsu */ - - -#ifdef L_fmul -;;; r22:r23 = fmul (r24, r25) like in FMUL instruction -;;; Clobbers: r24, r25, __tmp_reg__ -DEFUN __fmul - ; clear result - clr C0 - clr C1 - clr A0 -1: tst B1 - ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. -2: brpl 3f - ;; C += A - add C0, A0 - adc C1, A1 -3: ;; A >>= 1 - lsr A1 - ror A0 - ;; B <<= 1 - lsl B1 - brne 2b - ret -ENDF __fmul -#endif /* L_fmul */ - -#undef A0 -#undef A1 -#undef B1 -#undef C0 -#undef C1 diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr index 30e8d96447e..3f37e591f8e 100644 --- a/gcc/config/avr/t-avr +++ b/gcc/config/avr/t-avr @@ -39,54 +39,6 @@ $(srcdir)/config/avr/avr-tables.opt: $(srcdir)/config/avr/genopt.sh \ $(SHELL) $(srcdir)/config/avr/genopt.sh $(srcdir)/config/avr > \ $(srcdir)/config/avr/avr-tables.opt -LIB1ASMSRC = avr/libgcc.S -LIB1ASMFUNCS = \ - _mulqi3 \ - _mulhi3 \ - _mulhisi3 \ - _umulhisi3 \ - _usmulhisi3 \ - _muluhisi3 \ - _mulshisi3 \ - _mulsi3 \ - _udivmodqi4 \ - _divmodqi4 \ - _udivmodhi4 \ - _divmodhi4 \ - _udivmodsi4 \ - _divmodsi4 \ - _prologue \ - _epilogue \ - _exit \ - _cleanup \ - _tablejump \ - _tablejump_elpm \ - _copy_data \ - _clear_bss \ - _ctors \ - _dtors \ - _ffssi2 \ - _ffshi2 \ - _loop_ffsqi2 \ - _ctzsi2 \ - _ctzhi2 \ - _clzdi2 \ - _clzsi2 \ - _clzhi2 \ - _paritydi2 \ - _paritysi2 \ - _parityhi2 \ - _popcounthi2 \ - _popcountsi2 \ - _popcountdi2 \ - _popcountqi2 \ - _bswapsi2 \ - _bswapdi2 \ - _ashldi3 \ - _ashrdi3 \ - _lshrdi3 \ - _fmul _fmuls _fmulsu - LIB2FUNCS_EXCLUDE = \ _clz diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm deleted file mode 100644 index c7bf4f3f05c..00000000000 --- a/gcc/config/bfin/lib1funcs.asm +++ /dev/null @@ -1,211 +0,0 @@ -/* libgcc functions for Blackfin. - Copyright (C) 2005, 2009 Free Software Foundation, Inc. - Contributed by Analog Devices. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#ifdef L_divsi3 -.text -.align 2 -.global ___divsi3; -.type ___divsi3, STT_FUNC; - -___divsi3: - [--SP]= RETS; - [--SP] = R7; - - R2 = -R0; - CC = R0 < 0; - IF CC R0 = R2; - R7 = CC; - - R2 = -R1; - CC = R1 < 0; - IF CC R1 = R2; - R2 = CC; - R7 = R7 ^ R2; - - CALL ___udivsi3; - - CC = R7; - R1 = -R0; - IF CC R0 = R1; - - R7 = [SP++]; - RETS = [SP++]; - RTS; -#endif - -#ifdef L_modsi3 -.align 2 -.global ___modsi3; -.type ___modsi3, STT_FUNC; - -___modsi3: - [--SP] = RETS; - [--SP] = R0; - [--SP] = R1; - CALL ___divsi3; - R2 = [SP++]; - R1 = [SP++]; - R2 *= R0; - R0 = R1 - R2; - RETS = [SP++]; - RTS; -#endif - -#ifdef L_udivsi3 -.align 2 -.global ___udivsi3; -.type ___udivsi3, STT_FUNC; - -___udivsi3: - P0 = 32; - LSETUP (0f, 1f) LC0 = P0; - /* upper half of dividend */ - R3 = 0; -0: - /* The first time round in the loop we shift in garbage, but since we - perform 33 shifts, it doesn't matter. */ - R0 = ROT R0 BY 1; - R3 = ROT R3 BY 1; - R2 = R3 - R1; - CC = R3 < R1 (IU); -1: - /* Last instruction of the loop. */ - IF ! CC R3 = R2; - - /* Shift in the last bit. */ - R0 = ROT R0 BY 1; - /* R0 is the result, R3 contains the remainder. */ - R0 = ~ R0; - RTS; -#endif - -#ifdef L_umodsi3 -.align 2 -.global ___umodsi3; -.type ___umodsi3, STT_FUNC; - -___umodsi3: - [--SP] = RETS; - CALL ___udivsi3; - R0 = R3; - RETS = [SP++]; - RTS; -#endif - -#ifdef L_umulsi3_highpart -.align 2 -.global ___umulsi3_highpart; -.type ___umulsi3_highpart, STT_FUNC; - -___umulsi3_highpart: - A1 = R1.L * R0.L (FU); - A1 = A1 >> 16; - A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU); - A1 += R0.L * R1.H (FU); - A1 = A1 >> 16; - A0 += A1; - R0 = A0 (FU); - RTS; -#endif - -#ifdef L_smulsi3_highpart -.align 2 -.global ___smulsi3_highpart; -.type ___smulsi3_highpart, STT_FUNC; - -___smulsi3_highpart: - A1 = R1.L * R0.L (FU); - A1 = A1 >> 16; - A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M); - A1 += R1.H * R0.L (IS,M); - A1 = A1 >>> 16; - R0 = (A0 += A1); - RTS; -#endif - -#ifdef L_muldi3 -.align 2 -.global ___muldi3; -.type ___muldi3, STT_FUNC; - -/* - R1:R0 * R3:R2 - = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l -[X] = (R1.h * R3.h) * 2^96 -[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80 -[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64 -[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48 -[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32 -[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16 -[T4] + (R0.l * R2.l) - - We can discard the first three lines marked "X" since we produce - only a 64 bit result. So, we need ten 16-bit multiplies. - - Individual mul-acc results: -[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h -[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h -[E3] = R0.l * R2.h + R2.l * R0.h -[E4] = R0.l * R2.l - - We also need to add high parts from lower-level results to higher ones: - E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4 - - One interesting property is that all parts of the result that depend - on the sign of the multiplication are discarded. Those would be the - multiplications involving R1.h and R3.h, but only the top 16 bit of - the 32 bit result depend on the sign, and since R1.h and R3.h only - occur in E1, the top half of these results is cut off. - So, we can just use FU mode for all of the 16-bit multiplies, and - ignore questions of when to use mixed mode. */ - -___muldi3: - /* [SP] technically is part of the caller's frame, but we can - use it as scratch space. */ - A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */ - A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */ - A0 += A1; /* E1 */ - R4 = A0.w; - A0 = R0.l * R3.l (FU); /* E2 */ - A0 += R2.l * R1.l (FU); /* E2 */ - - A1 = R2.L * R0.L (FU); /* E4 */ - R3 = A1.w; - A1 = A1 >> 16; /* E3c */ - A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */ - A1 += R0.L * R2.H (FU); /* E3c */ - R0 = A1.w; - A1 = A1 >> 16; /* E2c */ - A0 += A1; /* E2c */ - R1 = A0.w; - - /* low(result) = low(E3c):low(E4) */ - R0 = PACK (R0.l, R3.l); - /* high(result) = E2c + (E1 << 16) */ - R1.h = R1.h + R4.l (NS) || R4 = [SP]; - RTS; - -.size ___muldi3, .-___muldi3 -#endif diff --git a/gcc/config/bfin/t-bfin b/gcc/config/bfin/t-bfin deleted file mode 100644 index bb95ab4139e..00000000000 --- a/gcc/config/bfin/t-bfin +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2005, 2007, 2011 Free Software Foundation, Inc. -# -# This file is part of GCC. -# -# GCC is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GCC is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# <http://www.gnu.org/licenses/>. - -## Target part of the Makefile - -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf index fcf76c4ddfe..5cbcfeeb87f 100644 --- a/gcc/config/bfin/t-bfin-elf +++ b/gcc/config/bfin/t-bfin-elf @@ -18,10 +18,6 @@ ## Target part of the Makefile -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart - TARGET_LIBGCC2_CFLAGS = -fpic MULTILIB_OPTIONS=mcpu=bf532-none diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux index a83f9f2da83..9a1d6a09437 100644 --- a/gcc/config/bfin/t-bfin-linux +++ b/gcc/config/bfin/t-bfin-linux @@ -18,10 +18,6 @@ ## Target part of the Makefile -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart - TARGET_LIBGCC2_CFLAGS = -fpic MULTILIB_OPTIONS=mcpu=bf532-none diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux index 1be0796987b..b9fca803e0a 100644 --- a/gcc/config/bfin/t-bfin-uclinux +++ b/gcc/config/bfin/t-bfin-uclinux @@ -18,10 +18,6 @@ ## Target part of the Makefile -LIB1ASMSRC = bfin/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _muldi3 _umulsi3_highpart -LIB1ASMFUNCS += _smulsi3_highpart - TARGET_LIBGCC2_CFLAGS = -fpic MULTILIB_OPTIONS=mcpu=bf532-none diff --git a/gcc/config/c6x/lib1funcs.asm b/gcc/config/c6x/lib1funcs.asm deleted file mode 100644 index 5bf34474bbd..00000000000 --- a/gcc/config/c6x/lib1funcs.asm +++ /dev/null @@ -1,438 +0,0 @@ -/* Copyright 2010, 2011 Free Software Foundation, Inc. - Contributed by Bernd Schmidt <bernds@codesourcery.com>. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - - ;; ABI considerations for the divide functions - ;; The following registers are call-used: - ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 - ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 - ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 - ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 - ;; - ;; In our implementation, divu and remu are leaf functions, - ;; while both divi and remi call into divu. - ;; A0 is not clobbered by any of the functions. - ;; divu does not clobber B2 either, which is taken advantage of - ;; in remi. - ;; divi uses B5 to hold the original return address during - ;; the call to divu. - ;; remi uses B2 and A5 to hold the input values during the - ;; call to divu. It stores B3 in on the stack. - -#ifdef L_divsi3 -.text -.align 2 -.global __c6xabi_divi -.hidden __c6xabi_divi -.type __c6xabi_divi, STT_FUNC - -__c6xabi_divi: - call .s2 __c6xabi_divu -|| mv .d2 B3, B5 -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B1 - - [A1] neg .l1 A4, A4 -|| [B1] neg .l2 B4, B4 -|| xor .s1x A1, B1, A1 - -#ifdef _TMS320C6400 - [A1] addkpc .s2 1f, B3, 4 -#else - [A1] mvkl .s2 1f, B3 - [A1] mvkh .s2 1f, B3 - nop 2 -#endif -1: - neg .l1 A4, A4 -|| mv .l2 B3,B5 -|| ret .s2 B5 - nop 5 -#endif - -#if defined L_modsi3 || defined L_divmodsi4 -.align 2 -#ifdef L_modsi3 -#define MOD_OUTPUT_REG A4 -.global __c6xabi_remi -.hidden __c6xabi_remi -.type __c6xabi_remi, STT_FUNC -#else -#define MOD_OUTPUT_REG A5 -.global __c6xabi_divremi -.hidden __c6xabi_divremi -.type __c6xabi_divremi, STT_FUNC -__c6xabi_divremi: -#endif - -__c6xabi_remi: - stw .d2t2 B3, *B15--[2] -|| cmpgt .l1 0, A4, A1 -|| cmpgt .l2 0, B4, B2 -|| mv .s1 A4, A5 -|| call .s2 __c6xabi_divu - - [A1] neg .l1 A4, A4 -|| [B2] neg .l2 B4, B4 -|| xor .s2x B2, A1, B0 -|| mv .d2 B4, B2 - -#ifdef _TMS320C6400 - [B0] addkpc .s2 1f, B3, 1 - [!B0] addkpc .s2 2f, B3, 1 - nop 2 -#else - [B0] mvkl .s2 1f,B3 - [!B0] mvkl .s2 2f,B3 - - [B0] mvkh .s2 1f,B3 - [!B0] mvkh .s2 2f,B3 -#endif -1: - neg .l1 A4, A4 -2: - ldw .d2t2 *++B15[2], B3 - -#ifdef _TMS320C6400_PLUS - mpy32 .m1x A4, B2, A6 - nop 3 - ret .s2 B3 - sub .l1 A5, A6, MOD_OUTPUT_REG - nop 4 -#else - mpyu .m1x A4, B2, A1 - nop 1 - mpylhu .m1x A4, B2, A6 -|| mpylhu .m2x B2, A4, B2 - nop 1 - add .l1x A6, B2, A6 -|| ret .s2 B3 - shl .s1 A6, 16, A6 - add .d1 A6, A1, A6 - sub .l1 A5, A6, MOD_OUTPUT_REG - nop 2 -#endif - -#endif - -#if defined L_udivsi3 || defined L_udivmodsi4 -.align 2 -#ifdef L_udivsi3 -.global __c6xabi_divu -.hidden __c6xabi_divu -.type __c6xabi_divu, STT_FUNC -__c6xabi_divu: -#else -.global __c6xabi_divremu -.hidden __c6xabi_divremu -.type __c6xabi_divremu, STT_FUNC -__c6xabi_divremu: -#endif - ;; We use a series of up to 31 subc instructions. First, we find - ;; out how many leading zero bits there are in the divisor. This - ;; gives us both a shift count for aligning (shifting) the divisor - ;; to the, and the number of times we have to execute subc. - - ;; At the end, we have both the remainder and most of the quotient - ;; in A4. The top bit of the quotient is computed first and is - ;; placed in A2. - - ;; Return immediately if the dividend is zero. Setting B4 to 1 - ;; is a trick to allow us to leave the following insns in the jump - ;; delay slot without affecting the result. - mv .s2x A4, B1 - -#ifndef _TMS320C6400 -[!b1] mvk .s2 1, B4 -#endif -[b1] lmbd .l2 1, B4, B1 -||[!b1] b .s2 B3 ; RETURN A -#ifdef _TMS320C6400 -||[!b1] mvk .d2 1, B4 -#endif -#ifdef L_udivmodsi4 -||[!b1] zero .s1 A5 -#endif - mv .l1x B1, A6 -|| shl .s2 B4, B1, B4 - - ;; The loop performs a maximum of 28 steps, so we do the - ;; first 3 here. - cmpltu .l1x A4, B4, A2 -[!A2] sub .l1x A4, B4, A4 -|| shru .s2 B4, 1, B4 -|| xor .s1 1, A2, A2 - - shl .s1 A2, 31, A2 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - - ;; RETURN A may happen here (note: must happen before the next branch) -0: - cmpgt .l2 B1, 7, B0 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -|| [b0] b .s1 0b -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 -|| mvk .s1 32, A1 - sub .l1 A1, A6, A6 -#ifdef L_udivmodsi4 -|| extu .s1 A4, A6, A5 -#endif - shl .s1 A4, A6, A4 - shru .s1 A4, 1, A4 -|| sub .l1 A6, 1, A6 - or .l1 A2, A4, A4 - shru .s1 A4, A6, A4 - nop - -#endif - -#ifdef L_umodsi3 -.align 2 -.global __c6xabi_remu -.hidden __c6xabi_remu -.type __c6xabi_remu, STT_FUNC -__c6xabi_remu: - ;; The ABI seems designed to prevent these functions calling each other, - ;; so we duplicate most of the divsi3 code here. - mv .s2x A4, B1 -#ifndef _TMS320C6400 -[!b1] mvk .s2 1, B4 -#endif - lmbd .l2 1, B4, B1 -||[!b1] b .s2 B3 ; RETURN A -#ifdef _TMS320C6400 -||[!b1] mvk .d2 1, B4 -#endif - - mv .l1x B1, A7 -|| shl .s2 B4, B1, B4 - - cmpltu .l1x A4, B4, A1 -[!a1] sub .l1x A4, B4, A4 - shru .s2 B4, 1, B4 - -0: - cmpgt .l2 B1, 7, B0 -|| [b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; RETURN A may happen here (note: must happen before the next branch) -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -|| [b0] b .s1 0b -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 - ;; loop backwards branch happens here - - ret .s2 B3 -[b1] subc .l1x A4,B4,A4 -|| [b1] add .s2 -1, B1, B1 -[b1] subc .l1x A4,B4,A4 - - extu .s1 A4, A7, A4 - nop 2 -#endif - -#if defined L_strasgi_64plus && defined _TMS320C6400_PLUS - -.align 2 -.global __c6xabi_strasgi_64plus -.hidden __c6xabi_strasgi_64plus -.type __c6xabi_strasgi_64plus, STT_FUNC -__c6xabi_strasgi_64plus: - shru .s2x a6, 2, b31 -|| mv .s1 a4, a30 -|| mv .d2 b4, b30 - - add .s2 -4, b31, b31 - - sploopd 1 -|| mvc .s2 b31, ilc - ldw .d2t2 *b30++, b31 - nop 4 - mv .s1x b31,a31 - spkernel 6, 0 -|| stw .d1t1 a31, *a30++ - - ret .s2 b3 - nop 5 -#endif - -#ifdef L_strasgi -.global __c6xabi_strasgi -.type __c6xabi_strasgi, STT_FUNC -__c6xabi_strasgi: - ;; This is essentially memcpy, with alignment known to be at least - ;; 4, and the size a multiple of 4 greater than or equal to 28. - ldw .d2t1 *B4++, A0 -|| mvk .s2 16, B1 - ldw .d2t1 *B4++, A1 -|| mvk .s2 20, B2 -|| sub .d1 A6, 24, A6 - ldw .d2t1 *B4++, A5 - ldw .d2t1 *B4++, A7 -|| mv .l2x A6, B7 - ldw .d2t1 *B4++, A8 - ldw .d2t1 *B4++, A9 -|| mv .s2x A0, B5 -|| cmpltu .l2 B2, B7, B0 - -0: - stw .d1t2 B5, *A4++ -||[b0] ldw .d2t1 *B4++, A0 -|| mv .s2x A1, B5 -|| mv .l2 B7, B6 - -[b0] sub .d2 B6, 24, B7 -||[b0] b .s2 0b -|| cmpltu .l2 B1, B6, B0 - -[b0] ldw .d2t1 *B4++, A1 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A5, B5 -|| cmpltu .l2 12, B6, B0 - -[b0] ldw .d2t1 *B4++, A5 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A7, B5 -|| cmpltu .l2 8, B6, B0 - -[b0] ldw .d2t1 *B4++, A7 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A8, B5 -|| cmpltu .l2 4, B6, B0 - -[b0] ldw .d2t1 *B4++, A8 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A9, B5 -|| cmpltu .l2 0, B6, B0 - -[b0] ldw .d2t1 *B4++, A9 -|| stw .d1t2 B5, *A4++ -|| mv .s2x A0, B5 -|| cmpltu .l2 B2, B7, B0 - - ;; loop back branch happens here - - cmpltu .l2 B1, B6, B0 -|| ret .s2 b3 - -[b0] stw .d1t1 A1, *A4++ -|| cmpltu .l2 12, B6, B0 -[b0] stw .d1t1 A5, *A4++ -|| cmpltu .l2 8, B6, B0 -[b0] stw .d1t1 A7, *A4++ -|| cmpltu .l2 4, B6, B0 -[b0] stw .d1t1 A8, *A4++ -|| cmpltu .l2 0, B6, B0 -[b0] stw .d1t1 A9, *A4++ - - ;; return happens here - -#endif - -#ifdef _TMS320C6400_PLUS -#ifdef L_push_rts -.align 2 -.global __c6xabi_push_rts -.hidden __c6xabi_push_rts -.type __c6xabi_push_rts, STT_FUNC -__c6xabi_push_rts: - stw .d2t2 B14, *B15--[2] - stdw .d2t1 A15:A14, *B15-- -|| b .s2x A3 - stdw .d2t2 B13:B12, *B15-- - stdw .d2t1 A13:A12, *B15-- - stdw .d2t2 B11:B10, *B15-- - stdw .d2t1 A11:A10, *B15-- - stdw .d2t2 B3:B2, *B15-- -#endif - -#ifdef L_pop_rts -.align 2 -.global __c6xabi_pop_rts -.hidden __c6xabi_pop_rts -.type __c6xabi_pop_rts, STT_FUNC -__c6xabi_pop_rts: - lddw .d2t2 *++B15, B3:B2 - lddw .d2t1 *++B15, A11:A10 - lddw .d2t2 *++B15, B11:B10 - lddw .d2t1 *++B15, A13:A12 - lddw .d2t2 *++B15, B13:B12 - lddw .d2t1 *++B15, A15:A14 -|| b .s2 B3 - ldw .d2t2 *++B15[2], B14 - nop 4 -#endif - -#ifdef L_call_stub -.align 2 -.global __c6xabi_call_stub -.type __c6xabi_call_stub, STT_FUNC -__c6xabi_call_stub: - stw .d2t1 A2, *B15--[2] - stdw .d2t1 A7:A6, *B15-- -|| call .s2 B31 - stdw .d2t1 A1:A0, *B15-- - stdw .d2t2 B7:B6, *B15-- - stdw .d2t2 B5:B4, *B15-- - stdw .d2t2 B1:B0, *B15-- - stdw .d2t2 B3:B2, *B15-- -|| addkpc .s2 1f, B3, 0 -1: - lddw .d2t2 *++B15, B3:B2 - lddw .d2t2 *++B15, B1:B0 - lddw .d2t2 *++B15, B5:B4 - lddw .d2t2 *++B15, B7:B6 - lddw .d2t1 *++B15, A1:A0 - lddw .d2t1 *++B15, A7:A6 -|| b .s2 B3 - ldw .d2t1 *++B15[2], A2 - nop 4 -#endif - -#endif - diff --git a/gcc/config/c6x/t-c6x-elf b/gcc/config/c6x/t-c6x-elf index b3b4b850fe8..6bc2832026d 100644 --- a/gcc/config/c6x/t-c6x-elf +++ b/gcc/config/c6x/t-c6x-elf @@ -18,11 +18,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = c6x/lib1funcs.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _umodsi3 _modsi3 _udivmodsi4 _divmodsi4 -LIB1ASMFUNCS += _strasgi _strasgi_64plus _clzsi2 _clzdi2 _clz -LIB1ASMFUNCS += _push_rts _pop_rts _call_stub - LIB2FUNCS_EXCLUDE = _cmpdi2 _ucmpdi2 _gcc_bcmp _eprintf _clzsi _clzdi EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h diff --git a/gcc/config/fr30/lib1funcs.asm b/gcc/config/fr30/lib1funcs.asm deleted file mode 100644 index 7c63453123a..00000000000 --- a/gcc/config/fr30/lib1funcs.asm +++ /dev/null @@ -1,115 +0,0 @@ -/* libgcc routines for the FR30. - Copyright (C) 1998, 1999, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - - .macro FUNC_START name - .text - .globl __\name - .type __\name, @function -__\name: - .endm - - .macro FUNC_END name - .size __\name, . - __\name - .endm - - .macro DIV_BODY reg number - .if \number - DIV_BODY \reg, "\number - 1" - div1 \reg - .endif - .endm - -#ifdef L_udivsi3 -FUNC_START udivsi3 - ;; Perform an unsiged division of r4 / r5 and place the result in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0u r5 - DIV_BODY r5 32 - mov mdl, r4 - ret -FUNC_END udivsi3 -#endif /* L_udivsi3 */ - -#ifdef L_divsi3 -FUNC_START divsi3 - ;; Perform a siged division of r4 / r5 and place the result in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0s r5 - DIV_BODY r5 32 - div2 r5 - div3 - div4s - mov mdl, r4 - ret -FUNC_END divsi3 -#endif /* L_divsi3 */ - -#ifdef L_umodsi3 -FUNC_START umodsi3 - ;; Perform an unsiged division of r4 / r5 and places the remainder in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0u r5 - DIV_BODY r5 32 - mov mdh, r4 - ret -FUNC_END umodsi3 -#endif /* L_umodsi3 */ - -#ifdef L_modsi3 -FUNC_START modsi3 - ;; Perform a siged division of r4 / r5 and place the remainder in r4. - ;; Does not handle overflow yet... - mov r4, mdl - div0s r5 - DIV_BODY r5 32 - div2 r5 - div3 - div4s - mov mdh, r4 - ret -FUNC_END modsi3 -#endif /* L_modsi3 */ - -#ifdef L_negsi2 -FUNC_START negsi2 - ldi:8 #0, r0 - sub r4, r0 - mov r0, r4 - ret -FUNC_END negsi2 -#endif /* L_negsi2 */ - -#ifdef L_one_cmplsi2 -FUNC_START one_cmplsi2 - ldi:8 #0xff, r0 - extsb r0 - eor r0, r4 - ret -FUNC_END one_cmplsi2 -#endif /* L_one_cmplsi2 */ - - diff --git a/gcc/config/fr30/t-fr30 b/gcc/config/fr30/t-fr30 index 75009d4eb70..e37921681d0 100644 --- a/gcc/config/fr30/t-fr30 +++ b/gcc/config/fr30/t-fr30 @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = fr30/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 - # If any special flags are necessary when building libgcc2 put them here. # # TARGET_LIBGCC2_CFLAGS diff --git a/gcc/config/frv/lib1funcs.asm b/gcc/config/frv/lib1funcs.asm deleted file mode 100644 index d1ffcab6133..00000000000 --- a/gcc/config/frv/lib1funcs.asm +++ /dev/null @@ -1,269 +0,0 @@ -/* Library functions. - Copyright (C) 2000, 2003, 2008, 2009 Free Software Foundation, Inc. - Contributed by Red Hat, Inc. - - This file is part of GCC. - - GCC is free software ; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY ; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#include <frv-asm.h> - - -#ifdef L_cmpll -/* icc0 = __cmpll (long long a, long long b) */ - - .globl EXT(__cmpll) - .type EXT(__cmpll),@function - .text - .p2align 4 -EXT(__cmpll): - cmp gr8, gr10, icc0 - ckeq icc0, cc4 - P(ccmp) gr9, gr11, cc4, 1 - ret -.Lend: - .size EXT(__cmpll),.Lend-EXT(__cmpll) -#endif /* L_cmpll */ - -#ifdef L_cmpf -/* icc0 = __cmpf (float a, float b) */ -/* Note, because this function returns the result in ICC0, it means it can't - handle NaNs. */ - - .globl EXT(__cmpf) - .type EXT(__cmpf),@function - .text - .p2align 4 -EXT(__cmpf): -#ifdef __FRV_HARD_FLOAT__ /* floating point instructions available */ - movgf gr8, fr0 - P(movgf) gr9, fr1 - setlos #1, gr8 - fcmps fr0, fr1, fcc0 - P(fcklt) fcc0, cc0 - fckeq fcc0, cc1 - csub gr0, gr8, gr8, cc0, 1 - cmov gr0, gr8, cc1, 1 - cmpi gr8, 0, icc0 - ret -#else /* no floating point instructions available */ - movsg lr, gr4 - addi sp, #-16, sp - sti gr4, @(sp, 8) - st fp, @(sp, gr0) - mov sp, fp - call EXT(__cmpsf2) - cmpi gr8, #0, icc0 - ldi @(sp, 8), gr4 - movgs gr4, lr - ld @(sp,gr0), fp - addi sp, #16, sp - ret -#endif -.Lend: - .size EXT(__cmpf),.Lend-EXT(__cmpf) -#endif - -#ifdef L_cmpd -/* icc0 = __cmpd (double a, double b) */ -/* Note, because this function returns the result in ICC0, it means it can't - handle NaNs. */ - - .globl EXT(__cmpd) - .type EXT(__cmpd),@function - .text - .p2align 4 -EXT(__cmpd): - movsg lr, gr4 - addi sp, #-16, sp - sti gr4, @(sp, 8) - st fp, @(sp, gr0) - mov sp, fp - call EXT(__cmpdf2) - cmpi gr8, #0, icc0 - ldi @(sp, 8), gr4 - movgs gr4, lr - ld @(sp,gr0), fp - addi sp, #16, sp - ret -.Lend: - .size EXT(__cmpd),.Lend-EXT(__cmpd) -#endif - -#ifdef L_addll -/* gr8,gr9 = __addll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__addll) - .type EXT(__addll),@function - .text - .p2align -EXT(__addll): - addcc gr9, gr11, gr9, icc0 - addx gr8, gr10, gr8, icc0 - ret -.Lend: - .size EXT(__addll),.Lend-EXT(__addll) -#endif - -#ifdef L_subll -/* gr8,gr9 = __subll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__subll) - .type EXT(__subll),@function - .text - .p2align 4 -EXT(__subll): - subcc gr9, gr11, gr9, icc0 - subx gr8, gr10, gr8, icc0 - ret -.Lend: - .size EXT(__subll),.Lend-EXT(__subll) -#endif - -#ifdef L_andll -/* gr8,gr9 = __andll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__andll) - .type EXT(__andll),@function - .text - .p2align 4 -EXT(__andll): - P(and) gr9, gr11, gr9 - P2(and) gr8, gr10, gr8 - ret -.Lend: - .size EXT(__andll),.Lend-EXT(__andll) -#endif - -#ifdef L_orll -/* gr8,gr9 = __orll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__orll) - .type EXT(__orll),@function - .text - .p2align 4 -EXT(__orll): - P(or) gr9, gr11, gr9 - P2(or) gr8, gr10, gr8 - ret -.Lend: - .size EXT(__orll),.Lend-EXT(__orll) -#endif - -#ifdef L_xorll -/* gr8,gr9 = __xorll (long long a, long long b) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__xorll) - .type EXT(__xorll),@function - .text - .p2align 4 -EXT(__xorll): - P(xor) gr9, gr11, gr9 - P2(xor) gr8, gr10, gr8 - ret -.Lend: - .size EXT(__xorll),.Lend-EXT(__xorll) -#endif - -#ifdef L_notll -/* gr8,gr9 = __notll (long long a) */ -/* Note, gcc will never call this function, but it is present in case an - ABI program calls it. */ - - .globl EXT(__notll) - .type EXT(__notll),@function - .text - .p2align 4 -EXT(__notll): - P(not) gr9, gr9 - P2(not) gr8, gr8 - ret -.Lend: - .size EXT(__notll),.Lend-EXT(__notll) -#endif - -#ifdef L_cmov -/* (void) __cmov (char *dest, const char *src, size_t len) */ -/* - * void __cmov (char *dest, const char *src, size_t len) - * { - * size_t i; - * - * if (dest < src || dest > src+len) - * { - * for (i = 0; i < len; i++) - * dest[i] = src[i]; - * } - * else - * { - * while (len-- > 0) - * dest[len] = src[len]; - * } - * } - */ - - .globl EXT(__cmov) - .type EXT(__cmov),@function - .text - .p2align 4 -EXT(__cmov): - P(cmp) gr8, gr9, icc0 - add gr9, gr10, gr4 - P(cmp) gr8, gr4, icc1 - bc icc0, 0, .Lfwd - bls icc1, 0, .Lback -.Lfwd: - /* move bytes in a forward direction */ - P(setlos) #0, gr5 - cmp gr0, gr10, icc0 - P(subi) gr9, #1, gr9 - P2(subi) gr8, #1, gr8 - bnc icc0, 0, .Lret -.Lfloop: - /* forward byte move loop */ - addi gr5, #1, gr5 - P(ldsb) @(gr9, gr5), gr4 - cmp gr5, gr10, icc0 - P(stb) gr4, @(gr8, gr5) - bc icc0, 0, .Lfloop - ret -.Lbloop: - /* backward byte move loop body */ - ldsb @(gr9,gr10),gr4 - stb gr4,@(gr8,gr10) -.Lback: - P(cmpi) gr10, #0, icc0 - addi gr10, #-1, gr10 - bne icc0, 0, .Lbloop -.Lret: - ret -.Lend: - .size EXT(__cmov),.Lend-EXT(__cmov) -#endif diff --git a/gcc/config/frv/t-frv b/gcc/config/frv/t-frv index 03f3cd8cde1..e31f823c30a 100644 --- a/gcc/config/frv/t-frv +++ b/gcc/config/frv/t-frv @@ -16,15 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# Name of assembly file containing libgcc1 functions. -# This entry must be present, but it can be empty if the target does -# not need any assembler functions to support its code generation. -# -# Alternatively if assembler functions *are* needed then define the -# entries below: -CROSS_LIBGCC1 = libgcc1-asm.a -LIB1ASMSRC = frv/lib1funcs.asm -LIB1ASMFUNCS = _cmpll _cmpf _cmpd _addll _subll _andll _orll _xorll _notll _cmov LIB2FUNCS_EXTRA = cmovh.c cmovw.c cmovd.c modi.c umodi.c uitof.c uitod.c ulltof.c ulltod.c # If any special flags are necessary when building libgcc2 put them here. diff --git a/gcc/config/h8300/fixunssfsi.c b/gcc/config/h8300/fixunssfsi.c index 2fe62b7a1a8..940d0c6dc6a 100644 --- a/gcc/config/h8300/fixunssfsi.c +++ b/gcc/config/h8300/fixunssfsi.c @@ -1,6 +1,6 @@ /* More subroutines needed by GCC output code on some machines. */ /* Compile this one with gcc. */ -/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009 +/* Copyright (C) 1989, 1992, 2001, 2002, 2003, 2004, 2009, 2011 Free Software Foundation, Inc. This file is part of GCC. @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* The libgcc2.c implementation gets confused by our type setup and creates a directly recursive call, so we do our own implementation. For - the H8/300, that's in lib1funcs.asm, for H8/300H and H8S, it's here. */ + the H8/300, that's in lib1funcs.S, for H8/300H and H8S, it's here. */ #ifndef __H8300__ long __fixunssfsi (float a); diff --git a/gcc/config/h8300/lib1funcs.asm b/gcc/config/h8300/lib1funcs.asm deleted file mode 100644 index 1b75b73269d..00000000000 --- a/gcc/config/h8300/lib1funcs.asm +++ /dev/null @@ -1,838 +0,0 @@ -;; libgcc routines for the Renesas H8/300 CPU. -;; Contributed by Steve Chamberlain <sac@cygnus.com> -;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com> - -/* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -/* Assembler register definitions. */ - -#define A0 r0 -#define A0L r0l -#define A0H r0h - -#define A1 r1 -#define A1L r1l -#define A1H r1h - -#define A2 r2 -#define A2L r2l -#define A2H r2h - -#define A3 r3 -#define A3L r3l -#define A3H r3h - -#define S0 r4 -#define S0L r4l -#define S0H r4h - -#define S1 r5 -#define S1L r5l -#define S1H r5h - -#define S2 r6 -#define S2L r6l -#define S2H r6h - -#ifdef __H8300__ -#define PUSHP push -#define POPP pop - -#define A0P r0 -#define A1P r1 -#define A2P r2 -#define A3P r3 -#define S0P r4 -#define S1P r5 -#define S2P r6 -#endif - -#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__) -#define PUSHP push.l -#define POPP pop.l - -#define A0P er0 -#define A1P er1 -#define A2P er2 -#define A3P er3 -#define S0P er4 -#define S1P er5 -#define S2P er6 - -#define A0E e0 -#define A1E e1 -#define A2E e2 -#define A3E e3 -#endif - -#ifdef __H8300H__ -#ifdef __NORMAL_MODE__ - .h8300hn -#else - .h8300h -#endif -#endif - -#ifdef __H8300S__ -#ifdef __NORMAL_MODE__ - .h8300sn -#else - .h8300s -#endif -#endif -#ifdef __H8300SX__ -#ifdef __NORMAL_MODE__ - .h8300sxn -#else - .h8300sx -#endif -#endif - -#ifdef L_cmpsi2 -#ifdef __H8300__ - .section .text - .align 2 - .global ___cmpsi2 -___cmpsi2: - cmp.w A0,A2 - bne .L2 - cmp.w A1,A3 - bne .L4 - mov.w #1,A0 - rts -.L2: - bgt .L5 -.L3: - mov.w #2,A0 - rts -.L4: - bls .L3 -.L5: - sub.w A0,A0 - rts - .end -#endif -#endif /* L_cmpsi2 */ - -#ifdef L_ucmpsi2 -#ifdef __H8300__ - .section .text - .align 2 - .global ___ucmpsi2 -___ucmpsi2: - cmp.w A0,A2 - bne .L2 - cmp.w A1,A3 - bne .L4 - mov.w #1,A0 - rts -.L2: - bhi .L5 -.L3: - mov.w #2,A0 - rts -.L4: - bls .L3 -.L5: - sub.w A0,A0 - rts - .end -#endif -#endif /* L_ucmpsi2 */ - -#ifdef L_divhi3 - -;; HImode divides for the H8/300. -;; We bunch all of this into one object file since there are several -;; "supporting routines". - -; general purpose normalize routine -; -; divisor in A0 -; dividend in A1 -; turns both into +ve numbers, and leaves what the answer sign -; should be in A2L - -#ifdef __H8300__ - .section .text - .align 2 -divnorm: - or A0H,A0H ; is divisor > 0 - stc ccr,A2L - bge _lab1 - not A0H ; no - then make it +ve - not A0L - adds #1,A0 -_lab1: or A1H,A1H ; look at dividend - bge _lab2 - not A1H ; it is -ve, make it positive - not A1L - adds #1,A1 - xor #0x8,A2L; and toggle sign of result -_lab2: rts -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - or A0H,A0H ; is divisor > 0 - stc ccr,A2L - bge _lab7 - not A0H ; no - then make it +ve - not A0L - adds #1,A0 -_lab7: or A1H,A1H ; look at dividend - bge _lab8 - not A1H ; it is -ve, make it positive - not A1L - adds #1,A1 -_lab8: rts - -; A0=A0/A1 signed - - .global ___divhi3 -___divhi3: - bsr divnorm - bsr ___udivhi3 -negans: btst #3,A2L ; should answer be negative ? - beq _lab4 - not A0H ; yes, so make it so - not A0L - adds #1,A0 -_lab4: rts - -; A0=A0%A1 signed - - .global ___modhi3 -___modhi3: - bsr modnorm - bsr ___udivhi3 - mov A3,A0 - bra negans - -; A0=A0%A1 unsigned - - .global ___umodhi3 -___umodhi3: - bsr ___udivhi3 - mov A3,A0 - rts - -; A0=A0/A1 unsigned -; A3=A0%A1 unsigned -; A2H trashed -; D high 8 bits of denom -; d low 8 bits of denom -; N high 8 bits of num -; n low 8 bits of num -; M high 8 bits of mod -; m low 8 bits of mod -; Q high 8 bits of quot -; q low 8 bits of quot -; P preserve - -; The H8/300 only has a 16/8 bit divide, so we look at the incoming and -; see how to partition up the expression. - - .global ___udivhi3 -___udivhi3: - ; A0 A1 A2 A3 - ; Nn Dd P - sub.w A3,A3 ; Nn Dd xP 00 - or A1H,A1H - bne divlongway - or A0H,A0H - beq _lab6 - -; we know that D == 0 and N is != 0 - mov.b A0H,A3L ; Nn Dd xP 0N - divxu A1L,A3 ; MQ - mov.b A3L,A0H ; Q -; dealt with N, do n -_lab6: mov.b A0L,A3L ; n - divxu A1L,A3 ; mq - mov.b A3L,A0L ; Qq - mov.b A3H,A3L ; m - mov.b #0x0,A3H ; Qq 0m - rts - -; D != 0 - which means the denominator is -; loop around to get the result. - -divlongway: - mov.b A0H,A3L ; Nn Dd xP 0N - mov.b #0x0,A0H ; high byte of answer has to be zero - mov.b #0x8,A2H ; 8 -div8: add.b A0L,A0L ; n*=2 - rotxl A3L ; Make remainder bigger - rotxl A3H - sub.w A1,A3 ; Q-=N - bhs setbit ; set a bit ? - add.w A1,A3 ; no : too far , Q+=N - - dec A2H - bne div8 ; next bit - rts - -setbit: inc A0L ; do insert bit - dec A2H - bne div8 ; next bit - rts - -#endif /* __H8300__ */ -#endif /* L_divhi3 */ - -#ifdef L_divsi3 - -;; 4 byte integer divides for the H8/300. -;; -;; We have one routine which does all the work and lots of -;; little ones which prepare the args and massage the sign. -;; We bunch all of this into one object file since there are several -;; "supporting routines". - - .section .text - .align 2 - -; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. -; This function is here to keep branch displacements small. - -#ifdef __H8300__ - -divnorm: - mov.b A0H,A0H ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge postive - - ; negate arg - not A0H - not A1H - not A0L - not A1L - - add #1,A1L - addx #0,A1H - addx #0,A0L - addx #0,A0H -postive: - mov.b A2H,A2H ; is the denominator -ve - bge postive2 - not A2L - not A2H - not A3L - not A3H - add.b #1,A3L - addx #0,A3H - addx #0,A2L - addx #0,A2H - xor.b #0x08,S2L ; toggle the result sign -postive2: - rts - -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - mov.b A0H,A0H ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge mpostive - - ; negate arg - not A0H - not A1H - not A0L - not A1L - - add #1,A1L - addx #0,A1H - addx #0,A0L - addx #0,A0H -mpostive: - mov.b A2H,A2H ; is the denominator -ve - bge mpostive2 - not A2L - not A2H - not A3L - not A3H - add.b #1,A3L - addx #0,A3H - addx #0,A2L - addx #0,A2H -mpostive2: - rts - -#else /* __H8300H__ */ - -divnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge postive - - neg.l A0P ; negate arg - -postive: - mov.l A1P,A1P ; is the denominator -ve - bge postive2 - - neg.l A1P ; negate arg - xor.b #0x08,S2L ; toggle the result sign - -postive2: - rts - -;; Basically the same, except that the sign of the divisor determines -;; the sign. -modnorm: - mov.l A0P,A0P ; is the numerator -ve - stc ccr,S2L ; keep the sign in bit 3 of S2L - bge mpostive - - neg.l A0P ; negate arg - -mpostive: - mov.l A1P,A1P ; is the denominator -ve - bge mpostive2 - - neg.l A1P ; negate arg - -mpostive2: - rts - -#endif - -; numerator in A0/A1 -; denominator in A2/A3 - .global ___modsi3 -___modsi3: -#ifdef __H8300__ - PUSHP S2P - PUSHP S0P - PUSHP S1P - bsr modnorm - bsr divmodsi4 - mov S0,A0 - mov S1,A1 - bra exitdiv -#else - PUSHP S2P - bsr modnorm - bsr ___udivsi3 - mov.l er3,er0 - bra exitdiv -#endif - - ;; H8/300H and H8S version of ___udivsi3 is defined later in - ;; the file. -#ifdef __H8300__ - .global ___udivsi3 -___udivsi3: - PUSHP S2P - PUSHP S0P - PUSHP S1P - bsr divmodsi4 - bra reti -#endif - - .global ___umodsi3 -___umodsi3: -#ifdef __H8300__ - PUSHP S2P - PUSHP S0P - PUSHP S1P - bsr divmodsi4 - mov S0,A0 - mov S1,A1 - bra reti -#else - bsr ___udivsi3 - mov.l er3,er0 - rts -#endif - - .global ___divsi3 -___divsi3: -#ifdef __H8300__ - PUSHP S2P - PUSHP S0P - PUSHP S1P - jsr divnorm - jsr divmodsi4 -#else - PUSHP S2P - jsr divnorm - bsr ___udivsi3 -#endif - - ; examine what the sign should be -exitdiv: - btst #3,S2L - beq reti - - ; should be -ve -#ifdef __H8300__ - not A0H - not A1H - not A0L - not A1L - - add #1,A1L - addx #0,A1H - addx #0,A0L - addx #0,A0H -#else /* __H8300H__ */ - neg.l A0P -#endif - -reti: -#ifdef __H8300__ - POPP S1P - POPP S0P -#endif - POPP S2P - rts - - ; takes A0/A1 numerator (A0P for H8/300H) - ; A2/A3 denominator (A1P for H8/300H) - ; returns A0/A1 quotient (A0P for H8/300H) - ; S0/S1 remainder (S0P for H8/300H) - ; trashes S2H - -#ifdef __H8300__ - -divmodsi4: - sub.w S0,S0 ; zero play area - mov.w S0,S1 - mov.b A2H,S2H - or A2L,S2H - or A3H,S2H - bne DenHighNonZero - mov.b A0H,A0H - bne NumByte0Zero - mov.b A0L,A0L - bne NumByte1Zero - mov.b A1H,A1H - bne NumByte2Zero - bra NumByte3Zero -NumByte0Zero: - mov.b A0H,S1L - divxu A3L,S1 - mov.b S1L,A0H -NumByte1Zero: - mov.b A0L,S1L - divxu A3L,S1 - mov.b S1L,A0L -NumByte2Zero: - mov.b A1H,S1L - divxu A3L,S1 - mov.b S1L,A1H -NumByte3Zero: - mov.b A1L,S1L - divxu A3L,S1 - mov.b S1L,A1L - - mov.b S1H,S1L - mov.b #0x0,S1H - rts - -; have to do the divide by shift and test -DenHighNonZero: - mov.b A0H,S1L - mov.b A0L,A0H - mov.b A1H,A0L - mov.b A1L,A1H - - mov.b #0,A1L - mov.b #24,S2H ; only do 24 iterations - -nextbit: - add.w A1,A1 ; double the answer guess - rotxl A0L - rotxl A0H - - rotxl S1L ; double remainder - rotxl S1H - rotxl S0L - rotxl S0H - sub.w A3,S1 ; does it all fit - subx A2L,S0L - subx A2H,S0H - bhs setone - - add.w A3,S1 ; no, restore mistake - addx A2L,S0L - addx A2H,S0H - - dec S2H - bne nextbit - rts - -setone: - inc A1L - dec S2H - bne nextbit - rts - -#else /* __H8300H__ */ - - ;; This function also computes the remainder and stores it in er3. - .global ___udivsi3 -___udivsi3: - mov.w A1E,A1E ; denominator top word 0? - bne DenHighNonZero - - ; do it the easy way, see page 107 in manual - mov.w A0E,A2 - extu.l A2P - divxu.w A1,A2P - mov.w A2E,A0E - divxu.w A1,A0P - mov.w A0E,A3 - mov.w A2,A0E - extu.l A3P - rts - - ; er0 = er0 / er1 - ; er3 = er0 % er1 - ; trashes er1 er2 - ; expects er1 >= 2^16 -DenHighNonZero: - mov.l er0,er3 - mov.l er1,er2 -#ifdef __H8300H__ -divmod_L21: - shlr.l er0 - shlr.l er2 ; make divisor < 2^16 - mov.w e2,e2 - bne divmod_L21 -#else - shlr.l #2,er2 ; make divisor < 2^16 - mov.w e2,e2 - beq divmod_L22A -divmod_L21: - shlr.l #2,er0 -divmod_L22: - shlr.l #2,er2 ; make divisor < 2^16 - mov.w e2,e2 - bne divmod_L21 -divmod_L22A: - rotxl.w r2 - bcs divmod_L23 - shlr.l er0 - bra divmod_L24 -divmod_L23: - rotxr.w r2 - shlr.l #2,er0 -divmod_L24: -#endif - ;; At this point, - ;; er0 contains shifted dividend - ;; er1 contains divisor - ;; er2 contains shifted divisor - ;; er3 contains dividend, later remainder - divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) - extu.l er0 - beq divmod_L25 - subs #1,er0 ; er0 = AQ - 1 - mov.w e1,r2 - mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor - sub.w r2,e3 ; dividend - 65536 * er2 - mov.w r1,r2 - mulxu.w r0,er2 ; compute er3 = remainder (tentative) - sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor -divmod_L25: - cmp.l er1,er3 ; is divisor < remainder? - blo divmod_L26 - adds #1,er0 - sub.l er1,er3 ; correct the remainder -divmod_L26: - rts - -#endif -#endif /* L_divsi3 */ - -#ifdef L_mulhi3 - -;; HImode multiply. -; The H8/300 only has an 8*8->16 multiply. -; The answer is the same as: -; -; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 -; (we can ignore A1.h * A0.h cause that will all off the top) -; A0 in -; A1 in -; A0 answer - -#ifdef __H8300__ - .section .text - .align 2 - .global ___mulhi3 -___mulhi3: - mov.b A1L,A2L ; A2l gets srcb.l - mulxu A0L,A2 ; A2 gets first sub product - - mov.b A0H,A3L ; prepare for - mulxu A1L,A3 ; second sub product - - add.b A3L,A2H ; sum first two terms - - mov.b A1H,A3L ; third sub product - mulxu A0L,A3 - - add.b A3L,A2H ; almost there - mov.w A2,A0 ; that is - rts - -#endif -#endif /* L_mulhi3 */ - -#ifdef L_mulsi3 - -;; SImode multiply. -;; -;; I think that shift and add may be sufficient for this. Using the -;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way -;; the inner loop uses maybe 20 cycles + overhead, but terminates -;; quickly on small args. -;; -;; A0/A1 src_a -;; A2/A3 src_b -;; -;; while (a) -;; { -;; if (a & 1) -;; r += b; -;; a >>= 1; -;; b <<= 1; -;; } - - .section .text - .align 2 - -#ifdef __H8300__ - - .global ___mulsi3 -___mulsi3: - PUSHP S0P - PUSHP S1P - - sub.w S0,S0 - sub.w S1,S1 - - ; while (a) -_top: mov.w A0,A0 - bne _more - mov.w A1,A1 - beq _done -_more: ; if (a & 1) - bld #0,A1L - bcc _nobit - ; r += b - add.w A3,S1 - addx A2L,S0L - addx A2H,S0H -_nobit: - ; a >>= 1 - shlr A0H - rotxr A0L - rotxr A1H - rotxr A1L - - ; b <<= 1 - add.w A3,A3 - addx A2L,A2L - addx A2H,A2H - bra _top - -_done: - mov.w S0,A0 - mov.w S1,A1 - POPP S1P - POPP S0P - rts - -#else /* __H8300H__ */ - -; -; mulsi3 for H8/300H - based on Renesas SH implementation -; -; by Toshiyasu Morita -; -; Old code: -; -; 16b * 16b = 372 states (worst case) -; 32b * 32b = 724 states (worst case) -; -; New code: -; -; 16b * 16b = 48 states -; 16b * 32b = 72 states -; 32b * 32b = 92 states -; - - .global ___mulsi3 -___mulsi3: - mov.w r1,r2 ; ( 2 states) b * d - mulxu r0,er2 ; (22 states) - - mov.w e0,r3 ; ( 2 states) a * d - beq L_skip1 ; ( 4 states) - mulxu r1,er3 ; (22 states) - add.w r3,e2 ; ( 2 states) - -L_skip1: - mov.w e1,r3 ; ( 2 states) c * b - beq L_skip2 ; ( 4 states) - mulxu r0,er3 ; (22 states) - add.w r3,e2 ; ( 2 states) - -L_skip2: - mov.l er2,er0 ; ( 2 states) - rts ; (10 states) - -#endif -#endif /* L_mulsi3 */ -#ifdef L_fixunssfsi_asm -/* For the h8300 we use asm to save some bytes, to - allow more programs to fit into the tiny address - space. For the H8/300H and H8S, the C version is good enough. */ -#ifdef __H8300__ -/* We still treat NANs different than libgcc2.c, but then, the - behavior is undefined anyways. */ - .global ___fixunssfsi -___fixunssfsi: - cmp.b #0x4f,r0h - bge Large_num - jmp @___fixsfsi -Large_num: - bhi L_huge_num - xor.b #0x80,A0L - bmi L_shift8 -L_huge_num: - mov.w #65535,A0 - mov.w A0,A1 - rts -L_shift8: - mov.b A0L,A0H - mov.b A1H,A0L - mov.b A1L,A1H - mov.b #0,A1L - rts -#endif -#endif /* L_fixunssfsi_asm */ diff --git a/gcc/config/h8300/t-h8300 b/gcc/config/h8300/t-h8300 index 616849007b4..7083c673acf 100644 --- a/gcc/config/h8300/t-h8300 +++ b/gcc/config/h8300/t-h8300 @@ -17,10 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = h8300/lib1funcs.asm -LIB1ASMFUNCS = _cmpsi2 _ucmpsi2 _divhi3 _divsi3 _mulhi3 _mulsi3 \ - _fixunssfsi_asm - LIB2FUNCS_EXTRA = \ $(srcdir)/config/h8300/clzhi2.c \ $(srcdir)/config/h8300/ctzhi2.c \ diff --git a/gcc/config/i386/cygwin.asm b/gcc/config/i386/cygwin.asm deleted file mode 100644 index 8f9c486850e..00000000000 --- a/gcc/config/i386/cygwin.asm +++ /dev/null @@ -1,188 +0,0 @@ -/* stuff needed for libgcc on win32. - * - * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009, 2010 - * Free Software Foundation, Inc. - * Written By Steve Chamberlain - * - * This file is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) any - * later version. - * - * This file is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. - */ - -#include "auto-host.h" - -#ifdef HAVE_GAS_CFI_SECTIONS_DIRECTIVE - .cfi_sections .debug_frame -# define cfi_startproc() .cfi_startproc -# define cfi_endproc() .cfi_endproc -# define cfi_adjust_cfa_offset(X) .cfi_adjust_cfa_offset X -# define cfi_def_cfa_register(X) .cfi_def_cfa_register X -# define cfi_register(D,S) .cfi_register D, S -# ifdef _WIN64 -# define cfi_push(X) .cfi_adjust_cfa_offset 8; .cfi_rel_offset X, 0 -# define cfi_pop(X) .cfi_adjust_cfa_offset -8; .cfi_restore X -# else -# define cfi_push(X) .cfi_adjust_cfa_offset 4; .cfi_rel_offset X, 0 -# define cfi_pop(X) .cfi_adjust_cfa_offset -4; .cfi_restore X -# endif -#else -# define cfi_startproc() -# define cfi_endproc() -# define cfi_adjust_cfa_offset(X) -# define cfi_def_cfa_register(X) -# define cfi_register(D,S) -# define cfi_push(X) -# define cfi_pop(X) -#endif /* HAVE_GAS_CFI_SECTIONS_DIRECTIVE */ - -#ifdef L_chkstk -/* Function prologue calls __chkstk to probe the stack when allocating more - than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K - increments is necessary to ensure that the guard pages used - by the OS virtual memory manger are allocated in correct sequence. */ - - .global ___chkstk - .global __alloca -#ifdef _WIN64 -/* __alloca is a normal function call, which uses %rcx as the argument. */ - cfi_startproc() -__alloca: - movq %rcx, %rax - /* FALLTHRU */ - -/* ___chkstk is a *special* function call, which uses %rax as the argument. - We avoid clobbering the 4 integer argument registers, %rcx, %rdx, - %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */ - .align 4 -___chkstk: - popq %r11 /* pop return address */ - cfi_adjust_cfa_offset(-8) /* indicate return address in r11 */ - cfi_register(%rip, %r11) - movq %rsp, %r10 - cmpq $0x1000, %rax /* > 4k ?*/ - jb 2f - -1: subq $0x1000, %r10 /* yes, move pointer down 4k*/ - orl $0x0, (%r10) /* probe there */ - subq $0x1000, %rax /* decrement count */ - cmpq $0x1000, %rax - ja 1b /* and do it again */ - -2: subq %rax, %r10 - movq %rsp, %rax /* hold CFA until return */ - cfi_def_cfa_register(%rax) - orl $0x0, (%r10) /* less than 4k, just peek here */ - movq %r10, %rsp /* decrement stack */ - - /* Push the return value back. Doing this instead of just - jumping to %r11 preserves the cached call-return stack - used by most modern processors. */ - pushq %r11 - ret - cfi_endproc() -#else - cfi_startproc() -___chkstk: -__alloca: - pushl %ecx /* save temp */ - cfi_push(%eax) - leal 8(%esp), %ecx /* point past return addr */ - cmpl $0x1000, %eax /* > 4k ?*/ - jb 2f - -1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ - orl $0x0, (%ecx) /* probe there */ - subl $0x1000, %eax /* decrement count */ - cmpl $0x1000, %eax - ja 1b /* and do it again */ - -2: subl %eax, %ecx - orl $0x0, (%ecx) /* less than 4k, just peek here */ - movl %esp, %eax /* save current stack pointer */ - cfi_def_cfa_register(%eax) - movl %ecx, %esp /* decrement stack */ - movl (%eax), %ecx /* recover saved temp */ - - /* Copy the return register. Doing this instead of just jumping to - the address preserves the cached call-return stack used by most - modern processors. */ - pushl 4(%eax) - ret - cfi_endproc() -#endif /* _WIN64 */ -#endif /* L_chkstk */ - -#ifdef L_chkstk_ms -/* ___chkstk_ms is a *special* function call, which uses %rax as the argument. - We avoid clobbering any registers. Unlike ___chkstk, it just probes the - stack and does no stack allocation. */ - .global ___chkstk_ms -#ifdef _WIN64 - cfi_startproc() -___chkstk_ms: - pushq %rcx /* save temps */ - cfi_push(%rcx) - pushq %rax - cfi_push(%rax) - cmpq $0x1000, %rax /* > 4k ?*/ - leaq 24(%rsp), %rcx /* point past return addr */ - jb 2f - -1: subq $0x1000, %rcx /* yes, move pointer down 4k */ - orq $0x0, (%rcx) /* probe there */ - subq $0x1000, %rax /* decrement count */ - cmpq $0x1000, %rax - ja 1b /* and do it again */ - -2: subq %rax, %rcx - orq $0x0, (%rcx) /* less than 4k, just peek here */ - - popq %rax - cfi_pop(%rax) - popq %rcx - cfi_pop(%rcx) - ret - cfi_endproc() -#else - cfi_startproc() -___chkstk_ms: - pushl %ecx /* save temp */ - cfi_push(%ecx) - pushl %eax - cfi_push(%eax) - cmpl $0x1000, %eax /* > 4k ?*/ - leal 12(%esp), %ecx /* point past return addr */ - jb 2f - -1: subl $0x1000, %ecx /* yes, move pointer down 4k*/ - orl $0x0, (%ecx) /* probe there */ - subl $0x1000, %eax /* decrement count */ - cmpl $0x1000, %eax - ja 1b /* and do it again */ - -2: subl %eax, %ecx - orl $0x0, (%ecx) /* less than 4k, just peek here */ - - popl %eax - cfi_pop(%eax) - popl %ecx - cfi_pop(%ecx) - ret - cfi_endproc() -#endif /* _WIN64 */ -#endif /* L_chkstk_ms */ diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming index 242d7f27f65..3e7f7cdd036 100644 --- a/gcc/config/i386/t-cygming +++ b/gcc/config/i386/t-cygming @@ -17,9 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = i386/cygwin.asm -LIB1ASMFUNCS = _chkstk _chkstk_ms - # cygwin and mingw always have a limits.h, but, depending upon how we are # doing the build, it may not be installed yet. LIMITS_H_TEST = true diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix index e7b016f1e7a..09c9127f6af 100644 --- a/gcc/config/i386/t-interix +++ b/gcc/config/i386/t-interix @@ -1,6 +1,3 @@ -LIB1ASMSRC = i386/cygwin.asm -LIB1ASMFUNCS = _chkstk _chkstk_ms - winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \ $(TM_P_H) $(HASHTAB_H) $(GGC_H) diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm deleted file mode 100644 index b7eaa6eca3c..00000000000 --- a/gcc/config/ia64/lib1funcs.asm +++ /dev/null @@ -1,795 +0,0 @@ -/* Copyright (C) 2000, 2001, 2003, 2005, 2009 Free Software Foundation, Inc. - Contributed by James E. Wilson <wilson@cygnus.com>. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef L__divxf3 -// Compute a 80-bit IEEE double-extended quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// farg0 holds the dividend. farg1 holds the divisor. -// -// __divtf3 is an alternate symbol name for backward compatibility. - - .text - .align 16 - .global __divxf3 - .proc __divxf3 -__divxf3: -#ifdef SHARED - .global __divtf3 -__divtf3: -#endif - cmp.eq p7, p0 = r0, r0 - frcpa.s0 f10, p6 = farg0, farg1 - ;; -(p6) cmp.ne p7, p0 = r0, r0 - .pred.rel.mutex p6, p7 -(p6) fnma.s1 f11 = farg1, f10, f1 -(p6) fma.s1 f12 = farg0, f10, f0 - ;; -(p6) fma.s1 f13 = f11, f11, f0 -(p6) fma.s1 f14 = f11, f11, f11 - ;; -(p6) fma.s1 f11 = f13, f13, f11 -(p6) fma.s1 f13 = f14, f10, f10 - ;; -(p6) fma.s1 f10 = f13, f11, f10 -(p6) fnma.s1 f11 = farg1, f12, farg0 - ;; -(p6) fma.s1 f11 = f11, f10, f12 -(p6) fnma.s1 f12 = farg1, f10, f1 - ;; -(p6) fma.s1 f10 = f12, f10, f10 -(p6) fnma.s1 f12 = farg1, f11, farg0 - ;; -(p6) fma.s0 fret0 = f12, f10, f11 -(p7) mov fret0 = f10 - br.ret.sptk rp - .endp __divxf3 -#endif - -#ifdef L__divdf3 -// Compute a 64-bit IEEE double quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// farg0 holds the dividend. farg1 holds the divisor. - - .text - .align 16 - .global __divdf3 - .proc __divdf3 -__divdf3: - cmp.eq p7, p0 = r0, r0 - frcpa.s0 f10, p6 = farg0, farg1 - ;; -(p6) cmp.ne p7, p0 = r0, r0 - .pred.rel.mutex p6, p7 -(p6) fmpy.s1 f11 = farg0, f10 -(p6) fnma.s1 f12 = farg1, f10, f1 - ;; -(p6) fma.s1 f11 = f12, f11, f11 -(p6) fmpy.s1 f13 = f12, f12 - ;; -(p6) fma.s1 f10 = f12, f10, f10 -(p6) fma.s1 f11 = f13, f11, f11 - ;; -(p6) fmpy.s1 f12 = f13, f13 -(p6) fma.s1 f10 = f13, f10, f10 - ;; -(p6) fma.d.s1 f11 = f12, f11, f11 -(p6) fma.s1 f10 = f12, f10, f10 - ;; -(p6) fnma.d.s1 f8 = farg1, f11, farg0 - ;; -(p6) fma.d fret0 = f8, f10, f11 -(p7) mov fret0 = f10 - br.ret.sptk rp - ;; - .endp __divdf3 -#endif - -#ifdef L__divsf3 -// Compute a 32-bit IEEE float quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// farg0 holds the dividend. farg1 holds the divisor. - - .text - .align 16 - .global __divsf3 - .proc __divsf3 -__divsf3: - cmp.eq p7, p0 = r0, r0 - frcpa.s0 f10, p6 = farg0, farg1 - ;; -(p6) cmp.ne p7, p0 = r0, r0 - .pred.rel.mutex p6, p7 -(p6) fmpy.s1 f8 = farg0, f10 -(p6) fnma.s1 f9 = farg1, f10, f1 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fmpy.s1 f9 = f9, f9 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fmpy.s1 f9 = f9, f9 - ;; -(p6) fma.d.s1 f10 = f9, f8, f8 - ;; -(p6) fnorm.s.s0 fret0 = f10 -(p7) mov fret0 = f10 - br.ret.sptk rp - ;; - .endp __divsf3 -#endif - -#ifdef L__divdi3 -// Compute a 64-bit integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __divdi3 - .proc __divdi3 -__divdi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, so that they won't be treated as unsigned. - fcvt.xf f8 = f8 - fcvt.xf f9 = f9 -(p7) break 1 - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fnma.s1 f11 = f9, f10, f1 -(p6) fmpy.s1 f12 = f8, f10 - ;; -(p6) fmpy.s1 f13 = f11, f11 -(p6) fma.s1 f12 = f11, f12, f12 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; -(p6) fma.s1 f10 = f12, f10, f11 - ;; - // Round quotient to an integer. - fcvt.fx.trunc.s1 f10 = f10 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __divdi3 -#endif - -#ifdef L__moddi3 -// Compute a 64-bit integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend (a). in1 holds the divisor (b). - - .text - .align 16 - .global __moddi3 - .proc __moddi3 -__moddi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f14 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, so that they won't be treated as unsigned. - fcvt.xf f8 = f14 - fcvt.xf f9 = f9 -(p7) break 1 - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f11 = f9, f10, f1 - ;; -(p6) fma.s1 f12 = f11, f12, f12 -(p6) fmpy.s1 f13 = f11, f11 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; - sub in1 = r0, in1 -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f10 = f12, f10, f11 - ;; - fcvt.fx.trunc.s1 f10 = f10 - ;; - // r = q * (-b) + a - xma.l f10 = f10, f9, f14 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __moddi3 -#endif - -#ifdef L__udivdi3 -// Compute a 64-bit unsigned integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __udivdi3 - .proc __udivdi3 -__udivdi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - fcvt.xuf.s1 f8 = f8 - fcvt.xuf.s1 f9 = f9 -(p7) break 1 - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fnma.s1 f11 = f9, f10, f1 -(p6) fmpy.s1 f12 = f8, f10 - ;; -(p6) fmpy.s1 f13 = f11, f11 -(p6) fma.s1 f12 = f11, f12, f12 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; -(p6) fma.s1 f10 = f12, f10, f11 - ;; - // Round quotient to an unsigned integer. - fcvt.fxu.trunc.s1 f10 = f10 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __udivdi3 -#endif - -#ifdef L__umoddi3 -// Compute a 64-bit unsigned integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend (a). in1 holds the divisor (b). - - .text - .align 16 - .global __umoddi3 - .proc __umoddi3 -__umoddi3: - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f14 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - // Convert the inputs to FP, to avoid FP software assist faults. - fcvt.xuf.s1 f8 = f14 - fcvt.xuf.s1 f9 = f9 -(p7) break 1; - ;; - // Compute the reciprocal approximation. - frcpa.s1 f10, p6 = f8, f9 - ;; - // 3 Newton-Raphson iterations. -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f11 = f9, f10, f1 - ;; -(p6) fma.s1 f12 = f11, f12, f12 -(p6) fmpy.s1 f13 = f11, f11 - ;; -(p6) fma.s1 f10 = f11, f10, f10 -(p6) fma.s1 f11 = f13, f12, f12 - ;; - sub in1 = r0, in1 -(p6) fma.s1 f10 = f13, f10, f10 -(p6) fnma.s1 f12 = f9, f11, f8 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f10 = f12, f10, f11 - ;; - // Round quotient to an unsigned integer. - fcvt.fxu.trunc.s1 f10 = f10 - ;; - // r = q * (-b) + a - xma.l f10 = f10, f9, f14 - ;; - // Transfer result to GP registers. - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __umoddi3 -#endif - -#ifdef L__divsi3 -// Compute a 32-bit integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __divsi3 - .proc __divsi3 -__divsi3: - .regstk 2,0,0,0 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - sxt4 in0 = in0 - sxt4 in1 = in1 - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 -(p7) break 1 - ;; - mov r2 = 0x0ffdd - fcvt.xf f8 = f8 - fcvt.xf f9 = f9 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 - ;; -(p6) fmpy.s1 f8 = f8, f10 -(p6) fnma.s1 f9 = f9, f10, f1 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fma.s1 f9 = f9, f9, f11 - ;; -(p6) fma.s1 f10 = f9, f8, f8 - ;; - fcvt.fx.trunc.s1 f10 = f10 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __divsi3 -#endif - -#ifdef L__modsi3 -// Compute a 32-bit integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __modsi3 - .proc __modsi3 -__modsi3: - .regstk 2,0,0,0 - mov r2 = 0x0ffdd - sxt4 in0 = in0 - sxt4 in1 = in1 - ;; - setf.sig f13 = r32 - setf.sig f9 = r33 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - sub in1 = r0, in1 - fcvt.xf f8 = f13 - fcvt.xf f9 = f9 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 -(p7) break 1 - ;; -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f10 = f9, f10, f1 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f12 = f10, f12, f12 -(p6) fma.s1 f10 = f10, f10, f11 - ;; -(p6) fma.s1 f10 = f10, f12, f12 - ;; - fcvt.fx.trunc.s1 f10 = f10 - ;; - xma.l f10 = f10, f9, f13 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __modsi3 -#endif - -#ifdef L__udivsi3 -// Compute a 32-bit unsigned integer quotient. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __udivsi3 - .proc __udivsi3 -__udivsi3: - .regstk 2,0,0,0 - mov r2 = 0x0ffdd - zxt4 in0 = in0 - zxt4 in1 = in1 - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - fcvt.xf f8 = f8 - fcvt.xf f9 = f9 -(p7) break 1 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 - ;; -(p6) fmpy.s1 f8 = f8, f10 -(p6) fnma.s1 f9 = f9, f10, f1 - ;; -(p6) fma.s1 f8 = f9, f8, f8 -(p6) fma.s1 f9 = f9, f9, f11 - ;; -(p6) fma.s1 f10 = f9, f8, f8 - ;; - fcvt.fxu.trunc.s1 f10 = f10 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __udivsi3 -#endif - -#ifdef L__umodsi3 -// Compute a 32-bit unsigned integer modulus. -// -// From the Intel IA-64 Optimization Guide, choose the minimum latency -// alternative. -// -// in0 holds the dividend. in1 holds the divisor. - - .text - .align 16 - .global __umodsi3 - .proc __umodsi3 -__umodsi3: - .regstk 2,0,0,0 - mov r2 = 0x0ffdd - zxt4 in0 = in0 - zxt4 in1 = in1 - ;; - setf.sig f13 = in0 - setf.sig f9 = in1 - // Check divide by zero. - cmp.ne.unc p0,p7=0,in1 - ;; - sub in1 = r0, in1 - fcvt.xf f8 = f13 - fcvt.xf f9 = f9 - ;; - setf.exp f11 = r2 - frcpa.s1 f10, p6 = f8, f9 -(p7) break 1; - ;; -(p6) fmpy.s1 f12 = f8, f10 -(p6) fnma.s1 f10 = f9, f10, f1 - ;; - setf.sig f9 = in1 -(p6) fma.s1 f12 = f10, f12, f12 -(p6) fma.s1 f10 = f10, f10, f11 - ;; -(p6) fma.s1 f10 = f10, f12, f12 - ;; - fcvt.fxu.trunc.s1 f10 = f10 - ;; - xma.l f10 = f10, f9, f13 - ;; - getf.sig ret0 = f10 - br.ret.sptk rp - ;; - .endp __umodsi3 -#endif - -#ifdef L__save_stack_nonlocal -// Notes on save/restore stack nonlocal: We read ar.bsp but write -// ar.bspstore. This is because ar.bsp can be read at all times -// (independent of the RSE mode) but since it's read-only we need to -// restore the value via ar.bspstore. This is OK because -// ar.bsp==ar.bspstore after executing "flushrs". - -// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) - - .text - .align 16 - .global __ia64_save_stack_nonlocal - .proc __ia64_save_stack_nonlocal -__ia64_save_stack_nonlocal: - { .mmf - alloc r18 = ar.pfs, 2, 0, 0, 0 - mov r19 = ar.rsc - ;; - } - { .mmi - flushrs - st8 [in0] = in1, 24 - and r19 = 0x1c, r19 - ;; - } - { .mmi - st8 [in0] = r18, -16 - mov ar.rsc = r19 - or r19 = 0x3, r19 - ;; - } - { .mmi - mov r16 = ar.bsp - mov r17 = ar.rnat - adds r2 = 8, in0 - ;; - } - { .mmi - st8 [in0] = r16 - st8 [r2] = r17 - } - { .mib - mov ar.rsc = r19 - br.ret.sptk.few rp - ;; - } - .endp __ia64_save_stack_nonlocal -#endif - -#ifdef L__nonlocal_goto -// void __ia64_nonlocal_goto(void *target_label, void *save_area, -// void *static_chain); - - .text - .align 16 - .global __ia64_nonlocal_goto - .proc __ia64_nonlocal_goto -__ia64_nonlocal_goto: - { .mmi - alloc r20 = ar.pfs, 3, 0, 0, 0 - ld8 r12 = [in1], 8 - mov.ret.sptk rp = in0, .L0 - ;; - } - { .mmf - ld8 r16 = [in1], 8 - mov r19 = ar.rsc - ;; - } - { .mmi - flushrs - ld8 r17 = [in1], 8 - and r19 = 0x1c, r19 - ;; - } - { .mmi - ld8 r18 = [in1] - mov ar.rsc = r19 - or r19 = 0x3, r19 - ;; - } - { .mmi - mov ar.bspstore = r16 - ;; - mov ar.rnat = r17 - ;; - } - { .mmi - loadrs - invala - mov r15 = in2 - ;; - } -.L0: { .mib - mov ar.rsc = r19 - mov ar.pfs = r18 - br.ret.sptk.few rp - ;; - } - .endp __ia64_nonlocal_goto -#endif - -#ifdef L__restore_stack_nonlocal -// This is mostly the same as nonlocal_goto above. -// ??? This has not been tested yet. - -// void __ia64_restore_stack_nonlocal(void *save_area) - - .text - .align 16 - .global __ia64_restore_stack_nonlocal - .proc __ia64_restore_stack_nonlocal -__ia64_restore_stack_nonlocal: - { .mmf - alloc r20 = ar.pfs, 4, 0, 0, 0 - ld8 r12 = [in0], 8 - ;; - } - { .mmb - ld8 r16=[in0], 8 - mov r19 = ar.rsc - ;; - } - { .mmi - flushrs - ld8 r17 = [in0], 8 - and r19 = 0x1c, r19 - ;; - } - { .mmf - ld8 r18 = [in0] - mov ar.rsc = r19 - ;; - } - { .mmi - mov ar.bspstore = r16 - ;; - mov ar.rnat = r17 - or r19 = 0x3, r19 - ;; - } - { .mmf - loadrs - invala - ;; - } -.L0: { .mib - mov ar.rsc = r19 - mov ar.pfs = r18 - br.ret.sptk.few rp - ;; - } - .endp __ia64_restore_stack_nonlocal -#endif - -#ifdef L__trampoline -// Implement the nested function trampoline. This is out of line -// so that we don't have to bother with flushing the icache, as -// well as making the on-stack trampoline smaller. -// -// The trampoline has the following form: -// -// +-------------------+ > -// TRAMP: | __ia64_trampoline | | -// +-------------------+ > fake function descriptor -// | TRAMP+16 | | -// +-------------------+ > -// | target descriptor | -// +-------------------+ -// | static link | -// +-------------------+ - - .text - .align 16 - .global __ia64_trampoline - .proc __ia64_trampoline -__ia64_trampoline: - { .mmi - ld8 r2 = [r1], 8 - ;; - ld8 r15 = [r1] - } - { .mmi - ld8 r3 = [r2], 8 - ;; - ld8 r1 = [r2] - mov b6 = r3 - } - { .bbb - br.sptk.many b6 - ;; - } - .endp __ia64_trampoline -#endif - -#ifdef SHARED -// Thunks for backward compatibility. -#ifdef L_fixtfdi - .text - .align 16 - .global __fixtfti - .proc __fixtfti -__fixtfti: - { .bbb - br.sptk.many __fixxfti - ;; - } - .endp __fixtfti -#endif -#ifdef L_fixunstfdi - .align 16 - .global __fixunstfti - .proc __fixunstfti -__fixunstfti: - { .bbb - br.sptk.many __fixunsxfti - ;; - } - .endp __fixunstfti -#endif -#ifdef L_floatditf - .align 16 - .global __floattitf - .proc __floattitf -__floattitf: - { .bbb - br.sptk.many __floattixf - ;; - } - .endp __floattitf -#endif -#endif diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux index e1554861d18..23691f3856c 100644 --- a/gcc/config/ia64/t-hpux +++ b/gcc/config/ia64/t-hpux @@ -26,12 +26,6 @@ MULTILIB_OPTIONS = milp32/mlp64 MULTILIB_DIRNAMES = hpux32 hpux64 MULTILIB_MATCHES = -# On HP-UX we do not want _fixtfdi, _fixunstfdi, or _floatditf from -# LIB1ASMSRC. These functions map the 128 bit conversion function names -# to 80 bit conversions and were done for Linux backwards compatibility. - -LIB1ASMFUNCS := $(filter-out _fixtfdi _fixunstfdi _floatditf,$(LIB1ASMFUNCS)) - # Support routines for HP-UX 128 bit floats. LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c @@ -39,12 +33,6 @@ LIB2FUNCS_EXTRA=quadlib.c $(srcdir)/config/floatunsitf.c quadlib.c: $(srcdir)/config/ia64/quadlib.c cat $(srcdir)/config/ia64/quadlib.c > quadlib.c -# We get an undefined main when building a cross compiler because our -# linkspec has "-u main" and we want that for linking but it makes -# LIBGCC1_TEST fail because it uses -nostdlib -nostartup. - -LIBGCC1_TEST = - # We do not want to include the EH stuff that linux uses, we want to use # the HP-UX libunwind library. diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64 index a143d43d56c..8a54d46b458 100644 --- a/gcc/config/ia64/t-ia64 +++ b/gcc/config/ia64/t-ia64 @@ -18,19 +18,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = ia64/lib1funcs.asm - -# We use different names for the DImode div/mod files so that they won't -# conflict with libgcc2.c files. We used to use __ia64 as a prefix, now -# we use __ as the prefix. Note that L_divdi3 in libgcc2.c actually defines -# a TImode divide function, so there is no actual overlap here between -# libgcc2.c and lib1funcs.asm. -LIB1ASMFUNCS = __divxf3 __divdf3 __divsf3 \ - __divdi3 __moddi3 __udivdi3 __umoddi3 \ - __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \ - __nonlocal_goto __restore_stack_nonlocal __trampoline \ - _fixtfdi _fixunstfdi _floatditf - # ??? Hack to get -P option used when compiling lib1funcs.asm, because Intel # assembler does not accept # line number as a comment. # ??? This breaks C++ pragma interface/implementation, which is used in the diff --git a/gcc/config/iq2000/t-iq2000 b/gcc/config/iq2000/t-iq2000 index 03d8c703f86..c634e58646e 100644 --- a/gcc/config/iq2000/t-iq2000 +++ b/gcc/config/iq2000/t-iq2000 @@ -16,11 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# Suppress building libgcc1.a, since the MIPS compiler port is complete -# and does not need anything from libgcc1.a. -LIBGCC1 = -CROSS_LIBGCC1 = - LIB2FUNCS_EXTRA = $(srcdir)/config/udivmod.c $(srcdir)/config/divmod.c $(srcdir)/config/udivmodsi4.c $(srcdir)/config/iq2000/lib2extra-funcs.c # Enable the following if multilibs are needed. diff --git a/gcc/config/m32c/m32c-lib1.S b/gcc/config/m32c/m32c-lib1.S deleted file mode 100644 index 9b657787187..00000000000 --- a/gcc/config/m32c/m32c-lib1.S +++ /dev/null @@ -1,231 +0,0 @@ -/* libgcc routines for R8C/M16C/M32C - Copyright (C) 2005, 2009, 2010 - Free Software Foundation, Inc. - Contributed by Red Hat. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3, or (at your - option) any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#if defined(__r8c_cpu__) || defined(__m16c_cpu__) -#define A16 -#define A(n,w) n -#define W w -#else -#define A24 -#define A(n,w) w -#define W l -#endif - - -#ifdef L__m32c_memregs - -/* Warning: these memory locations are used as a register bank. They - *must* end up consecutive in any final executable, so you may *not* - use the otherwise obvious ".comm" directive to allocate space for - them. */ - - .bss - .global mem0 -mem0: .space 1 - .global mem1 -mem1: .space 1 - .global mem2 -mem2: .space 1 - .global mem3 -mem3: .space 1 - .global mem4 -mem4: .space 1 - .global mem5 -mem5: .space 1 - .global mem6 -mem6: .space 1 - .global mem7 -mem7: .space 1 - .global mem8 -mem8: .space 1 - .global mem9 -mem9: .space 1 - .global mem10 -mem10: .space 1 - .global mem11 -mem11: .space 1 - .global mem12 -mem12: .space 1 - .global mem13 -mem13: .space 1 - .global mem14 -mem14: .space 1 - .global mem15 -mem15: .space 1 - -#endif - -#ifdef L__m32c_eh_return - .text - .global __m32c_eh_return -__m32c_eh_return: - - /* At this point, r0 has the stack adjustment, r1r3 has the - address to return to. The stack looks like this: - - old_ra - old_fp - <- unwound sp - ... - fb - through - r0 - <- sp - - What we need to do is restore all the registers, update the - stack, and return to the right place. - */ - - stc sp,a0 - - add.W A(#16,#24),a0 - /* a0 points to the current stack, just above the register - save areas */ - - mov.w a0,a1 - exts.w r0 - sub.W A(r0,r2r0),a1 - sub.W A(#3,#4),a1 - /* a1 points to the new stack. */ - - /* This is for the "rts" below. */ - mov.w r1,[a1] -#ifdef A16 - mov.w r2,r1 - mov.b r1l,2[a1] -#else - mov.w r2,2[a1] -#endif - - /* This is for the "popc sp" below. */ - mov.W a1,[a0] - - popm r0,r1,r2,r3,a0,a1,sb,fb - popc sp - rts -#endif - -/* SImode arguments for SI foo(SI,SI) functions. */ -#ifdef A16 -#define SAL 5[fb] -#define SAH 7[fb] -#define SBL 9[fb] -#define SBH 11[fb] -#else -#define SAL 8[fb] -#define SAH 10[fb] -#define SBL 12[fb] -#define SBH 14[fb] -#endif - -#ifdef L__m32c_mulsi3 - .text - .global ___mulsi3 -___mulsi3: - enter #0 - push.w r2 - mov.w SAL,r0 - mulu.w SBL,r0 /* writes to r2r0 */ - mov.w r0,mem0 - mov.w r2,mem2 - mov.w SAL,r0 - mulu.w SBH,r0 /* writes to r2r0 */ - add.w r0,mem2 - mov.w SAH,r0 - mulu.w SBL,r0 /* writes to r2r0 */ - add.w r0,mem2 - pop.w r2 - exitd -#endif - -#ifdef L__m32c_cmpsi2 - .text - .global ___cmpsi2 -___cmpsi2: - enter #0 - cmp.w SBH,SAH - jgt cmpsi_gt - jlt cmpsi_lt - cmp.w SBL,SAL - jgt cmpsi_gt - jlt cmpsi_lt - mov.w #1,r0 - exitd -cmpsi_gt: - mov.w #2,r0 - exitd -cmpsi_lt: - mov.w #0,r0 - exitd -#endif - -#ifdef L__m32c_ucmpsi2 - .text - .global ___ucmpsi2 -___ucmpsi2: - enter #0 - cmp.w SBH,SAH - jgtu cmpsi_gt - jltu cmpsi_lt - cmp.w SBL,SAL - jgtu cmpsi_gt - jltu cmpsi_lt - mov.w #1,r0 - exitd -cmpsi_gt: - mov.w #2,r0 - exitd -cmpsi_lt: - mov.w #0,r0 - exitd -#endif - -#ifdef L__m32c_jsri16 - .text -#ifdef A16 - .global m32c_jsri16 -m32c_jsri16: - add.w #-1, sp - - /* Read the address (16 bits) and return address (24 bits) off - the stack. */ - mov.w 4[sp], r0 - mov.w 1[sp], r3 - mov.b 3[sp], a0 /* This zero-extends, so the high byte has - zero in it. */ - - /* Write the return address, then new address, to the stack. */ - mov.w a0, 1[sp] /* Just to get the zero in 2[sp]. */ - mov.w r0, 0[sp] - mov.w r3, 3[sp] - mov.b a0, 5[sp] - - /* This "returns" to the target address, leaving the pending - return address on the stack. */ - rts -#endif - -#endif diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c index 7040df69fcf..04f69050609 100644 --- a/gcc/config/m32c/m32c.c +++ b/gcc/config/m32c/m32c.c @@ -391,7 +391,7 @@ class_can_hold_mode (reg_class_t rclass, enum machine_mode mode) we allow the user to limit the number of memregs available, in order to try to persuade gcc to try harder to use real registers. - Memregs are provided by m32c-lib1.S. + Memregs are provided by lib1funcs.S. */ int ok_to_change_target_memregs = TRUE; diff --git a/gcc/config/m32c/t-m32c b/gcc/config/m32c/t-m32c index b11f34d674f..aad972a2575 100644 --- a/gcc/config/m32c/t-m32c +++ b/gcc/config/m32c/t-m32c @@ -19,16 +19,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = m32c/m32c-lib1.S - -LIB1ASMFUNCS = \ - __m32c_memregs \ - __m32c_eh_return \ - __m32c_mulsi3 \ - __m32c_cmpsi2 \ - __m32c_ucmpsi2 \ - __m32c_jsri16 - LIB2FUNCS_EXTRA = $(srcdir)/config/m32c/m32c-lib2.c $(srcdir)/config/m32c/m32c-lib2-trapv.c # target-specific files diff --git a/gcc/config/m32r/t-linux b/gcc/config/m32r/t-linux index 487c0198786..f3b89d21d0b 100644 --- a/gcc/config/m32r/t-linux +++ b/gcc/config/m32r/t-linux @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# lib1funcs.asm is currently empty. -CROSS_LIBGCC1 = - # Turn off the SDA while compiling libgcc2. There are no headers for it # and we want maximal upward compatibility here. @@ -26,9 +23,3 @@ TARGET_LIBGCC2_CFLAGS = -G 0 -fPIC # Don't install "assert.h" in gcc. We use the one in glibc. INSTALL_ASSERT_H = - -# Do not build libgcc1. Let gcc generate those functions. The GNU/Linux -# C library can handle them. -LIBGCC1 = -CROSS_LIBGCC1 = -LIBGCC1_TEST = diff --git a/gcc/config/m68k/lb1sf68.asm b/gcc/config/m68k/lb1sf68.asm deleted file mode 100644 index 0339a092c4f..00000000000 --- a/gcc/config/m68k/lb1sf68.asm +++ /dev/null @@ -1,4116 +0,0 @@ -/* libgcc routines for 68000 w/o floating-point hardware. - Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -/* Use this one for any 680x0; assumes no floating point hardware. - The trailing " '" appearing on some lines is for ANSI preprocessors. Yuk. - Some of this code comes from MINIX, via the folks at ericsson. - D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 -*/ - -/* These are predefined by new versions of GNU cpp. */ - -#ifndef __USER_LABEL_PREFIX__ -#define __USER_LABEL_PREFIX__ _ -#endif - -#ifndef __REGISTER_PREFIX__ -#define __REGISTER_PREFIX__ -#endif - -#ifndef __IMMEDIATE_PREFIX__ -#define __IMMEDIATE_PREFIX__ # -#endif - -/* ANSI concatenation macros. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) - -/* Note that X is a function. */ - -#ifdef __ELF__ -#define FUNC(x) .type SYM(x),function -#else -/* The .proc pseudo-op is accepted, but ignored, by GAS. We could just - define this to the empty string for non-ELF systems, but defining it - to .proc means that the information is available to the assembler if - the need arises. */ -#define FUNC(x) .proc -#endif - -/* Use the right prefix for registers. */ - -#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x) - -/* Use the right prefix for immediate values. */ - -#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x) - -#define d0 REG (d0) -#define d1 REG (d1) -#define d2 REG (d2) -#define d3 REG (d3) -#define d4 REG (d4) -#define d5 REG (d5) -#define d6 REG (d6) -#define d7 REG (d7) -#define a0 REG (a0) -#define a1 REG (a1) -#define a2 REG (a2) -#define a3 REG (a3) -#define a4 REG (a4) -#define a5 REG (a5) -#define a6 REG (a6) -#define fp REG (fp) -#define sp REG (sp) -#define pc REG (pc) - -/* Provide a few macros to allow for PIC code support. - * With PIC, data is stored A5 relative so we've got to take a bit of special - * care to ensure that all loads of global data is via A5. PIC also requires - * jumps and subroutine calls to be PC relative rather than absolute. We cheat - * a little on this and in the PIC case, we use short offset branches and - * hope that the final object code is within range (which it should be). - */ -#ifndef __PIC__ - - /* Non PIC (absolute/relocatable) versions */ - - .macro PICCALL addr - jbsr \addr - .endm - - .macro PICJUMP addr - jmp \addr - .endm - - .macro PICLEA sym, reg - lea \sym, \reg - .endm - - .macro PICPEA sym, areg - pea \sym - .endm - -#else /* __PIC__ */ - -# if defined (__uClinux__) - - /* Versions for uClinux */ - -# if defined(__ID_SHARED_LIBRARY__) - - /* -mid-shared-library versions */ - - .macro PICLEA sym, reg - movel a5@(_current_shared_library_a5_offset_), \reg - movel \sym@GOT(\reg), \reg - .endm - - .macro PICPEA sym, areg - movel a5@(_current_shared_library_a5_offset_), \areg - movel \sym@GOT(\areg), sp@- - .endm - - .macro PICCALL addr - PICLEA \addr,a0 - jsr a0@ - .endm - - .macro PICJUMP addr - PICLEA \addr,a0 - jmp a0@ - .endm - -# else /* !__ID_SHARED_LIBRARY__ */ - - /* Versions for -msep-data */ - - .macro PICLEA sym, reg - movel \sym@GOT(a5), \reg - .endm - - .macro PICPEA sym, areg - movel \sym@GOT(a5), sp@- - .endm - - .macro PICCALL addr -#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__) - lea \addr-.-8,a0 - jsr pc@(a0) -#else - jbsr \addr -#endif - .endm - - .macro PICJUMP addr - /* ISA C has no bra.l instruction, and since this assembly file - gets assembled into multiple object files, we avoid the - bra instruction entirely. */ -#if defined (__mcoldfire__) && !defined (__mcfisab__) - lea \addr-.-8,a0 - jmp pc@(a0) -#else - bra \addr -#endif - .endm - -# endif - -# else /* !__uClinux__ */ - - /* Versions for Linux */ - - .macro PICLEA sym, reg - movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \reg - lea (-6, pc, \reg), \reg - movel \sym@GOT(\reg), \reg - .endm - - .macro PICPEA sym, areg - movel #_GLOBAL_OFFSET_TABLE_@GOTPC, \areg - lea (-6, pc, \areg), \areg - movel \sym@GOT(\areg), sp@- - .endm - - .macro PICCALL addr -#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__) - lea \addr-.-8,a0 - jsr pc@(a0) -#else - jbsr \addr -#endif - .endm - - .macro PICJUMP addr - /* ISA C has no bra.l instruction, and since this assembly file - gets assembled into multiple object files, we avoid the - bra instruction entirely. */ -#if defined (__mcoldfire__) && !defined (__mcfisab__) - lea \addr-.-8,a0 - jmp pc@(a0) -#else - bra \addr -#endif - .endm - -# endif -#endif /* __PIC__ */ - - -#ifdef L_floatex - -| This is an attempt at a decent floating point (single, double and -| extended double) code for the GNU C compiler. It should be easy to -| adapt to other compilers (but beware of the local labels!). - -| Starting date: 21 October, 1990 - -| It is convenient to introduce the notation (s,e,f) for a floating point -| number, where s=sign, e=exponent, f=fraction. We will call a floating -| point number fpn to abbreviate, independently of the precision. -| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 -| for doubles and 16383 for long doubles). We then have the following -| different cases: -| 1. Normalized fpns have 0 < e < MAX_EXP. They correspond to -| (-1)^s x 1.f x 2^(e-bias-1). -| 2. Denormalized fpns have e=0. They correspond to numbers of the form -| (-1)^s x 0.f x 2^(-bias). -| 3. +/-INFINITY have e=MAX_EXP, f=0. -| 4. Quiet NaN (Not a Number) have all bits set. -| 5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1. - -|============================================================================= -| exceptions -|============================================================================= - -| This is the floating point condition code register (_fpCCR): -| -| struct { -| short _exception_bits; -| short _trap_enable_bits; -| short _sticky_bits; -| short _rounding_mode; -| short _format; -| short _last_operation; -| union { -| float sf; -| double df; -| } _operand1; -| union { -| float sf; -| double df; -| } _operand2; -| } _fpCCR; - - .data - .even - - .globl SYM (_fpCCR) - -SYM (_fpCCR): -__exception_bits: - .word 0 -__trap_enable_bits: - .word 0 -__sticky_bits: - .word 0 -__rounding_mode: - .word ROUND_TO_NEAREST -__format: - .word NIL -__last_operation: - .word NOOP -__operand1: - .long 0 - .long 0 -__operand2: - .long 0 - .long 0 - -| Offsets: -EBITS = __exception_bits - SYM (_fpCCR) -TRAPE = __trap_enable_bits - SYM (_fpCCR) -STICK = __sticky_bits - SYM (_fpCCR) -ROUND = __rounding_mode - SYM (_fpCCR) -FORMT = __format - SYM (_fpCCR) -LASTO = __last_operation - SYM (_fpCCR) -OPER1 = __operand1 - SYM (_fpCCR) -OPER2 = __operand2 - SYM (_fpCCR) - -| The following exception types are supported: -INEXACT_RESULT = 0x0001 -UNDERFLOW = 0x0002 -OVERFLOW = 0x0004 -DIVIDE_BY_ZERO = 0x0008 -INVALID_OPERATION = 0x0010 - -| The allowed rounding modes are: -UNKNOWN = -1 -ROUND_TO_NEAREST = 0 | round result to nearest representable value -ROUND_TO_ZERO = 1 | round result towards zero -ROUND_TO_PLUS = 2 | round result towards plus infinity -ROUND_TO_MINUS = 3 | round result towards minus infinity - -| The allowed values of format are: -NIL = 0 -SINGLE_FLOAT = 1 -DOUBLE_FLOAT = 2 -LONG_FLOAT = 3 - -| The allowed values for the last operation are: -NOOP = 0 -ADD = 1 -MULTIPLY = 2 -DIVIDE = 3 -NEGATE = 4 -COMPARE = 5 -EXTENDSFDF = 6 -TRUNCDFSF = 7 - -|============================================================================= -| __clear_sticky_bits -|============================================================================= - -| The sticky bits are normally not cleared (thus the name), whereas the -| exception type and exception value reflect the last computation. -| This routine is provided to clear them (you can also write to _fpCCR, -| since it is globally visible). - - .globl SYM (__clear_sticky_bit) - - .text - .even - -| void __clear_sticky_bits(void); -SYM (__clear_sticky_bit): - PICLEA SYM (_fpCCR),a0 -#ifndef __mcoldfire__ - movew IMM (0),a0@(STICK) -#else - clr.w a0@(STICK) -#endif - rts - -|============================================================================= -| $_exception_handler -|============================================================================= - - .globl $_exception_handler - - .text - .even - -| This is the common exit point if an exception occurs. -| NOTE: it is NOT callable from C! -| It expects the exception type in d7, the format (SINGLE_FLOAT, -| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5. -| It sets the corresponding exception and sticky bits, and the format. -| Depending on the format if fills the corresponding slots for the -| operands which produced the exception (all this information is provided -| so if you write your own exception handlers you have enough information -| to deal with the problem). -| Then checks to see if the corresponding exception is trap-enabled, -| in which case it pushes the address of _fpCCR and traps through -| trap FPTRAP (15 for the moment). - -FPTRAP = 15 - -$_exception_handler: - PICLEA SYM (_fpCCR),a0 - movew d7,a0@(EBITS) | set __exception_bits -#ifndef __mcoldfire__ - orw d7,a0@(STICK) | and __sticky_bits -#else - movew a0@(STICK),d4 - orl d7,d4 - movew d4,a0@(STICK) -#endif - movew d6,a0@(FORMT) | and __format - movew d5,a0@(LASTO) | and __last_operation - -| Now put the operands in place: -#ifndef __mcoldfire__ - cmpw IMM (SINGLE_FLOAT),d6 -#else - cmpl IMM (SINGLE_FLOAT),d6 -#endif - beq 1f - movel a6@(8),a0@(OPER1) - movel a6@(12),a0@(OPER1+4) - movel a6@(16),a0@(OPER2) - movel a6@(20),a0@(OPER2+4) - bra 2f -1: movel a6@(8),a0@(OPER1) - movel a6@(12),a0@(OPER2) -2: -| And check whether the exception is trap-enabled: -#ifndef __mcoldfire__ - andw a0@(TRAPE),d7 | is exception trap-enabled? -#else - clrl d6 - movew a0@(TRAPE),d6 - andl d6,d7 -#endif - beq 1f | no, exit - PICPEA SYM (_fpCCR),a1 | yes, push address of _fpCCR - trap IMM (FPTRAP) | and trap -#ifndef __mcoldfire__ -1: moveml sp@+,d2-d7 | restore data registers -#else -1: moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts -#endif /* L_floatex */ - -#ifdef L_mulsi3 - .text - FUNC(__mulsi3) - .globl SYM (__mulsi3) -SYM (__mulsi3): - movew sp@(4), d0 /* x0 -> d0 */ - muluw sp@(10), d0 /* x0*y1 */ - movew sp@(6), d1 /* x1 -> d1 */ - muluw sp@(8), d1 /* x1*y0 */ -#ifndef __mcoldfire__ - addw d1, d0 -#else - addl d1, d0 -#endif - swap d0 - clrw d0 - movew sp@(6), d1 /* x1 -> d1 */ - muluw sp@(10), d1 /* x1*y1 */ - addl d1, d0 - - rts -#endif /* L_mulsi3 */ - -#ifdef L_udivsi3 - .text - FUNC(__udivsi3) - .globl SYM (__udivsi3) -SYM (__udivsi3): -#ifndef __mcoldfire__ - movel d2, sp@- - movel sp@(12), d1 /* d1 = divisor */ - movel sp@(8), d0 /* d0 = dividend */ - - cmpl IMM (0x10000), d1 /* divisor >= 2 ^ 16 ? */ - jcc L3 /* then try next algorithm */ - movel d0, d2 - clrw d2 - swap d2 - divu d1, d2 /* high quotient in lower word */ - movew d2, d0 /* save high quotient */ - swap d0 - movew sp@(10), d2 /* get low dividend + high rest */ - divu d1, d2 /* low quotient */ - movew d2, d0 - jra L6 - -L3: movel d1, d2 /* use d2 as divisor backup */ -L4: lsrl IMM (1), d1 /* shift divisor */ - lsrl IMM (1), d0 /* shift dividend */ - cmpl IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ? */ - jcc L4 - divu d1, d0 /* now we have 16-bit divisor */ - andl IMM (0xffff), d0 /* mask out divisor, ignore remainder */ - -/* Multiply the 16-bit tentative quotient with the 32-bit divisor. Because of - the operand ranges, this might give a 33-bit product. If this product is - greater than the dividend, the tentative quotient was too large. */ - movel d2, d1 - mulu d0, d1 /* low part, 32 bits */ - swap d2 - mulu d0, d2 /* high part, at most 17 bits */ - swap d2 /* align high part with low part */ - tstw d2 /* high part 17 bits? */ - jne L5 /* if 17 bits, quotient was too large */ - addl d2, d1 /* add parts */ - jcs L5 /* if sum is 33 bits, quotient was too large */ - cmpl sp@(8), d1 /* compare the sum with the dividend */ - jls L6 /* if sum > dividend, quotient was too large */ -L5: subql IMM (1), d0 /* adjust quotient */ - -L6: movel sp@+, d2 - rts - -#else /* __mcoldfire__ */ - -/* ColdFire implementation of non-restoring division algorithm from - Hennessy & Patterson, Appendix A. */ - link a6,IMM (-12) - moveml d2-d4,sp@ - movel a6@(8),d0 - movel a6@(12),d1 - clrl d2 | clear p - moveq IMM (31),d4 -L1: addl d0,d0 | shift reg pair (p,a) one bit left - addxl d2,d2 - movl d2,d3 | subtract b from p, store in tmp. - subl d1,d3 - jcs L2 | if no carry, - bset IMM (0),d0 | set the low order bit of a to 1, - movl d3,d2 | and store tmp in p. -L2: subql IMM (1),d4 - jcc L1 - moveml sp@,d2-d4 | restore data registers - unlk a6 | and return - rts -#endif /* __mcoldfire__ */ - -#endif /* L_udivsi3 */ - -#ifdef L_divsi3 - .text - FUNC(__divsi3) - .globl SYM (__divsi3) -SYM (__divsi3): - movel d2, sp@- - - moveq IMM (1), d2 /* sign of result stored in d2 (=1 or =-1) */ - movel sp@(12), d1 /* d1 = divisor */ - jpl L1 - negl d1 -#ifndef __mcoldfire__ - negb d2 /* change sign because divisor <0 */ -#else - negl d2 /* change sign because divisor <0 */ -#endif -L1: movel sp@(8), d0 /* d0 = dividend */ - jpl L2 - negl d0 -#ifndef __mcoldfire__ - negb d2 -#else - negl d2 -#endif - -L2: movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__udivsi3) /* divide abs(dividend) by abs(divisor) */ - addql IMM (8), sp - - tstb d2 - jpl L3 - negl d0 - -L3: movel sp@+, d2 - rts -#endif /* L_divsi3 */ - -#ifdef L_umodsi3 - .text - FUNC(__umodsi3) - .globl SYM (__umodsi3) -SYM (__umodsi3): - movel sp@(8), d1 /* d1 = divisor */ - movel sp@(4), d0 /* d0 = dividend */ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__udivsi3) - addql IMM (8), sp - movel sp@(8), d1 /* d1 = divisor */ -#ifndef __mcoldfire__ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */ - addql IMM (8), sp -#else - mulsl d1,d0 -#endif - movel sp@(4), d1 /* d1 = dividend */ - subl d0, d1 /* d1 = a - (a/b)*b */ - movel d1, d0 - rts -#endif /* L_umodsi3 */ - -#ifdef L_modsi3 - .text - FUNC(__modsi3) - .globl SYM (__modsi3) -SYM (__modsi3): - movel sp@(8), d1 /* d1 = divisor */ - movel sp@(4), d0 /* d0 = dividend */ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__divsi3) - addql IMM (8), sp - movel sp@(8), d1 /* d1 = divisor */ -#ifndef __mcoldfire__ - movel d1, sp@- - movel d0, sp@- - PICCALL SYM (__mulsi3) /* d0 = (a/b)*b */ - addql IMM (8), sp -#else - mulsl d1,d0 -#endif - movel sp@(4), d1 /* d1 = dividend */ - subl d0, d1 /* d1 = a - (a/b)*b */ - movel d1, d0 - rts -#endif /* L_modsi3 */ - - -#ifdef L_double - - .globl SYM (_fpCCR) - .globl $_exception_handler - -QUIET_NaN = 0xffffffff - -D_MAX_EXP = 0x07ff -D_BIAS = 1022 -DBL_MAX_EXP = D_MAX_EXP - D_BIAS -DBL_MIN_EXP = 1 - D_BIAS -DBL_MANT_DIG = 53 - -INEXACT_RESULT = 0x0001 -UNDERFLOW = 0x0002 -OVERFLOW = 0x0004 -DIVIDE_BY_ZERO = 0x0008 -INVALID_OPERATION = 0x0010 - -DOUBLE_FLOAT = 2 - -NOOP = 0 -ADD = 1 -MULTIPLY = 2 -DIVIDE = 3 -NEGATE = 4 -COMPARE = 5 -EXTENDSFDF = 6 -TRUNCDFSF = 7 - -UNKNOWN = -1 -ROUND_TO_NEAREST = 0 | round result to nearest representable value -ROUND_TO_ZERO = 1 | round result towards zero -ROUND_TO_PLUS = 2 | round result towards plus infinity -ROUND_TO_MINUS = 3 | round result towards minus infinity - -| Entry points: - - .globl SYM (__adddf3) - .globl SYM (__subdf3) - .globl SYM (__muldf3) - .globl SYM (__divdf3) - .globl SYM (__negdf2) - .globl SYM (__cmpdf2) - .globl SYM (__cmpdf2_internal) - .hidden SYM (__cmpdf2_internal) - - .text - .even - -| These are common routines to return and signal exceptions. - -Ld$den: -| Return and signal a denormalized number - orl d7,d0 - movew IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$infty: -Ld$overflow: -| Return a properly signed INFINITY and set the exception flags - movel IMM (0x7ff00000),d0 - movel IMM (0),d1 - orl d7,d0 - movew IMM (INEXACT_RESULT+OVERFLOW),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$underflow: -| Return 0 and set the exception flags - movel IMM (0),d0 - movel d0,d1 - movew IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$inop: -| Return a quiet NaN and set the exception flags - movel IMM (QUIET_NaN),d0 - movel d0,d1 - movew IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -Ld$div$0: -| Return a properly signed INFINITY and set the exception flags - movel IMM (0x7ff00000),d0 - movel IMM (0),d1 - orl d7,d0 - movew IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -|============================================================================= -|============================================================================= -| double precision routines -|============================================================================= -|============================================================================= - -| A double precision floating point number (double) has the format: -| -| struct _double { -| unsigned int sign : 1; /* sign bit */ -| unsigned int exponent : 11; /* exponent, shifted by 126 */ -| unsigned int fraction : 52; /* fraction */ -| } double; -| -| Thus sizeof(double) = 8 (64 bits). -| -| All the routines are callable from C programs, and return the result -| in the register pair d0-d1. They also preserve all registers except -| d0-d1 and a0-a1. - -|============================================================================= -| __subdf3 -|============================================================================= - -| double __subdf3(double, double); - FUNC(__subdf3) -SYM (__subdf3): - bchg IMM (31),sp@(12) | change sign of second operand - | and fall through, so we always add -|============================================================================= -| __adddf3 -|============================================================================= - -| double __adddf3(double, double); - FUNC(__adddf3) -SYM (__adddf3): -#ifndef __mcoldfire__ - link a6,IMM (0) | everything will be done in registers - moveml d2-d7,sp@- | save all data registers and a2 (but d0-d1) -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | - movel a6@(16),d2 | get second operand - movel a6@(20),d3 | - - movel d0,d7 | get d0's sign bit in d7 ' - addl d1,d1 | check and clear sign bit of a, and gain one - addxl d0,d0 | bit of extra precision - beq Ladddf$b | if zero return second operand - - movel d2,d6 | save sign in d6 - addl d3,d3 | get rid of sign bit and gain one bit of - addxl d2,d2 | extra precision - beq Ladddf$a | if zero return first operand - - andl IMM (0x80000000),d7 | isolate a's sign bit ' - swap d6 | and also b's sign bit ' -#ifndef __mcoldfire__ - andw IMM (0x8000),d6 | - orw d6,d7 | and combine them into d7, so that a's sign ' - | bit is in the high word and b's is in the ' - | low word, so d6 is free to be used -#else - andl IMM (0x8000),d6 - orl d6,d7 -#endif - movel d7,a0 | now save d7 into a0, so d7 is free to - | be used also - -| Get the exponents and check for denormalized and/or infinity. - - movel IMM (0x001fffff),d6 | mask for the fraction - movel IMM (0x00200000),d7 | mask to put hidden bit back - - movel d0,d4 | - andl d6,d0 | get fraction in d0 - notl d6 | make d6 into mask for the exponent - andl d6,d4 | get exponent in d4 - beq Ladddf$a$den | branch if a is denormalized - cmpl d6,d4 | check for INFINITY or NaN - beq Ladddf$nf | - orl d7,d0 | and put hidden bit back -Ladddf$1: - swap d4 | shift right exponent so that it starts -#ifndef __mcoldfire__ - lsrw IMM (5),d4 | in bit 0 and not bit 20 -#else - lsrl IMM (5),d4 | in bit 0 and not bit 20 -#endif -| Now we have a's exponent in d4 and fraction in d0-d1 ' - movel d2,d5 | save b to get exponent - andl d6,d5 | get exponent in d5 - beq Ladddf$b$den | branch if b is denormalized - cmpl d6,d5 | check for INFINITY or NaN - beq Ladddf$nf - notl d6 | make d6 into mask for the fraction again - andl d6,d2 | and get fraction in d2 - orl d7,d2 | and put hidden bit back -Ladddf$2: - swap d5 | shift right exponent so that it starts -#ifndef __mcoldfire__ - lsrw IMM (5),d5 | in bit 0 and not bit 20 -#else - lsrl IMM (5),d5 | in bit 0 and not bit 20 -#endif - -| Now we have b's exponent in d5 and fraction in d2-d3. ' - -| The situation now is as follows: the signs are combined in a0, the -| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a) -| and d5 (b). To do the rounding correctly we need to keep all the -| bits until the end, so we need to use d0-d1-d2-d3 for the first number -| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the -| exponents in a2-a3. - -#ifndef __mcoldfire__ - moveml a2-a3,sp@- | save the address registers -#else - movel a2,sp@- - movel a3,sp@- - movel a4,sp@- -#endif - - movel d4,a2 | save the exponents - movel d5,a3 | - - movel IMM (0),d7 | and move the numbers around - movel d7,d6 | - movel d3,d5 | - movel d2,d4 | - movel d7,d3 | - movel d7,d2 | - -| Here we shift the numbers until the exponents are the same, and put -| the largest exponent in a2. -#ifndef __mcoldfire__ - exg d4,a2 | get exponents back - exg d5,a3 | - cmpw d4,d5 | compare the exponents -#else - movel d4,a4 | get exponents back - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 - cmpl d4,d5 | compare the exponents -#endif - beq Ladddf$3 | if equal don't shift ' - bhi 9f | branch if second exponent is higher - -| Here we have a's exponent larger than b's, so we have to shift b. We do -| this by using as counter d2: -1: movew d4,d2 | move largest exponent to d2 -#ifndef __mcoldfire__ - subw d5,d2 | and subtract second exponent - exg d4,a2 | get back the longs we saved - exg d5,a3 | -#else - subl d5,d2 | and subtract second exponent - movel d4,a4 | get back the longs we saved - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 -#endif -| if difference is too large we don't shift (actually, we can just exit) ' -#ifndef __mcoldfire__ - cmpw IMM (DBL_MANT_DIG+2),d2 -#else - cmpl IMM (DBL_MANT_DIG+2),d2 -#endif - bge Ladddf$b$small -#ifndef __mcoldfire__ - cmpw IMM (32),d2 | if difference >= 32, shift by longs -#else - cmpl IMM (32),d2 | if difference >= 32, shift by longs -#endif - bge 5f -2: -#ifndef __mcoldfire__ - cmpw IMM (16),d2 | if difference >= 16, shift by words -#else - cmpl IMM (16),d2 | if difference >= 16, shift by words -#endif - bge 6f - bra 3f | enter dbra loop - -4: -#ifndef __mcoldfire__ - lsrl IMM (1),d4 - roxrl IMM (1),d5 - roxrl IMM (1),d6 - roxrl IMM (1),d7 -#else - lsrl IMM (1),d7 - btst IMM (0),d6 - beq 10f - bset IMM (31),d7 -10: lsrl IMM (1),d6 - btst IMM (0),d5 - beq 11f - bset IMM (31),d6 -11: lsrl IMM (1),d5 - btst IMM (0),d4 - beq 12f - bset IMM (31),d5 -12: lsrl IMM (1),d4 -#endif -3: -#ifndef __mcoldfire__ - dbra d2,4b -#else - subql IMM (1),d2 - bpl 4b -#endif - movel IMM (0),d2 - movel d2,d3 - bra Ladddf$4 -5: - movel d6,d7 - movel d5,d6 - movel d4,d5 - movel IMM (0),d4 -#ifndef __mcoldfire__ - subw IMM (32),d2 -#else - subl IMM (32),d2 -#endif - bra 2b -6: - movew d6,d7 - swap d7 - movew d5,d6 - swap d6 - movew d4,d5 - swap d5 - movew IMM (0),d4 - swap d4 -#ifndef __mcoldfire__ - subw IMM (16),d2 -#else - subl IMM (16),d2 -#endif - bra 3b - -9: -#ifndef __mcoldfire__ - exg d4,d5 - movew d4,d6 - subw d5,d6 | keep d5 (largest exponent) in d4 - exg d4,a2 - exg d5,a3 -#else - movel d5,d6 - movel d4,d5 - movel d6,d4 - subl d5,d6 - movel d4,a4 - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 -#endif -| if difference is too large we don't shift (actually, we can just exit) ' -#ifndef __mcoldfire__ - cmpw IMM (DBL_MANT_DIG+2),d6 -#else - cmpl IMM (DBL_MANT_DIG+2),d6 -#endif - bge Ladddf$a$small -#ifndef __mcoldfire__ - cmpw IMM (32),d6 | if difference >= 32, shift by longs -#else - cmpl IMM (32),d6 | if difference >= 32, shift by longs -#endif - bge 5f -2: -#ifndef __mcoldfire__ - cmpw IMM (16),d6 | if difference >= 16, shift by words -#else - cmpl IMM (16),d6 | if difference >= 16, shift by words -#endif - bge 6f - bra 3f | enter dbra loop - -4: -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 -#endif -3: -#ifndef __mcoldfire__ - dbra d6,4b -#else - subql IMM (1),d6 - bpl 4b -#endif - movel IMM (0),d7 - movel d7,d6 - bra Ladddf$4 -5: - movel d2,d3 - movel d1,d2 - movel d0,d1 - movel IMM (0),d0 -#ifndef __mcoldfire__ - subw IMM (32),d6 -#else - subl IMM (32),d6 -#endif - bra 2b -6: - movew d2,d3 - swap d3 - movew d1,d2 - swap d2 - movew d0,d1 - swap d1 - movew IMM (0),d0 - swap d0 -#ifndef __mcoldfire__ - subw IMM (16),d6 -#else - subl IMM (16),d6 -#endif - bra 3b -Ladddf$3: -#ifndef __mcoldfire__ - exg d4,a2 - exg d5,a3 -#else - movel d4,a4 - movel a2,d4 - movel a4,a2 - movel d5,a4 - movel a3,d5 - movel a4,a3 -#endif -Ladddf$4: -| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and -| the signs in a4. - -| Here we have to decide whether to add or subtract the numbers: -#ifndef __mcoldfire__ - exg d7,a0 | get the signs - exg d6,a3 | a3 is free to be used -#else - movel d7,a4 - movel a0,d7 - movel a4,a0 - movel d6,a4 - movel a3,d6 - movel a4,a3 -#endif - movel d7,d6 | - movew IMM (0),d7 | get a's sign in d7 ' - swap d6 | - movew IMM (0),d6 | and b's sign in d6 ' - eorl d7,d6 | compare the signs - bmi Lsubdf$0 | if the signs are different we have - | to subtract -#ifndef __mcoldfire__ - exg d7,a0 | else we add the numbers - exg d6,a3 | -#else - movel d7,a4 - movel a0,d7 - movel a4,a0 - movel d6,a4 - movel a3,d6 - movel a4,a3 -#endif - addl d7,d3 | - addxl d6,d2 | - addxl d5,d1 | - addxl d4,d0 | - - movel a2,d4 | return exponent to d4 - movel a0,d7 | - andl IMM (0x80000000),d7 | d7 now has the sign - -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider -| the case of denormalized numbers in the rounding routine itself). -| As in the addition (not in the subtraction!) we could have set -| one more bit we check this: - btst IMM (DBL_MANT_DIG+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - addw IMM (1),d4 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: - lea pc@(Ladddf$5),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Ladddf$5: -| Put back the exponent and check for overflow -#ifndef __mcoldfire__ - cmpw IMM (0x7ff),d4 | is the exponent big? -#else - cmpl IMM (0x7ff),d4 | is the exponent big? -#endif - bge 1f - bclr IMM (DBL_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (4),d4 | put exponent back into position -#else - lsll IMM (4),d4 | put exponent back into position -#endif - swap d0 | -#ifndef __mcoldfire__ - orw d4,d0 | -#else - orl d4,d0 | -#endif - swap d0 | - bra Ladddf$ret -1: - moveq IMM (ADD),d5 - bra Ld$overflow - -Lsubdf$0: -| Here we do the subtraction. -#ifndef __mcoldfire__ - exg d7,a0 | put sign back in a0 - exg d6,a3 | -#else - movel d7,a4 - movel a0,d7 - movel a4,a0 - movel d6,a4 - movel a3,d6 - movel a4,a3 -#endif - subl d7,d3 | - subxl d6,d2 | - subxl d5,d1 | - subxl d4,d0 | - beq Ladddf$ret$1 | if zero just exit - bpl 1f | if positive skip the following - movel a0,d7 | - bchg IMM (31),d7 | change sign bit in d7 - movel d7,a0 | - negl d3 | - negxl d2 | - negxl d1 | and negate result - negxl d0 | -1: - movel a2,d4 | return exponent to d4 - movel a0,d7 - andl IMM (0x80000000),d7 | isolate sign bit -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 | -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider -| the case of denormalized numbers in the rounding routine itself). -| As in the addition (not in the subtraction!) we could have set -| one more bit we check this: - btst IMM (DBL_MANT_DIG+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - addw IMM (1),d4 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: - lea pc@(Lsubdf$1),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lsubdf$1: -| Put back the exponent and sign (we don't have overflow). ' - bclr IMM (DBL_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (4),d4 | put exponent back into position -#else - lsll IMM (4),d4 | put exponent back into position -#endif - swap d0 | -#ifndef __mcoldfire__ - orw d4,d0 | -#else - orl d4,d0 | -#endif - swap d0 | - bra Ladddf$ret - -| If one of the numbers was too small (difference of exponents >= -| DBL_MANT_DIG+1) we return the other (and now we don't have to ' -| check for finiteness or zero). -Ladddf$a$small: -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - movel a6@(16),d0 - movel a6@(20),d1 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Ladddf$b$small: -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - movel a6@(8),d0 - movel a6@(12),d1 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Ladddf$a$den: - movel d7,d4 | d7 contains 0x00200000 - bra Ladddf$1 - -Ladddf$b$den: - movel d7,d5 | d7 contains 0x00200000 - notl d6 - bra Ladddf$2 - -Ladddf$b: -| Return b (if a is zero) - movel d2,d0 - movel d3,d1 - bne 1f | Check if b is -0 - cmpl IMM (0x80000000),d0 - bne 1f - andl IMM (0x80000000),d7 | Use the sign of a - clrl d0 - bra Ladddf$ret -Ladddf$a: - movel a6@(8),d0 - movel a6@(12),d1 -1: - moveq IMM (ADD),d5 -| Check for NaN and +/-INFINITY. - movel d0,d7 | - andl IMM (0x80000000),d7 | - bclr IMM (31),d0 | - cmpl IMM (0x7ff00000),d0 | - bge 2f | - movel d0,d0 | check for zero, since we don't ' - bne Ladddf$ret | want to return -0 by mistake - bclr IMM (31),d7 | - bra Ladddf$ret | -2: - andl IMM (0x000fffff),d0 | check for NaN (nonzero fraction) - orl d1,d0 | - bne Ld$inop | - bra Ld$infty | - -Ladddf$ret$1: -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 | restore regs and exit -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -Ladddf$ret: -| Normal exit. - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ - orl d7,d0 | put sign bit back -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -Ladddf$ret$den: -| Return a denormalized number. -#ifndef __mcoldfire__ - lsrl IMM (1),d0 | shift right once more - roxrl IMM (1),d1 | -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 -#endif - bra Ladddf$ret - -Ladddf$nf: - moveq IMM (ADD),d5 -| This could be faster but it is not worth the effort, since it is not -| executed very often. We sacrifice speed for clarity here. - movel a6@(8),d0 | get the numbers back (remember that we - movel a6@(12),d1 | did some processing already) - movel a6@(16),d2 | - movel a6@(20),d3 | - movel IMM (0x7ff00000),d4 | useful constant (INFINITY) - movel d0,d7 | save sign bits - movel d2,d6 | - bclr IMM (31),d0 | clear sign bits - bclr IMM (31),d2 | -| We know that one of them is either NaN of +/-INFINITY -| Check for NaN (if either one is NaN return NaN) - cmpl d4,d0 | check first a (d0) - bhi Ld$inop | if d0 > 0x7ff00000 or equal and - bne 2f - tstl d1 | d1 > 0, a is NaN - bne Ld$inop | -2: cmpl d4,d2 | check now b (d1) - bhi Ld$inop | - bne 3f - tstl d3 | - bne Ld$inop | -3: -| Now comes the check for +/-INFINITY. We know that both are (maybe not -| finite) numbers, but we have to check if both are infinite whether we -| are adding or subtracting them. - eorl d7,d6 | to check sign bits - bmi 1f - andl IMM (0x80000000),d7 | get (common) sign bit - bra Ld$infty -1: -| We know one (or both) are infinite, so we test for equality between the -| two numbers (if they are equal they have to be infinite both, so we -| return NaN). - cmpl d2,d0 | are both infinite? - bne 1f | if d0 <> d2 they are not equal - cmpl d3,d1 | if d0 == d2 test d3 and d1 - beq Ld$inop | if equal return NaN -1: - andl IMM (0x80000000),d7 | get a's sign bit ' - cmpl d4,d0 | test now for infinity - beq Ld$infty | if a is INFINITY return with this sign - bchg IMM (31),d7 | else we know b is INFINITY and has - bra Ld$infty | the opposite sign - -|============================================================================= -| __muldf3 -|============================================================================= - -| double __muldf3(double, double); - FUNC(__muldf3) -SYM (__muldf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0-d1 - movel a6@(12),d1 | - movel a6@(16),d2 | and b into d2-d3 - movel a6@(20),d3 | - movel d0,d7 | d7 will hold the sign of the product - eorl d2,d7 | - andl IMM (0x80000000),d7 | - movel d7,a0 | save sign bit into a0 - movel IMM (0x7ff00000),d7 | useful constant (+INFINITY) - movel d7,d6 | another (mask for fraction) - notl d6 | - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d4 | - orl d1,d4 | - beq Lmuldf$a$0 | branch if a is zero - movel d0,d4 | - bclr IMM (31),d2 | get rid of b's sign bit ' - movel d2,d5 | - orl d3,d5 | - beq Lmuldf$b$0 | branch if b is zero - movel d2,d5 | - cmpl d7,d0 | is a big? - bhi Lmuldf$inop | if a is NaN return NaN - beq Lmuldf$a$nf | we still have to check d1 and b ... - cmpl d7,d2 | now compare b with INFINITY - bhi Lmuldf$inop | is b NaN? - beq Lmuldf$b$nf | we still have to check d3 ... -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d4 and d5. - andl d7,d4 | isolate exponent in d4 - beq Lmuldf$a$den | if exponent zero, have denormalized - andl d6,d0 | isolate fraction - orl IMM (0x00100000),d0 | and put hidden bit back - swap d4 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (4),d4 | -#else - lsrl IMM (4),d4 | -#endif -Lmuldf$1: - andl d7,d5 | - beq Lmuldf$b$den | - andl d6,d2 | - orl IMM (0x00100000),d2 | and put hidden bit back - swap d5 | -#ifndef __mcoldfire__ - lsrw IMM (4),d5 | -#else - lsrl IMM (4),d5 | -#endif -Lmuldf$2: | -#ifndef __mcoldfire__ - addw d5,d4 | add exponents - subw IMM (D_BIAS+1),d4 | and subtract bias (plus one) -#else - addl d5,d4 | add exponents - subl IMM (D_BIAS+1),d4 | and subtract bias (plus one) -#endif - -| We are now ready to do the multiplication. The situation is as follows: -| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were -| denormalized to start with!), which means that in the product bit 104 -| (which will correspond to bit 8 of the fourth long) is set. - -| Here we have to do the product. -| To do it we have to juggle the registers back and forth, as there are not -| enough to keep everything in them. So we use the address registers to keep -| some intermediate data. - -#ifndef __mcoldfire__ - moveml a2-a3,sp@- | save a2 and a3 for temporary use -#else - movel a2,sp@- - movel a3,sp@- - movel a4,sp@- -#endif - movel IMM (0),a2 | a2 is a null register - movel d4,a3 | and a3 will preserve the exponent - -| First, shift d2-d3 so bit 20 becomes bit 31: -#ifndef __mcoldfire__ - rorl IMM (5),d2 | rotate d2 5 places right - swap d2 | and swap it - rorl IMM (5),d3 | do the same thing with d3 - swap d3 | - movew d3,d6 | get the rightmost 11 bits of d3 - andw IMM (0x07ff),d6 | - orw d6,d2 | and put them into d2 - andw IMM (0xf800),d3 | clear those bits in d3 -#else - moveq IMM (11),d7 | left shift d2 11 bits - lsll d7,d2 - movel d3,d6 | get a copy of d3 - lsll d7,d3 | left shift d3 11 bits - andl IMM (0xffe00000),d6 | get the top 11 bits of d3 - moveq IMM (21),d7 | right shift them 21 bits - lsrl d7,d6 - orl d6,d2 | stick them at the end of d2 -#endif - - movel d2,d6 | move b into d6-d7 - movel d3,d7 | move a into d4-d5 - movel d0,d4 | and clear d0-d1-d2-d3 (to put result) - movel d1,d5 | - movel IMM (0),d3 | - movel d3,d2 | - movel d3,d1 | - movel d3,d0 | - -| We use a1 as counter: - movel IMM (DBL_MANT_DIG-1),a1 -#ifndef __mcoldfire__ - exg d7,a1 -#else - movel d7,a4 - movel a1,d7 - movel a4,a1 -#endif - -1: -#ifndef __mcoldfire__ - exg d7,a1 | put counter back in a1 -#else - movel d7,a4 - movel a1,d7 - movel a4,a1 -#endif - addl d3,d3 | shift sum once left - addxl d2,d2 | - addxl d1,d1 | - addxl d0,d0 | - addl d7,d7 | - addxl d6,d6 | - bcc 2f | if bit clear skip the following -#ifndef __mcoldfire__ - exg d7,a2 | -#else - movel d7,a4 - movel a2,d7 - movel a4,a2 -#endif - addl d5,d3 | else add a to the sum - addxl d4,d2 | - addxl d7,d1 | - addxl d7,d0 | -#ifndef __mcoldfire__ - exg d7,a2 | -#else - movel d7,a4 - movel a2,d7 - movel a4,a2 -#endif -2: -#ifndef __mcoldfire__ - exg d7,a1 | put counter in d7 - dbf d7,1b | decrement and branch -#else - movel d7,a4 - movel a1,d7 - movel a4,a1 - subql IMM (1),d7 - bpl 1b -#endif - - movel a3,d4 | restore exponent -#ifndef __mcoldfire__ - moveml sp@+,a2-a3 -#else - movel sp@+,a4 - movel sp@+,a3 - movel sp@+,a2 -#endif - -| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The -| first thing to do now is to normalize it so bit 8 becomes bit -| DBL_MANT_DIG-32 (to do the rounding); later we will shift right. - swap d0 - swap d1 - movew d1,d0 - swap d2 - movew d2,d1 - swap d3 - movew d3,d2 - movew IMM (0),d3 -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 -#else - moveq IMM (29),d6 - lsrl IMM (3),d3 - movel d2,d7 - lsll d6,d7 - orl d7,d3 - lsrl IMM (3),d2 - movel d1,d7 - lsll d6,d7 - orl d7,d2 - lsrl IMM (3),d1 - movel d0,d7 - lsll d6,d7 - orl d7,d1 - lsrl IMM (3),d0 -#endif - -| Now round, check for over- and underflow, and exit. - movel a0,d7 | get sign bit back into d7 - moveq IMM (MULTIPLY),d5 - - btst IMM (DBL_MANT_DIG+1-32),d0 - beq Lround$exit -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - addw IMM (1),d4 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif - bra Lround$exit - -Lmuldf$inop: - moveq IMM (MULTIPLY),d5 - bra Ld$inop - -Lmuldf$b$nf: - moveq IMM (MULTIPLY),d5 - movel a0,d7 | get sign bit back into d7 - tstl d3 | we know d2 == 0x7ff00000, so check d3 - bne Ld$inop | if d3 <> 0 b is NaN - bra Ld$overflow | else we have overflow (since a is finite) - -Lmuldf$a$nf: - moveq IMM (MULTIPLY),d5 - movel a0,d7 | get sign bit back into d7 - tstl d1 | we know d0 == 0x7ff00000, so check d1 - bne Ld$inop | if d1 <> 0 a is NaN - bra Ld$overflow | else signal overflow - -| If either number is zero return zero, unless the other is +/-INFINITY or -| NaN, in which case we return NaN. -Lmuldf$b$0: - moveq IMM (MULTIPLY),d5 -#ifndef __mcoldfire__ - exg d2,d0 | put b (==0) into d0-d1 - exg d3,d1 | and a (with sign bit cleared) into d2-d3 - movel a0,d0 | set result sign -#else - movel d0,d2 | put a into d2-d3 - movel d1,d3 - movel a0,d0 | put result zero into d0-d1 - movq IMM(0),d1 -#endif - bra 1f -Lmuldf$a$0: - movel a0,d0 | set result sign - movel a6@(16),d2 | put b into d2-d3 again - movel a6@(20),d3 | - bclr IMM (31),d2 | clear sign bit -1: cmpl IMM (0x7ff00000),d2 | check for non-finiteness - bge Ld$inop | in case NaN or +/-INFINITY return NaN - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -| If a number is denormalized we put an exponent of 1 but do not put the -| hidden bit back into the fraction; instead we shift left until bit 21 -| (the hidden bit) is set, adjusting the exponent accordingly. We do this -| to ensure that the product of the fractions is close to 1. -Lmuldf$a$den: - movel IMM (1),d4 - andl d6,d0 -1: addl d1,d1 | shift a left until bit 20 is set - addxl d0,d0 | -#ifndef __mcoldfire__ - subw IMM (1),d4 | and adjust exponent -#else - subl IMM (1),d4 | and adjust exponent -#endif - btst IMM (20),d0 | - bne Lmuldf$1 | - bra 1b - -Lmuldf$b$den: - movel IMM (1),d5 - andl d6,d2 -1: addl d3,d3 | shift b left until bit 20 is set - addxl d2,d2 | -#ifndef __mcoldfire__ - subw IMM (1),d5 | and adjust exponent -#else - subql IMM (1),d5 | and adjust exponent -#endif - btst IMM (20),d2 | - bne Lmuldf$2 | - bra 1b - - -|============================================================================= -| __divdf3 -|============================================================================= - -| double __divdf3(double, double); - FUNC(__divdf3) -SYM (__divdf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0-d1 - movel a6@(12),d1 | - movel a6@(16),d2 | and b into d2-d3 - movel a6@(20),d3 | - movel d0,d7 | d7 will hold the sign of the result - eorl d2,d7 | - andl IMM (0x80000000),d7 - movel d7,a0 | save sign into a0 - movel IMM (0x7ff00000),d7 | useful constant (+INFINITY) - movel d7,d6 | another (mask for fraction) - notl d6 | - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d4 | - orl d1,d4 | - beq Ldivdf$a$0 | branch if a is zero - movel d0,d4 | - bclr IMM (31),d2 | get rid of b's sign bit ' - movel d2,d5 | - orl d3,d5 | - beq Ldivdf$b$0 | branch if b is zero - movel d2,d5 - cmpl d7,d0 | is a big? - bhi Ldivdf$inop | if a is NaN return NaN - beq Ldivdf$a$nf | if d0 == 0x7ff00000 we check d1 - cmpl d7,d2 | now compare b with INFINITY - bhi Ldivdf$inop | if b is NaN return NaN - beq Ldivdf$b$nf | if d2 == 0x7ff00000 we check d3 -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d4 and d5 and normalize the numbers to -| ensure that the ratio of the fractions is around 1. We do this by -| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit) -| set, even if they were denormalized to start with. -| Thus, the result will satisfy: 2 > result > 1/2. - andl d7,d4 | and isolate exponent in d4 - beq Ldivdf$a$den | if exponent is zero we have a denormalized - andl d6,d0 | and isolate fraction - orl IMM (0x00100000),d0 | and put hidden bit back - swap d4 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (4),d4 | -#else - lsrl IMM (4),d4 | -#endif -Ldivdf$1: | - andl d7,d5 | - beq Ldivdf$b$den | - andl d6,d2 | - orl IMM (0x00100000),d2 - swap d5 | -#ifndef __mcoldfire__ - lsrw IMM (4),d5 | -#else - lsrl IMM (4),d5 | -#endif -Ldivdf$2: | -#ifndef __mcoldfire__ - subw d5,d4 | subtract exponents - addw IMM (D_BIAS),d4 | and add bias -#else - subl d5,d4 | subtract exponents - addl IMM (D_BIAS),d4 | and add bias -#endif - -| We are now ready to do the division. We have prepared things in such a way -| that the ratio of the fractions will be less than 2 but greater than 1/2. -| At this point the registers in use are: -| d0-d1 hold a (first operand, bit DBL_MANT_DIG-32=0, bit -| DBL_MANT_DIG-1-32=1) -| d2-d3 hold b (second operand, bit DBL_MANT_DIG-32=1) -| d4 holds the difference of the exponents, corrected by the bias -| a0 holds the sign of the ratio - -| To do the rounding correctly we need to keep information about the -| nonsignificant bits. One way to do this would be to do the division -| using four registers; another is to use two registers (as originally -| I did), but use a sticky bit to preserve information about the -| fractional part. Note that we can keep that info in a1, which is not -| used. - movel IMM (0),d6 | d6-d7 will hold the result - movel d6,d7 | - movel IMM (0),a1 | and a1 will hold the sticky bit - - movel IMM (DBL_MANT_DIG-32+1),d5 - -1: cmpl d0,d2 | is a < b? - bhi 3f | if b > a skip the following - beq 4f | if d0==d2 check d1 and d3 -2: subl d3,d1 | - subxl d2,d0 | a <-- a - b - bset d5,d6 | set the corresponding bit in d6 -3: addl d1,d1 | shift a by 1 - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d5,1b | and branch back -#else - subql IMM (1), d5 - bpl 1b -#endif - bra 5f -4: cmpl d1,d3 | here d0==d2, so check d1 and d3 - bhi 3b | if d1 > d2 skip the subtraction - bra 2b | else go do it -5: -| Here we have to start setting the bits in the second long. - movel IMM (31),d5 | again d5 is counter - -1: cmpl d0,d2 | is a < b? - bhi 3f | if b > a skip the following - beq 4f | if d0==d2 check d1 and d3 -2: subl d3,d1 | - subxl d2,d0 | a <-- a - b - bset d5,d7 | set the corresponding bit in d7 -3: addl d1,d1 | shift a by 1 - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d5,1b | and branch back -#else - subql IMM (1), d5 - bpl 1b -#endif - bra 5f -4: cmpl d1,d3 | here d0==d2, so check d1 and d3 - bhi 3b | if d1 > d2 skip the subtraction - bra 2b | else go do it -5: -| Now go ahead checking until we hit a one, which we store in d2. - movel IMM (DBL_MANT_DIG),d5 -1: cmpl d2,d0 | is a < b? - bhi 4f | if b < a, exit - beq 3f | if d0==d2 check d1 and d3 -2: addl d1,d1 | shift a by 1 - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d5,1b | and branch back -#else - subql IMM (1), d5 - bpl 1b -#endif - movel IMM (0),d2 | here no sticky bit was found - movel d2,d3 - bra 5f -3: cmpl d1,d3 | here d0==d2, so check d1 and d3 - bhi 2b | if d1 > d2 go back -4: -| Here put the sticky bit in d2-d3 (in the position which actually corresponds -| to it; if you don't do this the algorithm loses in some cases). ' - movel IMM (0),d2 - movel d2,d3 -#ifndef __mcoldfire__ - subw IMM (DBL_MANT_DIG),d5 - addw IMM (63),d5 - cmpw IMM (31),d5 -#else - subl IMM (DBL_MANT_DIG),d5 - addl IMM (63),d5 - cmpl IMM (31),d5 -#endif - bhi 2f -1: bset d5,d3 - bra 5f -#ifndef __mcoldfire__ - subw IMM (32),d5 -#else - subl IMM (32),d5 -#endif -2: bset d5,d2 -5: -| Finally we are finished! Move the longs in the address registers to -| their final destination: - movel d6,d0 - movel d7,d1 - movel IMM (0),d3 - -| Here we have finished the division, with the result in d0-d1-d2-d3, with -| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set. -| If it is not, then definitely bit 21 is set. Normalize so bit 22 is -| not set: - btst IMM (DBL_MANT_DIG-32+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - roxrl IMM (1),d2 - roxrl IMM (1),d3 - addw IMM (1),d4 -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: -| Now round, check for over- and underflow, and exit. - movel a0,d7 | restore sign bit to d7 - moveq IMM (DIVIDE),d5 - bra Lround$exit - -Ldivdf$inop: - moveq IMM (DIVIDE),d5 - bra Ld$inop - -Ldivdf$a$0: -| If a is zero check to see whether b is zero also. In that case return -| NaN; then check if b is NaN, and return NaN also in that case. Else -| return a properly signed zero. - moveq IMM (DIVIDE),d5 - bclr IMM (31),d2 | - movel d2,d4 | - orl d3,d4 | - beq Ld$inop | if b is also zero return NaN - cmpl IMM (0x7ff00000),d2 | check for NaN - bhi Ld$inop | - blt 1f | - tstl d3 | - bne Ld$inop | -1: movel a0,d0 | else return signed zero - moveq IMM(0),d1 | - PICLEA SYM (_fpCCR),a0 | clear exception flags - movew IMM (0),a0@ | -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | -#else - moveml sp@,d2-d7 | - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | - rts | - -Ldivdf$b$0: - moveq IMM (DIVIDE),d5 -| If we got here a is not zero. Check if a is NaN; in that case return NaN, -| else return +/-INFINITY. Remember that a is in d0 with the sign bit -| cleared already. - movel a0,d7 | put a's sign bit back in d7 ' - cmpl IMM (0x7ff00000),d0 | compare d0 with INFINITY - bhi Ld$inop | if larger it is NaN - tstl d1 | - bne Ld$inop | - bra Ld$div$0 | else signal DIVIDE_BY_ZERO - -Ldivdf$b$nf: - moveq IMM (DIVIDE),d5 -| If d2 == 0x7ff00000 we have to check d3. - tstl d3 | - bne Ld$inop | if d3 <> 0, b is NaN - bra Ld$underflow | else b is +/-INFINITY, so signal underflow - -Ldivdf$a$nf: - moveq IMM (DIVIDE),d5 -| If d0 == 0x7ff00000 we have to check d1. - tstl d1 | - bne Ld$inop | if d1 <> 0, a is NaN -| If a is INFINITY we have to check b - cmpl d7,d2 | compare b with INFINITY - bge Ld$inop | if b is NaN or INFINITY return NaN - tstl d3 | - bne Ld$inop | - bra Ld$overflow | else return overflow - -| If a number is denormalized we put an exponent of 1 but do not put the -| bit back into the fraction. -Ldivdf$a$den: - movel IMM (1),d4 - andl d6,d0 -1: addl d1,d1 | shift a left until bit 20 is set - addxl d0,d0 -#ifndef __mcoldfire__ - subw IMM (1),d4 | and adjust exponent -#else - subl IMM (1),d4 | and adjust exponent -#endif - btst IMM (DBL_MANT_DIG-32-1),d0 - bne Ldivdf$1 - bra 1b - -Ldivdf$b$den: - movel IMM (1),d5 - andl d6,d2 -1: addl d3,d3 | shift b left until bit 20 is set - addxl d2,d2 -#ifndef __mcoldfire__ - subw IMM (1),d5 | and adjust exponent -#else - subql IMM (1),d5 | and adjust exponent -#endif - btst IMM (DBL_MANT_DIG-32-1),d2 - bne Ldivdf$2 - bra 1b - -Lround$exit: -| This is a common exit point for __muldf3 and __divdf3. When they enter -| this point the sign of the result is in d7, the result in d0-d1, normalized -| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4. - -| First check for underlow in the exponent: -#ifndef __mcoldfire__ - cmpw IMM (-DBL_MANT_DIG-1),d4 -#else - cmpl IMM (-DBL_MANT_DIG-1),d4 -#endif - blt Ld$underflow -| It could happen that the exponent is less than 1, in which case the -| number is denormalized. In this case we shift right and adjust the -| exponent until it becomes 1 or the fraction is zero (in the latter case -| we signal underflow and return zero). - movel d7,a0 | - movel IMM (0),d6 | use d6-d7 to collect bits flushed right - movel d6,d7 | use d6-d7 to collect bits flushed right -#ifndef __mcoldfire__ - cmpw IMM (1),d4 | if the exponent is less than 1 we -#else - cmpl IMM (1),d4 | if the exponent is less than 1 we -#endif - bge 2f | have to shift right (denormalize) -1: -#ifndef __mcoldfire__ - addw IMM (1),d4 | adjust the exponent - lsrl IMM (1),d0 | shift right once - roxrl IMM (1),d1 | - roxrl IMM (1),d2 | - roxrl IMM (1),d3 | - roxrl IMM (1),d6 | - roxrl IMM (1),d7 | - cmpw IMM (1),d4 | is the exponent 1 already? -#else - addl IMM (1),d4 | adjust the exponent - lsrl IMM (1),d7 - btst IMM (0),d6 - beq 13f - bset IMM (31),d7 -13: lsrl IMM (1),d6 - btst IMM (0),d3 - beq 14f - bset IMM (31),d6 -14: lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - btst IMM (0),d1 - beq 11f - bset IMM (31),d2 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 12f - bset IMM (31),d1 -12: lsrl IMM (1),d0 - cmpl IMM (1),d4 | is the exponent 1 already? -#endif - beq 2f | if not loop back - bra 1b | - bra Ld$underflow | safety check, shouldn't execute ' -2: orl d6,d2 | this is a trick so we don't lose ' - orl d7,d3 | the bits which were flushed right - movel a0,d7 | get back sign bit into d7 -| Now call the rounding routine (which takes care of denormalized numbers): - lea pc@(Lround$0),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lround$0: -| Here we have a correctly rounded result (either normalized or denormalized). - -| Here we should have either a normalized number or a denormalized one, and -| the exponent is necessarily larger or equal to 1 (so we don't have to ' -| check again for underflow!). We have to check for overflow or for a -| denormalized number (which also signals underflow). -| Check for overflow (i.e., exponent >= 0x7ff). -#ifndef __mcoldfire__ - cmpw IMM (0x07ff),d4 -#else - cmpl IMM (0x07ff),d4 -#endif - bge Ld$overflow -| Now check for a denormalized number (exponent==0): - movew d4,d4 - beq Ld$den -1: -| Put back the exponents and sign and return. -#ifndef __mcoldfire__ - lslw IMM (4),d4 | exponent back to fourth byte -#else - lsll IMM (4),d4 | exponent back to fourth byte -#endif - bclr IMM (DBL_MANT_DIG-32-1),d0 - swap d0 | and put back exponent -#ifndef __mcoldfire__ - orw d4,d0 | -#else - orl d4,d0 | -#endif - swap d0 | - orl d7,d0 | and sign also - - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -|============================================================================= -| __negdf2 -|============================================================================= - -| double __negdf2(double, double); - FUNC(__negdf2) -SYM (__negdf2): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (NEGATE),d5 - movel a6@(8),d0 | get number to negate in d0-d1 - movel a6@(12),d1 | - bchg IMM (31),d0 | negate - movel d0,d2 | make a positive copy (for the tests) - bclr IMM (31),d2 | - movel d2,d4 | check for zero - orl d1,d4 | - beq 2f | if zero (either sign) return +zero - cmpl IMM (0x7ff00000),d2 | compare to +INFINITY - blt 1f | if finite, return - bhi Ld$inop | if larger (fraction not zero) is NaN - tstl d1 | if d2 == 0x7ff00000 check d1 - bne Ld$inop | - movel d0,d7 | else get sign and return INFINITY - andl IMM (0x80000000),d7 - bra Ld$infty -1: PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -2: bclr IMM (31),d0 - bra 1b - -|============================================================================= -| __cmpdf2 -|============================================================================= - -GREATER = 1 -LESS = -1 -EQUAL = 0 - -| int __cmpdf2_internal(double, double, int); -SYM (__cmpdf2_internal): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- | save registers -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (COMPARE),d5 - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | - movel a6@(16),d2 | get second operand - movel a6@(20),d3 | -| First check if a and/or b are (+/-) zero and in that case clear -| the sign bit. - movel d0,d6 | copy signs into d6 (a) and d7(b) - bclr IMM (31),d0 | and clear signs in d0 and d2 - movel d2,d7 | - bclr IMM (31),d2 | - cmpl IMM (0x7ff00000),d0 | check for a == NaN - bhi Lcmpd$inop | if d0 > 0x7ff00000, a is NaN - beq Lcmpdf$a$nf | if equal can be INFINITY, so check d1 - movel d0,d4 | copy into d4 to test for zero - orl d1,d4 | - beq Lcmpdf$a$0 | -Lcmpdf$0: - cmpl IMM (0x7ff00000),d2 | check for b == NaN - bhi Lcmpd$inop | if d2 > 0x7ff00000, b is NaN - beq Lcmpdf$b$nf | if equal can be INFINITY, so check d3 - movel d2,d4 | - orl d3,d4 | - beq Lcmpdf$b$0 | -Lcmpdf$1: -| Check the signs - eorl d6,d7 - bpl 1f -| If the signs are not equal check if a >= 0 - tstl d6 - bpl Lcmpdf$a$gt$b | if (a >= 0 && b < 0) => a > b - bmi Lcmpdf$b$gt$a | if (a < 0 && b >= 0) => a < b -1: -| If the signs are equal check for < 0 - tstl d6 - bpl 1f -| If both are negative exchange them -#ifndef __mcoldfire__ - exg d0,d2 - exg d1,d3 -#else - movel d0,d7 - movel d2,d0 - movel d7,d2 - movel d1,d7 - movel d3,d1 - movel d7,d3 -#endif -1: -| Now that they are positive we just compare them as longs (does this also -| work for denormalized numbers?). - cmpl d0,d2 - bhi Lcmpdf$b$gt$a | |b| > |a| - bne Lcmpdf$a$gt$b | |b| < |a| -| If we got here d0 == d2, so we compare d1 and d3. - cmpl d1,d3 - bhi Lcmpdf$b$gt$a | |b| > |a| - bne Lcmpdf$a$gt$b | |b| < |a| -| If we got here a == b. - movel IMM (EQUAL),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -Lcmpdf$a$gt$b: - movel IMM (GREATER),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -Lcmpdf$b$gt$a: - movel IMM (LESS),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -Lcmpdf$a$0: - bclr IMM (31),d6 - bra Lcmpdf$0 -Lcmpdf$b$0: - bclr IMM (31),d7 - bra Lcmpdf$1 - -Lcmpdf$a$nf: - tstl d1 - bne Ld$inop - bra Lcmpdf$0 - -Lcmpdf$b$nf: - tstl d3 - bne Ld$inop - bra Lcmpdf$1 - -Lcmpd$inop: - movl a6@(24),d0 - moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (DOUBLE_FLOAT),d6 - PICJUMP $_exception_handler - -| int __cmpdf2(double, double); - FUNC(__cmpdf2) -SYM (__cmpdf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts - -|============================================================================= -| rounding routines -|============================================================================= - -| The rounding routines expect the number to be normalized in registers -| d0-d1-d2-d3, with the exponent in register d4. They assume that the -| exponent is larger or equal to 1. They return a properly normalized number -| if possible, and a denormalized number otherwise. The exponent is returned -| in d4. - -Lround$to$nearest: -| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"): -| Here we assume that the exponent is not too small (this should be checked -| before entering the rounding routine), but the number could be denormalized. - -| Check for denormalized numbers: -1: btst IMM (DBL_MANT_DIG-32),d0 - bne 2f | if set the number is normalized -| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent -| is one (remember that a denormalized number corresponds to an -| exponent of -D_BIAS+1). -#ifndef __mcoldfire__ - cmpw IMM (1),d4 | remember that the exponent is at least one -#else - cmpl IMM (1),d4 | remember that the exponent is at least one -#endif - beq 2f | an exponent of one means denormalized - addl d3,d3 | else shift and adjust the exponent - addxl d2,d2 | - addxl d1,d1 | - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d4,1b | -#else - subql IMM (1), d4 - bpl 1b -#endif -2: -| Now round: we do it as follows: after the shifting we can write the -| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2. -| If delta < 1, do nothing. If delta > 1, add 1 to f. -| If delta == 1, we make sure the rounded number will be even (odd?) -| (after shifting). - btst IMM (0),d1 | is delta < 1? - beq 2f | if so, do not do anything - orl d2,d3 | is delta == 1? - bne 1f | if so round to even - movel d1,d3 | - andl IMM (2),d3 | bit 1 is the last significant bit - movel IMM (0),d2 | - addl d3,d1 | - addxl d2,d0 | - bra 2f | -1: movel IMM (1),d3 | else add 1 - movel IMM (0),d2 | - addl d3,d1 | - addxl d2,d0 -| Shift right once (because we used bit #DBL_MANT_DIG-32!). -2: -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 -#endif - -| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a -| 'fraction overflow' ...). - btst IMM (DBL_MANT_DIG-32),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - addw IMM (1),d4 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - addl IMM (1),d4 -#endif -1: -| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we -| have to put the exponent to zero and return a denormalized number. - btst IMM (DBL_MANT_DIG-32-1),d0 - beq 1f - jmp a0@ -1: movel IMM (0),d4 - jmp a0@ - -Lround$to$zero: -Lround$to$plus: -Lround$to$minus: - jmp a0@ -#endif /* L_double */ - -#ifdef L_float - - .globl SYM (_fpCCR) - .globl $_exception_handler - -QUIET_NaN = 0xffffffff -SIGNL_NaN = 0x7f800001 -INFINITY = 0x7f800000 - -F_MAX_EXP = 0xff -F_BIAS = 126 -FLT_MAX_EXP = F_MAX_EXP - F_BIAS -FLT_MIN_EXP = 1 - F_BIAS -FLT_MANT_DIG = 24 - -INEXACT_RESULT = 0x0001 -UNDERFLOW = 0x0002 -OVERFLOW = 0x0004 -DIVIDE_BY_ZERO = 0x0008 -INVALID_OPERATION = 0x0010 - -SINGLE_FLOAT = 1 - -NOOP = 0 -ADD = 1 -MULTIPLY = 2 -DIVIDE = 3 -NEGATE = 4 -COMPARE = 5 -EXTENDSFDF = 6 -TRUNCDFSF = 7 - -UNKNOWN = -1 -ROUND_TO_NEAREST = 0 | round result to nearest representable value -ROUND_TO_ZERO = 1 | round result towards zero -ROUND_TO_PLUS = 2 | round result towards plus infinity -ROUND_TO_MINUS = 3 | round result towards minus infinity - -| Entry points: - - .globl SYM (__addsf3) - .globl SYM (__subsf3) - .globl SYM (__mulsf3) - .globl SYM (__divsf3) - .globl SYM (__negsf2) - .globl SYM (__cmpsf2) - .globl SYM (__cmpsf2_internal) - .hidden SYM (__cmpsf2_internal) - -| These are common routines to return and signal exceptions. - - .text - .even - -Lf$den: -| Return and signal a denormalized number - orl d7,d0 - moveq IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$infty: -Lf$overflow: -| Return a properly signed INFINITY and set the exception flags - movel IMM (INFINITY),d0 - orl d7,d0 - moveq IMM (INEXACT_RESULT+OVERFLOW),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$underflow: -| Return 0 and set the exception flags - moveq IMM (0),d0 - moveq IMM (INEXACT_RESULT+UNDERFLOW),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$inop: -| Return a quiet NaN and set the exception flags - movel IMM (QUIET_NaN),d0 - moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -Lf$div$0: -| Return a properly signed INFINITY and set the exception flags - movel IMM (INFINITY),d0 - orl d7,d0 - moveq IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -|============================================================================= -|============================================================================= -| single precision routines -|============================================================================= -|============================================================================= - -| A single precision floating point number (float) has the format: -| -| struct _float { -| unsigned int sign : 1; /* sign bit */ -| unsigned int exponent : 8; /* exponent, shifted by 126 */ -| unsigned int fraction : 23; /* fraction */ -| } float; -| -| Thus sizeof(float) = 4 (32 bits). -| -| All the routines are callable from C programs, and return the result -| in the single register d0. They also preserve all registers except -| d0-d1 and a0-a1. - -|============================================================================= -| __subsf3 -|============================================================================= - -| float __subsf3(float, float); - FUNC(__subsf3) -SYM (__subsf3): - bchg IMM (31),sp@(8) | change sign of second operand - | and fall through -|============================================================================= -| __addsf3 -|============================================================================= - -| float __addsf3(float, float); - FUNC(__addsf3) -SYM (__addsf3): -#ifndef __mcoldfire__ - link a6,IMM (0) | everything will be done in registers - moveml d2-d7,sp@- | save all data registers but d0-d1 -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | get second operand - movel d0,a0 | get d0's sign bit ' - addl d0,d0 | check and clear sign bit of a - beq Laddsf$b | if zero return second operand - movel d1,a1 | save b's sign bit ' - addl d1,d1 | get rid of sign bit - beq Laddsf$a | if zero return first operand - -| Get the exponents and check for denormalized and/or infinity. - - movel IMM (0x00ffffff),d4 | mask to get fraction - movel IMM (0x01000000),d5 | mask to put hidden bit back - - movel d0,d6 | save a to get exponent - andl d4,d0 | get fraction in d0 - notl d4 | make d4 into a mask for the exponent - andl d4,d6 | get exponent in d6 - beq Laddsf$a$den | branch if a is denormalized - cmpl d4,d6 | check for INFINITY or NaN - beq Laddsf$nf - swap d6 | put exponent into first word - orl d5,d0 | and put hidden bit back -Laddsf$1: -| Now we have a's exponent in d6 (second byte) and the mantissa in d0. ' - movel d1,d7 | get exponent in d7 - andl d4,d7 | - beq Laddsf$b$den | branch if b is denormalized - cmpl d4,d7 | check for INFINITY or NaN - beq Laddsf$nf - swap d7 | put exponent into first word - notl d4 | make d4 into a mask for the fraction - andl d4,d1 | get fraction in d1 - orl d5,d1 | and put hidden bit back -Laddsf$2: -| Now we have b's exponent in d7 (second byte) and the mantissa in d1. ' - -| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we -| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra -| bit). - - movel d1,d2 | move b to d2, since we want to use - | two registers to do the sum - movel IMM (0),d1 | and clear the new ones - movel d1,d3 | - -| Here we shift the numbers in registers d0 and d1 so the exponents are the -| same, and put the largest exponent in d6. Note that we are using two -| registers for each number (see the discussion by D. Knuth in "Seminumerical -| Algorithms"). -#ifndef __mcoldfire__ - cmpw d6,d7 | compare exponents -#else - cmpl d6,d7 | compare exponents -#endif - beq Laddsf$3 | if equal don't shift ' - bhi 5f | branch if second exponent largest -1: - subl d6,d7 | keep the largest exponent - negl d7 -#ifndef __mcoldfire__ - lsrw IMM (8),d7 | put difference in lower byte -#else - lsrl IMM (8),d7 | put difference in lower byte -#endif -| if difference is too large we don't shift (actually, we can just exit) ' -#ifndef __mcoldfire__ - cmpw IMM (FLT_MANT_DIG+2),d7 -#else - cmpl IMM (FLT_MANT_DIG+2),d7 -#endif - bge Laddsf$b$small -#ifndef __mcoldfire__ - cmpw IMM (16),d7 | if difference >= 16 swap -#else - cmpl IMM (16),d7 | if difference >= 16 swap -#endif - bge 4f -2: -#ifndef __mcoldfire__ - subw IMM (1),d7 -#else - subql IMM (1), d7 -#endif -3: -#ifndef __mcoldfire__ - lsrl IMM (1),d2 | shift right second operand - roxrl IMM (1),d3 - dbra d7,3b -#else - lsrl IMM (1),d3 - btst IMM (0),d2 - beq 10f - bset IMM (31),d3 -10: lsrl IMM (1),d2 - subql IMM (1), d7 - bpl 3b -#endif - bra Laddsf$3 -4: - movew d2,d3 - swap d3 - movew d3,d2 - swap d2 -#ifndef __mcoldfire__ - subw IMM (16),d7 -#else - subl IMM (16),d7 -#endif - bne 2b | if still more bits, go back to normal case - bra Laddsf$3 -5: -#ifndef __mcoldfire__ - exg d6,d7 | exchange the exponents -#else - eorl d6,d7 - eorl d7,d6 - eorl d6,d7 -#endif - subl d6,d7 | keep the largest exponent - negl d7 | -#ifndef __mcoldfire__ - lsrw IMM (8),d7 | put difference in lower byte -#else - lsrl IMM (8),d7 | put difference in lower byte -#endif -| if difference is too large we don't shift (and exit!) ' -#ifndef __mcoldfire__ - cmpw IMM (FLT_MANT_DIG+2),d7 -#else - cmpl IMM (FLT_MANT_DIG+2),d7 -#endif - bge Laddsf$a$small -#ifndef __mcoldfire__ - cmpw IMM (16),d7 | if difference >= 16 swap -#else - cmpl IMM (16),d7 | if difference >= 16 swap -#endif - bge 8f -6: -#ifndef __mcoldfire__ - subw IMM (1),d7 -#else - subl IMM (1),d7 -#endif -7: -#ifndef __mcoldfire__ - lsrl IMM (1),d0 | shift right first operand - roxrl IMM (1),d1 - dbra d7,7b -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - subql IMM (1),d7 - bpl 7b -#endif - bra Laddsf$3 -8: - movew d0,d1 - swap d1 - movew d1,d0 - swap d0 -#ifndef __mcoldfire__ - subw IMM (16),d7 -#else - subl IMM (16),d7 -#endif - bne 6b | if still more bits, go back to normal case - | otherwise we fall through - -| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the -| signs are stored in a0 and a1). - -Laddsf$3: -| Here we have to decide whether to add or subtract the numbers -#ifndef __mcoldfire__ - exg d6,a0 | get signs back - exg d7,a1 | and save the exponents -#else - movel d6,d4 - movel a0,d6 - movel d4,a0 - movel d7,d4 - movel a1,d7 - movel d4,a1 -#endif - eorl d6,d7 | combine sign bits - bmi Lsubsf$0 | if negative a and b have opposite - | sign so we actually subtract the - | numbers - -| Here we have both positive or both negative -#ifndef __mcoldfire__ - exg d6,a0 | now we have the exponent in d6 -#else - movel d6,d4 - movel a0,d6 - movel d4,a0 -#endif - movel a0,d7 | and sign in d7 - andl IMM (0x80000000),d7 -| Here we do the addition. - addl d3,d1 - addxl d2,d0 -| Note: now we have d2, d3, d4 and d5 to play with! - -| Put the exponent, in the first byte, in d2, to use the "standard" rounding -| routines: - movel d6,d2 -#ifndef __mcoldfire__ - lsrw IMM (8),d2 -#else - lsrl IMM (8),d2 -#endif - -| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider -| the case of denormalized numbers in the rounding routine itself). -| As in the addition (not in the subtraction!) we could have set -| one more bit we check this: - btst IMM (FLT_MANT_DIG+1),d0 - beq 1f -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 -#endif - addl IMM (1),d2 -1: - lea pc@(Laddsf$4),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Laddsf$4: -| Put back the exponent, but check for overflow. -#ifndef __mcoldfire__ - cmpw IMM (0xff),d2 -#else - cmpl IMM (0xff),d2 -#endif - bhi 1f - bclr IMM (FLT_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (7),d2 -#else - lsll IMM (7),d2 -#endif - swap d2 - orl d2,d0 - bra Laddsf$ret -1: - moveq IMM (ADD),d5 - bra Lf$overflow - -Lsubsf$0: -| We are here if a > 0 and b < 0 (sign bits cleared). -| Here we do the subtraction. - movel d6,d7 | put sign in d7 - andl IMM (0x80000000),d7 - - subl d3,d1 | result in d0-d1 - subxl d2,d0 | - beq Laddsf$ret | if zero just exit - bpl 1f | if positive skip the following - bchg IMM (31),d7 | change sign bit in d7 - negl d1 - negxl d0 -1: -#ifndef __mcoldfire__ - exg d2,a0 | now we have the exponent in d2 - lsrw IMM (8),d2 | put it in the first byte -#else - movel d2,d4 - movel a0,d2 - movel d4,a0 - lsrl IMM (8),d2 | put it in the first byte -#endif - -| Now d0-d1 is positive and the sign bit is in d7. - -| Note that we do not have to normalize, since in the subtraction bit -| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by -| the rounding routines themselves. - lea pc@(Lsubsf$1),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lsubsf$1: -| Put back the exponent (we can't have overflow!). ' - bclr IMM (FLT_MANT_DIG-1),d0 -#ifndef __mcoldfire__ - lslw IMM (7),d2 -#else - lsll IMM (7),d2 -#endif - swap d2 - orl d2,d0 - bra Laddsf$ret - -| If one of the numbers was too small (difference of exponents >= -| FLT_MANT_DIG+2) we return the other (and now we don't have to ' -| check for finiteness or zero). -Laddsf$a$small: - movel a6@(12),d0 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Laddsf$b$small: - movel a6@(8),d0 - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -| If the numbers are denormalized remember to put exponent equal to 1. - -Laddsf$a$den: - movel d5,d6 | d5 contains 0x01000000 - swap d6 - bra Laddsf$1 - -Laddsf$b$den: - movel d5,d7 - swap d7 - notl d4 | make d4 into a mask for the fraction - | (this was not executed after the jump) - bra Laddsf$2 - -| The rest is mainly code for the different results which can be -| returned (checking always for +/-INFINITY and NaN). - -Laddsf$b: -| Return b (if a is zero). - movel a6@(12),d0 - cmpl IMM (0x80000000),d0 | Check if b is -0 - bne 1f - movel a0,d7 - andl IMM (0x80000000),d7 | Use the sign of a - clrl d0 - bra Laddsf$ret -Laddsf$a: -| Return a (if b is zero). - movel a6@(8),d0 -1: - moveq IMM (ADD),d5 -| We have to check for NaN and +/-infty. - movel d0,d7 - andl IMM (0x80000000),d7 | put sign in d7 - bclr IMM (31),d0 | clear sign - cmpl IMM (INFINITY),d0 | check for infty or NaN - bge 2f - movel d0,d0 | check for zero (we do this because we don't ' - bne Laddsf$ret | want to return -0 by mistake - bclr IMM (31),d7 | if zero be sure to clear sign - bra Laddsf$ret | if everything OK just return -2: -| The value to be returned is either +/-infty or NaN - andl IMM (0x007fffff),d0 | check for NaN - bne Lf$inop | if mantissa not zero is NaN - bra Lf$infty - -Laddsf$ret: -| Normal exit (a and b nonzero, result is not NaN nor +/-infty). -| We have to clear the exception flags (just the exception type). - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ - orl d7,d0 | put sign bit -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | restore data registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | and return - rts - -Laddsf$ret$den: -| Return a denormalized number (for addition we don't signal underflow) ' - lsrl IMM (1),d0 | remember to shift right back once - bra Laddsf$ret | and return - -| Note: when adding two floats of the same sign if either one is -| NaN we return NaN without regard to whether the other is finite or -| not. When subtracting them (i.e., when adding two numbers of -| opposite signs) things are more complicated: if both are INFINITY -| we return NaN, if only one is INFINITY and the other is NaN we return -| NaN, but if it is finite we return INFINITY with the corresponding sign. - -Laddsf$nf: - moveq IMM (ADD),d5 -| This could be faster but it is not worth the effort, since it is not -| executed very often. We sacrifice speed for clarity here. - movel a6@(8),d0 | get the numbers back (remember that we - movel a6@(12),d1 | did some processing already) - movel IMM (INFINITY),d4 | useful constant (INFINITY) - movel d0,d2 | save sign bits - movel d1,d3 - bclr IMM (31),d0 | clear sign bits - bclr IMM (31),d1 -| We know that one of them is either NaN of +/-INFINITY -| Check for NaN (if either one is NaN return NaN) - cmpl d4,d0 | check first a (d0) - bhi Lf$inop - cmpl d4,d1 | check now b (d1) - bhi Lf$inop -| Now comes the check for +/-INFINITY. We know that both are (maybe not -| finite) numbers, but we have to check if both are infinite whether we -| are adding or subtracting them. - eorl d3,d2 | to check sign bits - bmi 1f - movel d0,d7 - andl IMM (0x80000000),d7 | get (common) sign bit - bra Lf$infty -1: -| We know one (or both) are infinite, so we test for equality between the -| two numbers (if they are equal they have to be infinite both, so we -| return NaN). - cmpl d1,d0 | are both infinite? - beq Lf$inop | if so return NaN - - movel d0,d7 - andl IMM (0x80000000),d7 | get a's sign bit ' - cmpl d4,d0 | test now for infinity - beq Lf$infty | if a is INFINITY return with this sign - bchg IMM (31),d7 | else we know b is INFINITY and has - bra Lf$infty | the opposite sign - -|============================================================================= -| __mulsf3 -|============================================================================= - -| float __mulsf3(float, float); - FUNC(__mulsf3) -SYM (__mulsf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0 - movel a6@(12),d1 | and b into d1 - movel d0,d7 | d7 will hold the sign of the product - eorl d1,d7 | - andl IMM (0x80000000),d7 - movel IMM (INFINITY),d6 | useful constant (+INFINITY) - movel d6,d5 | another (mask for fraction) - notl d5 | - movel IMM (0x00800000),d4 | this is to put hidden bit back - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d2 | - beq Lmulsf$a$0 | branch if a is zero - bclr IMM (31),d1 | get rid of b's sign bit ' - movel d1,d3 | - beq Lmulsf$b$0 | branch if b is zero - cmpl d6,d0 | is a big? - bhi Lmulsf$inop | if a is NaN return NaN - beq Lmulsf$inf | if a is INFINITY we have to check b - cmpl d6,d1 | now compare b with INFINITY - bhi Lmulsf$inop | is b NaN? - beq Lmulsf$overflow | is b INFINITY? -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d2 and d3. - andl d6,d2 | and isolate exponent in d2 - beq Lmulsf$a$den | if exponent is zero we have a denormalized - andl d5,d0 | and isolate fraction - orl d4,d0 | and put hidden bit back - swap d2 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (7),d2 | -#else - lsrl IMM (7),d2 | -#endif -Lmulsf$1: | number - andl d6,d3 | - beq Lmulsf$b$den | - andl d5,d1 | - orl d4,d1 | - swap d3 | -#ifndef __mcoldfire__ - lsrw IMM (7),d3 | -#else - lsrl IMM (7),d3 | -#endif -Lmulsf$2: | -#ifndef __mcoldfire__ - addw d3,d2 | add exponents - subw IMM (F_BIAS+1),d2 | and subtract bias (plus one) -#else - addl d3,d2 | add exponents - subl IMM (F_BIAS+1),d2 | and subtract bias (plus one) -#endif - -| We are now ready to do the multiplication. The situation is as follows: -| both a and b have bit FLT_MANT_DIG-1 set (even if they were -| denormalized to start with!), which means that in the product -| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the -| high long) is set. - -| To do the multiplication let us move the number a little bit around ... - movel d1,d6 | second operand in d6 - movel d0,d5 | first operand in d4-d5 - movel IMM (0),d4 - movel d4,d1 | the sums will go in d0-d1 - movel d4,d0 - -| now bit FLT_MANT_DIG-1 becomes bit 31: - lsll IMM (31-FLT_MANT_DIG+1),d6 - -| Start the loop (we loop #FLT_MANT_DIG times): - moveq IMM (FLT_MANT_DIG-1),d3 -1: addl d1,d1 | shift sum - addxl d0,d0 - lsll IMM (1),d6 | get bit bn - bcc 2f | if not set skip sum - addl d5,d1 | add a - addxl d4,d0 -2: -#ifndef __mcoldfire__ - dbf d3,1b | loop back -#else - subql IMM (1),d3 - bpl 1b -#endif - -| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG -| (mod 32) of d0 set. The first thing to do now is to normalize it so bit -| FLT_MANT_DIG is set (to do the rounding). -#ifndef __mcoldfire__ - rorl IMM (6),d1 - swap d1 - movew d1,d3 - andw IMM (0x03ff),d3 - andw IMM (0xfd00),d1 -#else - movel d1,d3 - lsll IMM (8),d1 - addl d1,d1 - addl d1,d1 - moveq IMM (22),d5 - lsrl d5,d3 - orl d3,d1 - andl IMM (0xfffffd00),d1 -#endif - lsll IMM (8),d0 - addl d0,d0 - addl d0,d0 -#ifndef __mcoldfire__ - orw d3,d0 -#else - orl d3,d0 -#endif - - moveq IMM (MULTIPLY),d5 - - btst IMM (FLT_MANT_DIG+1),d0 - beq Lround$exit -#ifndef __mcoldfire__ - lsrl IMM (1),d0 - roxrl IMM (1),d1 - addw IMM (1),d2 -#else - lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - addql IMM (1),d2 -#endif - bra Lround$exit - -Lmulsf$inop: - moveq IMM (MULTIPLY),d5 - bra Lf$inop - -Lmulsf$overflow: - moveq IMM (MULTIPLY),d5 - bra Lf$overflow - -Lmulsf$inf: - moveq IMM (MULTIPLY),d5 -| If either is NaN return NaN; else both are (maybe infinite) numbers, so -| return INFINITY with the correct sign (which is in d7). - cmpl d6,d1 | is b NaN? - bhi Lf$inop | if so return NaN - bra Lf$overflow | else return +/-INFINITY - -| If either number is zero return zero, unless the other is +/-INFINITY, -| or NaN, in which case we return NaN. -Lmulsf$b$0: -| Here d1 (==b) is zero. - movel a6@(8),d1 | get a again to check for non-finiteness - bra 1f -Lmulsf$a$0: - movel a6@(12),d1 | get b again to check for non-finiteness -1: bclr IMM (31),d1 | clear sign bit - cmpl IMM (INFINITY),d1 | and check for a large exponent - bge Lf$inop | if b is +/-INFINITY or NaN return NaN - movel d7,d0 | else return signed zero - PICLEA SYM (_fpCCR),a0 | - movew IMM (0),a0@ | -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | - rts | - -| If a number is denormalized we put an exponent of 1 but do not put the -| hidden bit back into the fraction; instead we shift left until bit 23 -| (the hidden bit) is set, adjusting the exponent accordingly. We do this -| to ensure that the product of the fractions is close to 1. -Lmulsf$a$den: - movel IMM (1),d2 - andl d5,d0 -1: addl d0,d0 | shift a left (until bit 23 is set) -#ifndef __mcoldfire__ - subw IMM (1),d2 | and adjust exponent -#else - subql IMM (1),d2 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d0 - bne Lmulsf$1 | - bra 1b | else loop back - -Lmulsf$b$den: - movel IMM (1),d3 - andl d5,d1 -1: addl d1,d1 | shift b left until bit 23 is set -#ifndef __mcoldfire__ - subw IMM (1),d3 | and adjust exponent -#else - subql IMM (1),d3 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d1 - bne Lmulsf$2 | - bra 1b | else loop back - -|============================================================================= -| __divsf3 -|============================================================================= - -| float __divsf3(float, float); - FUNC(__divsf3) -SYM (__divsf3): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - movel a6@(8),d0 | get a into d0 - movel a6@(12),d1 | and b into d1 - movel d0,d7 | d7 will hold the sign of the result - eorl d1,d7 | - andl IMM (0x80000000),d7 | - movel IMM (INFINITY),d6 | useful constant (+INFINITY) - movel d6,d5 | another (mask for fraction) - notl d5 | - movel IMM (0x00800000),d4 | this is to put hidden bit back - bclr IMM (31),d0 | get rid of a's sign bit ' - movel d0,d2 | - beq Ldivsf$a$0 | branch if a is zero - bclr IMM (31),d1 | get rid of b's sign bit ' - movel d1,d3 | - beq Ldivsf$b$0 | branch if b is zero - cmpl d6,d0 | is a big? - bhi Ldivsf$inop | if a is NaN return NaN - beq Ldivsf$inf | if a is INFINITY we have to check b - cmpl d6,d1 | now compare b with INFINITY - bhi Ldivsf$inop | if b is NaN return NaN - beq Ldivsf$underflow -| Here we have both numbers finite and nonzero (and with no sign bit). -| Now we get the exponents into d2 and d3 and normalize the numbers to -| ensure that the ratio of the fractions is close to 1. We do this by -| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set. - andl d6,d2 | and isolate exponent in d2 - beq Ldivsf$a$den | if exponent is zero we have a denormalized - andl d5,d0 | and isolate fraction - orl d4,d0 | and put hidden bit back - swap d2 | I like exponents in the first byte -#ifndef __mcoldfire__ - lsrw IMM (7),d2 | -#else - lsrl IMM (7),d2 | -#endif -Ldivsf$1: | - andl d6,d3 | - beq Ldivsf$b$den | - andl d5,d1 | - orl d4,d1 | - swap d3 | -#ifndef __mcoldfire__ - lsrw IMM (7),d3 | -#else - lsrl IMM (7),d3 | -#endif -Ldivsf$2: | -#ifndef __mcoldfire__ - subw d3,d2 | subtract exponents - addw IMM (F_BIAS),d2 | and add bias -#else - subl d3,d2 | subtract exponents - addl IMM (F_BIAS),d2 | and add bias -#endif - -| We are now ready to do the division. We have prepared things in such a way -| that the ratio of the fractions will be less than 2 but greater than 1/2. -| At this point the registers in use are: -| d0 holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1) -| d1 holds b (second operand, bit FLT_MANT_DIG=1) -| d2 holds the difference of the exponents, corrected by the bias -| d7 holds the sign of the ratio -| d4, d5, d6 hold some constants - movel d7,a0 | d6-d7 will hold the ratio of the fractions - movel IMM (0),d6 | - movel d6,d7 - - moveq IMM (FLT_MANT_DIG+1),d3 -1: cmpl d0,d1 | is a < b? - bhi 2f | - bset d3,d6 | set a bit in d6 - subl d1,d0 | if a >= b a <-- a-b - beq 3f | if a is zero, exit -2: addl d0,d0 | multiply a by 2 -#ifndef __mcoldfire__ - dbra d3,1b -#else - subql IMM (1),d3 - bpl 1b -#endif - -| Now we keep going to set the sticky bit ... - moveq IMM (FLT_MANT_DIG),d3 -1: cmpl d0,d1 - ble 2f - addl d0,d0 -#ifndef __mcoldfire__ - dbra d3,1b -#else - subql IMM(1),d3 - bpl 1b -#endif - movel IMM (0),d1 - bra 3f -2: movel IMM (0),d1 -#ifndef __mcoldfire__ - subw IMM (FLT_MANT_DIG),d3 - addw IMM (31),d3 -#else - subl IMM (FLT_MANT_DIG),d3 - addl IMM (31),d3 -#endif - bset d3,d1 -3: - movel d6,d0 | put the ratio in d0-d1 - movel a0,d7 | get sign back - -| Because of the normalization we did before we are guaranteed that -| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set, -| bit 25 could be set, and if it is not set then bit 24 is necessarily set. - btst IMM (FLT_MANT_DIG+1),d0 - beq 1f | if it is not set, then bit 24 is set - lsrl IMM (1),d0 | -#ifndef __mcoldfire__ - addw IMM (1),d2 | -#else - addl IMM (1),d2 | -#endif -1: -| Now round, check for over- and underflow, and exit. - moveq IMM (DIVIDE),d5 - bra Lround$exit - -Ldivsf$inop: - moveq IMM (DIVIDE),d5 - bra Lf$inop - -Ldivsf$overflow: - moveq IMM (DIVIDE),d5 - bra Lf$overflow - -Ldivsf$underflow: - moveq IMM (DIVIDE),d5 - bra Lf$underflow - -Ldivsf$a$0: - moveq IMM (DIVIDE),d5 -| If a is zero check to see whether b is zero also. In that case return -| NaN; then check if b is NaN, and return NaN also in that case. Else -| return a properly signed zero. - andl IMM (0x7fffffff),d1 | clear sign bit and test b - beq Lf$inop | if b is also zero return NaN - cmpl IMM (INFINITY),d1 | check for NaN - bhi Lf$inop | - movel d7,d0 | else return signed zero - PICLEA SYM (_fpCCR),a0 | - movew IMM (0),a0@ | -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | -#else - moveml sp@,d2-d7 | - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 | - rts | - -Ldivsf$b$0: - moveq IMM (DIVIDE),d5 -| If we got here a is not zero. Check if a is NaN; in that case return NaN, -| else return +/-INFINITY. Remember that a is in d0 with the sign bit -| cleared already. - cmpl IMM (INFINITY),d0 | compare d0 with INFINITY - bhi Lf$inop | if larger it is NaN - bra Lf$div$0 | else signal DIVIDE_BY_ZERO - -Ldivsf$inf: - moveq IMM (DIVIDE),d5 -| If a is INFINITY we have to check b - cmpl IMM (INFINITY),d1 | compare b with INFINITY - bge Lf$inop | if b is NaN or INFINITY return NaN - bra Lf$overflow | else return overflow - -| If a number is denormalized we put an exponent of 1 but do not put the -| bit back into the fraction. -Ldivsf$a$den: - movel IMM (1),d2 - andl d5,d0 -1: addl d0,d0 | shift a left until bit FLT_MANT_DIG-1 is set -#ifndef __mcoldfire__ - subw IMM (1),d2 | and adjust exponent -#else - subl IMM (1),d2 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d0 - bne Ldivsf$1 - bra 1b - -Ldivsf$b$den: - movel IMM (1),d3 - andl d5,d1 -1: addl d1,d1 | shift b left until bit FLT_MANT_DIG is set -#ifndef __mcoldfire__ - subw IMM (1),d3 | and adjust exponent -#else - subl IMM (1),d3 | and adjust exponent -#endif - btst IMM (FLT_MANT_DIG-1),d1 - bne Ldivsf$2 - bra 1b - -Lround$exit: -| This is a common exit point for __mulsf3 and __divsf3. - -| First check for underlow in the exponent: -#ifndef __mcoldfire__ - cmpw IMM (-FLT_MANT_DIG-1),d2 -#else - cmpl IMM (-FLT_MANT_DIG-1),d2 -#endif - blt Lf$underflow -| It could happen that the exponent is less than 1, in which case the -| number is denormalized. In this case we shift right and adjust the -| exponent until it becomes 1 or the fraction is zero (in the latter case -| we signal underflow and return zero). - movel IMM (0),d6 | d6 is used temporarily -#ifndef __mcoldfire__ - cmpw IMM (1),d2 | if the exponent is less than 1 we -#else - cmpl IMM (1),d2 | if the exponent is less than 1 we -#endif - bge 2f | have to shift right (denormalize) -1: -#ifndef __mcoldfire__ - addw IMM (1),d2 | adjust the exponent - lsrl IMM (1),d0 | shift right once - roxrl IMM (1),d1 | - roxrl IMM (1),d6 | d6 collect bits we would lose otherwise - cmpw IMM (1),d2 | is the exponent 1 already? -#else - addql IMM (1),d2 | adjust the exponent - lsrl IMM (1),d6 - btst IMM (0),d1 - beq 11f - bset IMM (31),d6 -11: lsrl IMM (1),d1 - btst IMM (0),d0 - beq 10f - bset IMM (31),d1 -10: lsrl IMM (1),d0 - cmpl IMM (1),d2 | is the exponent 1 already? -#endif - beq 2f | if not loop back - bra 1b | - bra Lf$underflow | safety check, shouldn't execute ' -2: orl d6,d1 | this is a trick so we don't lose ' - | the extra bits which were flushed right -| Now call the rounding routine (which takes care of denormalized numbers): - lea pc@(Lround$0),a0 | to return from rounding routine - PICLEA SYM (_fpCCR),a1 | check the rounding mode -#ifdef __mcoldfire__ - clrl d6 -#endif - movew a1@(6),d6 | rounding mode in d6 - beq Lround$to$nearest -#ifndef __mcoldfire__ - cmpw IMM (ROUND_TO_PLUS),d6 -#else - cmpl IMM (ROUND_TO_PLUS),d6 -#endif - bhi Lround$to$minus - blt Lround$to$zero - bra Lround$to$plus -Lround$0: -| Here we have a correctly rounded result (either normalized or denormalized). - -| Here we should have either a normalized number or a denormalized one, and -| the exponent is necessarily larger or equal to 1 (so we don't have to ' -| check again for underflow!). We have to check for overflow or for a -| denormalized number (which also signals underflow). -| Check for overflow (i.e., exponent >= 255). -#ifndef __mcoldfire__ - cmpw IMM (0x00ff),d2 -#else - cmpl IMM (0x00ff),d2 -#endif - bge Lf$overflow -| Now check for a denormalized number (exponent==0). - movew d2,d2 - beq Lf$den -1: -| Put back the exponents and sign and return. -#ifndef __mcoldfire__ - lslw IMM (7),d2 | exponent back to fourth byte -#else - lsll IMM (7),d2 | exponent back to fourth byte -#endif - bclr IMM (FLT_MANT_DIG-1),d0 - swap d0 | and put back exponent -#ifndef __mcoldfire__ - orw d2,d0 | -#else - orl d2,d0 -#endif - swap d0 | - orl d7,d0 | and sign also - - PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -|============================================================================= -| __negsf2 -|============================================================================= - -| This is trivial and could be shorter if we didn't bother checking for NaN ' -| and +/-INFINITY. - -| float __negsf2(float); - FUNC(__negsf2) -SYM (__negsf2): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (NEGATE),d5 - movel a6@(8),d0 | get number to negate in d0 - bchg IMM (31),d0 | negate - movel d0,d1 | make a positive copy - bclr IMM (31),d1 | - tstl d1 | check for zero - beq 2f | if zero (either sign) return +zero - cmpl IMM (INFINITY),d1 | compare to +INFINITY - blt 1f | - bhi Lf$inop | if larger (fraction not zero) is NaN - movel d0,d7 | else get sign and return INFINITY - andl IMM (0x80000000),d7 - bra Lf$infty -1: PICLEA SYM (_fpCCR),a0 - movew IMM (0),a0@ -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -2: bclr IMM (31),d0 - bra 1b - -|============================================================================= -| __cmpsf2 -|============================================================================= - -GREATER = 1 -LESS = -1 -EQUAL = 0 - -| int __cmpsf2_internal(float, float, int); -SYM (__cmpsf2_internal): -#ifndef __mcoldfire__ - link a6,IMM (0) - moveml d2-d7,sp@- | save registers -#else - link a6,IMM (-24) - moveml d2-d7,sp@ -#endif - moveq IMM (COMPARE),d5 - movel a6@(8),d0 | get first operand - movel a6@(12),d1 | get second operand -| Check if either is NaN, and in that case return garbage and signal -| INVALID_OPERATION. Check also if either is zero, and clear the signs -| if necessary. - movel d0,d6 - andl IMM (0x7fffffff),d0 - beq Lcmpsf$a$0 - cmpl IMM (0x7f800000),d0 - bhi Lcmpf$inop -Lcmpsf$1: - movel d1,d7 - andl IMM (0x7fffffff),d1 - beq Lcmpsf$b$0 - cmpl IMM (0x7f800000),d1 - bhi Lcmpf$inop -Lcmpsf$2: -| Check the signs - eorl d6,d7 - bpl 1f -| If the signs are not equal check if a >= 0 - tstl d6 - bpl Lcmpsf$a$gt$b | if (a >= 0 && b < 0) => a > b - bmi Lcmpsf$b$gt$a | if (a < 0 && b >= 0) => a < b -1: -| If the signs are equal check for < 0 - tstl d6 - bpl 1f -| If both are negative exchange them -#ifndef __mcoldfire__ - exg d0,d1 -#else - movel d0,d7 - movel d1,d0 - movel d7,d1 -#endif -1: -| Now that they are positive we just compare them as longs (does this also -| work for denormalized numbers?). - cmpl d0,d1 - bhi Lcmpsf$b$gt$a | |b| > |a| - bne Lcmpsf$a$gt$b | |b| < |a| -| If we got here a == b. - movel IMM (EQUAL),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 -#endif - unlk a6 - rts -Lcmpsf$a$gt$b: - movel IMM (GREATER),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts -Lcmpsf$b$gt$a: - movel IMM (LESS),d0 -#ifndef __mcoldfire__ - moveml sp@+,d2-d7 | put back the registers -#else - moveml sp@,d2-d7 - | XXX if frame pointer is ever removed, stack pointer must - | be adjusted here. -#endif - unlk a6 - rts - -Lcmpsf$a$0: - bclr IMM (31),d6 - bra Lcmpsf$1 -Lcmpsf$b$0: - bclr IMM (31),d7 - bra Lcmpsf$2 - -Lcmpf$inop: - movl a6@(16),d0 - moveq IMM (INEXACT_RESULT+INVALID_OPERATION),d7 - moveq IMM (SINGLE_FLOAT),d6 - PICJUMP $_exception_handler - -| int __cmpsf2(float, float); - FUNC(__cmpsf2) -SYM (__cmpsf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts - -|============================================================================= -| rounding routines -|============================================================================= - -| The rounding routines expect the number to be normalized in registers -| d0-d1, with the exponent in register d2. They assume that the -| exponent is larger or equal to 1. They return a properly normalized number -| if possible, and a denormalized number otherwise. The exponent is returned -| in d2. - -Lround$to$nearest: -| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"): -| Here we assume that the exponent is not too small (this should be checked -| before entering the rounding routine), but the number could be denormalized. - -| Check for denormalized numbers: -1: btst IMM (FLT_MANT_DIG),d0 - bne 2f | if set the number is normalized -| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent -| is one (remember that a denormalized number corresponds to an -| exponent of -F_BIAS+1). -#ifndef __mcoldfire__ - cmpw IMM (1),d2 | remember that the exponent is at least one -#else - cmpl IMM (1),d2 | remember that the exponent is at least one -#endif - beq 2f | an exponent of one means denormalized - addl d1,d1 | else shift and adjust the exponent - addxl d0,d0 | -#ifndef __mcoldfire__ - dbra d2,1b | -#else - subql IMM (1),d2 - bpl 1b -#endif -2: -| Now round: we do it as follows: after the shifting we can write the -| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2. -| If delta < 1, do nothing. If delta > 1, add 1 to f. -| If delta == 1, we make sure the rounded number will be even (odd?) -| (after shifting). - btst IMM (0),d0 | is delta < 1? - beq 2f | if so, do not do anything - tstl d1 | is delta == 1? - bne 1f | if so round to even - movel d0,d1 | - andl IMM (2),d1 | bit 1 is the last significant bit - addl d1,d0 | - bra 2f | -1: movel IMM (1),d1 | else add 1 - addl d1,d0 | -| Shift right once (because we used bit #FLT_MANT_DIG!). -2: lsrl IMM (1),d0 -| Now check again bit #FLT_MANT_DIG (rounding could have produced a -| 'fraction overflow' ...). - btst IMM (FLT_MANT_DIG),d0 - beq 1f - lsrl IMM (1),d0 -#ifndef __mcoldfire__ - addw IMM (1),d2 -#else - addql IMM (1),d2 -#endif -1: -| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we -| have to put the exponent to zero and return a denormalized number. - btst IMM (FLT_MANT_DIG-1),d0 - beq 1f - jmp a0@ -1: movel IMM (0),d2 - jmp a0@ - -Lround$to$zero: -Lround$to$plus: -Lround$to$minus: - jmp a0@ -#endif /* L_float */ - -| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2, -| __ledf2, __ltdf2 to all return the same value as a direct call to -| __cmpdf2 would. In this implementation, each of these routines -| simply calls __cmpdf2. It would be more efficient to give the -| __cmpdf2 routine several names, but separating them out will make it -| easier to write efficient versions of these routines someday. -| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1. -| The other routines return 1. - -#ifdef L_eqdf2 - .text - FUNC(__eqdf2) - .globl SYM (__eqdf2) -SYM (__eqdf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_eqdf2 */ - -#ifdef L_nedf2 - .text - FUNC(__nedf2) - .globl SYM (__nedf2) -SYM (__nedf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_nedf2 */ - -#ifdef L_gtdf2 - .text - FUNC(__gtdf2) - .globl SYM (__gtdf2) -SYM (__gtdf2): - link a6,IMM (0) - pea -1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_gtdf2 */ - -#ifdef L_gedf2 - .text - FUNC(__gedf2) - .globl SYM (__gedf2) -SYM (__gedf2): - link a6,IMM (0) - pea -1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_gedf2 */ - -#ifdef L_ltdf2 - .text - FUNC(__ltdf2) - .globl SYM (__ltdf2) -SYM (__ltdf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_ltdf2 */ - -#ifdef L_ledf2 - .text - FUNC(__ledf2) - .globl SYM (__ledf2) -SYM (__ledf2): - link a6,IMM (0) - pea 1 - movl a6@(20),sp@- - movl a6@(16),sp@- - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpdf2_internal) - unlk a6 - rts -#endif /* L_ledf2 */ - -| The comments above about __eqdf2, et. al., also apply to __eqsf2, -| et. al., except that the latter call __cmpsf2 rather than __cmpdf2. - -#ifdef L_eqsf2 - .text - FUNC(__eqsf2) - .globl SYM (__eqsf2) -SYM (__eqsf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_eqsf2 */ - -#ifdef L_nesf2 - .text - FUNC(__nesf2) - .globl SYM (__nesf2) -SYM (__nesf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_nesf2 */ - -#ifdef L_gtsf2 - .text - FUNC(__gtsf2) - .globl SYM (__gtsf2) -SYM (__gtsf2): - link a6,IMM (0) - pea -1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_gtsf2 */ - -#ifdef L_gesf2 - .text - FUNC(__gesf2) - .globl SYM (__gesf2) -SYM (__gesf2): - link a6,IMM (0) - pea -1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_gesf2 */ - -#ifdef L_ltsf2 - .text - FUNC(__ltsf2) - .globl SYM (__ltsf2) -SYM (__ltsf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_ltsf2 */ - -#ifdef L_lesf2 - .text - FUNC(__lesf2) - .globl SYM (__lesf2) -SYM (__lesf2): - link a6,IMM (0) - pea 1 - movl a6@(12),sp@- - movl a6@(8),sp@- - PICCALL SYM (__cmpsf2_internal) - unlk a6 - rts -#endif /* L_lesf2 */ - -#if defined (__ELF__) && defined (__linux__) - /* Make stack non-executable for ELF linux targets. */ - .section .note.GNU-stack,"",@progbits -#endif diff --git a/gcc/config/m68k/t-floatlib b/gcc/config/m68k/t-floatlib index 2039d1d0dc4..23734be40bd 100644 --- a/gcc/config/m68k/t-floatlib +++ b/gcc/config/m68k/t-floatlib @@ -1,4 +1,4 @@ -# Copyright (C) 2007 Free Software Foundation, Inc. +# Copyright (C) 2007, 2011 Free Software Foundation, Inc. # # This file is part of GCC. # @@ -16,12 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = m68k/lb1sf68.asm -LIB1ASMFUNCS = _mulsi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \ - _double _float _floatex \ - _eqdf2 _nedf2 _gtdf2 _gedf2 _ltdf2 _ledf2 \ - _eqsf2 _nesf2 _gtsf2 _gesf2 _ltsf2 _lesf2 - LIB2FUNCS_EXTRA = fpgnulib.c xfgnulib.c fpgnulib.c: $(srcdir)/config/m68k/fpgnulib.c diff --git a/gcc/config/mcore/lib1.asm b/gcc/config/mcore/lib1.asm deleted file mode 100644 index 701762f2a3c..00000000000 --- a/gcc/config/mcore/lib1.asm +++ /dev/null @@ -1,303 +0,0 @@ -/* libgcc routines for the MCore. - Copyright (C) 1993, 1999, 2000, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ - -#define SYM(x) CONCAT1 (__, x) - -#ifdef __ELF__ -#define TYPE(x) .type SYM (x),@function -#define SIZE(x) .size SYM (x), . - SYM (x) -#else -#define TYPE(x) -#define SIZE(x) -#endif - -.macro FUNC_START name - .text - .globl SYM (\name) - TYPE (\name) -SYM (\name): -.endm - -.macro FUNC_END name - SIZE (\name) -.endm - -#ifdef L_udivsi3 -FUNC_START udiv32 -FUNC_START udivsi32 - - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -// appears to be wrong... -// tested out incorrectly in our OS work... -// mov r7,r3 // looking at divisor -// ff1 r7 // I can move 32-r7 more bits to left. -// addi r7,1 // ok, one short of that... -// mov r1,r2 -// lsr r1,r7 // bits that came from low order... -// rsubi r7,31 // r7 == "32-n" == LEFT distance -// addi r7,1 // this is (32-n) -// lsl r4,r7 // fixes the high 32 (quotient) -// lsl r2,r7 -// cmpnei r4,0 -// bf 4f // the sentinel went away... - - // run the remaining bits - -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - -4: mov r2,r4 // return quotient - mov r3,r1 // and piggyback the remainder - jmp r15 -FUNC_END udiv32 -FUNC_END udivsi32 -#endif - -#ifdef L_umodsi3 -FUNC_START urem32 -FUNC_START umodsi3 - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - mov r2,r1 // return remainder - jmp r15 -FUNC_END urem32 -FUNC_END umodsi3 -#endif - -#ifdef L_divsi3 -FUNC_START div32 -FUNC_START divsi3 - mov r5,r2 // calc sign of quotient - xor r5,r3 - abs r2 // do unsigned divide - abs r3 - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -// tested out incorrectly in our OS work... -// mov r7,r3 // looking at divisor -// ff1 r7 // I can move 32-r7 more bits to left. -// addi r7,1 // ok, one short of that... -// mov r1,r2 -// lsr r1,r7 // bits that came from low order... -// rsubi r7,31 // r7 == "32-n" == LEFT distance -// addi r7,1 // this is (32-n) -// lsl r4,r7 // fixes the high 32 (quotient) -// lsl r2,r7 -// cmpnei r4,0 -// bf 4f // the sentinel went away... - - // run the remaining bits -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - -4: mov r2,r4 // return quotient - mov r3,r1 // piggyback the remainder - btsti r5,31 // after adjusting for sign - bf 3f - rsubi r2,0 - rsubi r3,0 -3: jmp r15 -FUNC_END div32 -FUNC_END divsi3 -#endif - -#ifdef L_modsi3 -FUNC_START rem32 -FUNC_START modsi3 - mov r5,r2 // calc sign of remainder - abs r2 // do unsigned divide - abs r3 - movi r1,0 // r1-r2 form 64 bit dividend - movi r4,1 // r4 is quotient (1 for a sentinel) - cmpnei r3,0 // look for 0 divisor - bt 9f - trap 3 // divide by 0 -9: - // control iterations; skip across high order 0 bits in dividend - mov r7,r2 - cmpnei r7,0 - bt 8f - movi r2,0 // 0 dividend - jmp r15 // quick return -8: - ff1 r7 // figure distance to skip - lsl r4,r7 // move the sentinel along (with 0's behind) - lsl r2,r7 // and the low 32 bits of numerator - -1: lslc r2,1 // 1 bit left shift of r1-r2 - addc r1,r1 - cmphs r1,r3 // upper 32 of dividend >= divisor? - bf 2f - sub r1,r3 // if yes, subtract divisor -2: addc r4,r4 // shift by 1 and count subtracts - bf 1b // if sentinel falls out of quotient, stop - mov r2,r1 // return remainder - btsti r5,31 // after adjusting for sign - bf 3f - rsubi r2,0 -3: jmp r15 -FUNC_END rem32 -FUNC_END modsi3 -#endif - - -/* GCC expects that {__eq,__ne,__gt,__ge,__le,__lt}{df2,sf2} - will behave as __cmpdf2. So, we stub the implementations to - jump on to __cmpdf2 and __cmpsf2. - - All of these shortcircuit the return path so that __cmp{sd}f2 - will go directly back to the caller. */ - -.macro COMPARE_DF_JUMP name - .import SYM (cmpdf2) -FUNC_START \name - jmpi SYM (cmpdf2) -FUNC_END \name -.endm - -#ifdef L_eqdf2 -COMPARE_DF_JUMP eqdf2 -#endif /* L_eqdf2 */ - -#ifdef L_nedf2 -COMPARE_DF_JUMP nedf2 -#endif /* L_nedf2 */ - -#ifdef L_gtdf2 -COMPARE_DF_JUMP gtdf2 -#endif /* L_gtdf2 */ - -#ifdef L_gedf2 -COMPARE_DF_JUMP gedf2 -#endif /* L_gedf2 */ - -#ifdef L_ltdf2 -COMPARE_DF_JUMP ltdf2 -#endif /* L_ltdf2 */ - -#ifdef L_ledf2 -COMPARE_DF_JUMP ledf2 -#endif /* L_ledf2 */ - -/* SINGLE PRECISION FLOATING POINT STUBS */ - -.macro COMPARE_SF_JUMP name - .import SYM (cmpsf2) -FUNC_START \name - jmpi SYM (cmpsf2) -FUNC_END \name -.endm - -#ifdef L_eqsf2 -COMPARE_SF_JUMP eqsf2 -#endif /* L_eqsf2 */ - -#ifdef L_nesf2 -COMPARE_SF_JUMP nesf2 -#endif /* L_nesf2 */ - -#ifdef L_gtsf2 -COMPARE_SF_JUMP gtsf2 -#endif /* L_gtsf2 */ - -#ifdef L_gesf2 -COMPARE_SF_JUMP __gesf2 -#endif /* L_gesf2 */ - -#ifdef L_ltsf2 -COMPARE_SF_JUMP __ltsf2 -#endif /* L_ltsf2 */ - -#ifdef L_lesf2 -COMPARE_SF_JUMP lesf2 -#endif /* L_lesf2 */ diff --git a/gcc/config/mcore/t-mcore b/gcc/config/mcore/t-mcore index 9c84d850f20..265399cecfe 100644 --- a/gcc/config/mcore/t-mcore +++ b/gcc/config/mcore/t-mcore @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = mcore/lib1.asm -LIB1ASMFUNCS = _divsi3 _udivsi3 _modsi3 _umodsi3 - # could use -msifilter to be safe from interrupt/jmp interactions and others. TARGET_LIBGCC2_CFLAGS=-O3 -DNO_FLOATLIB_FIXUNSDFSI #-msifilter diff --git a/gcc/config/mep/mep-lib1.asm b/gcc/config/mep/mep-lib1.asm deleted file mode 100644 index 0a18913f927..00000000000 --- a/gcc/config/mep/mep-lib1.asm +++ /dev/null @@ -1,125 +0,0 @@ -/* libgcc routines for Toshiba Media Processor. - Copyright (C) 2001, 2002, 2005, 2009 Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#define SAVEALL \ - add3 $sp, $sp, -16*4 ; \ - sw $0, ($sp) ; \ - sw $1, 4($sp) ; \ - sw $2, 8($sp) ; \ - sw $3, 12($sp) ; \ - sw $4, 16($sp) ; \ - sw $5, 20($sp) ; \ - sw $6, 24($sp) ; \ - sw $7, 28($sp) ; \ - sw $8, 32($sp) ; \ - sw $9, 36($sp) ; \ - sw $10, 40($sp) ; \ - sw $11, 44($sp) ; \ - sw $12, 48($sp) ; \ - sw $13, 52($sp) ; \ - sw $14, 56($sp) ; \ - ldc $5, $lp ; \ - add $5, 3 ; \ - mov $6, -4 ; \ - and $5, $6 - -#define RESTOREALL \ - stc $5, $lp ; \ - lw $14, 56($sp) ; \ - lw $13, 52($sp) ; \ - lw $12, 48($sp) ; \ - lw $11, 44($sp) ; \ - lw $10, 40($sp) ; \ - lw $9, 36($sp) ; \ - lw $8, 32($sp) ; \ - lw $7, 28($sp) ; \ - lw $6, 24($sp) ; \ - lw $5, 20($sp) ; \ - lw $4, 16($sp) ; \ - lw $3, 12($sp) ; \ - lw $2, 8($sp) ; \ - lw $1, 4($sp) ; \ - lw $0, ($sp) ; \ - add3 $sp, $sp, 16*4 ; \ - ret - -#ifdef L_mep_profile - .text - .global __mep_mcount -__mep_mcount: - SAVEALL - ldc $1, $lp - mov $2, $0 - bsr __mep_mcount_2 - RESTOREALL -#endif - -#ifdef L_mep_bb_init_trace - .text - .global __mep_bb_init_trace_func -__mep_bb_init_trace_func: - SAVEALL - lw $1, ($5) - lw $2, 4($5) - add $5, 8 - bsr __bb_init_trace_func - RESTOREALL -#endif - -#ifdef L_mep_bb_init - .text - .global __mep_bb_init_func -__mep_bb_init_func: - SAVEALL - lw $1, ($5) - add $5, 4 - bsr __bb_init_func - RESTOREALL -#endif - -#ifdef L_mep_bb_trace - .text - .global __mep_bb_trace_func -__mep_bb_trace_func: - SAVEALL - movu $3, __bb - lw $1, ($5) - sw $1, ($3) - lw $2, 4($5) - sw $2, 4($3) - add $5, 8 - bsr __bb_trace_func - RESTOREALL -#endif - -#ifdef L_mep_bb_increment - .text - .global __mep_bb_increment_func -__mep_bb_increment_func: - SAVEALL - lw $1, ($5) - lw $0, ($1) - add $0, 1 - sw $0, ($1) - add $5, 4 - RESTOREALL -#endif diff --git a/gcc/config/mep/t-mep b/gcc/config/mep/t-mep index d560db0aa4b..ac4ad95bc87 100644 --- a/gcc/config/mep/t-mep +++ b/gcc/config/mep/t-mep @@ -32,16 +32,6 @@ mep-pragma.o: $(srcdir)/config/mep/mep-pragma.c $(CONFIG_H) $(SYSTEM_H) \ function.h insn-config.h reload.h $(TARGET_H) $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< -# profiling support - -LIB1ASMSRC = mep/mep-lib1.asm - -LIB1ASMFUNCS = _mep_profile \ - _mep_bb_init_trace \ - _mep_bb_init \ - _mep_bb_trace \ - _mep_bb_increment - # multiply and divide routines LIB2FUNCS_EXTRA = \ diff --git a/gcc/config/mips/mips16.S b/gcc/config/mips/mips16.S deleted file mode 100644 index ec331b5f65e..00000000000 --- a/gcc/config/mips/mips16.S +++ /dev/null @@ -1,712 +0,0 @@ -/* mips16 floating point support code - Copyright (C) 1996, 1997, 1998, 2008, 2009, 2010 - Free Software Foundation, Inc. - Contributed by Cygnus Support - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -/* This file contains mips16 floating point support functions. These - functions are called by mips16 code to handle floating point when - -msoft-float is not used. They accept the arguments and return - values using the soft-float calling convention, but do the actual - operation using the hard floating point instructions. */ - -#if defined _MIPS_SIM && (_MIPS_SIM == _ABIO32 || _MIPS_SIM == _ABIO64) - -/* This file contains 32-bit assembly code. */ - .set nomips16 - -/* Start a function. */ - -#define STARTFN(NAME) .globl NAME; .ent NAME; NAME: - -/* Finish a function. */ - -#define ENDFN(NAME) .end NAME - -/* ARG1 - The FPR that holds the first floating-point argument. - - ARG2 - The FPR that holds the second floating-point argument. - - RET - The FPR that holds a floating-point return value. */ - -#define RET $f0 -#define ARG1 $f12 -#ifdef __mips64 -#define ARG2 $f13 -#else -#define ARG2 $f14 -#endif - -/* Set 64-bit register GPR so that its high 32 bits contain HIGH_FPR - and so that its low 32 bits contain LOW_FPR. */ -#define MERGE_GPRf(GPR, HIGH_FPR, LOW_FPR) \ - .set noat; \ - mfc1 $1, LOW_FPR; \ - mfc1 GPR, HIGH_FPR; \ - dsll $1, $1, 32; \ - dsll GPR, GPR, 32; \ - dsrl $1, $1, 32; \ - or GPR, GPR, $1; \ - .set at - -/* Move the high 32 bits of GPR to HIGH_FPR and the low 32 bits of - GPR to LOW_FPR. */ -#define MERGE_GPRt(GPR, HIGH_FPR, LOW_FPR) \ - .set noat; \ - dsrl $1, GPR, 32; \ - mtc1 GPR, LOW_FPR; \ - mtc1 $1, HIGH_FPR; \ - .set at - -/* Jump to T, and use "OPCODE, OP2" to implement a delayed move. */ -#define DELAYt(T, OPCODE, OP2) \ - .set noreorder; \ - jr T; \ - OPCODE, OP2; \ - .set reorder - -/* Use "OPCODE. OP2" and jump to T. */ -#define DELAYf(T, OPCODE, OP2) OPCODE, OP2; jr T - -/* MOVE_SF_BYTE0(D) - Move the first single-precision floating-point argument between - GPRs and FPRs. - - MOVE_SI_BYTE0(D) - Likewise the first single-precision integer argument. - - MOVE_SF_BYTE4(D) - Move the second single-precision floating-point argument between - GPRs and FPRs, given that the first argument occupies 4 bytes. - - MOVE_SF_BYTE8(D) - Move the second single-precision floating-point argument between - GPRs and FPRs, given that the first argument occupies 8 bytes. - - MOVE_DF_BYTE0(D) - Move the first double-precision floating-point argument between - GPRs and FPRs. - - MOVE_DF_BYTE8(D) - Likewise the second double-precision floating-point argument. - - MOVE_SF_RET(D, T) - Likewise a single-precision floating-point return value, - then jump to T. - - MOVE_SC_RET(D, T) - Likewise a complex single-precision floating-point return value. - - MOVE_DF_RET(D, T) - Likewise a double-precision floating-point return value. - - MOVE_DC_RET(D, T) - Likewise a complex double-precision floating-point return value. - - MOVE_SI_RET(D, T) - Likewise a single-precision integer return value. - - The D argument is "t" to move to FPRs and "f" to move from FPRs. - The return macros may assume that the target of the jump does not - use a floating-point register. */ - -#define MOVE_SF_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0) -#define MOVE_SI_RET(D, T) DELAY##D (T, m##D##c1 $2,$f0) - -#if defined(__mips64) && defined(__MIPSEB__) -#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f0, $f1); jr T -#elif defined(__mips64) -/* The high 32 bits of $2 correspond to the second word in memory; - i.e. the imaginary part. */ -#define MOVE_SC_RET(D, T) MERGE_GPR##D ($2, $f1, $f0); jr T -#elif __mips_fpr == 64 -#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1) -#else -#define MOVE_SC_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f2) -#endif - -#if defined(__mips64) -#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12 -#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f13 -#define MOVE_SF_BYTE8(D) m##D##c1 $5,$f13 -#else -#define MOVE_SF_BYTE0(D) m##D##c1 $4,$f12 -#define MOVE_SF_BYTE4(D) m##D##c1 $5,$f14 -#define MOVE_SF_BYTE8(D) m##D##c1 $6,$f14 -#endif -#define MOVE_SI_BYTE0(D) MOVE_SF_BYTE0(D) - -#if defined(__mips64) -#define MOVE_DF_BYTE0(D) dm##D##c1 $4,$f12 -#define MOVE_DF_BYTE8(D) dm##D##c1 $5,$f13 -#define MOVE_DF_RET(D, T) DELAY##D (T, dm##D##c1 $2,$f0) -#define MOVE_DC_RET(D, T) dm##D##c1 $3,$f1; MOVE_DF_RET (D, T) -#elif __mips_fpr == 64 && defined(__MIPSEB__) -#define MOVE_DF_BYTE0(D) m##D##c1 $5,$f12; m##D##hc1 $4,$f12 -#define MOVE_DF_BYTE8(D) m##D##c1 $7,$f14; m##D##hc1 $6,$f14 -#define MOVE_DF_RET(D, T) m##D##c1 $3,$f0; DELAY##D (T, m##D##hc1 $2,$f0) -#define MOVE_DC_RET(D, T) m##D##c1 $5,$f1; m##D##hc1 $4,$f1; MOVE_DF_RET (D, T) -#elif __mips_fpr == 64 -#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##hc1 $5,$f12 -#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##hc1 $7,$f14 -#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##hc1 $3,$f0) -#define MOVE_DC_RET(D, T) m##D##c1 $4,$f1; m##D##hc1 $5,$f1; MOVE_DF_RET (D, T) -#elif defined(__MIPSEB__) -/* FPRs are little-endian. */ -#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12 -#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f15; m##D##c1 $7,$f14 -#define MOVE_DF_RET(D, T) m##D##c1 $2,$f1; DELAY##D (T, m##D##c1 $3,$f0) -#define MOVE_DC_RET(D, T) m##D##c1 $4,$f3; m##D##c1 $5,$f2; MOVE_DF_RET (D, T) -#else -#define MOVE_DF_BYTE0(D) m##D##c1 $4,$f12; m##D##c1 $5,$f13 -#define MOVE_DF_BYTE8(D) m##D##c1 $6,$f14; m##D##c1 $7,$f15 -#define MOVE_DF_RET(D, T) m##D##c1 $2,$f0; DELAY##D (T, m##D##c1 $3,$f1) -#define MOVE_DC_RET(D, T) m##D##c1 $4,$f2; m##D##c1 $5,$f3; MOVE_DF_RET (D, T) -#endif - -/* Single-precision math. */ - -/* Define a function NAME that loads two single-precision values, - performs FPU operation OPCODE on them, and returns the single- - precision result. */ - -#define OPSF3(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - MOVE_SF_BYTE4 (t); \ - OPCODE RET,ARG1,ARG2; \ - MOVE_SF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16addsf3 -OPSF3 (__mips16_addsf3, add.s) -#endif -#ifdef L_m16subsf3 -OPSF3 (__mips16_subsf3, sub.s) -#endif -#ifdef L_m16mulsf3 -OPSF3 (__mips16_mulsf3, mul.s) -#endif -#ifdef L_m16divsf3 -OPSF3 (__mips16_divsf3, div.s) -#endif - -/* Define a function NAME that loads a single-precision value, - performs FPU operation OPCODE on it, and returns the single- - precision result. */ - -#define OPSF2(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - OPCODE RET,ARG1; \ - MOVE_SF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16negsf2 -OPSF2 (__mips16_negsf2, neg.s) -#endif -#ifdef L_m16abssf2 -OPSF2 (__mips16_abssf2, abs.s) -#endif - -/* Single-precision comparisons. */ - -/* Define a function NAME that loads two single-precision values, - performs floating point comparison OPCODE, and returns TRUE or - FALSE depending on the result. */ - -#define CMPSF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - MOVE_SF_BYTE4 (t); \ - OPCODE ARG1,ARG2; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -/* Like CMPSF, but reverse the comparison operands. */ - -#define REVCMPSF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_SF_BYTE0 (t); \ - MOVE_SF_BYTE4 (t); \ - OPCODE ARG2,ARG1; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -#ifdef L_m16eqsf2 -CMPSF (__mips16_eqsf2, c.eq.s, 0, 1) -#endif -#ifdef L_m16nesf2 -CMPSF (__mips16_nesf2, c.eq.s, 0, 1) -#endif -#ifdef L_m16gtsf2 -REVCMPSF (__mips16_gtsf2, c.lt.s, 1, 0) -#endif -#ifdef L_m16gesf2 -REVCMPSF (__mips16_gesf2, c.le.s, 0, -1) -#endif -#ifdef L_m16lesf2 -CMPSF (__mips16_lesf2, c.le.s, 0, 1) -#endif -#ifdef L_m16ltsf2 -CMPSF (__mips16_ltsf2, c.lt.s, -1, 0) -#endif -#ifdef L_m16unordsf2 -CMPSF(__mips16_unordsf2, c.un.s, 1, 0) -#endif - - -/* Single-precision conversions. */ - -#ifdef L_m16fltsisf -STARTFN (__mips16_floatsisf) - MOVE_SF_BYTE0 (t) - cvt.s.w RET,ARG1 - MOVE_SF_RET (f, $31) - ENDFN (__mips16_floatsisf) -#endif - -#ifdef L_m16fltunsisf -STARTFN (__mips16_floatunsisf) - .set noreorder - bltz $4,1f - MOVE_SF_BYTE0 (t) - .set reorder - cvt.s.w RET,ARG1 - MOVE_SF_RET (f, $31) -1: - and $2,$4,1 - srl $3,$4,1 - or $2,$2,$3 - mtc1 $2,RET - cvt.s.w RET,RET - add.s RET,RET,RET - MOVE_SF_RET (f, $31) - ENDFN (__mips16_floatunsisf) -#endif - -#ifdef L_m16fix_truncsfsi -STARTFN (__mips16_fix_truncsfsi) - MOVE_SF_BYTE0 (t) - trunc.w.s RET,ARG1,$4 - MOVE_SI_RET (f, $31) - ENDFN (__mips16_fix_truncsfsi) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) - -/* Double-precision math. */ - -/* Define a function NAME that loads two double-precision values, - performs FPU operation OPCODE on them, and returns the double- - precision result. */ - -#define OPDF3(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - MOVE_DF_BYTE8 (t); \ - OPCODE RET,ARG1,ARG2; \ - MOVE_DF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16adddf3 -OPDF3 (__mips16_adddf3, add.d) -#endif -#ifdef L_m16subdf3 -OPDF3 (__mips16_subdf3, sub.d) -#endif -#ifdef L_m16muldf3 -OPDF3 (__mips16_muldf3, mul.d) -#endif -#ifdef L_m16divdf3 -OPDF3 (__mips16_divdf3, div.d) -#endif - -/* Define a function NAME that loads a double-precision value, - performs FPU operation OPCODE on it, and returns the double- - precision result. */ - -#define OPDF2(NAME, OPCODE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - OPCODE RET,ARG1; \ - MOVE_DF_RET (f, $31); \ - ENDFN (NAME) - -#ifdef L_m16negdf2 -OPDF2 (__mips16_negdf2, neg.d) -#endif -#ifdef L_m16absdf2 -OPDF2 (__mips16_absdf2, abs.d) -#endif - -/* Conversions between single and double precision. */ - -#ifdef L_m16extsfdf2 -STARTFN (__mips16_extendsfdf2) - MOVE_SF_BYTE0 (t) - cvt.d.s RET,ARG1 - MOVE_DF_RET (f, $31) - ENDFN (__mips16_extendsfdf2) -#endif - -#ifdef L_m16trdfsf2 -STARTFN (__mips16_truncdfsf2) - MOVE_DF_BYTE0 (t) - cvt.s.d RET,ARG1 - MOVE_SF_RET (f, $31) - ENDFN (__mips16_truncdfsf2) -#endif - -/* Double-precision comparisons. */ - -/* Define a function NAME that loads two double-precision values, - performs floating point comparison OPCODE, and returns TRUE or - FALSE depending on the result. */ - -#define CMPDF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - MOVE_DF_BYTE8 (t); \ - OPCODE ARG1,ARG2; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -/* Like CMPDF, but reverse the comparison operands. */ - -#define REVCMPDF(NAME, OPCODE, TRUE, FALSE) \ -STARTFN (NAME); \ - MOVE_DF_BYTE0 (t); \ - MOVE_DF_BYTE8 (t); \ - OPCODE ARG2,ARG1; \ - li $2,TRUE; \ - bc1t 1f; \ - li $2,FALSE; \ -1:; \ - j $31; \ - ENDFN (NAME) - -#ifdef L_m16eqdf2 -CMPDF (__mips16_eqdf2, c.eq.d, 0, 1) -#endif -#ifdef L_m16nedf2 -CMPDF (__mips16_nedf2, c.eq.d, 0, 1) -#endif -#ifdef L_m16gtdf2 -REVCMPDF (__mips16_gtdf2, c.lt.d, 1, 0) -#endif -#ifdef L_m16gedf2 -REVCMPDF (__mips16_gedf2, c.le.d, 0, -1) -#endif -#ifdef L_m16ledf2 -CMPDF (__mips16_ledf2, c.le.d, 0, 1) -#endif -#ifdef L_m16ltdf2 -CMPDF (__mips16_ltdf2, c.lt.d, -1, 0) -#endif -#ifdef L_m16unorddf2 -CMPDF(__mips16_unorddf2, c.un.d, 1, 0) -#endif - -/* Double-precision conversions. */ - -#ifdef L_m16fltsidf -STARTFN (__mips16_floatsidf) - MOVE_SI_BYTE0 (t) - cvt.d.w RET,ARG1 - MOVE_DF_RET (f, $31) - ENDFN (__mips16_floatsidf) -#endif - -#ifdef L_m16fltunsidf -STARTFN (__mips16_floatunsidf) - MOVE_SI_BYTE0 (t) - cvt.d.w RET,ARG1 - bgez $4,1f - li.d ARG1, 4.294967296e+9 - add.d RET, RET, ARG1 -1: MOVE_DF_RET (f, $31) - ENDFN (__mips16_floatunsidf) -#endif - -#ifdef L_m16fix_truncdfsi -STARTFN (__mips16_fix_truncdfsi) - MOVE_DF_BYTE0 (t) - trunc.w.d RET,ARG1,$4 - MOVE_SI_RET (f, $31) - ENDFN (__mips16_fix_truncdfsi) -#endif -#endif /* !__mips_single_float */ - -/* Define a function NAME that moves a return value of mode MODE from - FPRs to GPRs. */ - -#define RET_FUNCTION(NAME, MODE) \ -STARTFN (NAME); \ - MOVE_##MODE##_RET (t, $31); \ - ENDFN (NAME) - -#ifdef L_m16retsf -RET_FUNCTION (__mips16_ret_sf, SF) -#endif - -#ifdef L_m16retsc -RET_FUNCTION (__mips16_ret_sc, SC) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16retdf -RET_FUNCTION (__mips16_ret_df, DF) -#endif - -#ifdef L_m16retdc -RET_FUNCTION (__mips16_ret_dc, DC) -#endif -#endif /* !__mips_single_float */ - -/* STUB_ARGS_X copies the arguments from GPRs to FPRs for argument - code X. X is calculated as ARG1 + ARG2 * 4, where ARG1 and ARG2 - classify the first and second arguments as follows: - - 1: a single-precision argument - 2: a double-precision argument - 0: no argument, or not one of the above. */ - -#define STUB_ARGS_0 /* () */ -#define STUB_ARGS_1 MOVE_SF_BYTE0 (t) /* (sf) */ -#define STUB_ARGS_5 MOVE_SF_BYTE0 (t); MOVE_SF_BYTE4 (t) /* (sf, sf) */ -#define STUB_ARGS_9 MOVE_SF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (sf, df) */ -#define STUB_ARGS_2 MOVE_DF_BYTE0 (t) /* (df) */ -#define STUB_ARGS_6 MOVE_DF_BYTE0 (t); MOVE_SF_BYTE8 (t) /* (df, sf) */ -#define STUB_ARGS_10 MOVE_DF_BYTE0 (t); MOVE_DF_BYTE8 (t) /* (df, df) */ - -/* These functions are used by 16-bit code when calling via a function - pointer. They must copy the floating point arguments from the GPRs - to FPRs and then call function $2. */ - -#define CALL_STUB_NO_RET(NAME, CODE) \ -STARTFN (NAME); \ - STUB_ARGS_##CODE; \ - .set noreorder; \ - jr $2; \ - move $25,$2; \ - .set reorder; \ - ENDFN (NAME) - -#ifdef L_m16stub1 -CALL_STUB_NO_RET (__mips16_call_stub_1, 1) -#endif - -#ifdef L_m16stub5 -CALL_STUB_NO_RET (__mips16_call_stub_5, 5) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) - -#ifdef L_m16stub2 -CALL_STUB_NO_RET (__mips16_call_stub_2, 2) -#endif - -#ifdef L_m16stub6 -CALL_STUB_NO_RET (__mips16_call_stub_6, 6) -#endif - -#ifdef L_m16stub9 -CALL_STUB_NO_RET (__mips16_call_stub_9, 9) -#endif - -#ifdef L_m16stub10 -CALL_STUB_NO_RET (__mips16_call_stub_10, 10) -#endif -#endif /* !__mips_single_float */ - -/* Now we have the same set of functions, except that this time the - function being called returns an SFmode, SCmode, DFmode or DCmode - value; we need to instantiate a set for each case. The calling - function will arrange to preserve $18, so these functions are free - to use it to hold the return address. - - Note that we do not know whether the function we are calling is 16 - bit or 32 bit. However, it does not matter, because 16-bit - functions always return floating point values in both the gp and - the fp regs. It would be possible to check whether the function - being called is 16 bits, in which case the copy is unnecessary; - however, it's faster to always do the copy. */ - -#define CALL_STUB_RET(NAME, CODE, MODE) \ -STARTFN (NAME); \ - move $18,$31; \ - STUB_ARGS_##CODE; \ - .set noreorder; \ - jalr $2; \ - move $25,$2; \ - .set reorder; \ - MOVE_##MODE##_RET (f, $18); \ - ENDFN (NAME) - -/* First, instantiate the single-float set. */ - -#ifdef L_m16stubsf0 -CALL_STUB_RET (__mips16_call_stub_sf_0, 0, SF) -#endif - -#ifdef L_m16stubsf1 -CALL_STUB_RET (__mips16_call_stub_sf_1, 1, SF) -#endif - -#ifdef L_m16stubsf5 -CALL_STUB_RET (__mips16_call_stub_sf_5, 5, SF) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubsf2 -CALL_STUB_RET (__mips16_call_stub_sf_2, 2, SF) -#endif - -#ifdef L_m16stubsf6 -CALL_STUB_RET (__mips16_call_stub_sf_6, 6, SF) -#endif - -#ifdef L_m16stubsf9 -CALL_STUB_RET (__mips16_call_stub_sf_9, 9, SF) -#endif - -#ifdef L_m16stubsf10 -CALL_STUB_RET (__mips16_call_stub_sf_10, 10, SF) -#endif -#endif /* !__mips_single_float */ - - -/* Now we have the same set of functions again, except that this time - the function being called returns an DFmode value. */ - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubdf0 -CALL_STUB_RET (__mips16_call_stub_df_0, 0, DF) -#endif - -#ifdef L_m16stubdf1 -CALL_STUB_RET (__mips16_call_stub_df_1, 1, DF) -#endif - -#ifdef L_m16stubdf5 -CALL_STUB_RET (__mips16_call_stub_df_5, 5, DF) -#endif - -#ifdef L_m16stubdf2 -CALL_STUB_RET (__mips16_call_stub_df_2, 2, DF) -#endif - -#ifdef L_m16stubdf6 -CALL_STUB_RET (__mips16_call_stub_df_6, 6, DF) -#endif - -#ifdef L_m16stubdf9 -CALL_STUB_RET (__mips16_call_stub_df_9, 9, DF) -#endif - -#ifdef L_m16stubdf10 -CALL_STUB_RET (__mips16_call_stub_df_10, 10, DF) -#endif -#endif /* !__mips_single_float */ - - -/* Ho hum. Here we have the same set of functions again, this time - for when the function being called returns an SCmode value. */ - -#ifdef L_m16stubsc0 -CALL_STUB_RET (__mips16_call_stub_sc_0, 0, SC) -#endif - -#ifdef L_m16stubsc1 -CALL_STUB_RET (__mips16_call_stub_sc_1, 1, SC) -#endif - -#ifdef L_m16stubsc5 -CALL_STUB_RET (__mips16_call_stub_sc_5, 5, SC) -#endif - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubsc2 -CALL_STUB_RET (__mips16_call_stub_sc_2, 2, SC) -#endif - -#ifdef L_m16stubsc6 -CALL_STUB_RET (__mips16_call_stub_sc_6, 6, SC) -#endif - -#ifdef L_m16stubsc9 -CALL_STUB_RET (__mips16_call_stub_sc_9, 9, SC) -#endif - -#ifdef L_m16stubsc10 -CALL_STUB_RET (__mips16_call_stub_sc_10, 10, SC) -#endif -#endif /* !__mips_single_float */ - - -/* Finally, another set of functions for DCmode. */ - -#if !defined(__mips_single_float) && !defined(__SINGLE_FLOAT) -#ifdef L_m16stubdc0 -CALL_STUB_RET (__mips16_call_stub_dc_0, 0, DC) -#endif - -#ifdef L_m16stubdc1 -CALL_STUB_RET (__mips16_call_stub_dc_1, 1, DC) -#endif - -#ifdef L_m16stubdc5 -CALL_STUB_RET (__mips16_call_stub_dc_5, 5, DC) -#endif - -#ifdef L_m16stubdc2 -CALL_STUB_RET (__mips16_call_stub_dc_2, 2, DC) -#endif - -#ifdef L_m16stubdc6 -CALL_STUB_RET (__mips16_call_stub_dc_6, 6, DC) -#endif - -#ifdef L_m16stubdc9 -CALL_STUB_RET (__mips16_call_stub_dc_9, 9, DC) -#endif - -#ifdef L_m16stubdc10 -CALL_STUB_RET (__mips16_call_stub_dc_10, 10, DC) -#endif -#endif /* !__mips_single_float */ -#endif diff --git a/gcc/config/mips/t-libgcc-mips16 b/gcc/config/mips/t-libgcc-mips16 deleted file mode 100644 index 31a042bb75e..00000000000 --- a/gcc/config/mips/t-libgcc-mips16 +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2007, 2008, 2011 Free Software Foundation, Inc. -# -# This file is part of GCC. -# -# GCC is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GCC is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# <http://www.gnu.org/licenses/>. - -LIB1ASMSRC = mips/mips16.S -LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \ - _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \ - _m16unordsf2 \ - _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \ - _m16adddf3 _m16subdf3 _m16muldf3 _m16divdf3 \ - _m16extsfdf2 _m16trdfsf2 \ - _m16eqdf2 _m16nedf2 _m16gtdf2 _m16gedf2 _m16ledf2 _m16ltdf2 \ - _m16unorddf2 \ - _m16fltsidf _m16fix_truncdfsi _m16fltunsidf \ - _m16retsf _m16retdf \ - _m16retsc _m16retdc \ - _m16stub1 _m16stub2 _m16stub5 _m16stub6 _m16stub9 _m16stub10 \ - _m16stubsf0 _m16stubsf1 _m16stubsf2 _m16stubsf5 _m16stubsf6 \ - _m16stubsf9 _m16stubsf10 \ - _m16stubdf0 _m16stubdf1 _m16stubdf2 _m16stubdf5 _m16stubdf6 \ - _m16stubdf9 _m16stubdf10 \ - _m16stubsc0 _m16stubsc1 _m16stubsc2 _m16stubsc5 _m16stubsc6 \ - _m16stubsc9 _m16stubsc10 \ - _m16stubdc0 _m16stubdc1 _m16stubdc2 _m16stubdc5 _m16stubdc6 \ - _m16stubdc9 _m16stubdc10 diff --git a/gcc/config/mips/t-sr71k b/gcc/config/mips/t-sr71k index 7b8669fefd2..f204017faa8 100644 --- a/gcc/config/mips/t-sr71k +++ b/gcc/config/mips/t-sr71k @@ -16,11 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# Suppress building libgcc1.a, since the MIPS compiler port is complete -# and does not need anything from libgcc1.a. -LIBGCC1 = -CROSS_LIBGCC1 = - # We must build libgcc2.a with -G 0, in case the user wants to link # without the $gp register. TARGET_LIBGCC2_CFLAGS = -G 0 diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S deleted file mode 100644 index 2e9c4f741b6..00000000000 --- a/gcc/config/pa/milli64.S +++ /dev/null @@ -1,2134 +0,0 @@ -/* 32 and 64-bit millicode, original author Hewlett-Packard - adapted for gcc by Paul Bame <bame@debian.org> - and Alan Modra <alan@linuxcare.com.au>. - - Copyright 2001, 2002, 2003, 2007, 2009 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#ifdef pa64 - .level 2.0w -#endif - -/* Hardware General Registers. */ -r0: .reg %r0 -r1: .reg %r1 -r2: .reg %r2 -r3: .reg %r3 -r4: .reg %r4 -r5: .reg %r5 -r6: .reg %r6 -r7: .reg %r7 -r8: .reg %r8 -r9: .reg %r9 -r10: .reg %r10 -r11: .reg %r11 -r12: .reg %r12 -r13: .reg %r13 -r14: .reg %r14 -r15: .reg %r15 -r16: .reg %r16 -r17: .reg %r17 -r18: .reg %r18 -r19: .reg %r19 -r20: .reg %r20 -r21: .reg %r21 -r22: .reg %r22 -r23: .reg %r23 -r24: .reg %r24 -r25: .reg %r25 -r26: .reg %r26 -r27: .reg %r27 -r28: .reg %r28 -r29: .reg %r29 -r30: .reg %r30 -r31: .reg %r31 - -/* Hardware Space Registers. */ -sr0: .reg %sr0 -sr1: .reg %sr1 -sr2: .reg %sr2 -sr3: .reg %sr3 -sr4: .reg %sr4 -sr5: .reg %sr5 -sr6: .reg %sr6 -sr7: .reg %sr7 - -/* Hardware Floating Point Registers. */ -fr0: .reg %fr0 -fr1: .reg %fr1 -fr2: .reg %fr2 -fr3: .reg %fr3 -fr4: .reg %fr4 -fr5: .reg %fr5 -fr6: .reg %fr6 -fr7: .reg %fr7 -fr8: .reg %fr8 -fr9: .reg %fr9 -fr10: .reg %fr10 -fr11: .reg %fr11 -fr12: .reg %fr12 -fr13: .reg %fr13 -fr14: .reg %fr14 -fr15: .reg %fr15 - -/* Hardware Control Registers. */ -cr11: .reg %cr11 -sar: .reg %cr11 /* Shift Amount Register */ - -/* Software Architecture General Registers. */ -rp: .reg r2 /* return pointer */ -#ifdef pa64 -mrp: .reg r2 /* millicode return pointer */ -#else -mrp: .reg r31 /* millicode return pointer */ -#endif -ret0: .reg r28 /* return value */ -ret1: .reg r29 /* return value (high part of double) */ -sp: .reg r30 /* stack pointer */ -dp: .reg r27 /* data pointer */ -arg0: .reg r26 /* argument */ -arg1: .reg r25 /* argument or high part of double argument */ -arg2: .reg r24 /* argument */ -arg3: .reg r23 /* argument or high part of double argument */ - -/* Software Architecture Space Registers. */ -/* sr0 ; return link from BLE */ -sret: .reg sr1 /* return value */ -sarg: .reg sr1 /* argument */ -/* sr4 ; PC SPACE tracker */ -/* sr5 ; process private data */ - -/* Frame Offsets (millicode convention!) Used when calling other - millicode routines. Stack unwinding is dependent upon these - definitions. */ -r31_slot: .equ -20 /* "current RP" slot */ -sr0_slot: .equ -16 /* "static link" slot */ -#if defined(pa64) -mrp_slot: .equ -16 /* "current RP" slot */ -psp_slot: .equ -8 /* "previous SP" slot */ -#else -mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */ -#endif - - -#define DEFINE(name,value)name: .EQU value -#define RDEFINE(name,value)name: .REG value -#ifdef milliext -#define MILLI_BE(lbl) BE lbl(sr7,r0) -#define MILLI_BEN(lbl) BE,n lbl(sr7,r0) -#define MILLI_BLE(lbl) BLE lbl(sr7,r0) -#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0) -#define MILLIRETN BE,n 0(sr0,mrp) -#define MILLIRET BE 0(sr0,mrp) -#define MILLI_RETN BE,n 0(sr0,mrp) -#define MILLI_RET BE 0(sr0,mrp) -#else -#define MILLI_BE(lbl) B lbl -#define MILLI_BEN(lbl) B,n lbl -#define MILLI_BLE(lbl) BL lbl,mrp -#define MILLI_BLEN(lbl) BL,n lbl,mrp -#define MILLIRETN BV,n 0(mrp) -#define MILLIRET BV 0(mrp) -#define MILLI_RETN BV,n 0(mrp) -#define MILLI_RET BV 0(mrp) -#endif - -#ifdef __STDC__ -#define CAT(a,b) a##b -#else -#define CAT(a,b) a/**/b -#endif - -#ifdef ELF -#define SUBSPA_MILLI .section .text -#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16 -#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16 -#define ATTR_MILLI -#define SUBSPA_DATA .section .data -#define ATTR_DATA -#define GLOBAL $global$ -#define GSYM(sym) !sym: -#define LSYM(sym) !CAT(.L,sym:) -#define LREF(sym) CAT(.L,sym) - -#else - -#ifdef coff -/* This used to be .milli but since link32 places different named - sections in different segments millicode ends up a long ways away - from .text (1meg?). This way they will be a lot closer. - - The SUBSPA_MILLI_* specify locality sets for certain millicode - modules in order to ensure that modules that call one another are - placed close together. Without locality sets this is unlikely to - happen because of the Dynamite linker library search algorithm. We - want these modules close together so that short calls always reach - (we don't want to require long calls or use long call stubs). */ - -#define SUBSPA_MILLI .subspa .text -#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16 -#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16 -#define ATTR_MILLI .attr code,read,execute -#define SUBSPA_DATA .subspa .data -#define ATTR_DATA .attr init_data,read,write -#define GLOBAL _gp -#else -#define SUBSPA_MILLI .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8 -#define SUBSPA_MILLI_DIV SUBSPA_MILLI -#define SUBSPA_MILLI_MUL SUBSPA_MILLI -#define ATTR_MILLI -#define SUBSPA_DATA .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero -#define ATTR_DATA -#define GLOBAL $global$ -#endif -#define SPACE_DATA .space $PRIVATE$,spnum=1,sort=16 - -#define GSYM(sym) !sym -#define LSYM(sym) !CAT(L$,sym) -#define LREF(sym) CAT(L$,sym) -#endif - -#ifdef L_dyncall - SUBSPA_MILLI - ATTR_DATA -GSYM($$dyncall) - .export $$dyncall,millicode - .proc - .callinfo millicode - .entry - bb,>=,n %r22,30,LREF(1) ; branch if not plabel address - depi 0,31,2,%r22 ; clear the two least significant bits - ldw 4(%r22),%r19 ; load new LTP value - ldw 0(%r22),%r22 ; load address of target -LSYM(1) -#ifdef LINUX - bv %r0(%r22) ; branch to the real target -#else - ldsid (%sr0,%r22),%r1 ; get the "space ident" selected by r22 - mtsp %r1,%sr0 ; move that space identifier into sr0 - be 0(%sr0,%r22) ; branch to the real target -#endif - stw %r2,-24(%r30) ; save return address into frame marker - .exit - .procend -#endif - -#ifdef L_divI -/* ROUTINES: $$divI, $$divoI - - Single precision divide for signed binary integers. - - The quotient is truncated towards zero. - The sign of the quotient is the XOR of the signs of the dividend and - divisor. - Divide by zero is trapped. - Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI. - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = quotient - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: - . divisor is zero (traps with ADDIT,= 0,25,0) - . dividend==-2**31 and divisor==-1 and routine is $$divoI - . (traps with ADDO 26,25,0) - . Changes memory at the following places: - . NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Branchs to other millicode routines using BE - . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15 - . - . For selected divisors, calls a divide by constant routine written by - . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13. - . - . The only overflow case is -2**31 divided by -1. - . Both routines return -2**31 but only $$divoI traps. */ - -RDEFINE(temp,r1) -RDEFINE(retreg,ret1) /* r29 */ -RDEFINE(temp1,arg0) - SUBSPA_MILLI_DIV - ATTR_MILLI - .import $$divI_2,millicode - .import $$divI_3,millicode - .import $$divI_4,millicode - .import $$divI_5,millicode - .import $$divI_6,millicode - .import $$divI_7,millicode - .import $$divI_8,millicode - .import $$divI_9,millicode - .import $$divI_10,millicode - .import $$divI_12,millicode - .import $$divI_14,millicode - .import $$divI_15,millicode - .export $$divI,millicode - .export $$divoI,millicode - .proc - .callinfo millicode - .entry -GSYM($$divoI) - comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */ -GSYM($$divI) - ldo -1(arg1),temp /* is there at most one bit set ? */ - and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */ - addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */ - b,n LREF(neg_denom) -LSYM(pow2) - addi,>= 0,arg0,retreg /* if numerator is negative, add the */ - add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */ - extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ - extrs retreg,15,16,retreg /* retreg = retreg >> 16 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ - ldi 0xcc,temp1 /* setup 0xcc in temp1 */ - extru,= arg1,23,8,temp /* test denominator with 0xff00 */ - extrs retreg,23,24,retreg /* retreg = retreg >> 8 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ - ldi 0xaa,temp /* setup 0xaa in temp */ - extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ - extrs retreg,27,28,retreg /* retreg = retreg >> 4 */ - and,= arg1,temp1,r0 /* test denominator with 0xcc */ - extrs retreg,29,30,retreg /* retreg = retreg >> 2 */ - and,= arg1,temp,r0 /* test denominator with 0xaa */ - extrs retreg,30,31,retreg /* retreg = retreg >> 1 */ - MILLIRETN -LSYM(neg_denom) - addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */ - b,n LREF(regular_seq) - sub r0,arg1,temp /* make denominator positive */ - comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */ - ldo -1(temp),retreg /* is there at most one bit set ? */ - and,= temp,retreg,r0 /* if so, the denominator is power of 2 */ - b,n LREF(regular_seq) - sub r0,arg0,retreg /* negate numerator */ - comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */ - copy retreg,arg0 /* set up arg0, arg1 and temp */ - copy temp,arg1 /* before branching to pow2 */ - b LREF(pow2) - ldo -1(arg1),temp -LSYM(regular_seq) - comib,>>=,n 15,arg1,LREF(small_divisor) - add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ -LSYM(normal) - subi 0,retreg,retreg /* make it positive */ - sub 0,arg1,temp /* clear carry, */ - /* negate the divisor */ - ds 0,temp,0 /* set V-bit to the comple- */ - /* ment of the divisor sign */ - add retreg,retreg,retreg /* shift msb bit into carry */ - ds r0,arg1,temp /* 1st divide step, if no carry */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 2nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 3rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 4th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 5th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 6th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 7th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 8th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 9th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 10th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 11th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 12th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 13th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 14th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 15th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 16th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 17th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 18th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 19th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 20th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 21st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 22nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 23rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 24th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 25th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 26th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 27th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 28th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 29th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 30th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 31st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 32nd divide step, */ - addc retreg,retreg,retreg /* shift last retreg bit into retreg */ - xor,>= arg0,arg1,0 /* get correct sign of quotient */ - sub 0,retreg,retreg /* based on operand signs */ - MILLIRETN - nop - -LSYM(small_divisor) - -#if defined(pa64) -/* Clear the upper 32 bits of the arg1 register. We are working with */ -/* small divisors (and 32-bit integers) We must not be mislead */ -/* by "1" bits left in the upper 32 bits. */ - depd %r0,31,32,%r25 -#endif - blr,n arg1,r0 - nop -/* table for divisor == 0,1, ... ,15 */ - addit,= 0,arg1,r0 /* trap if divisor == 0 */ - nop - MILLIRET /* divisor == 1 */ - copy arg0,retreg - MILLI_BEN($$divI_2) /* divisor == 2 */ - nop - MILLI_BEN($$divI_3) /* divisor == 3 */ - nop - MILLI_BEN($$divI_4) /* divisor == 4 */ - nop - MILLI_BEN($$divI_5) /* divisor == 5 */ - nop - MILLI_BEN($$divI_6) /* divisor == 6 */ - nop - MILLI_BEN($$divI_7) /* divisor == 7 */ - nop - MILLI_BEN($$divI_8) /* divisor == 8 */ - nop - MILLI_BEN($$divI_9) /* divisor == 9 */ - nop - MILLI_BEN($$divI_10) /* divisor == 10 */ - nop - b LREF(normal) /* divisor == 11 */ - add,>= 0,arg0,retreg - MILLI_BEN($$divI_12) /* divisor == 12 */ - nop - b LREF(normal) /* divisor == 13 */ - add,>= 0,arg0,retreg - MILLI_BEN($$divI_14) /* divisor == 14 */ - nop - MILLI_BEN($$divI_15) /* divisor == 15 */ - nop - -LSYM(negative1) - sub 0,arg0,retreg /* result is negation of dividend */ - MILLIRET - addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ - .exit - .procend - .end -#endif - -#ifdef L_divU -/* ROUTINE: $$divU - . - . Single precision divide for unsigned integers. - . - . Quotient is truncated towards zero. - . Traps on divide by zero. - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = quotient - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: - . divisor is zero - . Changes memory at the following places: - . NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Does not create a stack frame. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Branchs to other millicode routines using BE: - . $$divU_# for 3,5,6,7,9,10,12,14,15 - . - . For selected small divisors calls the special divide by constant - . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */ - -RDEFINE(temp,r1) -RDEFINE(retreg,ret1) /* r29 */ -RDEFINE(temp1,arg0) - SUBSPA_MILLI_DIV - ATTR_MILLI - .export $$divU,millicode - .import $$divU_3,millicode - .import $$divU_5,millicode - .import $$divU_6,millicode - .import $$divU_7,millicode - .import $$divU_9,millicode - .import $$divU_10,millicode - .import $$divU_12,millicode - .import $$divU_14,millicode - .import $$divU_15,millicode - .proc - .callinfo millicode - .entry -GSYM($$divU) -/* The subtract is not nullified since it does no harm and can be used - by the two cases that branch back to "normal". */ - ldo -1(arg1),temp /* is there at most one bit set ? */ - and,= arg1,temp,r0 /* if so, denominator is power of 2 */ - b LREF(regular_seq) - addit,= 0,arg1,0 /* trap for zero dvr */ - copy arg0,retreg - extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */ - extru retreg,15,16,retreg /* retreg = retreg >> 16 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */ - ldi 0xcc,temp1 /* setup 0xcc in temp1 */ - extru,= arg1,23,8,temp /* test denominator with 0xff00 */ - extru retreg,23,24,retreg /* retreg = retreg >> 8 */ - or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */ - ldi 0xaa,temp /* setup 0xaa in temp */ - extru,= arg1,27,4,r0 /* test denominator with 0xf0 */ - extru retreg,27,28,retreg /* retreg = retreg >> 4 */ - and,= arg1,temp1,r0 /* test denominator with 0xcc */ - extru retreg,29,30,retreg /* retreg = retreg >> 2 */ - and,= arg1,temp,r0 /* test denominator with 0xaa */ - extru retreg,30,31,retreg /* retreg = retreg >> 1 */ - MILLIRETN - nop -LSYM(regular_seq) - comib,>= 15,arg1,LREF(special_divisor) - subi 0,arg1,temp /* clear carry, negate the divisor */ - ds r0,temp,r0 /* set V-bit to 1 */ -LSYM(normal) - add arg0,arg0,retreg /* shift msb bit into carry */ - ds r0,arg1,temp /* 1st divide step, if no carry */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 2nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 3rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 4th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 5th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 6th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 7th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 8th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 9th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 10th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 11th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 12th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 13th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 14th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 15th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 16th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 17th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 18th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 19th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 20th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 21st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 22nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 23rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 24th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 25th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 26th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 27th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 28th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 29th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 30th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 31st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds temp,arg1,temp /* 32nd divide step, */ - MILLIRET - addc retreg,retreg,retreg /* shift last retreg bit into retreg */ - -/* Handle the cases where divisor is a small constant or has high bit on. */ -LSYM(special_divisor) -/* blr arg1,r0 */ -/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */ - -/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from - generating such a blr, comib sequence. A problem in nullification. So I - rewrote this code. */ - -#if defined(pa64) -/* Clear the upper 32 bits of the arg1 register. We are working with - small divisors (and 32-bit unsigned integers) We must not be mislead - by "1" bits left in the upper 32 bits. */ - depd %r0,31,32,%r25 -#endif - comib,> 0,arg1,LREF(big_divisor) - nop - blr arg1,r0 - nop - -LSYM(zero_divisor) /* this label is here to provide external visibility */ - addit,= 0,arg1,0 /* trap for zero dvr */ - nop - MILLIRET /* divisor == 1 */ - copy arg0,retreg - MILLIRET /* divisor == 2 */ - extru arg0,30,31,retreg - MILLI_BEN($$divU_3) /* divisor == 3 */ - nop - MILLIRET /* divisor == 4 */ - extru arg0,29,30,retreg - MILLI_BEN($$divU_5) /* divisor == 5 */ - nop - MILLI_BEN($$divU_6) /* divisor == 6 */ - nop - MILLI_BEN($$divU_7) /* divisor == 7 */ - nop - MILLIRET /* divisor == 8 */ - extru arg0,28,29,retreg - MILLI_BEN($$divU_9) /* divisor == 9 */ - nop - MILLI_BEN($$divU_10) /* divisor == 10 */ - nop - b LREF(normal) /* divisor == 11 */ - ds r0,temp,r0 /* set V-bit to 1 */ - MILLI_BEN($$divU_12) /* divisor == 12 */ - nop - b LREF(normal) /* divisor == 13 */ - ds r0,temp,r0 /* set V-bit to 1 */ - MILLI_BEN($$divU_14) /* divisor == 14 */ - nop - MILLI_BEN($$divU_15) /* divisor == 15 */ - nop - -/* Handle the case where the high bit is on in the divisor. - Compute: if( dividend>=divisor) quotient=1; else quotient=0; - Note: dividend>==divisor iff dividend-divisor does not borrow - and not borrow iff carry. */ -LSYM(big_divisor) - sub arg0,arg1,r0 - MILLIRET - addc r0,r0,retreg - .exit - .procend - .end -#endif - -#ifdef L_remI -/* ROUTINE: $$remI - - DESCRIPTION: - . $$remI returns the remainder of the division of two signed 32-bit - . integers. The sign of the remainder is the same as the sign of - . the dividend. - - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = destroyed - . arg1 = destroyed - . ret1 = remainder - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: DIVIDE BY ZERO - . Changes memory at the following places: NONE - - PERMISSIBLE CONTEXT: - . Unwindable - . Does not create a stack frame - . Is usable for internal or external microcode - - DISCUSSION: - . Calls other millicode routines via mrp: NONE - . Calls other millicode routines: NONE */ - -RDEFINE(tmp,r1) -RDEFINE(retreg,ret1) - - SUBSPA_MILLI - ATTR_MILLI - .proc - .callinfo millicode - .entry -GSYM($$remI) -GSYM($$remoI) - .export $$remI,MILLICODE - .export $$remoI,MILLICODE - ldo -1(arg1),tmp /* is there at most one bit set ? */ - and,<> arg1,tmp,r0 /* if not, don't use power of 2 */ - addi,> 0,arg1,r0 /* if denominator > 0, use power */ - /* of 2 */ - b,n LREF(neg_denom) -LSYM(pow2) - comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */ - and arg0,tmp,retreg /* get the result */ - MILLIRETN -LSYM(neg_num) - subi 0,arg0,arg0 /* negate numerator */ - and arg0,tmp,retreg /* get the result */ - subi 0,retreg,retreg /* negate result */ - MILLIRETN -LSYM(neg_denom) - addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */ - /* of 2 */ - b,n LREF(regular_seq) - sub r0,arg1,tmp /* make denominator positive */ - comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */ - ldo -1(tmp),retreg /* is there at most one bit set ? */ - and,= tmp,retreg,r0 /* if not, go to regular_seq */ - b,n LREF(regular_seq) - comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */ - and arg0,retreg,retreg - MILLIRETN -LSYM(neg_num_2) - subi 0,arg0,tmp /* test against 0x80000000 */ - and tmp,retreg,retreg - subi 0,retreg,retreg - MILLIRETN -LSYM(regular_seq) - addit,= 0,arg1,0 /* trap if div by zero */ - add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */ - sub 0,retreg,retreg /* make it positive */ - sub 0,arg1, tmp /* clear carry, */ - /* negate the divisor */ - ds 0, tmp,0 /* set V-bit to the comple- */ - /* ment of the divisor sign */ - or 0,0, tmp /* clear tmp */ - add retreg,retreg,retreg /* shift msb bit into carry */ - ds tmp,arg1, tmp /* 1st divide step, if no carry */ - /* out, msb of quotient = 0 */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ -LSYM(t1) - ds tmp,arg1, tmp /* 2nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 3rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 4th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 5th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 6th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 7th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 8th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 9th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 10th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 11th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 12th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 13th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 14th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 15th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 16th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 17th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 18th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 19th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 20th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 21st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 22nd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 23rd divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 24th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 25th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 26th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 27th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 28th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 29th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 30th divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 31st divide step */ - addc retreg,retreg,retreg /* shift retreg with/into carry */ - ds tmp,arg1, tmp /* 32nd divide step, */ - addc retreg,retreg,retreg /* shift last bit into retreg */ - movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */ - add,< arg1,0,0 /* if arg1 > 0, add arg1 */ - add,tr tmp,arg1,retreg /* for correcting remainder tmp */ - sub tmp,arg1,retreg /* else add absolute value arg1 */ -LSYM(finish) - add,>= arg0,0,0 /* set sign of remainder */ - sub 0,retreg,retreg /* to sign of dividend */ - MILLIRET - nop - .exit - .procend -#ifdef milliext - .origin 0x00000200 -#endif - .end -#endif - -#ifdef L_remU -/* ROUTINE: $$remU - . Single precision divide for remainder with unsigned binary integers. - . - . The remainder must be dividend-(dividend/divisor)*divisor. - . Divide by zero is trapped. - - INPUT REGISTERS: - . arg0 == dividend - . arg1 == divisor - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = remainder - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: DIVIDE BY ZERO - . Changes memory at the following places: NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Does not create a stack frame. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Calls other millicode routines using mrp: NONE - . Calls other millicode routines: NONE */ - - -RDEFINE(temp,r1) -RDEFINE(rmndr,ret1) /* r29 */ - SUBSPA_MILLI - ATTR_MILLI - .export $$remU,millicode - .proc - .callinfo millicode - .entry -GSYM($$remU) - ldo -1(arg1),temp /* is there at most one bit set ? */ - and,= arg1,temp,r0 /* if not, don't use power of 2 */ - b LREF(regular_seq) - addit,= 0,arg1,r0 /* trap on div by zero */ - and arg0,temp,rmndr /* get the result for power of 2 */ - MILLIRETN -LSYM(regular_seq) - comib,>=,n 0,arg1,LREF(special_case) - subi 0,arg1,rmndr /* clear carry, negate the divisor */ - ds r0,rmndr,r0 /* set V-bit to 1 */ - add arg0,arg0,temp /* shift msb bit into carry */ - ds r0,arg1,rmndr /* 1st divide step, if no carry */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 2nd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 3rd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 4th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 5th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 6th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 7th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 8th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 9th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 10th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 11th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 12th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 13th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 14th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 15th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 16th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 17th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 18th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 19th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 20th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 21st divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 22nd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 23rd divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 24th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 25th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 26th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 27th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 28th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 29th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 30th divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 31st divide step */ - addc temp,temp,temp /* shift temp with/into carry */ - ds rmndr,arg1,rmndr /* 32nd divide step, */ - comiclr,<= 0,rmndr,r0 - add rmndr,arg1,rmndr /* correction */ - MILLIRETN - nop - -/* Putting >= on the last DS and deleting COMICLR does not work! */ -LSYM(special_case) - sub,>>= arg0,arg1,rmndr - copy arg0,rmndr - MILLIRETN - nop - .exit - .procend - .end -#endif - -#ifdef L_div_const -/* ROUTINE: $$divI_2 - . $$divI_3 $$divU_3 - . $$divI_4 - . $$divI_5 $$divU_5 - . $$divI_6 $$divU_6 - . $$divI_7 $$divU_7 - . $$divI_8 - . $$divI_9 $$divU_9 - . $$divI_10 $$divU_10 - . - . $$divI_12 $$divU_12 - . - . $$divI_14 $$divU_14 - . $$divI_15 $$divU_15 - . $$divI_16 - . $$divI_17 $$divU_17 - . - . Divide by selected constants for single precision binary integers. - - INPUT REGISTERS: - . arg0 == dividend - . mrp == return pc - . sr0 == return space when called externally - - OUTPUT REGISTERS: - . arg0 = undefined - . arg1 = undefined - . ret1 = quotient - - OTHER REGISTERS AFFECTED: - . r1 = undefined - - SIDE EFFECTS: - . Causes a trap under the following conditions: NONE - . Changes memory at the following places: NONE - - PERMISSIBLE CONTEXT: - . Unwindable. - . Does not create a stack frame. - . Suitable for internal or external millicode. - . Assumes the special millicode register conventions. - - DISCUSSION: - . Calls other millicode routines using mrp: NONE - . Calls other millicode routines: NONE */ - - -/* TRUNCATED DIVISION BY SMALL INTEGERS - - We are interested in q(x) = floor(x/y), where x >= 0 and y > 0 - (with y fixed). - - Let a = floor(z/y), for some choice of z. Note that z will be - chosen so that division by z is cheap. - - Let r be the remainder(z/y). In other words, r = z - ay. - - Now, our method is to choose a value for b such that - - q'(x) = floor((ax+b)/z) - - is equal to q(x) over as large a range of x as possible. If the - two are equal over a sufficiently large range, and if it is easy to - form the product (ax), and it is easy to divide by z, then we can - perform the division much faster than the general division algorithm. - - So, we want the following to be true: - - . For x in the following range: - . - . ky <= x < (k+1)y - . - . implies that - . - . k <= (ax+b)/z < (k+1) - - We want to determine b such that this is true for all k in the - range {0..K} for some maximum K. - - Since (ax+b) is an increasing function of x, we can take each - bound separately to determine the "best" value for b. - - (ax+b)/z < (k+1) implies - - (a((k+1)y-1)+b < (k+1)z implies - - b < a + (k+1)(z-ay) implies - - b < a + (k+1)r - - This needs to be true for all k in the range {0..K}. In - particular, it is true for k = 0 and this leads to a maximum - acceptable value for b. - - b < a+r or b <= a+r-1 - - Taking the other bound, we have - - k <= (ax+b)/z implies - - k <= (aky+b)/z implies - - k(z-ay) <= b implies - - kr <= b - - Clearly, the largest range for k will be achieved by maximizing b, - when r is not zero. When r is zero, then the simplest choice for b - is 0. When r is not 0, set - - . b = a+r-1 - - Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y) - for all x in the range: - - . 0 <= x < (K+1)y - - We need to determine what K is. Of our two bounds, - - . b < a+(k+1)r is satisfied for all k >= 0, by construction. - - The other bound is - - . kr <= b - - This is always true if r = 0. If r is not 0 (the usual case), then - K = floor((a+r-1)/r), is the maximum value for k. - - Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct - answer for q(x) = floor(x/y) when x is in the range - - (0,(K+1)y-1) K = floor((a+r-1)/r) - - To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that - the formula for q'(x) yields the correct value of q(x) for all x - representable by a single word in HPPA. - - We are also constrained in that computing the product (ax), adding - b, and dividing by z must all be done quickly, otherwise we will be - better off going through the general algorithm using the DS - instruction, which uses approximately 70 cycles. - - For each y, there is a choice of z which satisfies the constraints - for (K+1)y >= 2**32. We may not, however, be able to satisfy the - timing constraints for arbitrary y. It seems that z being equal to - a power of 2 or a power of 2 minus 1 is as good as we can do, since - it minimizes the time to do division by z. We want the choice of z - to also result in a value for (a) that minimizes the computation of - the product (ax). This is best achieved if (a) has a regular bit - pattern (so the multiplication can be done with shifts and adds). - The value of (a) also needs to be less than 2**32 so the product is - always guaranteed to fit in 2 words. - - In actual practice, the following should be done: - - 1) For negative x, you should take the absolute value and remember - . the fact so that the result can be negated. This obviously does - . not apply in the unsigned case. - 2) For even y, you should factor out the power of 2 that divides y - . and divide x by it. You can then proceed by dividing by the - . odd factor of y. - - Here is a table of some odd values of y, and corresponding choices - for z which are "good". - - y z r a (hex) max x (hex) - - 3 2**32 1 55555555 100000001 - 5 2**32 1 33333333 100000003 - 7 2**24-1 0 249249 (infinite) - 9 2**24-1 0 1c71c7 (infinite) - 11 2**20-1 0 1745d (infinite) - 13 2**24-1 0 13b13b (infinite) - 15 2**32 1 11111111 10000000d - 17 2**32 1 f0f0f0f 10000000f - - If r is 1, then b = a+r-1 = a. This simplifies the computation - of (ax+b), since you can compute (x+1)(a) instead. If r is 0, - then b = 0 is ok to use which simplifies (ax+b). - - The bit patterns for 55555555, 33333333, and 11111111 are obviously - very regular. The bit patterns for the other values of a above are: - - y (hex) (binary) - - 7 249249 001001001001001001001001 << regular >> - 9 1c71c7 000111000111000111000111 << regular >> - 11 1745d 000000010111010001011101 << irregular >> - 13 13b13b 000100111011000100111011 << irregular >> - - The bit patterns for (a) corresponding to (y) of 11 and 13 may be - too irregular to warrant using this method. - - When z is a power of 2 minus 1, then the division by z is slightly - more complicated, involving an iterative solution. - - The code presented here solves division by 1 through 17, except for - 11 and 13. There are algorithms for both signed and unsigned - quantities given. - - TIMINGS (cycles) - - divisor positive negative unsigned - - . 1 2 2 2 - . 2 4 4 2 - . 3 19 21 19 - . 4 4 4 2 - . 5 18 22 19 - . 6 19 22 19 - . 8 4 4 2 - . 10 18 19 17 - . 12 18 20 18 - . 15 16 18 16 - . 16 4 4 2 - . 17 16 18 16 - - Now, the algorithm for 7, 9, and 14 is an iterative one. That is, - a loop body is executed until the tentative quotient is 0. The - number of times the loop body is executed varies depending on the - dividend, but is never more than two times. If the dividend is - less than the divisor, then the loop body is not executed at all. - Each iteration adds 4 cycles to the timings. - - divisor positive negative unsigned - - . 7 19+4n 20+4n 20+4n n = number of iterations - . 9 21+4n 22+4n 21+4n - . 14 21+4n 22+4n 20+4n - - To give an idea of how the number of iterations varies, here is a - table of dividend versus number of iterations when dividing by 7. - - smallest largest required - dividend dividend iterations - - . 0 6 0 - . 7 0x6ffffff 1 - 0x1000006 0xffffffff 2 - - There is some overlap in the range of numbers requiring 1 and 2 - iterations. */ - -RDEFINE(t2,r1) -RDEFINE(x2,arg0) /* r26 */ -RDEFINE(t1,arg1) /* r25 */ -RDEFINE(x1,ret1) /* r29 */ - - SUBSPA_MILLI_DIV - ATTR_MILLI - - .proc - .callinfo millicode - .entry -/* NONE of these routines require a stack frame - ALL of these routines are unwindable from millicode */ - -GSYM($$divide_by_constant) - .export $$divide_by_constant,millicode -/* Provides a "nice" label for the code covered by the unwind descriptor - for things like gprof. */ - -/* DIVISION BY 2 (shift by 1) */ -GSYM($$divI_2) - .export $$divI_2,millicode - comclr,>= arg0,0,0 - addi 1,arg0,arg0 - MILLIRET - extrs arg0,30,31,ret1 - - -/* DIVISION BY 4 (shift by 2) */ -GSYM($$divI_4) - .export $$divI_4,millicode - comclr,>= arg0,0,0 - addi 3,arg0,arg0 - MILLIRET - extrs arg0,29,30,ret1 - - -/* DIVISION BY 8 (shift by 3) */ -GSYM($$divI_8) - .export $$divI_8,millicode - comclr,>= arg0,0,0 - addi 7,arg0,arg0 - MILLIRET - extrs arg0,28,29,ret1 - -/* DIVISION BY 16 (shift by 4) */ -GSYM($$divI_16) - .export $$divI_16,millicode - comclr,>= arg0,0,0 - addi 15,arg0,arg0 - MILLIRET - extrs arg0,27,28,ret1 - -/**************************************************************************** -* -* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these -* -* includes 3,5,15,17 and also 6,10,12 -* -****************************************************************************/ - -/* DIVISION BY 3 (use z = 2**32; a = 55555555) */ - -GSYM($$divI_3) - .export $$divI_3,millicode - comb,<,N x2,0,LREF(neg3) - - addi 1,x2,x2 /* this cannot overflow */ - extru x2,1,2,x1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(pos) - addc x1,0,x1 - -LSYM(neg3) - subi 1,x2,x2 /* this cannot overflow */ - extru x2,1,2,x1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(neg) - addc x1,0,x1 - -GSYM($$divU_3) - .export $$divU_3,millicode - addi 1,x2,x2 /* this CAN overflow */ - addc 0,0,x1 - shd x1,x2,30,t1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(pos) - addc x1,t1,x1 - -/* DIVISION BY 5 (use z = 2**32; a = 33333333) */ - -GSYM($$divI_5) - .export $$divI_5,millicode - comb,<,N x2,0,LREF(neg5) - - addi 3,x2,t1 /* this cannot overflow */ - sh1add x2,t1,x2 /* multiply by 3 to get started */ - b LREF(pos) - addc 0,0,x1 - -LSYM(neg5) - sub 0,x2,x2 /* negate x2 */ - addi 1,x2,x2 /* this cannot overflow */ - shd 0,x2,31,x1 /* get top bit (can be 1) */ - sh1add x2,x2,x2 /* multiply by 3 to get started */ - b LREF(neg) - addc x1,0,x1 - -GSYM($$divU_5) - .export $$divU_5,millicode - addi 1,x2,x2 /* this CAN overflow */ - addc 0,0,x1 - shd x1,x2,31,t1 /* multiply by 3 to get started */ - sh1add x2,x2,x2 - b LREF(pos) - addc t1,x1,x1 - -/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */ -GSYM($$divI_6) - .export $$divI_6,millicode - comb,<,N x2,0,LREF(neg6) - extru x2,30,31,x2 /* divide by 2 */ - addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */ - sh2add x2,t1,x2 /* multiply by 5 to get started */ - b LREF(pos) - addc 0,0,x1 - -LSYM(neg6) - subi 2,x2,x2 /* negate, divide by 2, and add 1 */ - /* negation and adding 1 are done */ - /* at the same time by the SUBI */ - extru x2,30,31,x2 - shd 0,x2,30,x1 - sh2add x2,x2,x2 /* multiply by 5 to get started */ - b LREF(neg) - addc x1,0,x1 - -GSYM($$divU_6) - .export $$divU_6,millicode - extru x2,30,31,x2 /* divide by 2 */ - addi 1,x2,x2 /* cannot carry */ - shd 0,x2,30,x1 /* multiply by 5 to get started */ - sh2add x2,x2,x2 - b LREF(pos) - addc x1,0,x1 - -/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */ -GSYM($$divU_10) - .export $$divU_10,millicode - extru x2,30,31,x2 /* divide by 2 */ - addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */ - sh1add x2,t1,x2 /* multiply by 3 to get started */ - addc 0,0,x1 -LSYM(pos) - shd x1,x2,28,t1 /* multiply by 0x11 */ - shd x2,0,28,t2 - add x2,t2,x2 - addc x1,t1,x1 -LSYM(pos_for_17) - shd x1,x2,24,t1 /* multiply by 0x101 */ - shd x2,0,24,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,16,t1 /* multiply by 0x10001 */ - shd x2,0,16,t2 - add x2,t2,x2 - MILLIRET - addc x1,t1,x1 - -GSYM($$divI_10) - .export $$divI_10,millicode - comb,< x2,0,LREF(neg10) - copy 0,x1 - extru x2,30,31,x2 /* divide by 2 */ - addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */ - sh1add x2,x2,x2 /* multiply by 3 to get started */ - -LSYM(neg10) - subi 2,x2,x2 /* negate, divide by 2, and add 1 */ - /* negation and adding 1 are done */ - /* at the same time by the SUBI */ - extru x2,30,31,x2 - sh1add x2,x2,x2 /* multiply by 3 to get started */ -LSYM(neg) - shd x1,x2,28,t1 /* multiply by 0x11 */ - shd x2,0,28,t2 - add x2,t2,x2 - addc x1,t1,x1 -LSYM(neg_for_17) - shd x1,x2,24,t1 /* multiply by 0x101 */ - shd x2,0,24,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,16,t1 /* multiply by 0x10001 */ - shd x2,0,16,t2 - add x2,t2,x2 - addc x1,t1,x1 - MILLIRET - sub 0,x1,x1 - -/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */ -GSYM($$divI_12) - .export $$divI_12,millicode - comb,< x2,0,LREF(neg12) - copy 0,x1 - extru x2,29,30,x2 /* divide by 4 */ - addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */ - sh2add x2,x2,x2 /* multiply by 5 to get started */ - -LSYM(neg12) - subi 4,x2,x2 /* negate, divide by 4, and add 1 */ - /* negation and adding 1 are done */ - /* at the same time by the SUBI */ - extru x2,29,30,x2 - b LREF(neg) - sh2add x2,x2,x2 /* multiply by 5 to get started */ - -GSYM($$divU_12) - .export $$divU_12,millicode - extru x2,29,30,x2 /* divide by 4 */ - addi 5,x2,t1 /* cannot carry */ - sh2add x2,t1,x2 /* multiply by 5 to get started */ - b LREF(pos) - addc 0,0,x1 - -/* DIVISION BY 15 (use z = 2**32; a = 11111111) */ -GSYM($$divI_15) - .export $$divI_15,millicode - comb,< x2,0,LREF(neg15) - copy 0,x1 - addib,tr 1,x2,LREF(pos)+4 - shd x1,x2,28,t1 - -LSYM(neg15) - b LREF(neg) - subi 1,x2,x2 - -GSYM($$divU_15) - .export $$divU_15,millicode - addi 1,x2,x2 /* this CAN overflow */ - b LREF(pos) - addc 0,0,x1 - -/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */ -GSYM($$divI_17) - .export $$divI_17,millicode - comb,<,n x2,0,LREF(neg17) - addi 1,x2,x2 /* this cannot overflow */ - shd 0,x2,28,t1 /* multiply by 0xf to get started */ - shd x2,0,28,t2 - sub t2,x2,x2 - b LREF(pos_for_17) - subb t1,0,x1 - -LSYM(neg17) - subi 1,x2,x2 /* this cannot overflow */ - shd 0,x2,28,t1 /* multiply by 0xf to get started */ - shd x2,0,28,t2 - sub t2,x2,x2 - b LREF(neg_for_17) - subb t1,0,x1 - -GSYM($$divU_17) - .export $$divU_17,millicode - addi 1,x2,x2 /* this CAN overflow */ - addc 0,0,x1 - shd x1,x2,28,t1 /* multiply by 0xf to get started */ -LSYM(u17) - shd x2,0,28,t2 - sub t2,x2,x2 - b LREF(pos_for_17) - subb t1,x1,x1 - - -/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these - includes 7,9 and also 14 - - - z = 2**24-1 - r = z mod x = 0 - - so choose b = 0 - - Also, in order to divide by z = 2**24-1, we approximate by dividing - by (z+1) = 2**24 (which is easy), and then correcting. - - (ax) = (z+1)q' + r - . = zq' + (q'+r) - - So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1) - Then the true remainder of (ax)/z is (q'+r). Repeat the process - with this new remainder, adding the tentative quotients together, - until a tentative quotient is 0 (and then we are done). There is - one last correction to be done. It is possible that (q'+r) = z. - If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But, - in fact, we need to add 1 more to the quotient. Now, it turns - out that this happens if and only if the original value x is - an exact multiple of y. So, to avoid a three instruction test at - the end, instead use 1 instruction to add 1 to x at the beginning. */ - -/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */ -GSYM($$divI_7) - .export $$divI_7,millicode - comb,<,n x2,0,LREF(neg7) -LSYM(7) - addi 1,x2,x2 /* cannot overflow */ - shd 0,x2,29,x1 - sh3add x2,x2,x2 - addc x1,0,x1 -LSYM(pos7) - shd x1,x2,26,t1 - shd x2,0,26,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,20,t1 - shd x2,0,20,t2 - add x2,t2,x2 - addc x1,t1,t1 - - /* computed <t1,x2>. Now divide it by (2**24 - 1) */ - - copy 0,x1 - shd,= t1,x2,24,t1 /* tentative quotient */ -LSYM(1) - addb,tr t1,x1,LREF(2) /* add to previous quotient */ - extru x2,31,24,x2 /* new remainder (unadjusted) */ - - MILLIRETN - -LSYM(2) - addb,tr t1,x2,LREF(1) /* adjust remainder */ - extru,= x2,7,8,t1 /* new quotient */ - -LSYM(neg7) - subi 1,x2,x2 /* negate x2 and add 1 */ -LSYM(8) - shd 0,x2,29,x1 - sh3add x2,x2,x2 - addc x1,0,x1 - -LSYM(neg7_shift) - shd x1,x2,26,t1 - shd x2,0,26,t2 - add x2,t2,x2 - addc x1,t1,x1 - - shd x1,x2,20,t1 - shd x2,0,20,t2 - add x2,t2,x2 - addc x1,t1,t1 - - /* computed <t1,x2>. Now divide it by (2**24 - 1) */ - - copy 0,x1 - shd,= t1,x2,24,t1 /* tentative quotient */ -LSYM(3) - addb,tr t1,x1,LREF(4) /* add to previous quotient */ - extru x2,31,24,x2 /* new remainder (unadjusted) */ - - MILLIRET - sub 0,x1,x1 /* negate result */ - -LSYM(4) - addb,tr t1,x2,LREF(3) /* adjust remainder */ - extru,= x2,7,8,t1 /* new quotient */ - -GSYM($$divU_7) - .export $$divU_7,millicode - addi 1,x2,x2 /* can carry */ - addc 0,0,x1 - shd x1,x2,29,t1 - sh3add x2,x2,x2 - b LREF(pos7) - addc t1,x1,x1 - -/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */ -GSYM($$divI_9) - .export $$divI_9,millicode - comb,<,n x2,0,LREF(neg9) - addi 1,x2,x2 /* cannot overflow */ - shd 0,x2,29,t1 - shd x2,0,29,t2 - sub t2,x2,x2 - b LREF(pos7) - subb t1,0,x1 - -LSYM(neg9) - subi 1,x2,x2 /* negate and add 1 */ - shd 0,x2,29,t1 - shd x2,0,29,t2 - sub t2,x2,x2 - b LREF(neg7_shift) - subb t1,0,x1 - -GSYM($$divU_9) - .export $$divU_9,millicode - addi 1,x2,x2 /* can carry */ - addc 0,0,x1 - shd x1,x2,29,t1 - shd x2,0,29,t2 - sub t2,x2,x2 - b LREF(pos7) - subb t1,x1,x1 - -/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */ -GSYM($$divI_14) - .export $$divI_14,millicode - comb,<,n x2,0,LREF(neg14) -GSYM($$divU_14) - .export $$divU_14,millicode - b LREF(7) /* go to 7 case */ - extru x2,30,31,x2 /* divide by 2 */ - -LSYM(neg14) - subi 2,x2,x2 /* negate (and add 2) */ - b LREF(8) - extru x2,30,31,x2 /* divide by 2 */ - .exit - .procend - .end -#endif - -#ifdef L_mulI -/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */ -/****************************************************************************** -This routine is used on PA2.0 processors when gcc -mno-fpregs is used - -ROUTINE: $$mulI - - -DESCRIPTION: - - $$mulI multiplies two single word integers, giving a single - word result. - - -INPUT REGISTERS: - - arg0 = Operand 1 - arg1 = Operand 2 - r31 == return pc - sr0 == return space when called externally - - -OUTPUT REGISTERS: - - arg0 = undefined - arg1 = undefined - ret1 = result - -OTHER REGISTERS AFFECTED: - - r1 = undefined - -SIDE EFFECTS: - - Causes a trap under the following conditions: NONE - Changes memory at the following places: NONE - -PERMISSIBLE CONTEXT: - - Unwindable - Does not create a stack frame - Is usable for internal or external microcode - -DISCUSSION: - - Calls other millicode routines via mrp: NONE - Calls other millicode routines: NONE - -***************************************************************************/ - - -#define a0 %arg0 -#define a1 %arg1 -#define t0 %r1 -#define r %ret1 - -#define a0__128a0 zdep a0,24,25,a0 -#define a0__256a0 zdep a0,23,24,a0 -#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0) -#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1) -#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2) -#define b_n_ret_t0 b,n LREF(ret_t0) -#define b_e_shift b LREF(e_shift) -#define b_e_t0ma0 b LREF(e_t0ma0) -#define b_e_t0 b LREF(e_t0) -#define b_e_t0a0 b LREF(e_t0a0) -#define b_e_t02a0 b LREF(e_t02a0) -#define b_e_t04a0 b LREF(e_t04a0) -#define b_e_2t0 b LREF(e_2t0) -#define b_e_2t0a0 b LREF(e_2t0a0) -#define b_e_2t04a0 b LREF(e2t04a0) -#define b_e_3t0 b LREF(e_3t0) -#define b_e_4t0 b LREF(e_4t0) -#define b_e_4t0a0 b LREF(e_4t0a0) -#define b_e_4t08a0 b LREF(e4t08a0) -#define b_e_5t0 b LREF(e_5t0) -#define b_e_8t0 b LREF(e_8t0) -#define b_e_8t0a0 b LREF(e_8t0a0) -#define r__r_a0 add r,a0,r -#define r__r_2a0 sh1add a0,r,r -#define r__r_4a0 sh2add a0,r,r -#define r__r_8a0 sh3add a0,r,r -#define r__r_t0 add r,t0,r -#define r__r_2t0 sh1add t0,r,r -#define r__r_4t0 sh2add t0,r,r -#define r__r_8t0 sh3add t0,r,r -#define t0__3a0 sh1add a0,a0,t0 -#define t0__4a0 sh2add a0,0,t0 -#define t0__5a0 sh2add a0,a0,t0 -#define t0__8a0 sh3add a0,0,t0 -#define t0__9a0 sh3add a0,a0,t0 -#define t0__16a0 zdep a0,27,28,t0 -#define t0__32a0 zdep a0,26,27,t0 -#define t0__64a0 zdep a0,25,26,t0 -#define t0__128a0 zdep a0,24,25,t0 -#define t0__t0ma0 sub t0,a0,t0 -#define t0__t0_a0 add t0,a0,t0 -#define t0__t0_2a0 sh1add a0,t0,t0 -#define t0__t0_4a0 sh2add a0,t0,t0 -#define t0__t0_8a0 sh3add a0,t0,t0 -#define t0__2t0_a0 sh1add t0,a0,t0 -#define t0__3t0 sh1add t0,t0,t0 -#define t0__4t0 sh2add t0,0,t0 -#define t0__4t0_a0 sh2add t0,a0,t0 -#define t0__5t0 sh2add t0,t0,t0 -#define t0__8t0 sh3add t0,0,t0 -#define t0__8t0_a0 sh3add t0,a0,t0 -#define t0__9t0 sh3add t0,t0,t0 -#define t0__16t0 zdep t0,27,28,t0 -#define t0__32t0 zdep t0,26,27,t0 -#define t0__256a0 zdep a0,23,24,t0 - - - SUBSPA_MILLI - ATTR_MILLI - .align 16 - .proc - .callinfo millicode - .export $$mulI,millicode -GSYM($$mulI) - combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */ - copy 0,r /* zero out the result */ - xor a0,a1,a0 /* swap a0 & a1 using the */ - xor a0,a1,a1 /* old xor trick */ - xor a0,a1,a0 -LSYM(l4) - combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */ - zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ - sub,> 0,a1,t0 /* otherwise negate both and */ - combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */ - sub 0,a0,a1 - movb,tr,n t0,a0,LREF(l2) /* 10th inst. */ - -LSYM(l0) r__r_t0 /* add in this partial product */ -LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */ -LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */ -LSYM(l3) blr t0,0 /* case on these 8 bits ****** */ - extru a1,23,24,a1 /* a1 >>= 8 ****************** */ - -/*16 insts before this. */ -/* a0 <<= 8 ************************** */ -LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop -LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop -LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop -LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0 -LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop -LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0 -LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN -LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0 -LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop -LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0 -LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN -LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 -LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN -LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0 -LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0 -LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0 -LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN -LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0 -LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN -LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 -LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN -LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 -LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0 -LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 -LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 -LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0 -LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN -LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0 -LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN -LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 -LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0 -LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0 -LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0 -LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0 -LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0 -LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 -LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0 -LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0 -LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0 -LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 -LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 -LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0 -LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0 -LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN -LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0 -LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0 -LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0 -LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0 -LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0 -LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0 -LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0 -LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 -LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0 -LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 -LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 -LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0 -LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0 -LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0 -LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0 -LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0 -LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0 -LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0 -LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 -LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0 -LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0 -LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 -LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 -LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 -LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 -LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN -LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0 -LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0 -LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0 -LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0 -LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0 -LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0 -LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0 -LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0 -LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 -LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0 -LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0 -LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0 -LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0 -LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0 -LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0 -LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0 -LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0 -LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0 -LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0 -LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0 -LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0 -LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0 -LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0 -LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0 -LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0 -LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0 -LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0 -LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0 -LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0 -LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0 -LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0 -LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0 -LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0 -LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0 -LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0 -LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0 -LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0 -LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 -LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0 -LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0 -LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0 -LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0 -LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 -LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0 -LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0 -LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 -LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0 -LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0 -LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0 -LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0 -LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0 -LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0 -LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0 -LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0 -LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0 -LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0 -LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0 -LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0 -LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0 -LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0 -LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0 -LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0 -LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0 -LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0 -LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0 -LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0 -LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0 -LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0 -LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0 -LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0 -LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0 -LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0 -LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0 -LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0 -LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0 -LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0 -LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0 -LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0 -LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0 -LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0 -LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0 -LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0 -LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0 -LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0 -LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0 -LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0 -LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0 -LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0 -LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0 -LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0 -LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0 -LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0 -/*1040 insts before this. */ -LSYM(ret_t0) MILLIRET -LSYM(e_t0) r__r_t0 -LSYM(e_shift) a1_ne_0_b_l2 - a0__256a0 /* a0 <<= 8 *********** */ - MILLIRETN -LSYM(e_t0ma0) a1_ne_0_b_l0 - t0__t0ma0 - MILLIRET - r__r_t0 -LSYM(e_t0a0) a1_ne_0_b_l0 - t0__t0_a0 - MILLIRET - r__r_t0 -LSYM(e_t02a0) a1_ne_0_b_l0 - t0__t0_2a0 - MILLIRET - r__r_t0 -LSYM(e_t04a0) a1_ne_0_b_l0 - t0__t0_4a0 - MILLIRET - r__r_t0 -LSYM(e_2t0) a1_ne_0_b_l1 - r__r_2t0 - MILLIRETN -LSYM(e_2t0a0) a1_ne_0_b_l0 - t0__2t0_a0 - MILLIRET - r__r_t0 -LSYM(e2t04a0) t0__t0_2a0 - a1_ne_0_b_l1 - r__r_2t0 - MILLIRETN -LSYM(e_3t0) a1_ne_0_b_l0 - t0__3t0 - MILLIRET - r__r_t0 -LSYM(e_4t0) a1_ne_0_b_l1 - r__r_4t0 - MILLIRETN -LSYM(e_4t0a0) a1_ne_0_b_l0 - t0__4t0_a0 - MILLIRET - r__r_t0 -LSYM(e4t08a0) t0__t0_2a0 - a1_ne_0_b_l1 - r__r_4t0 - MILLIRETN -LSYM(e_5t0) a1_ne_0_b_l0 - t0__5t0 - MILLIRET - r__r_t0 -LSYM(e_8t0) a1_ne_0_b_l1 - r__r_8t0 - MILLIRETN -LSYM(e_8t0a0) a1_ne_0_b_l0 - t0__8t0_a0 - MILLIRET - r__r_t0 - - .procend - .end -#endif diff --git a/gcc/config/pa/t-linux b/gcc/config/pa/t-linux index df351e11458..b94ebd250a8 100644 --- a/gcc/config/pa/t-linux +++ b/gcc/config/pa/t-linux @@ -16,13 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -#Plug millicode routines into libgcc.a We want these on both native and -#cross compiles. We use the "64-bit" routines because the "32-bit" code -#is broken for certain corner cases. - -LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall -LIB1ASMSRC = pa/milli64.S - # Compile libgcc2.a as PIC. TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1 diff --git a/gcc/config/pa/t-linux64 b/gcc/config/pa/t-linux64 index d40546cabcc..af803a27ed3 100644 --- a/gcc/config/pa/t-linux64 +++ b/gcc/config/pa/t-linux64 @@ -16,12 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -#Plug millicode routines into libgcc.a We want these on both native and -#cross compiles. - -LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI -LIB1ASMSRC = pa/milli64.S - LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/pa/linux-atomic.c # Compile libgcc2.a as PIC. diff --git a/gcc/config/picochip/libgccExtras/fake_libgcc.asm b/gcc/config/picochip/libgccExtras/fake_libgcc.asm deleted file mode 100644 index e4b78f1e1f1..00000000000 --- a/gcc/config/picochip/libgccExtras/fake_libgcc.asm +++ /dev/null @@ -1,6 +0,0 @@ -// picoChip ASM file -// Fake libgcc asm file. This contains nothing, but is used to prevent gcc -// getting upset about the lack of a libgcc.S file when LIB1ASMFUNCS is defined -// to switch off the compilation of parts of libgcc. - - diff --git a/gcc/config/picochip/t-picochip b/gcc/config/picochip/t-picochip index 222d7a646b9..0f3fe8c3d81 100644 --- a/gcc/config/picochip/t-picochip +++ b/gcc/config/picochip/t-picochip @@ -35,14 +35,6 @@ LIB2FUNCS_EXTRA = \ $(srcdir)/config/picochip/libgccExtras/parityhi2.asm \ $(srcdir)/config/picochip/libgccExtras/popcounthi2.asm -# Prevent some of the more complicated libgcc functions from being -# compiled. This is because they are generally too big to fit into an -# AE anyway, so there is no point in having them. Also, some don't -# compile properly so we'll ignore them for the moment. - -LIB1ASMFUNCS = _mulsc3 _divsc3 -LIB1ASMSRC = picochip/libgccExtras/fake_libgcc.asm - # Turn off ranlib on target libraries. RANLIB_FOR_TARGET = cat diff --git a/gcc/config/sh/lib1funcs.asm b/gcc/config/sh/lib1funcs.asm deleted file mode 100644 index 2f0ca16cd91..00000000000 --- a/gcc/config/sh/lib1funcs.asm +++ /dev/null @@ -1,3933 +0,0 @@ -/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - - -!! libgcc routines for the Renesas / SuperH SH CPUs. -!! Contributed by Steve Chamberlain. -!! sac@cygnus.com - -!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines -!! recoded in assembly by Toshiyasu Morita -!! tm@netcom.com - -#if defined(__ELF__) && defined(__linux__) -.section .note.GNU-stack,"",%progbits -.previous -#endif - -/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and - ELF local label prefixes by J"orn Rennecke - amylaar@cygnus.com */ - -#include "lib1funcs.h" - -/* t-vxworks needs to build both PIC and non-PIC versions of libgcc, - so it is more convenient to define NO_FPSCR_VALUES here than to - define it on the command line. */ -#if defined __vxworks && defined __PIC__ -#define NO_FPSCR_VALUES -#endif - -#if ! __SH5__ -#ifdef L_ashiftrt - .global GLOBAL(ashiftrt_r4_0) - .global GLOBAL(ashiftrt_r4_1) - .global GLOBAL(ashiftrt_r4_2) - .global GLOBAL(ashiftrt_r4_3) - .global GLOBAL(ashiftrt_r4_4) - .global GLOBAL(ashiftrt_r4_5) - .global GLOBAL(ashiftrt_r4_6) - .global GLOBAL(ashiftrt_r4_7) - .global GLOBAL(ashiftrt_r4_8) - .global GLOBAL(ashiftrt_r4_9) - .global GLOBAL(ashiftrt_r4_10) - .global GLOBAL(ashiftrt_r4_11) - .global GLOBAL(ashiftrt_r4_12) - .global GLOBAL(ashiftrt_r4_13) - .global GLOBAL(ashiftrt_r4_14) - .global GLOBAL(ashiftrt_r4_15) - .global GLOBAL(ashiftrt_r4_16) - .global GLOBAL(ashiftrt_r4_17) - .global GLOBAL(ashiftrt_r4_18) - .global GLOBAL(ashiftrt_r4_19) - .global GLOBAL(ashiftrt_r4_20) - .global GLOBAL(ashiftrt_r4_21) - .global GLOBAL(ashiftrt_r4_22) - .global GLOBAL(ashiftrt_r4_23) - .global GLOBAL(ashiftrt_r4_24) - .global GLOBAL(ashiftrt_r4_25) - .global GLOBAL(ashiftrt_r4_26) - .global GLOBAL(ashiftrt_r4_27) - .global GLOBAL(ashiftrt_r4_28) - .global GLOBAL(ashiftrt_r4_29) - .global GLOBAL(ashiftrt_r4_30) - .global GLOBAL(ashiftrt_r4_31) - .global GLOBAL(ashiftrt_r4_32) - - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) - HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) - - .align 1 -GLOBAL(ashiftrt_r4_32): -GLOBAL(ashiftrt_r4_31): - rotcl r4 - rts - subc r4,r4 - -GLOBAL(ashiftrt_r4_30): - shar r4 -GLOBAL(ashiftrt_r4_29): - shar r4 -GLOBAL(ashiftrt_r4_28): - shar r4 -GLOBAL(ashiftrt_r4_27): - shar r4 -GLOBAL(ashiftrt_r4_26): - shar r4 -GLOBAL(ashiftrt_r4_25): - shar r4 -GLOBAL(ashiftrt_r4_24): - shlr16 r4 - shlr8 r4 - rts - exts.b r4,r4 - -GLOBAL(ashiftrt_r4_23): - shar r4 -GLOBAL(ashiftrt_r4_22): - shar r4 -GLOBAL(ashiftrt_r4_21): - shar r4 -GLOBAL(ashiftrt_r4_20): - shar r4 -GLOBAL(ashiftrt_r4_19): - shar r4 -GLOBAL(ashiftrt_r4_18): - shar r4 -GLOBAL(ashiftrt_r4_17): - shar r4 -GLOBAL(ashiftrt_r4_16): - shlr16 r4 - rts - exts.w r4,r4 - -GLOBAL(ashiftrt_r4_15): - shar r4 -GLOBAL(ashiftrt_r4_14): - shar r4 -GLOBAL(ashiftrt_r4_13): - shar r4 -GLOBAL(ashiftrt_r4_12): - shar r4 -GLOBAL(ashiftrt_r4_11): - shar r4 -GLOBAL(ashiftrt_r4_10): - shar r4 -GLOBAL(ashiftrt_r4_9): - shar r4 -GLOBAL(ashiftrt_r4_8): - shar r4 -GLOBAL(ashiftrt_r4_7): - shar r4 -GLOBAL(ashiftrt_r4_6): - shar r4 -GLOBAL(ashiftrt_r4_5): - shar r4 -GLOBAL(ashiftrt_r4_4): - shar r4 -GLOBAL(ashiftrt_r4_3): - shar r4 -GLOBAL(ashiftrt_r4_2): - shar r4 -GLOBAL(ashiftrt_r4_1): - rts - shar r4 - -GLOBAL(ashiftrt_r4_0): - rts - nop - - ENDFUNC(GLOBAL(ashiftrt_r4_0)) - ENDFUNC(GLOBAL(ashiftrt_r4_1)) - ENDFUNC(GLOBAL(ashiftrt_r4_2)) - ENDFUNC(GLOBAL(ashiftrt_r4_3)) - ENDFUNC(GLOBAL(ashiftrt_r4_4)) - ENDFUNC(GLOBAL(ashiftrt_r4_5)) - ENDFUNC(GLOBAL(ashiftrt_r4_6)) - ENDFUNC(GLOBAL(ashiftrt_r4_7)) - ENDFUNC(GLOBAL(ashiftrt_r4_8)) - ENDFUNC(GLOBAL(ashiftrt_r4_9)) - ENDFUNC(GLOBAL(ashiftrt_r4_10)) - ENDFUNC(GLOBAL(ashiftrt_r4_11)) - ENDFUNC(GLOBAL(ashiftrt_r4_12)) - ENDFUNC(GLOBAL(ashiftrt_r4_13)) - ENDFUNC(GLOBAL(ashiftrt_r4_14)) - ENDFUNC(GLOBAL(ashiftrt_r4_15)) - ENDFUNC(GLOBAL(ashiftrt_r4_16)) - ENDFUNC(GLOBAL(ashiftrt_r4_17)) - ENDFUNC(GLOBAL(ashiftrt_r4_18)) - ENDFUNC(GLOBAL(ashiftrt_r4_19)) - ENDFUNC(GLOBAL(ashiftrt_r4_20)) - ENDFUNC(GLOBAL(ashiftrt_r4_21)) - ENDFUNC(GLOBAL(ashiftrt_r4_22)) - ENDFUNC(GLOBAL(ashiftrt_r4_23)) - ENDFUNC(GLOBAL(ashiftrt_r4_24)) - ENDFUNC(GLOBAL(ashiftrt_r4_25)) - ENDFUNC(GLOBAL(ashiftrt_r4_26)) - ENDFUNC(GLOBAL(ashiftrt_r4_27)) - ENDFUNC(GLOBAL(ashiftrt_r4_28)) - ENDFUNC(GLOBAL(ashiftrt_r4_29)) - ENDFUNC(GLOBAL(ashiftrt_r4_30)) - ENDFUNC(GLOBAL(ashiftrt_r4_31)) - ENDFUNC(GLOBAL(ashiftrt_r4_32)) -#endif - -#ifdef L_ashiftrt_n - -! -! GLOBAL(ashrsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - - .global GLOBAL(ashrsi3) - HIDDEN_FUNC(GLOBAL(ashrsi3)) - .align 2 -GLOBAL(ashrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashrsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(ashrsi3_table): - .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) - .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) - -LOCAL(ashrsi3_31): - rotcl r0 - rts - subc r0,r0 - -LOCAL(ashrsi3_30): - shar r0 -LOCAL(ashrsi3_29): - shar r0 -LOCAL(ashrsi3_28): - shar r0 -LOCAL(ashrsi3_27): - shar r0 -LOCAL(ashrsi3_26): - shar r0 -LOCAL(ashrsi3_25): - shar r0 -LOCAL(ashrsi3_24): - shlr16 r0 - shlr8 r0 - rts - exts.b r0,r0 - -LOCAL(ashrsi3_23): - shar r0 -LOCAL(ashrsi3_22): - shar r0 -LOCAL(ashrsi3_21): - shar r0 -LOCAL(ashrsi3_20): - shar r0 -LOCAL(ashrsi3_19): - shar r0 -LOCAL(ashrsi3_18): - shar r0 -LOCAL(ashrsi3_17): - shar r0 -LOCAL(ashrsi3_16): - shlr16 r0 - rts - exts.w r0,r0 - -LOCAL(ashrsi3_15): - shar r0 -LOCAL(ashrsi3_14): - shar r0 -LOCAL(ashrsi3_13): - shar r0 -LOCAL(ashrsi3_12): - shar r0 -LOCAL(ashrsi3_11): - shar r0 -LOCAL(ashrsi3_10): - shar r0 -LOCAL(ashrsi3_9): - shar r0 -LOCAL(ashrsi3_8): - shar r0 -LOCAL(ashrsi3_7): - shar r0 -LOCAL(ashrsi3_6): - shar r0 -LOCAL(ashrsi3_5): - shar r0 -LOCAL(ashrsi3_4): - shar r0 -LOCAL(ashrsi3_3): - shar r0 -LOCAL(ashrsi3_2): - shar r0 -LOCAL(ashrsi3_1): - rts - shar r0 - -LOCAL(ashrsi3_0): - rts - nop - - ENDFUNC(GLOBAL(ashrsi3)) -#endif - -#ifdef L_ashiftlt - -! -! GLOBAL(ashlsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - .global GLOBAL(ashlsi3) - HIDDEN_FUNC(GLOBAL(ashlsi3)) - .align 2 -GLOBAL(ashlsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(ashlsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(ashlsi3_table): - .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) - .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) - -LOCAL(ashlsi3_6): - shll2 r0 -LOCAL(ashlsi3_4): - shll2 r0 -LOCAL(ashlsi3_2): - rts - shll2 r0 - -LOCAL(ashlsi3_7): - shll2 r0 -LOCAL(ashlsi3_5): - shll2 r0 -LOCAL(ashlsi3_3): - shll2 r0 -LOCAL(ashlsi3_1): - rts - shll r0 - -LOCAL(ashlsi3_14): - shll2 r0 -LOCAL(ashlsi3_12): - shll2 r0 -LOCAL(ashlsi3_10): - shll2 r0 -LOCAL(ashlsi3_8): - rts - shll8 r0 - -LOCAL(ashlsi3_15): - shll2 r0 -LOCAL(ashlsi3_13): - shll2 r0 -LOCAL(ashlsi3_11): - shll2 r0 -LOCAL(ashlsi3_9): - shll8 r0 - rts - shll r0 - -LOCAL(ashlsi3_22): - shll2 r0 -LOCAL(ashlsi3_20): - shll2 r0 -LOCAL(ashlsi3_18): - shll2 r0 -LOCAL(ashlsi3_16): - rts - shll16 r0 - -LOCAL(ashlsi3_23): - shll2 r0 -LOCAL(ashlsi3_21): - shll2 r0 -LOCAL(ashlsi3_19): - shll2 r0 -LOCAL(ashlsi3_17): - shll16 r0 - rts - shll r0 - -LOCAL(ashlsi3_30): - shll2 r0 -LOCAL(ashlsi3_28): - shll2 r0 -LOCAL(ashlsi3_26): - shll2 r0 -LOCAL(ashlsi3_24): - shll16 r0 - rts - shll8 r0 - -LOCAL(ashlsi3_31): - shll2 r0 -LOCAL(ashlsi3_29): - shll2 r0 -LOCAL(ashlsi3_27): - shll2 r0 -LOCAL(ashlsi3_25): - shll16 r0 - shll8 r0 - rts - shll r0 - -LOCAL(ashlsi3_0): - rts - nop - - ENDFUNC(GLOBAL(ashlsi3)) -#endif - -#ifdef L_lshiftrt - -! -! GLOBAL(lshrsi3) -! -! Entry: -! -! r4: Value to shift -! r5: Shifts -! -! Exit: -! -! r0: Result -! -! Destroys: -! -! (none) -! - .global GLOBAL(lshrsi3) - HIDDEN_FUNC(GLOBAL(lshrsi3)) - .align 2 -GLOBAL(lshrsi3): - mov #31,r0 - and r0,r5 - mova LOCAL(lshrsi3_table),r0 - mov.b @(r0,r5),r5 -#ifdef __sh1__ - add r5,r0 - jmp @r0 -#else - braf r5 -#endif - mov r4,r0 - - .align 2 -LOCAL(lshrsi3_table): - .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) - .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) - -LOCAL(lshrsi3_6): - shlr2 r0 -LOCAL(lshrsi3_4): - shlr2 r0 -LOCAL(lshrsi3_2): - rts - shlr2 r0 - -LOCAL(lshrsi3_7): - shlr2 r0 -LOCAL(lshrsi3_5): - shlr2 r0 -LOCAL(lshrsi3_3): - shlr2 r0 -LOCAL(lshrsi3_1): - rts - shlr r0 - -LOCAL(lshrsi3_14): - shlr2 r0 -LOCAL(lshrsi3_12): - shlr2 r0 -LOCAL(lshrsi3_10): - shlr2 r0 -LOCAL(lshrsi3_8): - rts - shlr8 r0 - -LOCAL(lshrsi3_15): - shlr2 r0 -LOCAL(lshrsi3_13): - shlr2 r0 -LOCAL(lshrsi3_11): - shlr2 r0 -LOCAL(lshrsi3_9): - shlr8 r0 - rts - shlr r0 - -LOCAL(lshrsi3_22): - shlr2 r0 -LOCAL(lshrsi3_20): - shlr2 r0 -LOCAL(lshrsi3_18): - shlr2 r0 -LOCAL(lshrsi3_16): - rts - shlr16 r0 - -LOCAL(lshrsi3_23): - shlr2 r0 -LOCAL(lshrsi3_21): - shlr2 r0 -LOCAL(lshrsi3_19): - shlr2 r0 -LOCAL(lshrsi3_17): - shlr16 r0 - rts - shlr r0 - -LOCAL(lshrsi3_30): - shlr2 r0 -LOCAL(lshrsi3_28): - shlr2 r0 -LOCAL(lshrsi3_26): - shlr2 r0 -LOCAL(lshrsi3_24): - shlr16 r0 - rts - shlr8 r0 - -LOCAL(lshrsi3_31): - shlr2 r0 -LOCAL(lshrsi3_29): - shlr2 r0 -LOCAL(lshrsi3_27): - shlr2 r0 -LOCAL(lshrsi3_25): - shlr16 r0 - shlr8 r0 - rts - shlr r0 - -LOCAL(lshrsi3_0): - rts - nop - - ENDFUNC(GLOBAL(lshrsi3)) -#endif - -#ifdef L_movmem - .text - .balign 4 - .global GLOBAL(movmem) - HIDDEN_FUNC(GLOBAL(movmem)) - HIDDEN_ALIAS(movstr,movmem) - /* This would be a lot simpler if r6 contained the byte count - minus 64, and we wouldn't be called here for a byte count of 64. */ -GLOBAL(movmem): - sts.l pr,@-r15 - shll2 r6 - bsr GLOBAL(movmemSI52+2) - mov.l @(48,r5),r0 - .balign 4 -LOCAL(movmem_loop): /* Reached with rts */ - mov.l @(60,r5),r0 - add #-64,r6 - mov.l r0,@(60,r4) - tst r6,r6 - mov.l @(56,r5),r0 - bt LOCAL(movmem_done) - mov.l r0,@(56,r4) - cmp/pl r6 - mov.l @(52,r5),r0 - add #64,r5 - mov.l r0,@(52,r4) - add #64,r4 - bt GLOBAL(movmemSI52) -! done all the large groups, do the remainder -! jump to movmem+ - mova GLOBAL(movmemSI4)+4,r0 - add r6,r0 - jmp @r0 -LOCAL(movmem_done): ! share slot insn, works out aligned. - lds.l @r15+,pr - mov.l r0,@(56,r4) - mov.l @(52,r5),r0 - rts - mov.l r0,@(52,r4) - .balign 4 -! ??? We need aliases movstr* for movmem* for the older libraries. These -! aliases will be removed at the some point in the future. - .global GLOBAL(movmemSI64) - HIDDEN_FUNC(GLOBAL(movmemSI64)) - HIDDEN_ALIAS(movstrSI64,movmemSI64) -GLOBAL(movmemSI64): - mov.l @(60,r5),r0 - mov.l r0,@(60,r4) - .global GLOBAL(movmemSI60) - HIDDEN_FUNC(GLOBAL(movmemSI60)) - HIDDEN_ALIAS(movstrSI60,movmemSI60) -GLOBAL(movmemSI60): - mov.l @(56,r5),r0 - mov.l r0,@(56,r4) - .global GLOBAL(movmemSI56) - HIDDEN_FUNC(GLOBAL(movmemSI56)) - HIDDEN_ALIAS(movstrSI56,movmemSI56) -GLOBAL(movmemSI56): - mov.l @(52,r5),r0 - mov.l r0,@(52,r4) - .global GLOBAL(movmemSI52) - HIDDEN_FUNC(GLOBAL(movmemSI52)) - HIDDEN_ALIAS(movstrSI52,movmemSI52) -GLOBAL(movmemSI52): - mov.l @(48,r5),r0 - mov.l r0,@(48,r4) - .global GLOBAL(movmemSI48) - HIDDEN_FUNC(GLOBAL(movmemSI48)) - HIDDEN_ALIAS(movstrSI48,movmemSI48) -GLOBAL(movmemSI48): - mov.l @(44,r5),r0 - mov.l r0,@(44,r4) - .global GLOBAL(movmemSI44) - HIDDEN_FUNC(GLOBAL(movmemSI44)) - HIDDEN_ALIAS(movstrSI44,movmemSI44) -GLOBAL(movmemSI44): - mov.l @(40,r5),r0 - mov.l r0,@(40,r4) - .global GLOBAL(movmemSI40) - HIDDEN_FUNC(GLOBAL(movmemSI40)) - HIDDEN_ALIAS(movstrSI40,movmemSI40) -GLOBAL(movmemSI40): - mov.l @(36,r5),r0 - mov.l r0,@(36,r4) - .global GLOBAL(movmemSI36) - HIDDEN_FUNC(GLOBAL(movmemSI36)) - HIDDEN_ALIAS(movstrSI36,movmemSI36) -GLOBAL(movmemSI36): - mov.l @(32,r5),r0 - mov.l r0,@(32,r4) - .global GLOBAL(movmemSI32) - HIDDEN_FUNC(GLOBAL(movmemSI32)) - HIDDEN_ALIAS(movstrSI32,movmemSI32) -GLOBAL(movmemSI32): - mov.l @(28,r5),r0 - mov.l r0,@(28,r4) - .global GLOBAL(movmemSI28) - HIDDEN_FUNC(GLOBAL(movmemSI28)) - HIDDEN_ALIAS(movstrSI28,movmemSI28) -GLOBAL(movmemSI28): - mov.l @(24,r5),r0 - mov.l r0,@(24,r4) - .global GLOBAL(movmemSI24) - HIDDEN_FUNC(GLOBAL(movmemSI24)) - HIDDEN_ALIAS(movstrSI24,movmemSI24) -GLOBAL(movmemSI24): - mov.l @(20,r5),r0 - mov.l r0,@(20,r4) - .global GLOBAL(movmemSI20) - HIDDEN_FUNC(GLOBAL(movmemSI20)) - HIDDEN_ALIAS(movstrSI20,movmemSI20) -GLOBAL(movmemSI20): - mov.l @(16,r5),r0 - mov.l r0,@(16,r4) - .global GLOBAL(movmemSI16) - HIDDEN_FUNC(GLOBAL(movmemSI16)) - HIDDEN_ALIAS(movstrSI16,movmemSI16) -GLOBAL(movmemSI16): - mov.l @(12,r5),r0 - mov.l r0,@(12,r4) - .global GLOBAL(movmemSI12) - HIDDEN_FUNC(GLOBAL(movmemSI12)) - HIDDEN_ALIAS(movstrSI12,movmemSI12) -GLOBAL(movmemSI12): - mov.l @(8,r5),r0 - mov.l r0,@(8,r4) - .global GLOBAL(movmemSI8) - HIDDEN_FUNC(GLOBAL(movmemSI8)) - HIDDEN_ALIAS(movstrSI8,movmemSI8) -GLOBAL(movmemSI8): - mov.l @(4,r5),r0 - mov.l r0,@(4,r4) - .global GLOBAL(movmemSI4) - HIDDEN_FUNC(GLOBAL(movmemSI4)) - HIDDEN_ALIAS(movstrSI4,movmemSI4) -GLOBAL(movmemSI4): - mov.l @(0,r5),r0 - rts - mov.l r0,@(0,r4) - - ENDFUNC(GLOBAL(movmemSI64)) - ENDFUNC(GLOBAL(movmemSI60)) - ENDFUNC(GLOBAL(movmemSI56)) - ENDFUNC(GLOBAL(movmemSI52)) - ENDFUNC(GLOBAL(movmemSI48)) - ENDFUNC(GLOBAL(movmemSI44)) - ENDFUNC(GLOBAL(movmemSI40)) - ENDFUNC(GLOBAL(movmemSI36)) - ENDFUNC(GLOBAL(movmemSI32)) - ENDFUNC(GLOBAL(movmemSI28)) - ENDFUNC(GLOBAL(movmemSI24)) - ENDFUNC(GLOBAL(movmemSI20)) - ENDFUNC(GLOBAL(movmemSI16)) - ENDFUNC(GLOBAL(movmemSI12)) - ENDFUNC(GLOBAL(movmemSI8)) - ENDFUNC(GLOBAL(movmemSI4)) - ENDFUNC(GLOBAL(movmem)) -#endif - -#ifdef L_movmem_i4 - .text - .global GLOBAL(movmem_i4_even) - .global GLOBAL(movmem_i4_odd) - .global GLOBAL(movmemSI12_i4) - - HIDDEN_FUNC(GLOBAL(movmem_i4_even)) - HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) - HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) - - HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) - HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) - HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) - - .p2align 5 -L_movmem_2mod4_end: - mov.l r0,@(16,r4) - rts - mov.l r1,@(20,r4) - - .p2align 2 - -GLOBAL(movmem_i4_even): - mov.l @r5+,r0 - bra L_movmem_start_even - mov.l @r5+,r1 - -GLOBAL(movmem_i4_odd): - mov.l @r5+,r1 - add #-4,r4 - mov.l @r5+,r2 - mov.l @r5+,r3 - mov.l r1,@(4,r4) - mov.l r2,@(8,r4) - -L_movmem_loop: - mov.l r3,@(12,r4) - dt r6 - mov.l @r5+,r0 - bt/s L_movmem_2mod4_end - mov.l @r5+,r1 - add #16,r4 -L_movmem_start_even: - mov.l @r5+,r2 - mov.l @r5+,r3 - mov.l r0,@r4 - dt r6 - mov.l r1,@(4,r4) - bf/s L_movmem_loop - mov.l r2,@(8,r4) - rts - mov.l r3,@(12,r4) - - ENDFUNC(GLOBAL(movmem_i4_even)) - ENDFUNC(GLOBAL(movmem_i4_odd)) - - .p2align 4 -GLOBAL(movmemSI12_i4): - mov.l @r5,r0 - mov.l @(4,r5),r1 - mov.l @(8,r5),r2 - mov.l r0,@r4 - mov.l r1,@(4,r4) - rts - mov.l r2,@(8,r4) - - ENDFUNC(GLOBAL(movmemSI12_i4)) -#endif - -#ifdef L_mulsi3 - - - .global GLOBAL(mulsi3) - HIDDEN_FUNC(GLOBAL(mulsi3)) - -! r4 = aabb -! r5 = ccdd -! r0 = aabb*ccdd via partial products -! -! if aa == 0 and cc = 0 -! r0 = bb*dd -! -! else -! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) -! - -GLOBAL(mulsi3): - mulu.w r4,r5 ! multiply the lsws macl=bb*dd - mov r5,r3 ! r3 = ccdd - swap.w r4,r2 ! r2 = bbaa - xtrct r2,r3 ! r3 = aacc - tst r3,r3 ! msws zero ? - bf hiset - rts ! yes - then we have the answer - sts macl,r0 - -hiset: sts macl,r0 ! r0 = bb*dd - mulu.w r2,r5 ! brewing macl = aa*dd - sts macl,r1 - mulu.w r3,r4 ! brewing macl = cc*bb - sts macl,r2 - add r1,r2 - shll16 r2 - rts - add r2,r0 - - ENDFUNC(GLOBAL(mulsi3)) -#endif -#endif /* ! __SH5__ */ -#ifdef L_sdivsi3_i4 - .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH -#ifdef __SH4__ -!! args in r4 and r5, result in fpul, clobber dr0, dr2 - - .global GLOBAL(sdivsi3_i4) - HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) -GLOBAL(sdivsi3_i4): - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - rts - ftrc dr0,fpul - - ENDFUNC(GLOBAL(sdivsi3_i4)) -#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) -!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 - -#if ! __SH5__ || __SH5__ == 32 -#if __SH5__ - .mode SHcompact -#endif - .global GLOBAL(sdivsi3_i4) - HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) -GLOBAL(sdivsi3_i4): - sts.l fpscr,@-r15 - mov #8,r2 - swap.w r2,r2 - lds r2,fpscr - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul - rts - lds.l @r15+,fpscr - - ENDFUNC(GLOBAL(sdivsi3_i4)) -#endif /* ! __SH5__ || __SH5__ == 32 */ -#endif /* ! __SH4__ */ -#endif - -#ifdef L_sdivsi3 -/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with - sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) -!! -!! Steve Chamberlain -!! sac@cygnus.com -!! -!! - -!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit - - .global GLOBAL(sdivsi3) -#if __SHMEDIA__ -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 2 -#if 0 -/* The assembly code that follows is a hand-optimized version of the C - code that follows. Note that the registers that are modified are - exactly those listed as clobbered in the patterns divsi3_i1 and - divsi3_i1_media. - -int __sdivsi3 (i, j) - int i, j; -{ - register unsigned long long r18 asm ("r18"); - register unsigned long long r19 asm ("r19"); - register unsigned long long r0 asm ("r0") = 0; - register unsigned long long r1 asm ("r1") = 1; - register int r2 asm ("r2") = i >> 31; - register int r3 asm ("r3") = j >> 31; - - r2 = r2 ? r2 : r1; - r3 = r3 ? r3 : r1; - r18 = i * r2; - r19 = j * r3; - r2 *= r3; - - r19 <<= 31; - r1 <<= 31; - do - if (r18 >= r19) - r0 |= r1, r18 -= r19; - while (r19 >>= 1, r1 >>= 1); - - return r2 * (int)r0; -} -*/ -GLOBAL(sdivsi3): - pt/l LOCAL(sdivsi3_dontadd), tr2 - pt/l LOCAL(sdivsi3_loop), tr1 - ptabs/l r18, tr0 - movi 0, r0 - movi 1, r1 - shari.l r4, 31, r2 - shari.l r5, 31, r3 - cmveq r2, r1, r2 - cmveq r3, r1, r3 - muls.l r4, r2, r18 - muls.l r5, r3, r19 - muls.l r2, r3, r2 - shlli r19, 31, r19 - shlli r1, 31, r1 -LOCAL(sdivsi3_loop): - bgtu r19, r18, tr2 - or r0, r1, r0 - sub r18, r19, r18 -LOCAL(sdivsi3_dontadd): - shlri r1, 1, r1 - shlri r19, 1, r19 - bnei r1, 0, tr1 - muls.l r0, r2, r0 - add.l r0, r63, r0 - blink tr0, r63 -#elif 0 /* ! 0 */ - // inputs: r4,r5 - // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 - // result in r0 -GLOBAL(sdivsi3): - // can create absolute value without extra latency, - // but dependent on proper sign extension of inputs: - // shari.l r5,31,r2 - // xor r5,r2,r20 - // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. - shari.l r5,31,r2 - ori r2,1,r2 - muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. - movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 - shari.l r4,31,r3 - nsb r20,r0 - shlld r20,r0,r25 - shlri r25,48,r25 - sub r19,r25,r1 - mmulfx.w r1,r1,r2 - mshflo.w r1,r63,r1 - // If r4 was to be used in-place instead of r21, could use this sequence - // to compute absolute: - // sub r63,r4,r19 // compute absolute value of r4 - // shlri r4,32,r3 // into lower 32 bit of r4, keeping - // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. - ori r3,1,r3 - mmulfx.w r25,r2,r2 - sub r19,r0,r0 - muls.l r4,r3,r21 - msub.w r1,r2,r2 - addi r2,-2,r1 - mulu.l r21,r1,r19 - mmulfx.w r2,r2,r2 - shlli r1,15,r1 - shlrd r19,r0,r19 - mulu.l r19,r20,r3 - mmacnfx.wl r25,r2,r1 - ptabs r18,tr0 - sub r21,r3,r25 - - mulu.l r25,r1,r2 - addi r0,14,r0 - xor r4,r5,r18 - shlrd r2,r0,r2 - mulu.l r2,r20,r3 - add r19,r2,r19 - shari.l r18,31,r18 - sub r25,r3,r25 - - mulu.l r25,r1,r2 - sub r25,r20,r25 - add r19,r18,r19 - shlrd r2,r0,r2 - mulu.l r2,r20,r3 - addi r25,1,r25 - add r19,r2,r19 - - cmpgt r25,r3,r25 - add.l r19,r25,r0 - xor r0,r18,r0 - blink tr0,r63 -#else /* ! 0 && ! 0 */ - - // inputs: r4,r5 - // clobbered: r1,r18,r19,r20,r21,r25,tr0 - // result in r0 - HIDDEN_FUNC(GLOBAL(sdivsi3_2)) -#ifndef __pic__ - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): /* this is the shcompact entry point */ - // The special SHmedia entry point sdivsi3_1 prevents accidental linking - // with the SHcompact implementation, which clobbers tr1 / tr2. - .global GLOBAL(sdivsi3_1) -GLOBAL(sdivsi3_1): - .global GLOBAL(div_table_internal) - movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 - shori GLOBAL(div_table_internal) & 65535, r20 -#endif - .global GLOBAL(sdivsi3_2) - // div_table in r20 - // clobbered: r1,r18,r19,r21,r25,tr0 -GLOBAL(sdivsi3_2): - nsb r5, r1 - shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 - shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) - ldx.ub r20, r21, r19 // u0.8 - shari r25, 32, r25 // normalize to s2.30 - shlli r21, 1, r21 - muls.l r25, r19, r19 // s2.38 - ldx.w r20, r21, r21 // s2.14 - ptabs r18, tr0 - shari r19, 24, r19 // truncate to s2.14 - sub r21, r19, r19 // some 11 bit inverse in s1.14 - muls.l r19, r19, r21 // u0.28 - sub r63, r1, r1 - addi r1, 92, r1 - muls.l r25, r21, r18 // s2.58 - shlli r19, 45, r19 // multiply by two and convert to s2.58 - /* bubble */ - sub r19, r18, r18 - shari r18, 28, r18 // some 22 bit inverse in s1.30 - muls.l r18, r25, r0 // s2.60 - muls.l r18, r4, r25 // s32.30 - /* bubble */ - shari r0, 16, r19 // s-16.44 - muls.l r19, r18, r19 // s-16.74 - shari r25, 63, r0 - shari r4, 14, r18 // s19.-14 - shari r19, 30, r19 // s-16.44 - muls.l r19, r18, r19 // s15.30 - xor r21, r0, r21 // You could also use the constant 1 << 27. - add r21, r25, r21 - sub r21, r19, r21 - shard r21, r1, r21 - sub r21, r0, r0 - blink tr0, r63 -#ifndef __pic__ - ENDFUNC(GLOBAL(sdivsi3)) -#endif - ENDFUNC(GLOBAL(sdivsi3_2)) -#endif -#elif defined __SHMEDIA__ -/* m5compact-nofpu */ - // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): - pt/l LOCAL(sdivsi3_dontsub), tr0 - pt/l LOCAL(sdivsi3_loop), tr1 - ptabs/l r18,tr2 - shari.l r4,31,r18 - shari.l r5,31,r19 - xor r4,r18,r20 - xor r5,r19,r21 - sub.l r20,r18,r20 - sub.l r21,r19,r21 - xor r18,r19,r19 - shlli r21,32,r25 - addi r25,-1,r21 - addz.l r20,r63,r20 -LOCAL(sdivsi3_loop): - shlli r20,1,r20 - bgeu/u r21,r20,tr0 - sub r20,r21,r20 -LOCAL(sdivsi3_dontsub): - addi.l r25,-1,r25 - bnei r25,-32,tr1 - xor r20,r19,r20 - sub.l r20,r19,r0 - blink tr2,r63 - ENDFUNC(GLOBAL(sdivsi3)) -#else /* ! __SHMEDIA__ */ - FUNC(GLOBAL(sdivsi3)) -GLOBAL(sdivsi3): - mov r4,r1 - mov r5,r0 - - tst r0,r0 - bt div0 - mov #0,r2 - div0s r2,r1 - subc r3,r3 - subc r2,r1 - div0s r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - div1 r0,r3 - rotcl r1 - addc r2,r1 - rts - mov r1,r0 - - -div0: rts - mov #0,r0 - - ENDFUNC(GLOBAL(sdivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif /* ! __SH4__ */ -#endif -#ifdef L_udivsi3_i4 - - .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH -#ifdef __SH4__ -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, -!! and t bit - - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - rotr r1 - xor r1,r4 - lds r4,fpul - mova L1,r0 -#ifdef FMOVD_WORKS - fmov.d @r0+,dr4 -#else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 -#endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - rts - ftrc dr0,fpul - -trivial: - rts - lds r4,fpul - - .align 2 -#ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. -#endif -L1: - .double 2147483648 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) -#if ! __SH5__ || __SH5__ == 32 -!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 - .mode SHmedia - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - addz.l r4,r63,r20 - addz.l r5,r63,r21 - fmov.qd r20,dr0 - fmov.qd r21,dr32 - ptabs r18,tr0 - float.qd dr0,dr0 - float.qd dr32,dr32 - fdiv.d dr0,dr32,dr0 - ftrc.dq dr0,dr32 - fmov.s fr33,fr32 - blink tr0,r63 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#endif /* ! __SH5__ || __SH5__ == 32 */ -#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 - - .global GLOBAL(udivsi3_i4) - HIDDEN_FUNC(GLOBAL(udivsi3_i4)) -GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - sts.l fpscr,@-r15 - mova L1,r0 - lds.l @r0+,fpscr - rotr r1 - xor r1,r4 - lds r4,fpul -#ifdef FMOVD_WORKS - fmov.d @r0+,dr4 -#else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 -#endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul - rts - lds.l @r15+,fpscr - -#ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. -#endif -trivial: - rts - lds r4,fpul - - .align 2 -L1: -#ifndef FMOVD_WORKS - .long 0x80000 -#else - .long 0x180000 -#endif - .double 2147483648 - - ENDFUNC(GLOBAL(udivsi3_i4)) -#endif /* ! __SH4__ */ -#endif - -#ifdef L_udivsi3 -/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with - sh2e/sh3e code. */ -#if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) - -!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit - .global GLOBAL(udivsi3) - HIDDEN_FUNC(GLOBAL(udivsi3)) - -#if __SHMEDIA__ -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 2 -#if 0 -/* The assembly code that follows is a hand-optimized version of the C - code that follows. Note that the registers that are modified are - exactly those listed as clobbered in the patterns udivsi3_i1 and - udivsi3_i1_media. - -unsigned -__udivsi3 (i, j) - unsigned i, j; -{ - register unsigned long long r0 asm ("r0") = 0; - register unsigned long long r18 asm ("r18") = 1; - register unsigned long long r4 asm ("r4") = i; - register unsigned long long r19 asm ("r19") = j; - - r19 <<= 31; - r18 <<= 31; - do - if (r4 >= r19) - r0 |= r18, r4 -= r19; - while (r19 >>= 1, r18 >>= 1); - - return r0; -} -*/ -GLOBAL(udivsi3): - pt/l LOCAL(udivsi3_dontadd), tr2 - pt/l LOCAL(udivsi3_loop), tr1 - ptabs/l r18, tr0 - movi 0, r0 - movi 1, r18 - addz.l r5, r63, r19 - addz.l r4, r63, r4 - shlli r19, 31, r19 - shlli r18, 31, r18 -LOCAL(udivsi3_loop): - bgtu r19, r4, tr2 - or r0, r18, r0 - sub r4, r19, r4 -LOCAL(udivsi3_dontadd): - shlri r18, 1, r18 - shlri r19, 1, r19 - bnei r18, 0, tr1 - blink tr0, r63 -#else -GLOBAL(udivsi3): - // inputs: r4,r5 - // clobbered: r18,r19,r20,r21,r22,r25,tr0 - // result in r0. - addz.l r5,r63,r22 - nsb r22,r0 - shlld r22,r0,r25 - shlri r25,48,r25 - movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 - sub r20,r25,r21 - mmulfx.w r21,r21,r19 - mshflo.w r21,r63,r21 - ptabs r18,tr0 - mmulfx.w r25,r19,r19 - sub r20,r0,r0 - /* bubble */ - msub.w r21,r19,r19 - addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 - before the msub.w, but we need a different value for - r19 to keep errors under control. */ - mulu.l r4,r21,r18 - mmulfx.w r19,r19,r19 - shlli r21,15,r21 - shlrd r18,r0,r18 - mulu.l r18,r22,r20 - mmacnfx.wl r25,r19,r21 - /* bubble */ - sub r4,r20,r25 - - mulu.l r25,r21,r19 - addi r0,14,r0 - /* bubble */ - shlrd r19,r0,r19 - mulu.l r19,r22,r20 - add r18,r19,r18 - /* bubble */ - sub.l r25,r20,r25 - - mulu.l r25,r21,r19 - addz.l r25,r63,r25 - sub r25,r22,r25 - shlrd r19,r0,r19 - mulu.l r19,r22,r20 - addi r25,1,r25 - add r18,r19,r18 - - cmpgt r25,r20,r25 - add.l r18,r25,r0 - blink tr0,r63 -#endif -#elif defined (__SHMEDIA__) -/* m5compact-nofpu - more emphasis on code size than on speed, but don't - ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. - So use a short shmedia loop. */ - // clobbered: r20,r21,r25,tr0,tr1,tr2 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 -GLOBAL(udivsi3): - pt/l LOCAL(udivsi3_dontsub), tr0 - pt/l LOCAL(udivsi3_loop), tr1 - ptabs/l r18,tr2 - shlli r5,32,r25 - addi r25,-1,r21 - addz.l r4,r63,r20 -LOCAL(udivsi3_loop): - shlli r20,1,r20 - bgeu/u r21,r20,tr0 - sub r20,r21,r20 -LOCAL(udivsi3_dontsub): - addi.l r25,-1,r25 - bnei r25,-32,tr1 - add.l r20,r63,r0 - blink tr2,r63 -#else /* ! defined (__SHMEDIA__) */ -LOCAL(div8): - div1 r5,r4 -LOCAL(div7): - div1 r5,r4; div1 r5,r4; div1 r5,r4 - div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 - -LOCAL(divx4): - div1 r5,r4; rotcl r0 - div1 r5,r4; rotcl r0 - div1 r5,r4; rotcl r0 - rts; div1 r5,r4 - -GLOBAL(udivsi3): - sts.l pr,@-r15 - extu.w r5,r0 - cmp/eq r5,r0 -#ifdef __sh1__ - bf LOCAL(large_divisor) -#else - bf/s LOCAL(large_divisor) -#endif - div0u - swap.w r4,r0 - shlr16 r4 - bsr LOCAL(div8) - shll16 r5 - bsr LOCAL(div7) - div1 r5,r4 - xtrct r4,r0 - xtrct r0,r4 - bsr LOCAL(div8) - swap.w r4,r4 - bsr LOCAL(div7) - div1 r5,r4 - lds.l @r15+,pr - xtrct r4,r0 - swap.w r0,r0 - rotcl r0 - rts - shlr16 r5 - -LOCAL(large_divisor): -#ifdef __sh1__ - div0u -#endif - mov #0,r0 - xtrct r4,r0 - xtrct r0,r4 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - bsr LOCAL(divx4) - rotcl r0 - lds.l @r15+,pr - rts - rotcl r0 - - ENDFUNC(GLOBAL(udivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif /* __SH4__ */ -#endif /* L_udivsi3 */ - -#ifdef L_udivdi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(udivdi3) - FUNC(GLOBAL(udivdi3)) -GLOBAL(udivdi3): - HIDDEN_ALIAS(udivdi3_internal,udivdi3) - shlri r3,1,r4 - nsb r4,r22 - shlld r3,r22,r6 - shlri r6,49,r5 - movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ - sub r21,r5,r1 - mmulfx.w r1,r1,r4 - mshflo.w r1,r63,r1 - sub r63,r22,r20 // r63 == 64 % 64 - mmulfx.w r5,r4,r4 - pta LOCAL(large_divisor),tr0 - addi r20,32,r9 - msub.w r1,r4,r1 - madd.w r1,r1,r1 - mmulfx.w r1,r1,r4 - shlri r6,32,r7 - bgt/u r9,r63,tr0 // large_divisor - mmulfx.w r5,r4,r4 - shlri r2,32+14,r19 - addi r22,-31,r0 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r19,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - mulu.l r5,r3,r8 - mshalds.l r1,r21,r1 - shari r4,26,r4 - shlld r8,r0,r8 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r8,r2 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ - - shlri r2,22,r21 - mulu.l r21,r1,r21 - shlld r5,r0,r8 - addi r20,30-22,r0 - shlrd r21,r0,r21 - mulu.l r21,r3,r5 - add r8,r21,r8 - mcmpgt.l r21,r63,r21 // See Note 1 - addi r20,30,r0 - mshfhi.l r63,r21,r21 - sub r2,r5,r2 - andc r2,r21,r2 - - /* small divisor: need a third divide step */ - mulu.l r2,r1,r7 - ptabs r18,tr0 - addi r2,1,r2 - shlrd r7,r0,r7 - mulu.l r7,r3,r5 - add r8,r7,r8 - sub r2,r3,r2 - cmpgt r2,r5,r5 - add r8,r5,r2 - /* could test r3 here to check for divide by zero. */ - blink tr0,r63 - -LOCAL(large_divisor): - mmulfx.w r5,r4,r4 - shlrd r2,r9,r25 - shlri r25,32,r8 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r8,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1,r8 - mulu.l r8,r7,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r25,r5,r25 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ - - shlri r25,22,r21 - mulu.l r21,r1,r21 - pta LOCAL(no_lo_adj),tr0 - addi r22,32,r0 - shlri r21,40,r21 - mulu.l r21,r7,r5 - add r8,r21,r8 - shlld r2,r0,r2 - sub r25,r5,r25 - bgtu/u r7,r25,tr0 // no_lo_adj - addi r8,1,r8 - sub r25,r7,r25 -LOCAL(no_lo_adj): - mextr4 r2,r25,r2 - - /* large_divisor: only needs a few adjustments. */ - mulu.l r8,r6,r5 - ptabs r18,tr0 - /* bubble */ - cmpgtu r5,r2,r5 - sub r8,r5,r2 - blink tr0,r63 - ENDFUNC(GLOBAL(udivdi3)) -/* Note 1: To shift the result of the second divide stage so that the result - always fits into 32 bits, yet we still reduce the rest sufficiently - would require a lot of instructions to do the shifts just right. Using - the full 64 bit shift result to multiply with the divisor would require - four extra instructions for the upper 32 bits (shift / mulu / shift / sub). - Fortunately, if the upper 32 bits of the shift result are nonzero, we - know that the rest after taking this partial result into account will - fit into 32 bits. So we just clear the upper 32 bits of the rest if the - upper 32 bits of the partial result are nonzero. */ -#endif /* __SHMEDIA__ */ -#endif /* L_udivdi3 */ - -#ifdef L_divdi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(divdi3) - FUNC(GLOBAL(divdi3)) -GLOBAL(divdi3): - pta GLOBAL(udivdi3_internal),tr0 - shari r2,63,r22 - shari r3,63,r23 - xor r2,r22,r2 - xor r3,r23,r3 - sub r2,r22,r2 - sub r3,r23,r3 - beq/u r22,r23,tr0 - ptabs r18,tr1 - blink tr0,r18 - sub r63,r2,r2 - blink tr1,r63 - ENDFUNC(GLOBAL(divdi3)) -#endif /* __SHMEDIA__ */ -#endif /* L_divdi3 */ - -#ifdef L_umoddi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(umoddi3) - FUNC(GLOBAL(umoddi3)) -GLOBAL(umoddi3): - HIDDEN_ALIAS(umoddi3_internal,umoddi3) - shlri r3,1,r4 - nsb r4,r22 - shlld r3,r22,r6 - shlri r6,49,r5 - movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ - sub r21,r5,r1 - mmulfx.w r1,r1,r4 - mshflo.w r1,r63,r1 - sub r63,r22,r20 // r63 == 64 % 64 - mmulfx.w r5,r4,r4 - pta LOCAL(large_divisor),tr0 - addi r20,32,r9 - msub.w r1,r4,r1 - madd.w r1,r1,r1 - mmulfx.w r1,r1,r4 - shlri r6,32,r7 - bgt/u r9,r63,tr0 // large_divisor - mmulfx.w r5,r4,r4 - shlri r2,32+14,r19 - addi r22,-31,r0 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r19,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - mulu.l r5,r3,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - shlld r5,r0,r5 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r2,r5,r2 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ - - shlri r2,22,r21 - mulu.l r21,r1,r21 - addi r20,30-22,r0 - /* bubble */ /* could test r3 here to check for divide by zero. */ - shlrd r21,r0,r21 - mulu.l r21,r3,r5 - mcmpgt.l r21,r63,r21 // See Note 1 - addi r20,30,r0 - mshfhi.l r63,r21,r21 - sub r2,r5,r2 - andc r2,r21,r2 - - /* small divisor: need a third divide step */ - mulu.l r2,r1,r7 - ptabs r18,tr0 - sub r2,r3,r8 /* re-use r8 here for rest - r3 */ - shlrd r7,r0,r7 - mulu.l r7,r3,r5 - /* bubble */ - addi r8,1,r7 - cmpgt r7,r5,r7 - cmvne r7,r8,r2 - sub r2,r5,r2 - blink tr0,r63 - -LOCAL(large_divisor): - mmulfx.w r5,r4,r4 - shlrd r2,r9,r25 - shlri r25,32,r8 - msub.w r1,r4,r1 - - mulu.l r1,r7,r4 - addi r1,-3,r5 - mulu.l r5,r8,r5 - sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 - shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as - the case may be, %0000000000000000 000.11111111111, still */ - muls.l r1,r4,r4 /* leaving at least one sign bit. */ - shlri r5,14-1,r8 - mulu.l r8,r7,r5 - mshalds.l r1,r21,r1 - shari r4,26,r4 - add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) - sub r25,r5,r25 - /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ - - shlri r25,22,r21 - mulu.l r21,r1,r21 - pta LOCAL(no_lo_adj),tr0 - addi r22,32,r0 - shlri r21,40,r21 - mulu.l r21,r7,r5 - add r8,r21,r8 - shlld r2,r0,r2 - sub r25,r5,r25 - bgtu/u r7,r25,tr0 // no_lo_adj - addi r8,1,r8 - sub r25,r7,r25 -LOCAL(no_lo_adj): - mextr4 r2,r25,r2 - - /* large_divisor: only needs a few adjustments. */ - mulu.l r8,r6,r5 - ptabs r18,tr0 - add r2,r6,r7 - cmpgtu r5,r2,r8 - cmvne r8,r7,r2 - sub r2,r5,r2 - shlrd r2,r22,r2 - blink tr0,r63 - ENDFUNC(GLOBAL(umoddi3)) -/* Note 1: To shift the result of the second divide stage so that the result - always fits into 32 bits, yet we still reduce the rest sufficiently - would require a lot of instructions to do the shifts just right. Using - the full 64 bit shift result to multiply with the divisor would require - four extra instructions for the upper 32 bits (shift / mulu / shift / sub). - Fortunately, if the upper 32 bits of the shift result are nonzero, we - know that the rest after taking this partial result into account will - fit into 32 bits. So we just clear the upper 32 bits of the rest if the - upper 32 bits of the partial result are nonzero. */ -#endif /* __SHMEDIA__ */ -#endif /* L_umoddi3 */ - -#ifdef L_moddi3 -#ifdef __SHMEDIA__ - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(moddi3) - FUNC(GLOBAL(moddi3)) -GLOBAL(moddi3): - pta GLOBAL(umoddi3_internal),tr0 - shari r2,63,r22 - shari r3,63,r23 - xor r2,r22,r2 - xor r3,r23,r3 - sub r2,r22,r2 - sub r3,r23,r3 - beq/u r22,r63,tr0 - ptabs r18,tr1 - blink tr0,r18 - sub r63,r2,r2 - blink tr1,r63 - ENDFUNC(GLOBAL(moddi3)) -#endif /* __SHMEDIA__ */ -#endif /* L_moddi3 */ - -#ifdef L_set_fpscr -#if !defined (__SH2A_NOFPU__) -#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 -#ifdef __SH5__ - .mode SHcompact -#endif - .global GLOBAL(set_fpscr) - HIDDEN_FUNC(GLOBAL(set_fpscr)) -GLOBAL(set_fpscr): - lds r4,fpscr -#ifdef __PIC__ - mov.l r12,@-r15 -#ifdef __vxworks - mov.l LOCAL(set_fpscr_L0_base),r12 - mov.l LOCAL(set_fpscr_L0_index),r0 - mov.l @r12,r12 - mov.l @(r0,r12),r12 -#else - mova LOCAL(set_fpscr_L0),r0 - mov.l LOCAL(set_fpscr_L0),r12 - add r0,r12 -#endif - mov.l LOCAL(set_fpscr_L1),r0 - mov.l @(r0,r12),r1 - mov.l @r15+,r12 -#else - mov.l LOCAL(set_fpscr_L1),r1 -#endif - swap.w r4,r0 - or #24,r0 -#ifndef FMOVD_WORKS - xor #16,r0 -#endif -#if defined(__SH4__) || defined (__SH2A_DOUBLE__) - swap.w r0,r3 - mov.l r3,@(4,r1) -#else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ - swap.w r0,r2 - mov.l r2,@r1 -#endif -#ifndef FMOVD_WORKS - xor #8,r0 -#else - xor #24,r0 -#endif -#if defined(__SH4__) || defined (__SH2A_DOUBLE__) - swap.w r0,r2 - rts - mov.l r2,@r1 -#else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ - swap.w r0,r3 - rts - mov.l r3,@(4,r1) -#endif - .align 2 -#ifdef __PIC__ -#ifdef __vxworks -LOCAL(set_fpscr_L0_base): - .long ___GOTT_BASE__ -LOCAL(set_fpscr_L0_index): - .long ___GOTT_INDEX__ -#else -LOCAL(set_fpscr_L0): - .long _GLOBAL_OFFSET_TABLE_ -#endif -LOCAL(set_fpscr_L1): - .long GLOBAL(fpscr_values@GOT) -#else -LOCAL(set_fpscr_L1): - .long GLOBAL(fpscr_values) -#endif - - ENDFUNC(GLOBAL(set_fpscr)) -#ifndef NO_FPSCR_VALUES -#ifdef __ELF__ - .comm GLOBAL(fpscr_values),8,4 -#else - .comm GLOBAL(fpscr_values),8 -#endif /* ELF */ -#endif /* NO_FPSCR_VALUES */ -#endif /* SH2E / SH3E / SH4 */ -#endif /* __SH2A_NOFPU__ */ -#endif /* L_set_fpscr */ -#ifdef L_ic_invalidate -#if __SH5__ == 32 - .mode SHmedia - .section .text..SHmedia32,"ax" - .align 2 - .global GLOBAL(init_trampoline) - HIDDEN_FUNC(GLOBAL(init_trampoline)) -GLOBAL(init_trampoline): - st.l r0,8,r2 -#ifdef __LITTLE_ENDIAN__ - movi 9,r20 - shori 0x402b,r20 - shori 0xd101,r20 - shori 0xd002,r20 -#else - movi 0xffffffffffffd002,r20 - shori 0xd101,r20 - shori 0x402b,r20 - shori 9,r20 -#endif - st.q r0,0,r20 - st.l r0,12,r3 - ENDFUNC(GLOBAL(init_trampoline)) - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): - ocbwb r0,0 - synco - icbi r0, 0 - ptabs r18, tr0 - synci - blink tr0, r63 - ENDFUNC(GLOBAL(ic_invalidate)) -#elif defined(__SH4A__) - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): - ocbwb @r4 - synco - icbi @r4 - rts - nop - ENDFUNC(GLOBAL(ic_invalidate)) -#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) - /* For system code, we use ic_invalidate_line_i, but user code - needs a different mechanism. A kernel call is generally not - available, and it would also be slow. Different SH4 variants use - different sizes and associativities of the Icache. We use a small - bit of dispatch code that can be put hidden in every shared object, - which calls the actual processor-specific invalidation code in a - separate module. - Or if you have operating system support, the OS could mmap the - procesor-specific code from a single page, since it is highly - repetitive. */ - .global GLOBAL(ic_invalidate) - HIDDEN_FUNC(GLOBAL(ic_invalidate)) -GLOBAL(ic_invalidate): -#ifdef __pic__ -#ifdef __vxworks - mov.l 1f,r1 - mov.l 2f,r0 - mov.l @r1,r1 - mov.l 0f,r2 - mov.l @(r0,r1),r0 -#else - mov.l 1f,r1 - mova 1f,r0 - mov.l 0f,r2 - add r1,r0 -#endif - mov.l @(r0,r2),r1 -#else - mov.l 0f,r1 -#endif - ocbwb @r4 - mov.l @(8,r1),r0 - sub r1,r4 - and r4,r0 - add r1,r0 - jmp @r0 - mov.l @(4,r1),r0 - .align 2 -#ifndef __pic__ -0: .long GLOBAL(ic_invalidate_array) -#else /* __pic__ */ - .global GLOBAL(ic_invalidate_array) -0: .long GLOBAL(ic_invalidate_array)@GOT -#ifdef __vxworks -1: .long ___GOTT_BASE__ -2: .long ___GOTT_INDEX__ -#else -1: .long _GLOBAL_OFFSET_TABLE_ -#endif - ENDFUNC(GLOBAL(ic_invalidate)) -#endif /* __pic__ */ -#endif /* SH4 */ -#endif /* L_ic_invalidate */ - -#ifdef L_ic_invalidate_array -#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) - .global GLOBAL(ic_invalidate_array) - /* This is needed when an SH4 dso with trampolines is used on SH4A. */ - .global GLOBAL(ic_invalidate_array) - FUNC(GLOBAL(ic_invalidate_array)) -GLOBAL(ic_invalidate_array): - add r1,r4 - synco - icbi @r4 - rts - nop - .align 2 - .long 0 - ENDFUNC(GLOBAL(ic_invalidate_array)) -#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) - .global GLOBAL(ic_invalidate_array) - .p2align 5 - FUNC(GLOBAL(ic_invalidate_array)) -/* This must be aligned to the beginning of a cache line. */ -GLOBAL(ic_invalidate_array): -#ifndef WAYS -#define WAYS 4 -#define WAY_SIZE 0x4000 -#endif -#if WAYS == 1 - .rept WAY_SIZE * WAYS / 32 - rts - nop - .rept 7 - .long WAY_SIZE - 32 - .endr - .endr -#elif WAYS <= 6 - .rept WAY_SIZE * WAYS / 32 - braf r0 - add #-8,r0 - .long WAY_SIZE + 8 - .long WAY_SIZE - 32 - .rept WAYS-2 - braf r0 - nop - .endr - .rept 7 - WAYS - rts - nop - .endr - .endr -#else /* WAYS > 6 */ - /* This variant needs two different pages for mmap-ing. */ - .rept WAYS-1 - .rept WAY_SIZE / 32 - braf r0 - nop - .long WAY_SIZE - .rept 6 - .long WAY_SIZE - 32 - .endr - .endr - .endr - .rept WAY_SIZE / 32 - rts - .rept 15 - nop - .endr - .endr -#endif /* WAYS */ - ENDFUNC(GLOBAL(ic_invalidate_array)) -#endif /* SH4 */ -#endif /* L_ic_invalidate_array */ - -#if defined (__SH5__) && __SH5__ == 32 -#ifdef L_shcompact_call_trampoline - .section .rodata - .align 1 -LOCAL(ct_main_table): -.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) -.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - - /* This function loads 64-bit general-purpose registers from the - stack, from a memory address contained in them or from an FP - register, according to a cookie passed in r1. Its execution - time is linear on the number of registers that actually have - to be copied. See sh.h for details on the actual bit pattern. - - The function to be called is passed in r0. If a 32-bit return - value is expected, the actual function will be tail-called, - otherwise the return address will be stored in r10 (that the - caller should expect to be clobbered) and the return value - will be expanded into r2/r3 upon return. */ - - .global GLOBAL(GCC_shcompact_call_trampoline) - FUNC(GLOBAL(GCC_shcompact_call_trampoline)) -GLOBAL(GCC_shcompact_call_trampoline): - ptabs/l r0, tr0 /* Prepare to call the actual function. */ - movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 - pt/l LOCAL(ct_loop), tr1 - addz.l r1, r63, r1 - shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 -LOCAL(ct_loop): - nsb r1, r28 - shlli r28, 1, r29 - ldx.w r0, r29, r30 -LOCAL(ct_main_label): - ptrel/l r30, tr2 - blink tr2, r63 -LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ - /* It must be dr0, so just do it. */ - fmov.dq dr0, r2 - movi 7, r30 - shlli r30, 29, r31 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ - /* It is either dr0 or dr2. */ - movi 7, r30 - shlri r1, 26, r32 - shlli r30, 26, r31 - andc r1, r31, r1 - fmov.dq dr0, r3 - beqi/l r32, 4, tr1 - fmov.dq dr2, r3 - blink tr1, r63 -LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ - shlri r1, 23 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 -LOCAL(ct_r4_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 23, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r4_fp_copy): - fmov.dq dr0, r4 - blink tr1, r63 - fmov.dq dr2, r4 - blink tr1, r63 - fmov.dq dr4, r4 - blink tr1, r63 -LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ - shlri r1, 20 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 -LOCAL(ct_r5_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 20, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r5_fp_copy): - fmov.dq dr0, r5 - blink tr1, r63 - fmov.dq dr2, r5 - blink tr1, r63 - fmov.dq dr4, r5 - blink tr1, r63 - fmov.dq dr6, r5 - blink tr1, r63 -LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ - /* It must be dr8. */ - fmov.dq dr8, r6 - movi 15, r30 - shlli r30, 16, r31 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ - shlri r1, 16 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 -LOCAL(ct_r6_fp_base): - ptrel/l r32, tr2 - movi 7, r30 - shlli r30, 16, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r6_fp_copy): - fmov.dq dr0, r6 - blink tr1, r63 - fmov.dq dr2, r6 - blink tr1, r63 - fmov.dq dr4, r6 - blink tr1, r63 - fmov.dq dr6, r6 - blink tr1, r63 -LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 12, r31 - shlri r1, 12, r32 - andc r1, r31, r1 - fmov.dq dr8, r7 - beqi/l r32, 8, tr1 - fmov.dq dr10, r7 - blink tr1, r63 -LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ - shlri r1, 12 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 -LOCAL(ct_r7_fp_base): - ptrel/l r32, tr2 - movi 7 << 12, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r7_fp_copy): - fmov.dq dr0, r7 - blink tr1, r63 - fmov.dq dr2, r7 - blink tr1, r63 - fmov.dq dr4, r7 - blink tr1, r63 - fmov.dq dr6, r7 - blink tr1, r63 -LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 8, r31 - andi r1, 1 << 8, r32 - andc r1, r31, r1 - fmov.dq dr8, r8 - beq/l r32, r63, tr1 - fmov.dq dr10, r8 - blink tr1, r63 -LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ - shlri r1, 8 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 -LOCAL(ct_r8_fp_base): - ptrel/l r32, tr2 - movi 7 << 8, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r8_fp_copy): - fmov.dq dr0, r8 - blink tr1, r63 - fmov.dq dr2, r8 - blink tr1, r63 - fmov.dq dr4, r8 - blink tr1, r63 - fmov.dq dr6, r8 - blink tr1, r63 -LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ - /* It is either dr8 or dr10. */ - movi 15 << 4, r31 - andi r1, 1 << 4, r32 - andc r1, r31, r1 - fmov.dq dr8, r9 - beq/l r32, r63, tr1 - fmov.dq dr10, r9 - blink tr1, r63 -LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ - shlri r1, 4 - 3, r34 - andi r34, 3 << 3, r33 - addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 -LOCAL(ct_r9_fp_base): - ptrel/l r32, tr2 - movi 7 << 4, r31 - andc r1, r31, r1 - blink tr2, r63 -LOCAL(ct_r9_fp_copy): - fmov.dq dr0, r9 - blink tr1, r63 - fmov.dq dr2, r9 - blink tr1, r63 - fmov.dq dr4, r9 - blink tr1, r63 - fmov.dq dr6, r9 - blink tr1, r63 -LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ - pt/l LOCAL(ct_r2_load), tr2 - movi 3, r30 - shlli r30, 29, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r2, 8, r3 - ldx.q r2, r63, r2 - /* Fall through. */ -LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ - pt/l LOCAL(ct_r3_load), tr2 - movi 3, r30 - shlli r30, 26, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r3, 8, r4 - ldx.q r3, r63, r3 -LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ - pt/l LOCAL(ct_r4_load), tr2 - movi 3, r30 - shlli r30, 23, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r4, 8, r5 - ldx.q r4, r63, r4 -LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ - pt/l LOCAL(ct_r5_load), tr2 - movi 3, r30 - shlli r30, 20, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r5, 8, r6 - ldx.q r5, r63, r5 -LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ - pt/l LOCAL(ct_r6_load), tr2 - movi 3 << 16, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r6, 8, r7 - ldx.q r6, r63, r6 -LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ - pt/l LOCAL(ct_r7_load), tr2 - movi 3 << 12, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r7, 8, r8 - ldx.q r7, r63, r7 -LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ - pt/l LOCAL(ct_r8_load), tr2 - movi 3 << 8, r31 - and r1, r31, r32 - andc r1, r31, r1 - beq/l r31, r32, tr2 - addi.l r8, 8, r9 - ldx.q r8, r63, r8 -LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ - pt/l LOCAL(ct_check_tramp), tr2 - ldx.q r9, r63, r9 - blink tr2, r63 -LOCAL(ct_r2_load): - ldx.q r2, r63, r2 - blink tr1, r63 -LOCAL(ct_r3_load): - ldx.q r3, r63, r3 - blink tr1, r63 -LOCAL(ct_r4_load): - ldx.q r4, r63, r4 - blink tr1, r63 -LOCAL(ct_r5_load): - ldx.q r5, r63, r5 - blink tr1, r63 -LOCAL(ct_r6_load): - ldx.q r6, r63, r6 - blink tr1, r63 -LOCAL(ct_r7_load): - ldx.q r7, r63, r7 - blink tr1, r63 -LOCAL(ct_r8_load): - ldx.q r8, r63, r8 - blink tr1, r63 -LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r2 - shlli r30, 29, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r3 - shlli r30, 26, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r4 - shlli r30, 23, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r5 - shlli r30, 20, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ - movi 1, r30 - ldx.q r15, r63, r6 - shlli r30, 16, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ - ldx.q r15, r63, r7 - movi 1 << 12, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ - ldx.q r15, r63, r8 - movi 1 << 8, r31 - addi.l r15, 8, r15 - andc r1, r31, r1 - blink tr1, r63 -LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ - andi r1, 7 << 1, r30 - movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 - shlli r30, 2, r31 - shori LOCAL(ct_end_of_pop_seq) & 65535, r32 - sub.l r32, r31, r33 - ptabs/l r33, tr2 - blink tr2, r63 -LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ - ldx.q r15, r63, r3 - addi.l r15, 8, r15 - ldx.q r15, r63, r4 - addi.l r15, 8, r15 - ldx.q r15, r63, r5 - addi.l r15, 8, r15 - ldx.q r15, r63, r6 - addi.l r15, 8, r15 - ldx.q r15, r63, r7 - addi.l r15, 8, r15 - ldx.q r15, r63, r8 - addi.l r15, 8, r15 -LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ - ldx.q r15, r63, r9 - addi.l r15, 8, r15 -LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ -LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ - pt/u LOCAL(ct_ret_wide), tr2 - andi r1, 1, r1 - bne/u r1, r63, tr2 -LOCAL(ct_call_func): /* Just branch to the function. */ - blink tr0, r63 -LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its - 64-bit return value. */ - add.l r18, r63, r10 - blink tr0, r18 - ptabs r10, tr0 -#if __LITTLE_ENDIAN__ - shari r2, 32, r3 - add.l r2, r63, r2 -#else - add.l r2, r63, r3 - shari r2, 32, r2 -#endif - blink tr0, r63 - - ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) -#endif /* L_shcompact_call_trampoline */ - -#ifdef L_shcompact_return_trampoline - /* This function does the converse of the code in `ret_wide' - above. It is tail-called by SHcompact functions returning - 64-bit non-floating-point values, to pack the 32-bit values in - r2 and r3 into r2. */ - - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - .global GLOBAL(GCC_shcompact_return_trampoline) - HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) -GLOBAL(GCC_shcompact_return_trampoline): - ptabs/l r18, tr0 -#if __LITTLE_ENDIAN__ - addz.l r2, r63, r2 - shlli r3, 32, r3 -#else - addz.l r3, r63, r3 - shlli r2, 32, r2 -#endif - or r3, r2, r2 - blink tr0, r63 - - ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) -#endif /* L_shcompact_return_trampoline */ - -#ifdef L_shcompact_incoming_args - .section .rodata - .align 1 -LOCAL(ia_main_table): -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) -.word 1 /* Invalid, just loop */ -.word 1 /* Invalid, just loop */ -.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) -.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) - .mode SHmedia - .section .text..SHmedia32, "ax" - .align 2 - - /* This function stores 64-bit general-purpose registers back in - the stack, and loads the address in which each register - was stored into itself. The lower 32 bits of r17 hold the address - to begin storing, and the upper 32 bits of r17 hold the cookie. - Its execution time is linear on the - number of registers that actually have to be copied, and it is - optimized for structures larger than 64 bits, as opposed to - individual `long long' arguments. See sh.h for details on the - actual bit pattern. */ - - .global GLOBAL(GCC_shcompact_incoming_args) - FUNC(GLOBAL(GCC_shcompact_incoming_args)) -GLOBAL(GCC_shcompact_incoming_args): - ptabs/l r18, tr0 /* Prepare to return. */ - shlri r17, 32, r0 /* Load the cookie. */ - movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 - pt/l LOCAL(ia_loop), tr1 - add.l r17, r63, r17 - shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 -LOCAL(ia_loop): - nsb r0, r36 - shlli r36, 1, r37 - ldx.w r43, r37, r38 -LOCAL(ia_main_label): - ptrel/l r38, tr2 - blink tr2, r63 -LOCAL(ia_r2_ld): /* Store r2 and load its address. */ - movi 3, r38 - shlli r38, 29, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r2 - add.l r17, r63, r2 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r3_ld): /* Store r3 and load its address. */ - movi 3, r38 - shlli r38, 26, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r3 - add.l r17, r63, r3 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r4_ld): /* Store r4 and load its address. */ - movi 3, r38 - shlli r38, 23, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r4 - add.l r17, r63, r4 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r5_ld): /* Store r5 and load its address. */ - movi 3, r38 - shlli r38, 20, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r5 - add.l r17, r63, r5 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r6_ld): /* Store r6 and load its address. */ - movi 3, r38 - shlli r38, 16, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r6 - add.l r17, r63, r6 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r7_ld): /* Store r7 and load its address. */ - movi 3 << 12, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r7 - add.l r17, r63, r7 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r8_ld): /* Store r8 and load its address. */ - movi 3 << 8, r39 - and r0, r39, r40 - andc r0, r39, r0 - stx.q r17, r63, r8 - add.l r17, r63, r8 - addi.l r17, 8, r17 - beq/u r39, r40, tr1 -LOCAL(ia_r9_ld): /* Store r9 and load its address. */ - stx.q r17, r63, r9 - add.l r17, r63, r9 - blink tr0, r63 -LOCAL(ia_r2_push): /* Push r2 onto the stack. */ - movi 1, r38 - shlli r38, 29, r39 - andc r0, r39, r0 - stx.q r17, r63, r2 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r3_push): /* Push r3 onto the stack. */ - movi 1, r38 - shlli r38, 26, r39 - andc r0, r39, r0 - stx.q r17, r63, r3 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r4_push): /* Push r4 onto the stack. */ - movi 1, r38 - shlli r38, 23, r39 - andc r0, r39, r0 - stx.q r17, r63, r4 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r5_push): /* Push r5 onto the stack. */ - movi 1, r38 - shlli r38, 20, r39 - andc r0, r39, r0 - stx.q r17, r63, r5 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r6_push): /* Push r6 onto the stack. */ - movi 1, r38 - shlli r38, 16, r39 - andc r0, r39, r0 - stx.q r17, r63, r6 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r7_push): /* Push r7 onto the stack. */ - movi 1 << 12, r39 - andc r0, r39, r0 - stx.q r17, r63, r7 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_r8_push): /* Push r8 onto the stack. */ - movi 1 << 8, r39 - andc r0, r39, r0 - stx.q r17, r63, r8 - addi.l r17, 8, r17 - blink tr1, r63 -LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ - andi r0, 7 << 1, r38 - movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 - shlli r38, 2, r39 - shori LOCAL(ia_end_of_push_seq) & 65535, r40 - sub.l r40, r39, r41 - ptabs/l r41, tr2 - blink tr2, r63 -LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ - stx.q r17, r63, r3 - addi.l r17, 8, r17 - stx.q r17, r63, r4 - addi.l r17, 8, r17 - stx.q r17, r63, r5 - addi.l r17, 8, r17 - stx.q r17, r63, r6 - addi.l r17, 8, r17 - stx.q r17, r63, r7 - addi.l r17, 8, r17 - stx.q r17, r63, r8 - addi.l r17, 8, r17 -LOCAL(ia_r9_push): /* Push r9 onto the stack. */ - stx.q r17, r63, r9 -LOCAL(ia_return): /* Return. */ - blink tr0, r63 -LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ - ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) -#endif /* L_shcompact_incoming_args */ -#endif -#if __SH5__ -#ifdef L_nested_trampoline -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif - .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ - .global GLOBAL(GCC_nested_trampoline) - HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) -GLOBAL(GCC_nested_trampoline): - .mode SHmedia - ptrel/u r63, tr0 - gettr tr0, r0 -#if __SH5__ == 64 - ld.q r0, 24, r1 -#else - ld.l r0, 24, r1 -#endif - ptabs/l r1, tr1 -#if __SH5__ == 64 - ld.q r0, 32, r1 -#else - ld.l r0, 28, r1 -#endif - blink tr1, r63 - - ENDFUNC(GLOBAL(GCC_nested_trampoline)) -#endif /* L_nested_trampoline */ -#endif /* __SH5__ */ -#if __SH5__ == 32 -#ifdef L_push_pop_shmedia_regs - .section .text..SHmedia32,"ax" - .mode SHmedia - .align 2 -#ifndef __SH4_NOFPU__ - .global GLOBAL(GCC_push_shmedia_regs) - FUNC(GLOBAL(GCC_push_shmedia_regs)) -GLOBAL(GCC_push_shmedia_regs): - addi.l r15, -14*8, r15 - fst.d r15, 13*8, dr62 - fst.d r15, 12*8, dr60 - fst.d r15, 11*8, dr58 - fst.d r15, 10*8, dr56 - fst.d r15, 9*8, dr54 - fst.d r15, 8*8, dr52 - fst.d r15, 7*8, dr50 - fst.d r15, 6*8, dr48 - fst.d r15, 5*8, dr46 - fst.d r15, 4*8, dr44 - fst.d r15, 3*8, dr42 - fst.d r15, 2*8, dr40 - fst.d r15, 1*8, dr38 - fst.d r15, 0*8, dr36 -#else /* ! __SH4_NOFPU__ */ - .global GLOBAL(GCC_push_shmedia_regs_nofpu) - FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) -GLOBAL(GCC_push_shmedia_regs_nofpu): -#endif /* ! __SH4_NOFPU__ */ - ptabs/l r18, tr0 - addi.l r15, -27*8, r15 - gettr tr7, r62 - gettr tr6, r61 - gettr tr5, r60 - st.q r15, 26*8, r62 - st.q r15, 25*8, r61 - st.q r15, 24*8, r60 - st.q r15, 23*8, r59 - st.q r15, 22*8, r58 - st.q r15, 21*8, r57 - st.q r15, 20*8, r56 - st.q r15, 19*8, r55 - st.q r15, 18*8, r54 - st.q r15, 17*8, r53 - st.q r15, 16*8, r52 - st.q r15, 15*8, r51 - st.q r15, 14*8, r50 - st.q r15, 13*8, r49 - st.q r15, 12*8, r48 - st.q r15, 11*8, r47 - st.q r15, 10*8, r46 - st.q r15, 9*8, r45 - st.q r15, 8*8, r44 - st.q r15, 7*8, r35 - st.q r15, 6*8, r34 - st.q r15, 5*8, r33 - st.q r15, 4*8, r32 - st.q r15, 3*8, r31 - st.q r15, 2*8, r30 - st.q r15, 1*8, r29 - st.q r15, 0*8, r28 - blink tr0, r63 -#ifndef __SH4_NOFPU__ - ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) -#else - ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) -#endif -#ifndef __SH4_NOFPU__ - .global GLOBAL(GCC_pop_shmedia_regs) - FUNC(GLOBAL(GCC_pop_shmedia_regs)) -GLOBAL(GCC_pop_shmedia_regs): - pt .L0, tr1 - movi 41*8, r0 - fld.d r15, 40*8, dr62 - fld.d r15, 39*8, dr60 - fld.d r15, 38*8, dr58 - fld.d r15, 37*8, dr56 - fld.d r15, 36*8, dr54 - fld.d r15, 35*8, dr52 - fld.d r15, 34*8, dr50 - fld.d r15, 33*8, dr48 - fld.d r15, 32*8, dr46 - fld.d r15, 31*8, dr44 - fld.d r15, 30*8, dr42 - fld.d r15, 29*8, dr40 - fld.d r15, 28*8, dr38 - fld.d r15, 27*8, dr36 - blink tr1, r63 -#else /* ! __SH4_NOFPU__ */ - .global GLOBAL(GCC_pop_shmedia_regs_nofpu) - FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) -GLOBAL(GCC_pop_shmedia_regs_nofpu): -#endif /* ! __SH4_NOFPU__ */ - movi 27*8, r0 -.L0: - ptabs r18, tr0 - ld.q r15, 26*8, r62 - ld.q r15, 25*8, r61 - ld.q r15, 24*8, r60 - ptabs r62, tr7 - ptabs r61, tr6 - ptabs r60, tr5 - ld.q r15, 23*8, r59 - ld.q r15, 22*8, r58 - ld.q r15, 21*8, r57 - ld.q r15, 20*8, r56 - ld.q r15, 19*8, r55 - ld.q r15, 18*8, r54 - ld.q r15, 17*8, r53 - ld.q r15, 16*8, r52 - ld.q r15, 15*8, r51 - ld.q r15, 14*8, r50 - ld.q r15, 13*8, r49 - ld.q r15, 12*8, r48 - ld.q r15, 11*8, r47 - ld.q r15, 10*8, r46 - ld.q r15, 9*8, r45 - ld.q r15, 8*8, r44 - ld.q r15, 7*8, r35 - ld.q r15, 6*8, r34 - ld.q r15, 5*8, r33 - ld.q r15, 4*8, r32 - ld.q r15, 3*8, r31 - ld.q r15, 2*8, r30 - ld.q r15, 1*8, r29 - ld.q r15, 0*8, r28 - add.l r15, r0, r15 - blink tr0, r63 - -#ifndef __SH4_NOFPU__ - ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) -#else - ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) -#endif -#endif /* __SH5__ == 32 */ -#endif /* L_push_pop_shmedia_regs */ - -#ifdef L_div_table -#if __SH5__ -#if defined(__pic__) && defined(__SHMEDIA__) - .global GLOBAL(sdivsi3) - FUNC(GLOBAL(sdivsi3)) -#if __SH5__ == 32 - .section .text..SHmedia32,"ax" -#else - .text -#endif -#if 0 -/* ??? FIXME: Presumably due to a linker bug, exporting data symbols - in a text section does not work (at least for shared libraries): - the linker sets the LSB of the address as if this was SHmedia code. */ -#define TEXT_DATA_BUG -#endif - .align 2 - // inputs: r4,r5 - // clobbered: r1,r18,r19,r20,r21,r25,tr0 - // result in r0 - .global GLOBAL(sdivsi3) -GLOBAL(sdivsi3): -#ifdef TEXT_DATA_BUG - ptb datalabel Local_div_table,tr0 -#else - ptb GLOBAL(div_table_internal),tr0 -#endif - nsb r5, r1 - shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 - shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) - /* bubble */ - gettr tr0,r20 - ldx.ub r20, r21, r19 // u0.8 - shari r25, 32, r25 // normalize to s2.30 - shlli r21, 1, r21 - muls.l r25, r19, r19 // s2.38 - ldx.w r20, r21, r21 // s2.14 - ptabs r18, tr0 - shari r19, 24, r19 // truncate to s2.14 - sub r21, r19, r19 // some 11 bit inverse in s1.14 - muls.l r19, r19, r21 // u0.28 - sub r63, r1, r1 - addi r1, 92, r1 - muls.l r25, r21, r18 // s2.58 - shlli r19, 45, r19 // multiply by two and convert to s2.58 - /* bubble */ - sub r19, r18, r18 - shari r18, 28, r18 // some 22 bit inverse in s1.30 - muls.l r18, r25, r0 // s2.60 - muls.l r18, r4, r25 // s32.30 - /* bubble */ - shari r0, 16, r19 // s-16.44 - muls.l r19, r18, r19 // s-16.74 - shari r25, 63, r0 - shari r4, 14, r18 // s19.-14 - shari r19, 30, r19 // s-16.44 - muls.l r19, r18, r19 // s15.30 - xor r21, r0, r21 // You could also use the constant 1 << 27. - add r21, r25, r21 - sub r21, r19, r21 - shard r21, r1, r21 - sub r21, r0, r0 - blink tr0, r63 - ENDFUNC(GLOBAL(sdivsi3)) -/* This table has been generated by divtab.c . -Defects for bias -330: - Max defect: 6.081536e-07 at -1.000000e+00 - Min defect: 2.849516e-08 at 1.030651e+00 - Max 2nd step defect: 9.606539e-12 at -1.000000e+00 - Min 2nd step defect: 0.000000e+00 at 0.000000e+00 - Defect at 1: 1.238659e-07 - Defect at -2: 1.061708e-07 */ -#else /* ! __pic__ || ! __SHMEDIA__ */ - .section .rodata -#endif /* __pic__ */ -#if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) - .balign 2 - .type Local_div_table,@object - .size Local_div_table,128 -/* negative division constants */ - .word -16638 - .word -17135 - .word -17737 - .word -18433 - .word -19103 - .word -19751 - .word -20583 - .word -21383 - .word -22343 - .word -23353 - .word -24407 - .word -25582 - .word -26863 - .word -28382 - .word -29965 - .word -31800 -/* negative division factors */ - .byte 66 - .byte 70 - .byte 75 - .byte 81 - .byte 87 - .byte 93 - .byte 101 - .byte 109 - .byte 119 - .byte 130 - .byte 142 - .byte 156 - .byte 172 - .byte 192 - .byte 214 - .byte 241 - .skip 16 -Local_div_table: - .skip 16 -/* positive division factors */ - .byte 241 - .byte 214 - .byte 192 - .byte 172 - .byte 156 - .byte 142 - .byte 130 - .byte 119 - .byte 109 - .byte 101 - .byte 93 - .byte 87 - .byte 81 - .byte 75 - .byte 70 - .byte 66 -/* positive division constants */ - .word 31801 - .word 29966 - .word 28383 - .word 26864 - .word 25583 - .word 24408 - .word 23354 - .word 22344 - .word 21384 - .word 20584 - .word 19752 - .word 19104 - .word 18434 - .word 17738 - .word 17136 - .word 16639 - .section .rodata -#endif /* TEXT_DATA_BUG */ - .balign 2 - .type GLOBAL(div_table),@object - .size GLOBAL(div_table),128 -/* negative division constants */ - .word -16638 - .word -17135 - .word -17737 - .word -18433 - .word -19103 - .word -19751 - .word -20583 - .word -21383 - .word -22343 - .word -23353 - .word -24407 - .word -25582 - .word -26863 - .word -28382 - .word -29965 - .word -31800 -/* negative division factors */ - .byte 66 - .byte 70 - .byte 75 - .byte 81 - .byte 87 - .byte 93 - .byte 101 - .byte 109 - .byte 119 - .byte 130 - .byte 142 - .byte 156 - .byte 172 - .byte 192 - .byte 214 - .byte 241 - .skip 16 - .global GLOBAL(div_table) -GLOBAL(div_table): - HIDDEN_ALIAS(div_table_internal,div_table) - .skip 16 -/* positive division factors */ - .byte 241 - .byte 214 - .byte 192 - .byte 172 - .byte 156 - .byte 142 - .byte 130 - .byte 119 - .byte 109 - .byte 101 - .byte 93 - .byte 87 - .byte 81 - .byte 75 - .byte 70 - .byte 66 -/* positive division constants */ - .word 31801 - .word 29966 - .word 28383 - .word 26864 - .word 25583 - .word 24408 - .word 23354 - .word 22344 - .word 21384 - .word 20584 - .word 19752 - .word 19104 - .word 18434 - .word 17738 - .word 17136 - .word 16639 - -#elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) -/* This code used shld, thus is not suitable for SH1 / SH2. */ - -/* Signed / unsigned division without use of FPU, optimized for SH4. - Uses a lookup table for divisors in the range -128 .. +128, and - div1 with case distinction for larger divisors in three more ranges. - The code is lumped together with the table to allow the use of mova. */ -#ifdef __LITTLE_ENDIAN__ -#define L_LSB 0 -#define L_LSWMSB 1 -#define L_MSWLSB 2 -#else -#define L_LSB 3 -#define L_LSWMSB 2 -#define L_MSWLSB 1 -#endif - - .balign 4 - .global GLOBAL(udivsi3_i4i) - FUNC(GLOBAL(udivsi3_i4i)) -GLOBAL(udivsi3_i4i): - mov.w LOCAL(c128_w), r1 - div0u - mov r4,r0 - shlr8 r0 - cmp/hi r1,r5 - extu.w r5,r1 - bf LOCAL(udiv_le128) - cmp/eq r5,r1 - bf LOCAL(udiv_ge64k) - shlr r0 - mov r5,r1 - shll16 r5 - mov.l r4,@-r15 - div1 r5,r0 - mov.l r1,@-r15 - div1 r5,r0 - div1 r5,r0 - bra LOCAL(udiv_25) - div1 r5,r0 - -LOCAL(div_le128): - mova LOCAL(div_table_ix),r0 - bra LOCAL(div_le128_2) - mov.b @(r0,r5),r1 -LOCAL(udiv_le128): - mov.l r4,@-r15 - mova LOCAL(div_table_ix),r0 - mov.b @(r0,r5),r1 - mov.l r5,@-r15 -LOCAL(div_le128_2): - mova LOCAL(div_table_inv),r0 - mov.l @(r0,r1),r1 - mov r5,r0 - tst #0xfe,r0 - mova LOCAL(div_table_clz),r0 - dmulu.l r1,r4 - mov.b @(r0,r5),r1 - bt/s LOCAL(div_by_1) - mov r4,r0 - mov.l @r15+,r5 - sts mach,r0 - /* clrt */ - addc r4,r0 - mov.l @r15+,r4 - rotcr r0 - rts - shld r1,r0 - -LOCAL(div_by_1_neg): - neg r4,r0 -LOCAL(div_by_1): - mov.l @r15+,r5 - rts - mov.l @r15+,r4 - -LOCAL(div_ge64k): - bt/s LOCAL(div_r8) - div0u - shll8 r5 - bra LOCAL(div_ge64k_2) - div1 r5,r0 -LOCAL(udiv_ge64k): - cmp/hi r0,r5 - mov r5,r1 - bt LOCAL(udiv_r8) - shll8 r5 - mov.l r4,@-r15 - div1 r5,r0 - mov.l r1,@-r15 -LOCAL(div_ge64k_2): - div1 r5,r0 - mov.l LOCAL(zero_l),r1 - .rept 4 - div1 r5,r0 - .endr - mov.l r1,@-r15 - div1 r5,r0 - mov.w LOCAL(m256_w),r1 - div1 r5,r0 - mov.b r0,@(L_LSWMSB,r15) - xor r4,r0 - and r1,r0 - bra LOCAL(div_ge64k_end) - xor r4,r0 - -LOCAL(div_r8): - shll16 r4 - bra LOCAL(div_r8_2) - shll8 r4 -LOCAL(udiv_r8): - mov.l r4,@-r15 - shll16 r4 - clrt - shll8 r4 - mov.l r5,@-r15 -LOCAL(div_r8_2): - rotcl r4 - mov r0,r1 - div1 r5,r1 - mov r4,r0 - rotcl r0 - mov r5,r4 - div1 r5,r1 - .rept 5 - rotcl r0; div1 r5,r1 - .endr - rotcl r0 - mov.l @r15+,r5 - div1 r4,r1 - mov.l @r15+,r4 - rts - rotcl r0 - - ENDFUNC(GLOBAL(udivsi3_i4i)) - - .global GLOBAL(sdivsi3_i4i) - FUNC(GLOBAL(sdivsi3_i4i)) - /* This is link-compatible with a GLOBAL(sdivsi3) call, - but we effectively clobber only r1. */ -GLOBAL(sdivsi3_i4i): - mov.l r4,@-r15 - cmp/pz r5 - mov.w LOCAL(c128_w), r1 - bt/s LOCAL(pos_divisor) - cmp/pz r4 - mov.l r5,@-r15 - neg r5,r5 - bt/s LOCAL(neg_result) - cmp/hi r1,r5 - neg r4,r4 -LOCAL(pos_result): - extu.w r5,r0 - bf LOCAL(div_le128) - cmp/eq r5,r0 - mov r4,r0 - shlr8 r0 - bf/s LOCAL(div_ge64k) - cmp/hi r0,r5 - div0u - shll16 r5 - div1 r5,r0 - div1 r5,r0 - div1 r5,r0 -LOCAL(udiv_25): - mov.l LOCAL(zero_l),r1 - div1 r5,r0 - div1 r5,r0 - mov.l r1,@-r15 - .rept 3 - div1 r5,r0 - .endr - mov.b r0,@(L_MSWLSB,r15) - xtrct r4,r0 - swap.w r0,r0 - .rept 8 - div1 r5,r0 - .endr - mov.b r0,@(L_LSWMSB,r15) -LOCAL(div_ge64k_end): - .rept 8 - div1 r5,r0 - .endr - mov.l @r15+,r4 ! zero-extension and swap using LS unit. - extu.b r0,r0 - mov.l @r15+,r5 - or r4,r0 - mov.l @r15+,r4 - rts - rotcl r0 - -LOCAL(div_le128_neg): - tst #0xfe,r0 - mova LOCAL(div_table_ix),r0 - mov.b @(r0,r5),r1 - mova LOCAL(div_table_inv),r0 - bt/s LOCAL(div_by_1_neg) - mov.l @(r0,r1),r1 - mova LOCAL(div_table_clz),r0 - dmulu.l r1,r4 - mov.b @(r0,r5),r1 - mov.l @r15+,r5 - sts mach,r0 - /* clrt */ - addc r4,r0 - mov.l @r15+,r4 - rotcr r0 - shld r1,r0 - rts - neg r0,r0 - -LOCAL(pos_divisor): - mov.l r5,@-r15 - bt/s LOCAL(pos_result) - cmp/hi r1,r5 - neg r4,r4 -LOCAL(neg_result): - extu.w r5,r0 - bf LOCAL(div_le128_neg) - cmp/eq r5,r0 - mov r4,r0 - shlr8 r0 - bf/s LOCAL(div_ge64k_neg) - cmp/hi r0,r5 - div0u - mov.l LOCAL(zero_l),r1 - shll16 r5 - div1 r5,r0 - mov.l r1,@-r15 - .rept 7 - div1 r5,r0 - .endr - mov.b r0,@(L_MSWLSB,r15) - xtrct r4,r0 - swap.w r0,r0 - .rept 8 - div1 r5,r0 - .endr - mov.b r0,@(L_LSWMSB,r15) -LOCAL(div_ge64k_neg_end): - .rept 8 - div1 r5,r0 - .endr - mov.l @r15+,r4 ! zero-extension and swap using LS unit. - extu.b r0,r1 - mov.l @r15+,r5 - or r4,r1 -LOCAL(div_r8_neg_end): - mov.l @r15+,r4 - rotcl r1 - rts - neg r1,r0 - -LOCAL(div_ge64k_neg): - bt/s LOCAL(div_r8_neg) - div0u - shll8 r5 - mov.l LOCAL(zero_l),r1 - .rept 6 - div1 r5,r0 - .endr - mov.l r1,@-r15 - div1 r5,r0 - mov.w LOCAL(m256_w),r1 - div1 r5,r0 - mov.b r0,@(L_LSWMSB,r15) - xor r4,r0 - and r1,r0 - bra LOCAL(div_ge64k_neg_end) - xor r4,r0 - -LOCAL(c128_w): - .word 128 - -LOCAL(div_r8_neg): - clrt - shll16 r4 - mov r4,r1 - shll8 r1 - mov r5,r4 - .rept 7 - rotcl r1; div1 r5,r0 - .endr - mov.l @r15+,r5 - rotcl r1 - bra LOCAL(div_r8_neg_end) - div1 r4,r0 - -LOCAL(m256_w): - .word 0xff00 -/* This table has been generated by divtab-sh4.c. */ - .balign 4 -LOCAL(div_table_clz): - .byte 0 - .byte 1 - .byte 0 - .byte -1 - .byte -1 - .byte -2 - .byte -2 - .byte -2 - .byte -2 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -3 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -4 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -5 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 - .byte -6 -/* Lookup table translating positive divisor to index into table of - normalized inverse. N.B. the '0' entry is also the last entry of the - previous table, and causes an unaligned access for division by zero. */ -LOCAL(div_table_ix): - .byte -6 - .byte -128 - .byte -128 - .byte 0 - .byte -128 - .byte -64 - .byte 0 - .byte 64 - .byte -128 - .byte -96 - .byte -64 - .byte -32 - .byte 0 - .byte 32 - .byte 64 - .byte 96 - .byte -128 - .byte -112 - .byte -96 - .byte -80 - .byte -64 - .byte -48 - .byte -32 - .byte -16 - .byte 0 - .byte 16 - .byte 32 - .byte 48 - .byte 64 - .byte 80 - .byte 96 - .byte 112 - .byte -128 - .byte -120 - .byte -112 - .byte -104 - .byte -96 - .byte -88 - .byte -80 - .byte -72 - .byte -64 - .byte -56 - .byte -48 - .byte -40 - .byte -32 - .byte -24 - .byte -16 - .byte -8 - .byte 0 - .byte 8 - .byte 16 - .byte 24 - .byte 32 - .byte 40 - .byte 48 - .byte 56 - .byte 64 - .byte 72 - .byte 80 - .byte 88 - .byte 96 - .byte 104 - .byte 112 - .byte 120 - .byte -128 - .byte -124 - .byte -120 - .byte -116 - .byte -112 - .byte -108 - .byte -104 - .byte -100 - .byte -96 - .byte -92 - .byte -88 - .byte -84 - .byte -80 - .byte -76 - .byte -72 - .byte -68 - .byte -64 - .byte -60 - .byte -56 - .byte -52 - .byte -48 - .byte -44 - .byte -40 - .byte -36 - .byte -32 - .byte -28 - .byte -24 - .byte -20 - .byte -16 - .byte -12 - .byte -8 - .byte -4 - .byte 0 - .byte 4 - .byte 8 - .byte 12 - .byte 16 - .byte 20 - .byte 24 - .byte 28 - .byte 32 - .byte 36 - .byte 40 - .byte 44 - .byte 48 - .byte 52 - .byte 56 - .byte 60 - .byte 64 - .byte 68 - .byte 72 - .byte 76 - .byte 80 - .byte 84 - .byte 88 - .byte 92 - .byte 96 - .byte 100 - .byte 104 - .byte 108 - .byte 112 - .byte 116 - .byte 120 - .byte 124 - .byte -128 -/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ - .balign 4 -LOCAL(zero_l): - .long 0x0 - .long 0xF81F81F9 - .long 0xF07C1F08 - .long 0xE9131AC0 - .long 0xE1E1E1E2 - .long 0xDAE6076C - .long 0xD41D41D5 - .long 0xCD856891 - .long 0xC71C71C8 - .long 0xC0E07039 - .long 0xBACF914D - .long 0xB4E81B4F - .long 0xAF286BCB - .long 0xA98EF607 - .long 0xA41A41A5 - .long 0x9EC8E952 - .long 0x9999999A - .long 0x948B0FCE - .long 0x8F9C18FA - .long 0x8ACB90F7 - .long 0x86186187 - .long 0x81818182 - .long 0x7D05F418 - .long 0x78A4C818 - .long 0x745D1746 - .long 0x702E05C1 - .long 0x6C16C16D - .long 0x68168169 - .long 0x642C8591 - .long 0x60581606 - .long 0x5C9882BA - .long 0x58ED2309 -LOCAL(div_table_inv): - .long 0x55555556 - .long 0x51D07EAF - .long 0x4E5E0A73 - .long 0x4AFD6A06 - .long 0x47AE147B - .long 0x446F8657 - .long 0x41414142 - .long 0x3E22CBCF - .long 0x3B13B13C - .long 0x38138139 - .long 0x3521CFB3 - .long 0x323E34A3 - .long 0x2F684BDB - .long 0x2C9FB4D9 - .long 0x29E4129F - .long 0x27350B89 - .long 0x24924925 - .long 0x21FB7813 - .long 0x1F7047DD - .long 0x1CF06ADB - .long 0x1A7B9612 - .long 0x18118119 - .long 0x15B1E5F8 - .long 0x135C8114 - .long 0x11111112 - .long 0xECF56BF - .long 0xC9714FC - .long 0xA6810A7 - .long 0x8421085 - .long 0x624DD30 - .long 0x4104105 - .long 0x2040811 - /* maximum error: 0.987342 scaled: 0.921875*/ - - ENDFUNC(GLOBAL(sdivsi3_i4i)) -#endif /* SH3 / SH4 */ - -#endif /* L_div_table */ - -#ifdef L_udiv_qrnnd_16 -#if !__SHMEDIA__ - HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) - /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ - /* n1 < d, but n1 might be larger than d1. */ - .global GLOBAL(udiv_qrnnd_16) - .balign 8 -GLOBAL(udiv_qrnnd_16): - div0u - cmp/hi r6,r0 - bt .Lots - .rept 16 - div1 r6,r0 - .endr - extu.w r0,r1 - bt 0f - add r6,r0 -0: rotcl r1 - mulu.w r1,r5 - xtrct r4,r0 - swap.w r0,r0 - sts macl,r2 - cmp/hs r2,r0 - sub r2,r0 - bt 0f - addc r5,r0 - add #-1,r1 - bt 0f -1: add #-1,r1 - rts - add r5,r0 - .balign 8 -.Lots: - sub r5,r0 - swap.w r4,r1 - xtrct r0,r1 - clrt - mov r1,r0 - addc r5,r0 - mov #-1,r1 - SL1(bf, 1b, - shlr16 r1) -0: rts - nop - ENDFUNC(GLOBAL(udiv_qrnnd_16)) -#endif /* !__SHMEDIA__ */ -#endif /* L_udiv_qrnnd_16 */ diff --git a/gcc/config/sh/lib1funcs.h b/gcc/config/sh/lib1funcs.h deleted file mode 100644 index af4b41cc314..00000000000 --- a/gcc/config/sh/lib1funcs.h +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - 2004, 2005, 2006, 2009 - Free Software Foundation, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#ifdef __ELF__ -#define LOCAL(X) .L_##X -#define FUNC(X) .type X,@function -#define HIDDEN_FUNC(X) FUNC(X); .hidden X -#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y); .hidden GLOBAL(X) -#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X -#define ENDFUNC(X) ENDFUNC0(X) -#else -#define LOCAL(X) L_##X -#define FUNC(X) -#define HIDDEN_FUNC(X) -#define HIDDEN_ALIAS(X,Y) ALIAS (X,Y) -#define ENDFUNC(X) -#endif - -#define CONCAT(A,B) A##B -#define GLOBAL0(U,X) CONCAT(U,__##X) -#define GLOBAL(X) GLOBAL0(__USER_LABEL_PREFIX__,X) - -#define ALIAS(X,Y) .global GLOBAL(X); .set GLOBAL(X),GLOBAL(Y) - -#if defined __SH2A__ && defined __FMOVD_ENABLED__ -#undef FMOVD_WORKS -#define FMOVD_WORKS -#endif - -#ifdef __LITTLE_ENDIAN__ -#define DR00 fr1 -#define DR01 fr0 -#define DR20 fr3 -#define DR21 fr2 -#define DR40 fr5 -#define DR41 fr4 -#else /* !__LITTLE_ENDIAN__ */ -#define DR00 fr0 -#define DR01 fr1 -#define DR20 fr2 -#define DR21 fr3 -#define DR40 fr4 -#define DR41 fr5 -#endif /* !__LITTLE_ENDIAN__ */ - -#ifdef __sh1__ -#define SL(branch, dest, in_slot, in_slot_arg2) \ - in_slot, in_slot_arg2; branch dest -#define SL1(branch, dest, in_slot) \ - in_slot; branch dest -#else /* ! __sh1__ */ -#define SL(branch, dest, in_slot, in_slot_arg2) \ - branch##.s dest; in_slot, in_slot_arg2 -#define SL1(branch, dest, in_slot) \ - branch##/s dest; in_slot -#endif /* !__sh1__ */ diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 1e654801334..cc26e05a764 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1983,7 +1983,7 @@ struct sh_args { that the native compiler puts too large (> 32) immediate shift counts into a register and shifts by the register, letting the SH decide what to do instead of doing that itself. */ -/* ??? The library routines in lib1funcs.asm truncate the shift count. +/* ??? The library routines in lib1funcs.S truncate the shift count. However, the SH3 has hardware shifts that do not truncate exactly as gcc expects - the sign bit is significant - so it appears that we need to leave this zero for correct SH3 code. */ diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux index a5c711618c6..2304fb176cb 100644 --- a/gcc/config/sh/t-linux +++ b/gcc/config/sh/t-linux @@ -1,5 +1,3 @@ -LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array - LIB2FUNCS_EXTRA= $(srcdir)/config/sh/linux-atomic.asm MULTILIB_DIRNAMES= diff --git a/gcc/config/sh/t-netbsd b/gcc/config/sh/t-netbsd index de172d3f73f..dea1c478cb5 100644 --- a/gcc/config/sh/t-netbsd +++ b/gcc/config/sh/t-netbsd @@ -17,6 +17,5 @@ # <http://www.gnu.org/licenses/>. TARGET_LIBGCC2_CFLAGS = -fpic -mieee -LIB1ASMFUNCS_CACHE = _ic_invalidate LIB2FUNCS_EXTRA= diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh index 6eaf784e8ae..56ea83e0697 100644 --- a/gcc/config/sh/t-sh +++ b/gcc/config/sh/t-sh @@ -22,13 +22,6 @@ sh-c.o: $(srcdir)/config/sh/sh-c.c \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/sh/sh-c.c -LIB1ASMSRC = sh/lib1funcs.asm -LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movmem \ - _movmem_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ - _div_table _udiv_qrnnd_16 \ - $(LIB1ASMFUNCS_CACHE) -LIB1ASMFUNCS_CACHE = _ic_invalidate _ic_invalidate_array - TARGET_LIBGCC2_CFLAGS = -mieee DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG)) diff --git a/gcc/config/sh/t-sh64 b/gcc/config/sh/t-sh64 index d88f929fd7a..3bd9205079b 100644 --- a/gcc/config/sh/t-sh64 +++ b/gcc/config/sh/t-sh64 @@ -1,4 +1,4 @@ -# Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. +# Copyright (C) 2002, 2004, 2005, 2011 Free Software Foundation, Inc. # # This file is part of GCC. # @@ -16,13 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMFUNCS = \ - _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \ - _shcompact_call_trampoline _shcompact_return_trampoline \ - _shcompact_incoming_args _ic_invalidate _nested_trampoline \ - _push_pop_shmedia_regs \ - _udivdi3 _divdi3 _umoddi3 _moddi3 _div_table - MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64) MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=) diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm deleted file mode 100644 index b60bd5740e7..00000000000 --- a/gcc/config/sparc/lb1spc.asm +++ /dev/null @@ -1,784 +0,0 @@ -/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 - for the sparc processor. - - These routines are derived from the SPARC Architecture Manual, version 8, - slightly edited to match the desired calling convention, and also to - optimize them for our purposes. */ - -#ifdef L_mulsi3 -.text - .align 4 - .global .umul - .proc 4 -.umul: - or %o0, %o1, %o4 ! logical or of multiplier and multiplicand - mov %o0, %y ! multiplier to Y register - andncc %o4, 0xfff, %o5 ! mask out lower 12 bits - be mul_shortway ! can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc - ! - ! long multiply - ! - mulscc %o4, %o1, %o4 ! first iteration of 33 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 ! 32nd iteration - mulscc %o4, %g0, %o4 ! last iteration only shifts - ! the upper 32 bits of product are wrong, but we do not care - retl - rd %y, %o0 - ! - ! short multiply - ! -mul_shortway: - mulscc %o4, %o1, %o4 ! first iteration of 13 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 ! 12th iteration - mulscc %o4, %g0, %o4 ! last iteration only shifts - rd %y, %o5 - sll %o4, 12, %o4 ! left shift partial product by 12 bits - srl %o5, 20, %o5 ! right shift partial product by 20 bits - retl - or %o5, %o4, %o0 ! merge for true product -#endif - -#ifdef L_divsi3 -/* - * Division and remainder, from Appendix E of the SPARC Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .div name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - .global .udiv - .align 4 - .proc 4 - .text -.udiv: - b ready_to_divide - mov 0, %g3 ! result is always positive - - .global .div - .align 4 - .proc 4 - .text -.div: - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge ready_to_divide ! no, go do the divide - xor %o1, %o0, %g3 ! compute sign in any case - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge ready_to_divide ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative - - -ready_to_divide: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta 0x2 ! ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu got_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu not_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g2 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g2. - 2: addcc %o5, %o5, %o5 - bcc not_too_big - add %g2, 1, %g2 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b do_single_div - sub %g2, 1, %g2 - - not_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be do_single_div - nop - /* NB: these are commented out in the V8-SPARC manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g2 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - do_single_div: - subcc %g2, 1, %g2 - bl end_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b end_single_divloop - nop - single_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - end_single_divloop: - subcc %g2, 1, %g2 - bge single_divloop - tst %o3 - b,a end_regular_divide - -not_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be got_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -divloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - - -L3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -end_regular_divide: - subcc %o4, 1, %o4 - bge divloop - tst %o3 - bl,a got_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - - -got_result: - ! check to see if answer should be < 0 - tst %g3 - bl,a 1f - sub %g0, %o2, %o2 -1: - retl - mov %o2, %o0 -#endif - -#ifdef L_modsi3 -/* This implementation was taken from glibc: - * - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ -.text - .align 4 - .global .urem - .proc 4 -.urem: - b divide - mov 0, %g3 ! result always positive - - .align 4 - .global .rem - .proc 4 -.rem: - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - mov %o0, %g3 ! sign of remainder matches %o0 - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. -divide: - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta 0x2 !ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu got_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu not_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g2 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g2. - 2: addcc %o5, %o5, %o5 - bcc not_too_big - add %g2, 1, %g2 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b do_single_div - sub %g2, 1, %g2 - - not_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be do_single_div - nop - /* NB: these are commented out in the V8-SPARC manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g2 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - do_single_div: - subcc %g2, 1, %g2 - bl end_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b end_single_divloop - nop - single_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - end_single_divloop: - subcc %g2, 1, %g2 - bge single_divloop - tst %o3 - b,a end_regular_divide - -not_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be got_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -divloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 -L4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -end_regular_divide: - subcc %o4, 1, %o4 - bge divloop - tst %o3 - bl,a got_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -got_result: - ! check to see if answer should be < 0 - tst %g3 - bl,a 1f - sub %g0, %o3, %o3 -1: - retl - mov %o3, %o0 - -#endif - diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm deleted file mode 100644 index 973401f8018..00000000000 --- a/gcc/config/sparc/lb1spl.asm +++ /dev/null @@ -1,246 +0,0 @@ -/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 - for the sparclite processor. - - These routines are all from the SPARClite User's Guide, slightly edited - to match the desired calling convention, and also to optimize them. */ - -#ifdef L_udivsi3 -.text - .align 4 - .global .udiv - .proc 04 -.udiv: - wr %g0,%g0,%y ! Not a delayed write for sparclite - tst %g0 - divscc %o0,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - retl - divscc %g1,%o1,%o0 -#endif - -#ifdef L_umodsi3 -.text - .align 4 - .global .urem - .proc 04 -.urem: - wr %g0,%g0,%y ! Not a delayed write for sparclite - tst %g0 - divscc %o0,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - bl 1f - rd %y,%o0 - retl - nop -1: retl - add %o0,%o1,%o0 -#endif - -#ifdef L_divsi3 -.text - .align 4 - .global .div - .proc 04 -! ??? This routine could be made faster if was optimized, and if it was -! rewritten to only calculate the quotient. -.div: - wr %g0,%g0,%y ! Not a delayed write for sparclite - mov %o1,%o4 - tst %o1 - bl,a 1f - sub %g0,%o4,%o4 -1: tst %o0 - bl,a 2f - mov -1,%y -2: divscc %o0,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - be 6f - mov %y,%o3 - bg 4f - addcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bl 5f - tst %g1 - ba 5f - add %o3,%o4,%o3 -4: subcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bge 5f - tst %g1 - sub %o3,%o4,%o3 -5: bl,a 6f - add %g1,1,%g1 -6: tst %o1 - bl,a 7f - sub %g0,%g1,%g1 -7: retl - mov %g1,%o0 ! Quotient is in %g1. -#endif - -#ifdef L_modsi3 -.text - .align 4 - .global .rem - .proc 04 -! ??? This routine could be made faster if was optimized, and if it was -! rewritten to only calculate the remainder. -.rem: - wr %g0,%g0,%y ! Not a delayed write for sparclite - mov %o1,%o4 - tst %o1 - bl,a 1f - sub %g0,%o4,%o4 -1: tst %o0 - bl,a 2f - mov -1,%y -2: divscc %o0,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - be 6f - mov %y,%o3 - bg 4f - addcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bl 5f - tst %g1 - ba 5f - add %o3,%o4,%o3 -4: subcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bge 5f - tst %g1 - sub %o3,%o4,%o3 -5: bl,a 6f - add %g1,1,%g1 -6: tst %o1 - bl,a 7f - sub %g0,%g1,%g1 -7: retl - mov %o3,%o0 ! Remainder is in %o3. -#endif diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf index 7073bcb7721..be926585481 100644 --- a/gcc/config/sparc/t-elf +++ b/gcc/config/sparc/t-elf @@ -17,9 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = sparc/lb1spc.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 - MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat MULTILIB_DIRNAMES = soft v8 flat MULTILIB_MATCHES = msoft-float=mno-fpu diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon index 4f9d0a9e797..8e5e30f7ff7 100644 --- a/gcc/config/sparc/t-leon +++ b/gcc/config/sparc/t-leon @@ -16,9 +16,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = sparc/lb1spc.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 - # Multilibs for LEON # LEON is a SPARC-V8, but the AT697 implementation has a bug in the # V8-specific instructions. diff --git a/gcc/config/spu/t-spu-elf b/gcc/config/spu/t-spu-elf index b1660353ee6..45802499525 100644 --- a/gcc/config/spu/t-spu-elf +++ b/gcc/config/spu/t-spu-elf @@ -15,10 +15,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# Suppress building libgcc1.a -LIBGCC1 = -CROSS_LIBGCC1 = - TARGET_LIBGCC2_CFLAGS = -fPIC -mwarn-reloc -D__IN_LIBGCC2 # We exclude those because the libgcc2.c default versions do not support diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm deleted file mode 100644 index 04e9b1e0ad4..00000000000 --- a/gcc/config/v850/lib1funcs.asm +++ /dev/null @@ -1,2330 +0,0 @@ -/* libgcc routines for NEC V850. - Copyright (C) 1996, 1997, 2002, 2005, 2009, 2010 - Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#ifdef L_mulsi3 - .text - .globl ___mulsi3 - .type ___mulsi3,@function -___mulsi3: -#ifdef __v850__ -/* - #define SHIFT 12 - #define MASK ((1 << SHIFT) - 1) - - #define STEP(i, j) \ - ({ \ - short a_part = (a >> (i)) & MASK; \ - short b_part = (b >> (j)) & MASK; \ - int res = (((int) a_part) * ((int) b_part)); \ - res; \ - }) - - int - __mulsi3 (unsigned a, unsigned b) - { - return STEP (0, 0) + - ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) + - ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0)) - << (2 * SHIFT)); - } -*/ - mov r6, r14 - movea lo(32767), r0, r10 - and r10, r14 - mov r7, r15 - and r10, r15 - shr 15, r6 - mov r6, r13 - and r10, r13 - shr 15, r7 - mov r7, r12 - and r10, r12 - shr 15, r6 - shr 15, r7 - mov r14, r10 - mulh r15, r10 - mov r14, r11 - mulh r12, r11 - mov r13, r16 - mulh r15, r16 - mulh r14, r7 - mulh r15, r6 - add r16, r11 - mulh r13, r12 - shl 15, r11 - add r11, r10 - add r12, r7 - add r6, r7 - shl 30, r7 - add r7, r10 - jmp [r31] -#endif /* __v850__ */ -#if defined(__v850e__) || defined(__v850ea__) || defined(__v850e2__) || defined(__v850e2v3__) - /* This routine is almost unneccesarry because gcc - generates the MUL instruction for the RTX mulsi3. - But if someone wants to link his application with - previsously compiled v850 objects then they will - need this function. */ - - /* It isn't good to put the inst sequence as below; - mul r7, r6, - mov r6, r10, r0 - In this case, there is a RAW hazard between them. - MUL inst takes 2 cycle in EX stage, then MOV inst - must wait 1cycle. */ - mov r7, r10 - mul r6, r10, r0 - jmp [r31] -#endif /* __v850e__ */ - .size ___mulsi3,.-___mulsi3 -#endif /* L_mulsi3 */ - - -#ifdef L_udivsi3 - .text - .global ___udivsi3 - .type ___udivsi3,@function -___udivsi3: -#ifdef __v850__ - mov 1,r12 - mov 0,r10 - cmp r6,r7 - bnl .L12 - movhi hi(-2147483648),r0,r13 - cmp r0,r7 - blt .L12 -.L4: - shl 1,r7 - shl 1,r12 - cmp r6,r7 - bnl .L12 - cmp r0,r12 - be .L8 - mov r7,r19 - and r13,r19 - be .L4 - br .L12 -.L9: - cmp r7,r6 - bl .L10 - sub r7,r6 - or r12,r10 -.L10: - shr 1,r12 - shr 1,r7 -.L12: - cmp r0,r12 - bne .L9 -.L8: - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - mov r6, r10 - divu r7, r10, r0 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___udivsi3,.-___udivsi3 -#endif - -#ifdef L_divsi3 - .text - .globl ___divsi3 - .type ___divsi3,@function -___divsi3: -#ifdef __v850__ - add -8,sp - st.w r31,4[sp] - st.w r22,0[sp] - mov 1,r22 - tst r7,r7 - bp .L3 - subr r0,r7 - subr r0,r22 -.L3: - tst r6,r6 - bp .L4 - subr r0,r6 - subr r0,r22 -.L4: - jarl ___udivsi3,r31 - cmp r0,r22 - bp .L7 - subr r0,r10 -.L7: - ld.w 0[sp],r22 - ld.w 4[sp],r31 - add 8,sp - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - mov r6, r10 - div r7, r10, r0 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___divsi3,.-___divsi3 -#endif - -#ifdef L_umodsi3 - .text - .globl ___umodsi3 - .type ___umodsi3,@function -___umodsi3: -#ifdef __v850__ - add -12,sp - st.w r31,8[sp] - st.w r7,4[sp] - st.w r6,0[sp] - jarl ___udivsi3,r31 - ld.w 4[sp],r7 - mov r10,r6 - jarl ___mulsi3,r31 - ld.w 0[sp],r6 - subr r6,r10 - ld.w 8[sp],r31 - add 12,sp - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - divu r7, r6, r10 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___umodsi3,.-___umodsi3 -#endif /* L_umodsi3 */ - -#ifdef L_modsi3 - .text - .globl ___modsi3 - .type ___modsi3,@function -___modsi3: -#ifdef __v850__ - add -12,sp - st.w r31,8[sp] - st.w r7,4[sp] - st.w r6,0[sp] - jarl ___divsi3,r31 - ld.w 4[sp],r7 - mov r10,r6 - jarl ___mulsi3,r31 - ld.w 0[sp],r6 - subr r6,r10 - ld.w 8[sp],r31 - add 12,sp - jmp [r31] - -#else /* defined(__v850e__) */ - - /* See comments at end of __mulsi3. */ - div r7, r6, r10 - jmp [r31] - -#endif /* __v850e__ */ - - .size ___modsi3,.-___modsi3 -#endif /* L_modsi3 */ - -#ifdef L_save_2 - .text - .align 2 - .globl __save_r2_r29 - .type __save_r2_r29,@function - /* Allocate space and save registers 2, 20 .. 29 on the stack. */ - /* Called via: jalr __save_r2_r29,r10. */ -__save_r2_r29: -#ifdef __EP__ - mov ep,r1 - addi -44,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - sst.w r2,40[ep] - mov r1,ep -#else - addi -44,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] - st.w r2,40[sp] -#endif - jmp [r10] - .size __save_r2_r29,.-__save_r2_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r2_r29. */ - .align 2 - .globl __return_r2_r29 - .type __return_r2_r29,@function -__return_r2_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - sld.w 40[ep],r2 - addi 44,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r20 - ld.w 40[sp],r2 - addi 44,sp,sp -#endif - jmp [r31] - .size __return_r2_r29,.-__return_r2_r29 -#endif /* L_save_2 */ - -#ifdef L_save_20 - .text - .align 2 - .globl __save_r20_r29 - .type __save_r20_r29,@function - /* Allocate space and save registers 20 .. 29 on the stack. */ - /* Called via: jalr __save_r20_r29,r10. */ -__save_r20_r29: -#ifdef __EP__ - mov ep,r1 - addi -40,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - mov r1,ep -#else - addi -40,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] -#endif - jmp [r10] - .size __save_r20_r29,.-__save_r20_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r20_r29. */ - .align 2 - .globl __return_r20_r29 - .type __return_r20_r29,@function -__return_r20_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - addi 40,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r20 - addi 40,sp,sp -#endif - jmp [r31] - .size __return_r20_r29,.-__return_r20_r29 -#endif /* L_save_20 */ - -#ifdef L_save_21 - .text - .align 2 - .globl __save_r21_r29 - .type __save_r21_r29,@function - /* Allocate space and save registers 21 .. 29 on the stack. */ - /* Called via: jalr __save_r21_r29,r10. */ -__save_r21_r29: -#ifdef __EP__ - mov ep,r1 - addi -36,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - mov r1,ep -#else - addi -36,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] -#endif - jmp [r10] - .size __save_r21_r29,.-__save_r21_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r21_r29. */ - .align 2 - .globl __return_r21_r29 - .type __return_r21_r29,@function -__return_r21_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - addi 36,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - addi 36,sp,sp -#endif - jmp [r31] - .size __return_r21_r29,.-__return_r21_r29 -#endif /* L_save_21 */ - -#ifdef L_save_22 - .text - .align 2 - .globl __save_r22_r29 - .type __save_r22_r29,@function - /* Allocate space and save registers 22 .. 29 on the stack. */ - /* Called via: jalr __save_r22_r29,r10. */ -__save_r22_r29: -#ifdef __EP__ - mov ep,r1 - addi -32,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - mov r1,ep -#else - addi -32,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] -#endif - jmp [r10] - .size __save_r22_r29,.-__save_r22_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r22_r29. */ - .align 2 - .globl __return_r22_r29 - .type __return_r22_r29,@function -__return_r22_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - addi 32,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - addi 32,sp,sp -#endif - jmp [r31] - .size __return_r22_r29,.-__return_r22_r29 -#endif /* L_save_22 */ - -#ifdef L_save_23 - .text - .align 2 - .globl __save_r23_r29 - .type __save_r23_r29,@function - /* Allocate space and save registers 23 .. 29 on the stack. */ - /* Called via: jalr __save_r23_r29,r10. */ -__save_r23_r29: -#ifdef __EP__ - mov ep,r1 - addi -28,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - mov r1,ep -#else - addi -28,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] -#endif - jmp [r10] - .size __save_r23_r29,.-__save_r23_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r23_r29. */ - .align 2 - .globl __return_r23_r29 - .type __return_r23_r29,@function -__return_r23_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - addi 28,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - addi 28,sp,sp -#endif - jmp [r31] - .size __return_r23_r29,.-__return_r23_r29 -#endif /* L_save_23 */ - -#ifdef L_save_24 - .text - .align 2 - .globl __save_r24_r29 - .type __save_r24_r29,@function - /* Allocate space and save registers 24 .. 29 on the stack. */ - /* Called via: jalr __save_r24_r29,r10. */ -__save_r24_r29: -#ifdef __EP__ - mov ep,r1 - addi -24,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - mov r1,ep -#else - addi -24,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] -#endif - jmp [r10] - .size __save_r24_r29,.-__save_r24_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r24_r29. */ - .align 2 - .globl __return_r24_r29 - .type __return_r24_r29,@function -__return_r24_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - addi 24,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - addi 24,sp,sp -#endif - jmp [r31] - .size __return_r24_r29,.-__return_r24_r29 -#endif /* L_save_24 */ - -#ifdef L_save_25 - .text - .align 2 - .globl __save_r25_r29 - .type __save_r25_r29,@function - /* Allocate space and save registers 25 .. 29 on the stack. */ - /* Called via: jalr __save_r25_r29,r10. */ -__save_r25_r29: -#ifdef __EP__ - mov ep,r1 - addi -20,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - mov r1,ep -#else - addi -20,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] -#endif - jmp [r10] - .size __save_r25_r29,.-__save_r25_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r25_r29. */ - .align 2 - .globl __return_r25_r29 - .type __return_r25_r29,@function -__return_r25_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - addi 20,sp,sp - mov r1,ep -#else - ld.w 0[ep],r29 - ld.w 4[ep],r28 - ld.w 8[ep],r27 - ld.w 12[ep],r26 - ld.w 16[ep],r25 - addi 20,sp,sp -#endif - jmp [r31] - .size __return_r25_r29,.-__return_r25_r29 -#endif /* L_save_25 */ - -#ifdef L_save_26 - .text - .align 2 - .globl __save_r26_r29 - .type __save_r26_r29,@function - /* Allocate space and save registers 26 .. 29 on the stack. */ - /* Called via: jalr __save_r26_r29,r10. */ -__save_r26_r29: -#ifdef __EP__ - mov ep,r1 - add -16,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - mov r1,ep -#else - add -16,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] -#endif - jmp [r10] - .size __save_r26_r29,.-__save_r26_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r26_r29. */ - .align 2 - .globl __return_r26_r29 - .type __return_r26_r29,@function -__return_r26_r29: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - addi 16,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - addi 16,sp,sp -#endif - jmp [r31] - .size __return_r26_r29,.-__return_r26_r29 -#endif /* L_save_26 */ - -#ifdef L_save_27 - .text - .align 2 - .globl __save_r27_r29 - .type __save_r27_r29,@function - /* Allocate space and save registers 27 .. 29 on the stack. */ - /* Called via: jalr __save_r27_r29,r10. */ -__save_r27_r29: - add -12,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - jmp [r10] - .size __save_r27_r29,.-__save_r27_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r27_r29. */ - .align 2 - .globl __return_r27_r29 - .type __return_r27_r29,@function -__return_r27_r29: - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - add 12,sp - jmp [r31] - .size __return_r27_r29,.-__return_r27_r29 -#endif /* L_save_27 */ - -#ifdef L_save_28 - .text - .align 2 - .globl __save_r28_r29 - .type __save_r28_r29,@function - /* Allocate space and save registers 28,29 on the stack. */ - /* Called via: jalr __save_r28_r29,r10. */ -__save_r28_r29: - add -8,sp - st.w r29,0[sp] - st.w r28,4[sp] - jmp [r10] - .size __save_r28_r29,.-__save_r28_r29 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r28_r29. */ - .align 2 - .globl __return_r28_r29 - .type __return_r28_r29,@function -__return_r28_r29: - ld.w 0[sp],r29 - ld.w 4[sp],r28 - add 8,sp - jmp [r31] - .size __return_r28_r29,.-__return_r28_r29 -#endif /* L_save_28 */ - -#ifdef L_save_29 - .text - .align 2 - .globl __save_r29 - .type __save_r29,@function - /* Allocate space and save register 29 on the stack. */ - /* Called via: jalr __save_r29,r10. */ -__save_r29: - add -4,sp - st.w r29,0[sp] - jmp [r10] - .size __save_r29,.-__save_r29 - - /* Restore saved register 29, deallocate stack and return to the user. */ - /* Called via: jr __return_r29. */ - .align 2 - .globl __return_r29 - .type __return_r29,@function -__return_r29: - ld.w 0[sp],r29 - add 4,sp - jmp [r31] - .size __return_r29,.-__return_r29 -#endif /* L_save_28 */ - -#ifdef L_save_2c - .text - .align 2 - .globl __save_r2_r31 - .type __save_r2_r31,@function - /* Allocate space and save registers 20 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r2_r31,r10. */ -__save_r2_r31: -#ifdef __EP__ - mov ep,r1 - addi -48,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - sst.w r2,40[ep] - sst.w r31,44[ep] - mov r1,ep -#else - addi -48,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] - st.w r2,40[sp] - st.w r31,44[sp] -#endif - jmp [r10] - .size __save_r2_r31,.-__save_r2_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r20_r31. */ - .align 2 - .globl __return_r2_r31 - .type __return_r2_r31,@function -__return_r2_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - sld.w 40[ep],r2 - sld.w 44[ep],r31 - addi 48,sp,sp - mov r1,ep -#else - ld.w 44[sp],r29 - ld.w 40[sp],r28 - ld.w 36[sp],r27 - ld.w 32[sp],r26 - ld.w 28[sp],r25 - ld.w 24[sp],r24 - ld.w 20[sp],r23 - ld.w 16[sp],r22 - ld.w 12[sp],r21 - ld.w 8[sp],r20 - ld.w 4[sp],r2 - ld.w 0[sp],r31 - addi 48,sp,sp -#endif - jmp [r31] - .size __return_r2_r31,.-__return_r2_r31 -#endif /* L_save_2c */ - -#ifdef L_save_20c - .text - .align 2 - .globl __save_r20_r31 - .type __save_r20_r31,@function - /* Allocate space and save registers 20 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r20_r31,r10. */ -__save_r20_r31: -#ifdef __EP__ - mov ep,r1 - addi -44,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r20,36[ep] - sst.w r31,40[ep] - mov r1,ep -#else - addi -44,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r20,36[sp] - st.w r31,40[sp] -#endif - jmp [r10] - .size __save_r20_r31,.-__save_r20_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r20_r31. */ - .align 2 - .globl __return_r20_r31 - .type __return_r20_r31,@function -__return_r20_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r20 - sld.w 40[ep],r31 - addi 44,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r20 - ld.w 40[sp],r31 - addi 44,sp,sp -#endif - jmp [r31] - .size __return_r20_r31,.-__return_r20_r31 -#endif /* L_save_20c */ - -#ifdef L_save_21c - .text - .align 2 - .globl __save_r21_r31 - .type __save_r21_r31,@function - /* Allocate space and save registers 21 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r21_r31,r10. */ -__save_r21_r31: -#ifdef __EP__ - mov ep,r1 - addi -40,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r21,32[ep] - sst.w r31,36[ep] - mov r1,ep - jmp [r10] -#else - addi -40,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r21,32[sp] - st.w r31,36[sp] - jmp [r10] -#endif - .size __save_r21_r31,.-__save_r21_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r21_r31. */ - .align 2 - .globl __return_r21_r31 - .type __return_r21_r31,@function -__return_r21_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r21 - sld.w 36[ep],r31 - addi 40,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r21 - ld.w 36[sp],r31 - addi 40,sp,sp -#endif - jmp [r31] - .size __return_r21_r31,.-__return_r21_r31 -#endif /* L_save_21c */ - -#ifdef L_save_22c - .text - .align 2 - .globl __save_r22_r31 - .type __save_r22_r31,@function - /* Allocate space and save registers 22 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r22_r31,r10. */ -__save_r22_r31: -#ifdef __EP__ - mov ep,r1 - addi -36,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r22,28[ep] - sst.w r31,32[ep] - mov r1,ep -#else - addi -36,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r22,28[sp] - st.w r31,32[sp] -#endif - jmp [r10] - .size __save_r22_r31,.-__save_r22_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r22_r31. */ - .align 2 - .globl __return_r22_r31 - .type __return_r22_r31,@function -__return_r22_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r22 - sld.w 32[ep],r31 - addi 36,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r22 - ld.w 32[sp],r31 - addi 36,sp,sp -#endif - jmp [r31] - .size __return_r22_r31,.-__return_r22_r31 -#endif /* L_save_22c */ - -#ifdef L_save_23c - .text - .align 2 - .globl __save_r23_r31 - .type __save_r23_r31,@function - /* Allocate space and save registers 23 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r23_r31,r10. */ -__save_r23_r31: -#ifdef __EP__ - mov ep,r1 - addi -32,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r23,24[ep] - sst.w r31,28[ep] - mov r1,ep -#else - addi -32,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r23,24[sp] - st.w r31,28[sp] -#endif - jmp [r10] - .size __save_r23_r31,.-__save_r23_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r23_r31. */ - .align 2 - .globl __return_r23_r31 - .type __return_r23_r31,@function -__return_r23_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r23 - sld.w 28[ep],r31 - addi 32,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r23 - ld.w 28[sp],r31 - addi 32,sp,sp -#endif - jmp [r31] - .size __return_r23_r31,.-__return_r23_r31 -#endif /* L_save_23c */ - -#ifdef L_save_24c - .text - .align 2 - .globl __save_r24_r31 - .type __save_r24_r31,@function - /* Allocate space and save registers 24 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r24_r31,r10. */ -__save_r24_r31: -#ifdef __EP__ - mov ep,r1 - addi -28,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r24,20[ep] - sst.w r31,24[ep] - mov r1,ep -#else - addi -28,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r24,20[sp] - st.w r31,24[sp] -#endif - jmp [r10] - .size __save_r24_r31,.-__save_r24_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r24_r31. */ - .align 2 - .globl __return_r24_r31 - .type __return_r24_r31,@function -__return_r24_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r24 - sld.w 24[ep],r31 - addi 28,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r24 - ld.w 24[sp],r31 - addi 28,sp,sp -#endif - jmp [r31] - .size __return_r24_r31,.-__return_r24_r31 -#endif /* L_save_24c */ - -#ifdef L_save_25c - .text - .align 2 - .globl __save_r25_r31 - .type __save_r25_r31,@function - /* Allocate space and save registers 25 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r25_r31,r10. */ -__save_r25_r31: -#ifdef __EP__ - mov ep,r1 - addi -24,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r25,16[ep] - sst.w r31,20[ep] - mov r1,ep -#else - addi -24,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r25,16[sp] - st.w r31,20[sp] -#endif - jmp [r10] - .size __save_r25_r31,.-__save_r25_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r25_r31. */ - .align 2 - .globl __return_r25_r31 - .type __return_r25_r31,@function -__return_r25_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r25 - sld.w 20[ep],r31 - addi 24,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r25 - ld.w 20[sp],r31 - addi 24,sp,sp -#endif - jmp [r31] - .size __return_r25_r31,.-__return_r25_r31 -#endif /* L_save_25c */ - -#ifdef L_save_26c - .text - .align 2 - .globl __save_r26_r31 - .type __save_r26_r31,@function - /* Allocate space and save registers 26 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r26_r31,r10. */ -__save_r26_r31: -#ifdef __EP__ - mov ep,r1 - addi -20,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r26,12[ep] - sst.w r31,16[ep] - mov r1,ep -#else - addi -20,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r26,12[sp] - st.w r31,16[sp] -#endif - jmp [r10] - .size __save_r26_r31,.-__save_r26_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r26_r31. */ - .align 2 - .globl __return_r26_r31 - .type __return_r26_r31,@function -__return_r26_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r26 - sld.w 16[ep],r31 - addi 20,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r26 - ld.w 16[sp],r31 - addi 20,sp,sp -#endif - jmp [r31] - .size __return_r26_r31,.-__return_r26_r31 -#endif /* L_save_26c */ - -#ifdef L_save_27c - .text - .align 2 - .globl __save_r27_r31 - .type __save_r27_r31,@function - /* Allocate space and save registers 27 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r27_r31,r10. */ -__save_r27_r31: -#ifdef __EP__ - mov ep,r1 - addi -16,sp,sp - mov sp,ep - sst.w r29,0[ep] - sst.w r28,4[ep] - sst.w r27,8[ep] - sst.w r31,12[ep] - mov r1,ep -#else - addi -16,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r27,8[sp] - st.w r31,12[sp] -#endif - jmp [r10] - .size __save_r27_r31,.-__save_r27_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r27_r31. */ - .align 2 - .globl __return_r27_r31 - .type __return_r27_r31,@function -__return_r27_r31: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 0[ep],r29 - sld.w 4[ep],r28 - sld.w 8[ep],r27 - sld.w 12[ep],r31 - addi 16,sp,sp - mov r1,ep -#else - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r27 - ld.w 12[sp],r31 - addi 16,sp,sp -#endif - jmp [r31] - .size __return_r27_r31,.-__return_r27_r31 -#endif /* L_save_27c */ - -#ifdef L_save_28c - .text - .align 2 - .globl __save_r28_r31 - .type __save_r28_r31,@function - /* Allocate space and save registers 28 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r28_r31,r10. */ -__save_r28_r31: - addi -12,sp,sp - st.w r29,0[sp] - st.w r28,4[sp] - st.w r31,8[sp] - jmp [r10] - .size __save_r28_r31,.-__save_r28_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r28_r31. */ - .align 2 - .globl __return_r28_r31 - .type __return_r28_r31,@function -__return_r28_r31: - ld.w 0[sp],r29 - ld.w 4[sp],r28 - ld.w 8[sp],r31 - addi 12,sp,sp - jmp [r31] - .size __return_r28_r31,.-__return_r28_r31 -#endif /* L_save_28c */ - -#ifdef L_save_29c - .text - .align 2 - .globl __save_r29_r31 - .type __save_r29_r31,@function - /* Allocate space and save registers 29 & 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r29_r31,r10. */ -__save_r29_r31: - addi -8,sp,sp - st.w r29,0[sp] - st.w r31,4[sp] - jmp [r10] - .size __save_r29_r31,.-__save_r29_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r29_r31. */ - .align 2 - .globl __return_r29_r31 - .type __return_r29_r31,@function -__return_r29_r31: - ld.w 0[sp],r29 - ld.w 4[sp],r31 - addi 8,sp,sp - jmp [r31] - .size __return_r29_r31,.-__return_r29_r31 -#endif /* L_save_29c */ - -#ifdef L_save_31c - .text - .align 2 - .globl __save_r31 - .type __save_r31,@function - /* Allocate space and save register 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: jalr __save_r31,r10. */ -__save_r31: - addi -4,sp,sp - st.w r31,0[sp] - jmp [r10] - .size __save_r31,.-__save_r31 - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: jr __return_r31. */ - .align 2 - .globl __return_r31 - .type __return_r31,@function -__return_r31: - ld.w 0[sp],r31 - addi 4,sp,sp - jmp [r31] - .size __return_r31,.-__return_r31 -#endif /* L_save_31c */ - -#ifdef L_save_interrupt - .text - .align 2 - .globl __save_interrupt - .type __save_interrupt,@function - /* Save registers r1, r4 on stack and load up with expected values. */ - /* Note, 20 bytes of stack have already been allocated. */ - /* Called via: jalr __save_interrupt,r10. */ -__save_interrupt: - /* add -20,sp ; st.w r11,16[sp] ; st.w r10,12[sp] ; */ - st.w ep,0[sp] - st.w gp,4[sp] - st.w r1,8[sp] - movhi hi(__ep),r0,ep - movea lo(__ep),ep,ep - movhi hi(__gp),r0,gp - movea lo(__gp),gp,gp - jmp [r10] - .size __save_interrupt,.-__save_interrupt - - /* Restore saved registers, deallocate stack and return from the interrupt. */ - /* Called via: jr __return_interrupt. */ - .align 2 - .globl __return_interrupt - .type __return_interrupt,@function -__return_interrupt: - ld.w 0[sp],ep - ld.w 4[sp],gp - ld.w 8[sp],r1 - ld.w 12[sp],r10 - ld.w 16[sp],r11 - addi 20,sp,sp - reti - .size __return_interrupt,.-__return_interrupt -#endif /* L_save_interrupt */ - -#ifdef L_save_all_interrupt - .text - .align 2 - .globl __save_all_interrupt - .type __save_all_interrupt,@function - /* Save all registers except for those saved in __save_interrupt. */ - /* Allocate enough stack for all of the registers & 16 bytes of space. */ - /* Called via: jalr __save_all_interrupt,r10. */ -__save_all_interrupt: - addi -104,sp,sp -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sst.w r31,100[ep] - sst.w r2,96[ep] - sst.w gp,92[ep] - sst.w r6,88[ep] - sst.w r7,84[ep] - sst.w r8,80[ep] - sst.w r9,76[ep] - sst.w r11,72[ep] - sst.w r12,68[ep] - sst.w r13,64[ep] - sst.w r14,60[ep] - sst.w r15,56[ep] - sst.w r16,52[ep] - sst.w r17,48[ep] - sst.w r18,44[ep] - sst.w r19,40[ep] - sst.w r20,36[ep] - sst.w r21,32[ep] - sst.w r22,28[ep] - sst.w r23,24[ep] - sst.w r24,20[ep] - sst.w r25,16[ep] - sst.w r26,12[ep] - sst.w r27,8[ep] - sst.w r28,4[ep] - sst.w r29,0[ep] - mov r1,ep -#else - st.w r31,100[sp] - st.w r2,96[sp] - st.w gp,92[sp] - st.w r6,88[sp] - st.w r7,84[sp] - st.w r8,80[sp] - st.w r9,76[sp] - st.w r11,72[sp] - st.w r12,68[sp] - st.w r13,64[sp] - st.w r14,60[sp] - st.w r15,56[sp] - st.w r16,52[sp] - st.w r17,48[sp] - st.w r18,44[sp] - st.w r19,40[sp] - st.w r20,36[sp] - st.w r21,32[sp] - st.w r22,28[sp] - st.w r23,24[sp] - st.w r24,20[sp] - st.w r25,16[sp] - st.w r26,12[sp] - st.w r27,8[sp] - st.w r28,4[sp] - st.w r29,0[sp] -#endif - jmp [r10] - .size __save_all_interrupt,.-__save_all_interrupt - - .globl __restore_all_interrupt - .type __restore_all_interrupt,@function - /* Restore all registers saved in __save_all_interrupt and - deallocate the stack space. */ - /* Called via: jalr __restore_all_interrupt,r10. */ -__restore_all_interrupt: -#ifdef __EP__ - mov ep,r1 - mov sp,ep - sld.w 100[ep],r31 - sld.w 96[ep],r2 - sld.w 92[ep],gp - sld.w 88[ep],r6 - sld.w 84[ep],r7 - sld.w 80[ep],r8 - sld.w 76[ep],r9 - sld.w 72[ep],r11 - sld.w 68[ep],r12 - sld.w 64[ep],r13 - sld.w 60[ep],r14 - sld.w 56[ep],r15 - sld.w 52[ep],r16 - sld.w 48[ep],r17 - sld.w 44[ep],r18 - sld.w 40[ep],r19 - sld.w 36[ep],r20 - sld.w 32[ep],r21 - sld.w 28[ep],r22 - sld.w 24[ep],r23 - sld.w 20[ep],r24 - sld.w 16[ep],r25 - sld.w 12[ep],r26 - sld.w 8[ep],r27 - sld.w 4[ep],r28 - sld.w 0[ep],r29 - mov r1,ep -#else - ld.w 100[sp],r31 - ld.w 96[sp],r2 - ld.w 92[sp],gp - ld.w 88[sp],r6 - ld.w 84[sp],r7 - ld.w 80[sp],r8 - ld.w 76[sp],r9 - ld.w 72[sp],r11 - ld.w 68[sp],r12 - ld.w 64[sp],r13 - ld.w 60[sp],r14 - ld.w 56[sp],r15 - ld.w 52[sp],r16 - ld.w 48[sp],r17 - ld.w 44[sp],r18 - ld.w 40[sp],r19 - ld.w 36[sp],r20 - ld.w 32[sp],r21 - ld.w 28[sp],r22 - ld.w 24[sp],r23 - ld.w 20[sp],r24 - ld.w 16[sp],r25 - ld.w 12[sp],r26 - ld.w 8[sp],r27 - ld.w 4[sp],r28 - ld.w 0[sp],r29 -#endif - addi 104,sp,sp - jmp [r10] - .size __restore_all_interrupt,.-__restore_all_interrupt -#endif /* L_save_all_interrupt */ - -#if defined(__v850e__) || defined(__v850e1__) || defined(__v850e2__) || defined(__v850e2v3__) -#ifdef L_callt_save_r2_r29 - /* Put these functions into the call table area. */ - .call_table_text - - /* Allocate space and save registers 2, 20 .. 29 on the stack. */ - /* Called via: callt ctoff(__callt_save_r2_r29). */ - .align 2 -.L_save_r2_r29: - add -4, sp - st.w r2, 0[sp] - prepare {r20 - r29}, 0 - ctret - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: callt ctoff(__callt_return_r2_r29). */ - .align 2 -.L_return_r2_r29: - dispose 0, {r20-r29} - ld.w 0[sp], r2 - add 4, sp - jmp [r31] - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_r2_r29 - .type __callt_save_r2_r29,@function -__callt_save_r2_r29: .short ctoff(.L_save_r2_r29) - - .global __callt_return_r2_r29 - .type __callt_return_r2_r29,@function -__callt_return_r2_r29: .short ctoff(.L_return_r2_r29) - -#endif /* L_callt_save_r2_r29. */ - -#ifdef L_callt_save_r2_r31 - /* Put these functions into the call table area. */ - .call_table_text - - /* Allocate space and save registers 2 and 20 .. 29, 31 on the stack. */ - /* Also allocate space for the argument save area. */ - /* Called via: callt ctoff(__callt_save_r2_r31). */ - .align 2 -.L_save_r2_r31: - add -4, sp - st.w r2, 0[sp] - prepare {r20 - r29, r31}, 0 - ctret - - /* Restore saved registers, deallocate stack and return to the user. */ - /* Called via: callt ctoff(__callt_return_r2_r31). */ - .align 2 -.L_return_r2_r31: - dispose 0, {r20 - r29, r31} - ld.w 0[sp], r2 - addi 4, sp, sp - jmp [r31] - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_r2_r31 - .type __callt_save_r2_r31,@function -__callt_save_r2_r31: .short ctoff(.L_save_r2_r31) - - .global __callt_return_r2_r31 - .type __callt_return_r2_r31,@function -__callt_return_r2_r31: .short ctoff(.L_return_r2_r31) - -#endif /* L_callt_save_r2_r31 */ - -#ifdef L_callt_save_interrupt - /* Put these functions into the call table area. */ - .call_table_text - - /* Save registers r1, ep, gp, r10 on stack and load up with expected values. */ - /* Called via: callt ctoff(__callt_save_interrupt). */ - .align 2 -.L_save_interrupt: - /* SP has already been moved before callt ctoff(_save_interrupt). */ - /* R1,R10,R11,ctpc,ctpsw has alread been saved bofore callt ctoff(_save_interrupt). */ - /* addi -28, sp, sp */ - /* st.w r1, 24[sp] */ - /* st.w r10, 12[sp] */ - /* st.w r11, 16[sp] */ - /* stsr ctpc, r10 */ - /* st.w r10, 20[sp] */ - /* stsr ctpsw, r10 */ - /* st.w r10, 24[sp] */ - st.w ep, 0[sp] - st.w gp, 4[sp] - st.w r1, 8[sp] - mov hilo(__ep),ep - mov hilo(__gp),gp - ctret - - .call_table_text - /* Restore saved registers, deallocate stack and return from the interrupt. */ - /* Called via: callt ctoff(__callt_restore_interrupt). */ - .align 2 - .globl __return_interrupt - .type __return_interrupt,@function -.L_return_interrupt: - ld.w 24[sp], r1 - ldsr r1, ctpsw - ld.w 20[sp], r1 - ldsr r1, ctpc - ld.w 16[sp], r11 - ld.w 12[sp], r10 - ld.w 8[sp], r1 - ld.w 4[sp], gp - ld.w 0[sp], ep - addi 28, sp, sp - reti - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_interrupt - .type __callt_save_interrupt,@function -__callt_save_interrupt: .short ctoff(.L_save_interrupt) - - .global __callt_return_interrupt - .type __callt_return_interrupt,@function -__callt_return_interrupt: .short ctoff(.L_return_interrupt) - -#endif /* L_callt_save_interrupt */ - -#ifdef L_callt_save_all_interrupt - /* Put these functions into the call table area. */ - .call_table_text - - /* Save all registers except for those saved in __save_interrupt. */ - /* Allocate enough stack for all of the registers & 16 bytes of space. */ - /* Called via: callt ctoff(__callt_save_all_interrupt). */ - .align 2 -.L_save_all_interrupt: - addi -60, sp, sp -#ifdef __EP__ - mov ep, r1 - mov sp, ep - sst.w r2, 56[ep] - sst.w r5, 52[ep] - sst.w r6, 48[ep] - sst.w r7, 44[ep] - sst.w r8, 40[ep] - sst.w r9, 36[ep] - sst.w r11, 32[ep] - sst.w r12, 28[ep] - sst.w r13, 24[ep] - sst.w r14, 20[ep] - sst.w r15, 16[ep] - sst.w r16, 12[ep] - sst.w r17, 8[ep] - sst.w r18, 4[ep] - sst.w r19, 0[ep] - mov r1, ep -#else - st.w r2, 56[sp] - st.w r5, 52[sp] - st.w r6, 48[sp] - st.w r7, 44[sp] - st.w r8, 40[sp] - st.w r9, 36[sp] - st.w r11, 32[sp] - st.w r12, 28[sp] - st.w r13, 24[sp] - st.w r14, 20[sp] - st.w r15, 16[sp] - st.w r16, 12[sp] - st.w r17, 8[sp] - st.w r18, 4[sp] - st.w r19, 0[sp] -#endif - prepare {r20 - r29, r31}, 0 - ctret - - /* Restore all registers saved in __save_all_interrupt - deallocate the stack space. */ - /* Called via: callt ctoff(__callt_restore_all_interrupt). */ - .align 2 -.L_restore_all_interrupt: - dispose 0, {r20 - r29, r31} -#ifdef __EP__ - mov ep, r1 - mov sp, ep - sld.w 0 [ep], r19 - sld.w 4 [ep], r18 - sld.w 8 [ep], r17 - sld.w 12[ep], r16 - sld.w 16[ep], r15 - sld.w 20[ep], r14 - sld.w 24[ep], r13 - sld.w 28[ep], r12 - sld.w 32[ep], r11 - sld.w 36[ep], r9 - sld.w 40[ep], r8 - sld.w 44[ep], r7 - sld.w 48[ep], r6 - sld.w 52[ep], r5 - sld.w 56[ep], r2 - mov r1, ep -#else - ld.w 0 [sp], r19 - ld.w 4 [sp], r18 - ld.w 8 [sp], r17 - ld.w 12[sp], r16 - ld.w 16[sp], r15 - ld.w 20[sp], r14 - ld.w 24[sp], r13 - ld.w 28[sp], r12 - ld.w 32[sp], r11 - ld.w 36[sp], r9 - ld.w 40[sp], r8 - ld.w 44[sp], r7 - ld.w 48[sp], r6 - ld.w 52[sp], r5 - ld.w 56[sp], r2 -#endif - addi 60, sp, sp - ctret - - /* Place the offsets of the start of these routines into the call table. */ - .call_table_data - - .global __callt_save_all_interrupt - .type __callt_save_all_interrupt,@function -__callt_save_all_interrupt: .short ctoff(.L_save_all_interrupt) - - .global __callt_restore_all_interrupt - .type __callt_restore_all_interrupt,@function -__callt_restore_all_interrupt: .short ctoff(.L_restore_all_interrupt) - -#endif /* L_callt_save_all_interrupt */ - - -#define MAKE_CALLT_FUNCS( START ) \ - .call_table_text ;\ - .align 2 ;\ - /* Allocate space and save registers START .. r29 on the stack. */ ;\ - /* Called via: callt ctoff(__callt_save_START_r29). */ ;\ -.L_save_##START##_r29: ;\ - prepare { START - r29 }, 0 ;\ - ctret ;\ - ;\ - /* Restore saved registers, deallocate stack and return. */ ;\ - /* Called via: callt ctoff(__return_START_r29). */ ;\ - .align 2 ;\ -.L_return_##START##_r29: ;\ - dispose 0, { START - r29 }, r31 ;\ - ;\ - /* Place the offsets of the start of these funcs into the call table. */;\ - .call_table_data ;\ - ;\ - .global __callt_save_##START##_r29 ;\ - .type __callt_save_##START##_r29,@function ;\ -__callt_save_##START##_r29: .short ctoff(.L_save_##START##_r29 ) ;\ - ;\ - .global __callt_return_##START##_r29 ;\ - .type __callt_return_##START##_r29,@function ;\ -__callt_return_##START##_r29: .short ctoff(.L_return_##START##_r29 ) - - -#define MAKE_CALLT_CFUNCS( START ) \ - .call_table_text ;\ - .align 2 ;\ - /* Allocate space and save registers START .. r31 on the stack. */ ;\ - /* Called via: callt ctoff(__callt_save_START_r31c). */ ;\ -.L_save_##START##_r31c: ;\ - prepare { START - r29, r31}, 0 ;\ - ctret ;\ - ;\ - /* Restore saved registers, deallocate stack and return. */ ;\ - /* Called via: callt ctoff(__return_START_r31c). */ ;\ - .align 2 ;\ -.L_return_##START##_r31c: ;\ - dispose 0, { START - r29, r31}, r31 ;\ - ;\ - /* Place the offsets of the start of these funcs into the call table. */;\ - .call_table_data ;\ - ;\ - .global __callt_save_##START##_r31c ;\ - .type __callt_save_##START##_r31c,@function ;\ -__callt_save_##START##_r31c: .short ctoff(.L_save_##START##_r31c ) ;\ - ;\ - .global __callt_return_##START##_r31c ;\ - .type __callt_return_##START##_r31c,@function ;\ -__callt_return_##START##_r31c: .short ctoff(.L_return_##START##_r31c ) - - -#ifdef L_callt_save_20 - MAKE_CALLT_FUNCS (r20) -#endif -#ifdef L_callt_save_21 - MAKE_CALLT_FUNCS (r21) -#endif -#ifdef L_callt_save_22 - MAKE_CALLT_FUNCS (r22) -#endif -#ifdef L_callt_save_23 - MAKE_CALLT_FUNCS (r23) -#endif -#ifdef L_callt_save_24 - MAKE_CALLT_FUNCS (r24) -#endif -#ifdef L_callt_save_25 - MAKE_CALLT_FUNCS (r25) -#endif -#ifdef L_callt_save_26 - MAKE_CALLT_FUNCS (r26) -#endif -#ifdef L_callt_save_27 - MAKE_CALLT_FUNCS (r27) -#endif -#ifdef L_callt_save_28 - MAKE_CALLT_FUNCS (r28) -#endif -#ifdef L_callt_save_29 - MAKE_CALLT_FUNCS (r29) -#endif - -#ifdef L_callt_save_20c - MAKE_CALLT_CFUNCS (r20) -#endif -#ifdef L_callt_save_21c - MAKE_CALLT_CFUNCS (r21) -#endif -#ifdef L_callt_save_22c - MAKE_CALLT_CFUNCS (r22) -#endif -#ifdef L_callt_save_23c - MAKE_CALLT_CFUNCS (r23) -#endif -#ifdef L_callt_save_24c - MAKE_CALLT_CFUNCS (r24) -#endif -#ifdef L_callt_save_25c - MAKE_CALLT_CFUNCS (r25) -#endif -#ifdef L_callt_save_26c - MAKE_CALLT_CFUNCS (r26) -#endif -#ifdef L_callt_save_27c - MAKE_CALLT_CFUNCS (r27) -#endif -#ifdef L_callt_save_28c - MAKE_CALLT_CFUNCS (r28) -#endif -#ifdef L_callt_save_29c - MAKE_CALLT_CFUNCS (r29) -#endif - - -#ifdef L_callt_save_31c - .call_table_text - .align 2 - /* Allocate space and save register r31 on the stack. */ - /* Called via: callt ctoff(__callt_save_r31c). */ -.L_callt_save_r31c: - prepare {r31}, 0 - ctret - - /* Restore saved registers, deallocate stack and return. */ - /* Called via: callt ctoff(__return_r31c). */ - .align 2 -.L_callt_return_r31c: - dispose 0, {r31}, r31 - - /* Place the offsets of the start of these funcs into the call table. */ - .call_table_data - - .global __callt_save_r31c - .type __callt_save_r31c,@function -__callt_save_r31c: .short ctoff(.L_callt_save_r31c) - - .global __callt_return_r31c - .type __callt_return_r31c,@function -__callt_return_r31c: .short ctoff(.L_callt_return_r31c) -#endif - -#endif /* __v850e__ */ - -/* libgcc2 routines for NEC V850. */ -/* Double Integer Arithmetical Operation. */ - -#ifdef L_negdi2 - .text - .global ___negdi2 - .type ___negdi2, @function -___negdi2: - not r6, r10 - add 1, r10 - setf l, r6 - not r7, r11 - add r6, r11 - jmp [lp] - - .size ___negdi2,.-___negdi2 -#endif - -#ifdef L_cmpdi2 - .text - .global ___cmpdi2 - .type ___cmpdi2,@function -___cmpdi2: - # Signed comparison bitween each high word. - cmp r9, r7 - be .L_cmpdi_cmp_low - setf ge, r10 - setf gt, r6 - add r6, r10 - jmp [lp] -.L_cmpdi_cmp_low: - # Unsigned comparigon bitween each low word. - cmp r8, r6 - setf nl, r10 - setf h, r6 - add r6, r10 - jmp [lp] - .size ___cmpdi2, . - ___cmpdi2 -#endif - -#ifdef L_ucmpdi2 - .text - .global ___ucmpdi2 - .type ___ucmpdi2,@function -___ucmpdi2: - cmp r9, r7 # Check if each high word are same. - bne .L_ucmpdi_check_psw - cmp r8, r6 # Compare the word. -.L_ucmpdi_check_psw: - setf nl, r10 # - setf h, r6 # - add r6, r10 # Add the result of comparison NL and comparison H. - jmp [lp] - .size ___ucmpdi2, . - ___ucmpdi2 -#endif - -#ifdef L_muldi3 - .text - .global ___muldi3 - .type ___muldi3,@function -___muldi3: -#ifdef __v850__ - jarl __save_r26_r31, r10 - addi 16, sp, sp - mov r6, r28 - shr 15, r28 - movea lo(32767), r0, r14 - and r14, r28 - mov r8, r10 - shr 15, r10 - and r14, r10 - mov r6, r19 - shr 30, r19 - mov r7, r12 - shl 2, r12 - or r12, r19 - and r14, r19 - mov r8, r13 - shr 30, r13 - mov r9, r12 - shl 2, r12 - or r12, r13 - and r14, r13 - mov r7, r11 - shr 13, r11 - and r14, r11 - mov r9, r31 - shr 13, r31 - and r14, r31 - mov r7, r29 - shr 28, r29 - and r14, r29 - mov r9, r12 - shr 28, r12 - and r14, r12 - and r14, r6 - and r14, r8 - mov r6, r14 - mulh r8, r14 - mov r6, r16 - mulh r10, r16 - mov r6, r18 - mulh r13, r18 - mov r6, r15 - mulh r31, r15 - mulh r12, r6 - mov r28, r17 - mulh r10, r17 - add -16, sp - mov r28, r12 - mulh r8, r12 - add r17, r18 - mov r28, r17 - mulh r31, r17 - add r12, r16 - mov r28, r12 - mulh r13, r12 - add r17, r6 - mov r19, r17 - add r12, r15 - mov r19, r12 - mulh r8, r12 - mulh r10, r17 - add r12, r18 - mov r19, r12 - mulh r13, r12 - add r17, r15 - mov r11, r13 - mulh r8, r13 - add r12, r6 - mov r11, r12 - mulh r10, r12 - add r13, r15 - mulh r29, r8 - add r12, r6 - mov r16, r13 - shl 15, r13 - add r14, r13 - mov r18, r12 - shl 30, r12 - mov r13, r26 - add r12, r26 - shr 15, r14 - movhi hi(131071), r0, r12 - movea lo(131071), r12, r13 - and r13, r14 - mov r16, r12 - and r13, r12 - add r12, r14 - mov r18, r12 - shl 15, r12 - and r13, r12 - add r12, r14 - shr 17, r14 - shr 17, r16 - add r14, r16 - shl 13, r15 - shr 2, r18 - add r18, r15 - add r15, r16 - mov r16, r27 - add r8, r6 - shl 28, r6 - add r6, r27 - mov r26, r10 - mov r27, r11 - jr __return_r26_r31 -#else /* defined(__v850e__) */ - /* (Ahi << 32 + Alo) * (Bhi << 32 + Blo) */ - /* r7 r6 r9 r8 */ - mov r8, r10 - mulu r7, r8, r0 /* Ahi * Blo */ - mulu r6, r9, r0 /* Alo * Bhi */ - mulu r6, r10, r11 /* Alo * Blo */ - add r8, r11 - add r9, r11 - jmp [r31] -#endif /* defined(__v850e__) */ - .size ___muldi3, . - ___muldi3 -#endif - diff --git a/gcc/config/v850/t-v850 b/gcc/config/v850/t-v850 index fcd3b841e30..7885229e631 100644 --- a/gcc/config/v850/t-v850 +++ b/gcc/config/v850/t-v850 @@ -17,67 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = v850/lib1funcs.asm -LIB1ASMFUNCS = _mulsi3 \ - _divsi3 \ - _udivsi3 \ - _modsi3 \ - _umodsi3 \ - _save_2 \ - _save_20 \ - _save_21 \ - _save_22 \ - _save_23 \ - _save_24 \ - _save_25 \ - _save_26 \ - _save_27 \ - _save_28 \ - _save_29 \ - _save_2c \ - _save_20c \ - _save_21c \ - _save_22c \ - _save_23c \ - _save_24c \ - _save_25c \ - _save_26c \ - _save_27c \ - _save_28c \ - _save_29c \ - _save_31c \ - _save_interrupt \ - _save_all_interrupt \ - _callt_save_20 \ - _callt_save_21 \ - _callt_save_22 \ - _callt_save_23 \ - _callt_save_24 \ - _callt_save_25 \ - _callt_save_26 \ - _callt_save_27 \ - _callt_save_28 \ - _callt_save_29 \ - _callt_save_20c \ - _callt_save_21c \ - _callt_save_22c \ - _callt_save_23c \ - _callt_save_24c \ - _callt_save_25c \ - _callt_save_26c \ - _callt_save_27c \ - _callt_save_28c \ - _callt_save_29c \ - _callt_save_31c \ - _callt_save_interrupt \ - _callt_save_all_interrupt \ - _callt_save_r2_r29 \ - _callt_save_r2_r31 \ - _negdi2 \ - _cmpdi2 \ - _ucmpdi2 \ - _muldi3 - # Create target-specific versions of the libraries MULTILIB_OPTIONS = mv850/mv850e/mv850e2/mv850e2v3 MULTILIB_DIRNAMES = v850 v850e v850e2 v850e2v3 diff --git a/gcc/config/vax/lib1funcs.asm b/gcc/config/vax/lib1funcs.asm deleted file mode 100644 index 1d57b56dad9..00000000000 --- a/gcc/config/vax/lib1funcs.asm +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (C) 2009 Free Software Foundation, Inc. - This file is part of GCC. - Contributed by Maciej W. Rozycki <macro@linux-mips.org>. - - This file is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3, or (at your option) any - later version. - - This file is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef L_udivsi3 - .text - .globl __udivsi3 - .type __udivsi3, @function -__udivsi3: - .word 0 - movl 8(%ap), %r1 - blss 0f /* Check bit #31 of divisor. */ - movl 4(%ap), %r2 - blss 1f /* Check bit #31 of dividend. */ - - /* Both zero, do a standard division. */ - - divl3 %r1, %r2, %r0 - ret - - /* MSB of divisor set, only 1 or 0 may result. */ -0: - decl %r1 - clrl %r0 - cmpl %r1, 4(%ap) - adwc $0, %r0 - ret - - /* MSB of dividend set, do an extended division. */ -1: - clrl %r3 - ediv %r1, %r2, %r0, %r3 - ret - .size __udivsi3, . - __udivsi3 - .previous -#endif - -#ifdef L_umodsi3 - .text - .globl __umodsi3 - .type __umodsi3, @function -__umodsi3: - .word 0 - movl 8(%ap), %r1 - blss 0f /* Check bit #31 of divisor. */ - movl 4(%ap), %r2 - blss 1f /* Check bit #31 of dividend. */ - - /* Both zero, do a standard division. */ - - divl3 %r1, %r2, %r0 - mull2 %r0, %r1 - subl3 %r1, %r2, %r0 - ret - - /* MSB of divisor set, subtract the divisor at most once. */ -0: - movl 4(%ap), %r2 - clrl %r0 - cmpl %r2, %r1 - sbwc $0, %r0 - bicl2 %r0, %r1 - subl3 %r1, %r2, %r0 - ret - - /* MSB of dividend set, do an extended division. */ -1: - clrl %r3 - ediv %r1, %r2, %r3, %r0 - ret - .size __umodsi3, . - __umodsi3 - .previous -#endif diff --git a/gcc/config/vax/t-linux b/gcc/config/vax/t-linux deleted file mode 100644 index 9af1edb0fab..00000000000 --- a/gcc/config/vax/t-linux +++ /dev/null @@ -1,2 +0,0 @@ -LIB1ASMSRC = vax/lib1funcs.asm -LIB1ASMFUNCS = _udivsi3 _umodsi3 diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S deleted file mode 100644 index 9b46889bdc2..00000000000 --- a/gcc/config/xtensa/ieee754-df.S +++ /dev/null @@ -1,2388 +0,0 @@ -/* IEEE-754 double-precision functions for Xtensa - Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef __XTENSA_EB__ -#define xh a2 -#define xl a3 -#define yh a4 -#define yl a5 -#else -#define xh a3 -#define xl a2 -#define yh a5 -#define yl a4 -#endif - -/* Warning! The branch displacements for some Xtensa branch instructions - are quite small, and this code has been carefully laid out to keep - branch targets in range. If you change anything, be sure to check that - the assembler is not relaxing anything to branch over a jump. */ - -#ifdef L_negdf2 - - .align 4 - .global __negdf2 - .type __negdf2, @function -__negdf2: - leaf_entry sp, 16 - movi a4, 0x80000000 - xor xh, xh, a4 - leaf_return - -#endif /* L_negdf2 */ - -#ifdef L_addsubdf3 - - /* Addition */ -__adddf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Ladd_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall yh, a6, 1f - /* If x is a NaN, return it. Otherwise, return y. */ - slli a7, xh, 12 - or a7, a7, xl - beqz a7, .Ladd_ynan_or_inf -1: leaf_return - -.Ladd_ynan_or_inf: - /* Return y. */ - mov xh, yh - mov xl, yl - leaf_return - -.Ladd_opposite_signs: - /* Operand signs differ. Do a subtraction. */ - slli a7, a6, 11 - xor yh, yh, a7 - j .Lsub_same_sign - - .align 4 - .global __adddf3 - .type __adddf3, @function -__adddf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Check if the two operands have the same sign. */ - xor a7, xh, yh - bltz a7, .Ladd_opposite_signs - -.Ladd_same_sign: - /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ - ball xh, a6, .Ladd_xnan_or_inf - ball yh, a6, .Ladd_ynan_or_inf - - /* Compare the exponents. The smaller operand will be shifted - right by the exponent difference and added to the larger - one. */ - extui a7, xh, 20, 12 - extui a8, yh, 20, 12 - bltu a7, a8, .Ladd_shiftx - -.Ladd_shifty: - /* Check if the smaller (or equal) exponent is zero. */ - bnone yh, a6, .Ladd_yexpzero - - /* Replace yh sign/exponent with 0x001. */ - or yh, yh, a6 - slli yh, yh, 11 - srli yh, yh, 11 - -.Ladd_yexpdiff: - /* Compute the exponent difference. Optimize for difference < 32. */ - sub a10, a7, a8 - bgeui a10, 32, .Ladd_bigshifty - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out of yl are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, yl, a9 - src yl, yh, yl - srl yh, yh - -.Ladd_addy: - /* Do the 64-bit addition. */ - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: - /* Check if the add overflowed into the exponent. */ - extui a10, xh, 20, 12 - beq a10, a7, .Ladd_round - mov a8, a7 - j .Ladd_carry - -.Ladd_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0", and increment the apparent exponent - because subnormals behave as if they had the minimum (nonzero) - exponent. Test for the case when both exponents are zero. */ - slli yh, yh, 12 - srli yh, yh, 12 - bnone xh, a6, .Ladd_bothexpzero - addi a8, a8, 1 - j .Ladd_yexpdiff - -.Ladd_bothexpzero: - /* Both exponents are zero. Handle this as a special case. There - is no need to shift or round, and the normal code for handling - a carry into the exponent field will not work because it - assumes there is an implicit "1.0" that needs to be added. */ - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: leaf_return - -.Ladd_bigshifty: - /* Exponent difference > 64 -- just return the bigger value. */ - bgeui a10, 64, 1b - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out are saved in a9 for rounding the result. */ - ssr a10 - sll a11, yl /* lost bits shifted out of yl */ - src a9, yh, yl - srl yl, yh - movi yh, 0 - beqz a11, .Ladd_addy - or a9, a9, a10 /* any positive, nonzero value will work */ - j .Ladd_addy - -.Ladd_xexpzero: - /* Same as "yexpzero" except skip handling the case when both - exponents are zero. */ - slli xh, xh, 12 - srli xh, xh, 12 - addi a7, a7, 1 - j .Ladd_xexpdiff - -.Ladd_shiftx: - /* Same thing as the "shifty" code, but with x and y swapped. Also, - because the exponent difference is always nonzero in this version, - the shift sequence can use SLL and skip loading a constant zero. */ - bnone xh, a6, .Ladd_xexpzero - - or xh, xh, a6 - slli xh, xh, 11 - srli xh, xh, 11 - -.Ladd_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Ladd_bigshiftx - - ssr a10 - sll a9, xl - src xl, xh, xl - srl xh, xh - -.Ladd_addx: - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: - /* Check if the add overflowed into the exponent. */ - extui a10, xh, 20, 12 - bne a10, a8, .Ladd_carry - -.Ladd_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi xl, xl, 1 - beqz xl, .Ladd_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_bigshiftx: - /* Mostly the same thing as "bigshifty".... */ - bgeui a10, 64, .Ladd_returny - - ssr a10 - sll a11, xl - src a9, xh, xl - srl xl, xh - movi xh, 0 - beqz a11, .Ladd_addx - or a9, a9, a10 - j .Ladd_addx - -.Ladd_returny: - mov xh, yh - mov xl, yl - leaf_return - -.Ladd_carry: - /* The addition has overflowed into the exponent field, so the - value needs to be renormalized. The mantissa of the result - can be recovered by subtracting the original exponent and - adding 0x100000 (which is the explicit "1.0" for the - mantissa of the non-shifted operand -- the "1.0" for the - shifted operand was already added). The mantissa can then - be shifted right by one bit. The explicit "1.0" of the - shifted mantissa then needs to be replaced by the exponent, - incremented by one to account for the normalizing shift. - It is faster to combine these operations: do the shift first - and combine the additions and subtractions. If x is the - original exponent, the result is: - shifted mantissa - (x << 19) + (1 << 19) + (x << 20) - or: - shifted mantissa + ((x + 1) << 19) - Note that the exponent is incremented here by leaving the - explicit "1.0" of the mantissa in the exponent field. */ - - /* Shift xh/xl right by one bit. Save the lsb of xl. */ - mov a10, xl - ssai 1 - src xl, xh, xl - srl xh, xh - - /* See explanation above. The original exponent is in a8. */ - addi a8, a8, 1 - slli a8, a8, 19 - add xh, xh, a8 - - /* Return an Infinity if the exponent overflowed. */ - ball xh, a6, .Ladd_infinity - - /* Same thing as the "round" code except the msb of the leftover - fraction is bit 0 of a10, with the rest of the fraction in a9. */ - bbci.l a10, 0, 1f - addi xl, xl, 1 - beqz xl, .Ladd_roundcarry - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_infinity: - /* Clear the mantissa. */ - movi xl, 0 - srli xh, xh, 20 - slli xh, xh, 20 - - /* The sign bit may have been lost in a carry-out. Put it back. */ - slli a8, a8, 1 - or xh, xh, a8 - leaf_return - -.Ladd_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Ladd_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - - - /* Subtraction */ -__subdf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Lsub_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall yh, a6, 1f - /* Both x and y are either NaN or Inf, so the result is NaN. */ - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: leaf_return - -.Lsub_ynan_or_inf: - /* Negate y and return it. */ - slli a7, a6, 11 - xor xh, yh, a7 - mov xl, yl - leaf_return - -.Lsub_opposite_signs: - /* Operand signs differ. Do an addition. */ - slli a7, a6, 11 - xor yh, yh, a7 - j .Ladd_same_sign - - .align 4 - .global __subdf3 - .type __subdf3, @function -__subdf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Check if the two operands have the same sign. */ - xor a7, xh, yh - bltz a7, .Lsub_opposite_signs - -.Lsub_same_sign: - /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ - ball xh, a6, .Lsub_xnan_or_inf - ball yh, a6, .Lsub_ynan_or_inf - - /* Compare the operands. In contrast to addition, the entire - value matters here. */ - extui a7, xh, 20, 11 - extui a8, yh, 20, 11 - bltu xh, yh, .Lsub_xsmaller - beq xh, yh, .Lsub_compare_low - -.Lsub_ysmaller: - /* Check if the smaller (or equal) exponent is zero. */ - bnone yh, a6, .Lsub_yexpzero - - /* Replace yh sign/exponent with 0x001. */ - or yh, yh, a6 - slli yh, yh, 11 - srli yh, yh, 11 - -.Lsub_yexpdiff: - /* Compute the exponent difference. Optimize for difference < 32. */ - sub a10, a7, a8 - bgeui a10, 32, .Lsub_bigshifty - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out of yl are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, yl, a9 - src yl, yh, yl - srl yh, yh - -.Lsub_suby: - /* Do the 64-bit subtraction. */ - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from xh/xl. */ - neg a9, a9 - beqz a9, 1f - addi a5, xh, -1 - moveqz xh, a5, xl - addi xl, xl, -1 -1: - /* Check if the subtract underflowed into the exponent. */ - extui a10, xh, 20, 11 - beq a10, a7, .Lsub_round - j .Lsub_borrow - -.Lsub_compare_low: - /* The high words are equal. Compare the low words. */ - bltu xl, yl, .Lsub_xsmaller - bltu yl, xl, .Lsub_ysmaller - /* The operands are equal. Return 0.0. */ - movi xh, 0 - movi xl, 0 -1: leaf_return - -.Lsub_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0". Unless x is also a subnormal, increment - y's apparent exponent because subnormals behave as if they had - the minimum (nonzero) exponent. */ - slli yh, yh, 12 - srli yh, yh, 12 - bnone xh, a6, .Lsub_yexpdiff - addi a8, a8, 1 - j .Lsub_yexpdiff - -.Lsub_bigshifty: - /* Exponent difference > 64 -- just return the bigger value. */ - bgeui a10, 64, 1b - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out are saved in a9 for rounding the result. */ - ssr a10 - sll a11, yl /* lost bits shifted out of yl */ - src a9, yh, yl - srl yl, yh - movi yh, 0 - beqz a11, .Lsub_suby - or a9, a9, a10 /* any positive, nonzero value will work */ - j .Lsub_suby - -.Lsub_xsmaller: - /* Same thing as the "ysmaller" code, but with x and y swapped and - with y negated. */ - bnone xh, a6, .Lsub_xexpzero - - or xh, xh, a6 - slli xh, xh, 11 - srli xh, xh, 11 - -.Lsub_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Lsub_bigshiftx - - ssr a10 - movi a9, 0 - src a9, xl, a9 - src xl, xh, xl - srl xh, xh - - /* Negate y. */ - slli a11, a6, 11 - xor yh, yh, a11 - -.Lsub_subx: - sub xl, yl, xl - sub xh, yh, xh - bgeu yl, xl, 1f - addi xh, xh, -1 -1: - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from xh/xl. */ - neg a9, a9 - beqz a9, 1f - addi a5, xh, -1 - moveqz xh, a5, xl - addi xl, xl, -1 -1: - /* Check if the subtract underflowed into the exponent. */ - extui a10, xh, 20, 11 - bne a10, a8, .Lsub_borrow - -.Lsub_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi xl, xl, 1 - beqz xl, .Lsub_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Lsub_exactlyhalf -1: leaf_return - -.Lsub_xexpzero: - /* Same as "yexpzero". */ - slli xh, xh, 12 - srli xh, xh, 12 - bnone yh, a6, .Lsub_xexpdiff - addi a7, a7, 1 - j .Lsub_xexpdiff - -.Lsub_bigshiftx: - /* Mostly the same thing as "bigshifty", but with the sign bit of the - shifted value set so that the subsequent subtraction flips the - sign of y. */ - bgeui a10, 64, .Lsub_returny - - ssr a10 - sll a11, xl - src a9, xh, xl - srl xl, xh - slli xh, a6, 11 /* set sign bit of xh */ - beqz a11, .Lsub_subx - or a9, a9, a10 - j .Lsub_subx - -.Lsub_returny: - /* Negate and return y. */ - slli a7, a6, 11 - xor xh, yh, a7 - mov xl, yl - leaf_return - -.Lsub_borrow: - /* The subtraction has underflowed into the exponent field, so the - value needs to be renormalized. Shift the mantissa left as - needed to remove any leading zeros and adjust the exponent - accordingly. If the exponent is not large enough to remove - all the leading zeros, the result will be a subnormal value. */ - - slli a8, xh, 12 - beqz a8, .Lsub_xhzero - do_nsau a6, a8, a7, a11 - srli a8, a8, 12 - bge a6, a10, .Lsub_subnormal - addi a6, a6, 1 - -.Lsub_shift_lt32: - /* Shift the mantissa (a8/xl/a9) left by a6. */ - ssl a6 - src a8, a8, xl - src xl, xl, a9 - sll a9, a9 - - /* Combine the shifted mantissa with the sign and exponent, - decrementing the exponent by a6. (The exponent has already - been decremented by one due to the borrow from the subtraction, - but adding the mantissa will increment the exponent by one.) */ - srli xh, xh, 20 - sub xh, xh, a6 - slli xh, xh, 20 - add xh, xh, a8 - j .Lsub_round - -.Lsub_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Lsub_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - -.Lsub_xhzero: - /* When normalizing the result, all the mantissa bits in the high - word are zero. Shift by "20 + (leading zero count of xl) + 1". */ - do_nsau a6, xl, a7, a11 - addi a6, a6, 21 - blt a10, a6, .Lsub_subnormal - -.Lsub_normalize_shift: - bltui a6, 32, .Lsub_shift_lt32 - - ssl a6 - src a8, xl, a9 - sll xl, a9 - movi a9, 0 - - srli xh, xh, 20 - sub xh, xh, a6 - slli xh, xh, 20 - add xh, xh, a8 - j .Lsub_round - -.Lsub_subnormal: - /* The exponent is too small to shift away all the leading zeros. - Set a6 to the current exponent (which has already been - decremented by the borrow) so that the exponent of the result - will be zero. Do not add 1 to a6 in this case, because: (1) - adding the mantissa will not increment the exponent, so there is - no need to subtract anything extra from the exponent to - compensate, and (2) the effective exponent of a subnormal is 1 - not 0 so the shift amount must be 1 smaller than normal. */ - mov a6, a10 - j .Lsub_normalize_shift - -#endif /* L_addsubdf3 */ - -#ifdef L_muldf3 - - /* Multiplication */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - -__muldf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Lmul_xexpzero: - /* Clear the sign bit of x. */ - slli xh, xh, 1 - srli xh, xh, 1 - - /* If x is zero, return zero. */ - or a10, xh, xl - beqz a10, .Lmul_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - beqz xh, .Lmul_xh_zero - do_nsau a10, xh, a11, a12 - addi a10, a10, -11 - ssl a10 - src xh, xh, xl - sll xl, xl - movi a8, 1 - sub a8, a8, a10 - j .Lmul_xnormalized -.Lmul_xh_zero: - do_nsau a10, xl, a11, a12 - addi a10, a10, -11 - movi a8, -31 - sub a8, a8, a10 - ssl a10 - bltz a10, .Lmul_xl_srl - sll xh, xl - movi xl, 0 - j .Lmul_xnormalized -.Lmul_xl_srl: - srl xh, xl - sll xl, xl - j .Lmul_xnormalized - -.Lmul_yexpzero: - /* Clear the sign bit of y. */ - slli yh, yh, 1 - srli yh, yh, 1 - - /* If y is zero, return zero. */ - or a10, yh, yl - beqz a10, .Lmul_return_zero - - /* Normalize y. Adjust the exponent in a9. */ - beqz yh, .Lmul_yh_zero - do_nsau a10, yh, a11, a12 - addi a10, a10, -11 - ssl a10 - src yh, yh, yl - sll yl, yl - movi a9, 1 - sub a9, a9, a10 - j .Lmul_ynormalized -.Lmul_yh_zero: - do_nsau a10, yl, a11, a12 - addi a10, a10, -11 - movi a9, -31 - sub a9, a9, a10 - ssl a10 - bltz a10, .Lmul_yl_srl - sll yh, yl - movi yl, 0 - j .Lmul_ynormalized -.Lmul_yl_srl: - srl yh, yl - sll yl, yl - j .Lmul_ynormalized - -.Lmul_return_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - j .Lmul_done - -.Lmul_xnan_or_inf: - /* If y is zero, return NaN. */ - bnez yl, 1f - slli a8, yh, 1 - bnez a8, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 - j .Lmul_done -1: - /* If y is NaN, return y. */ - bnall yh, a6, .Lmul_returnx - slli a8, yh, 12 - or a8, a8, yl - beqz a8, .Lmul_returnx - -.Lmul_returny: - mov xh, yh - mov xl, yl - -.Lmul_returnx: - /* Set the sign bit and return. */ - extui a7, a7, 31, 1 - slli xh, xh, 1 - ssai 1 - src xh, a7, xh - j .Lmul_done - -.Lmul_ynan_or_inf: - /* If x is zero, return NaN. */ - bnez xl, .Lmul_returny - slli a8, xh, 1 - bnez a8, .Lmul_returny - movi a7, 0x80000 /* make it a quiet NaN */ - or xh, yh, a7 - j .Lmul_done - - .align 4 - .global __muldf3 - .type __muldf3, @function -__muldf3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 64 -#else - leaf_entry sp, 32 -#endif - movi a6, 0x7ff00000 - - /* Get the sign of the result. */ - xor a7, xh, yh - - /* Check for NaN and infinity. */ - ball xh, a6, .Lmul_xnan_or_inf - ball yh, a6, .Lmul_ynan_or_inf - - /* Extract the exponents. */ - extui a8, xh, 20, 11 - extui a9, yh, 20, 11 - - beqz a8, .Lmul_xexpzero -.Lmul_xnormalized: - beqz a9, .Lmul_yexpzero -.Lmul_ynormalized: - - /* Add the exponents. */ - add a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0x1fffff - or xh, xh, a6 - and xh, xh, a10 - or yh, yh, a6 - and yh, yh, a10 - - /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. - The least-significant word of the result is thrown away except - that if it is nonzero, the lsb of a6 is set to 1. */ -#if XCHAL_HAVE_MUL32_HIGH - - /* Compute a6 with any carry-outs in a10. */ - movi a10, 0 - mull a6, xl, yh - mull a11, xh, yl - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - muluh a11, xl, yl - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - /* If the low word of the result is nonzero, set the lsb of a6. */ - mull a11, xl, yl - beqz a11, 1f - movi a9, 1 - or a6, a6, a9 -1: - /* Compute xl with any carry-outs in a9. */ - movi a9, 0 - mull a11, xh, yh - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - muluh a11, xh, yl - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - muluh xl, xl, yh - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - /* Compute xh. */ - muluh xh, xh, yh - add xh, xh, a9 - -#else /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Break the inputs into 16-bit chunks and compute 16 32-bit partial - products. These partial products are: - - 0 xll * yll - - 1 xll * ylh - 2 xlh * yll - - 3 xll * yhl - 4 xlh * ylh - 5 xhl * yll - - 6 xll * yhh - 7 xlh * yhl - 8 xhl * ylh - 9 xhh * yll - - 10 xlh * yhh - 11 xhl * yhl - 12 xhh * ylh - - 13 xhl * yhh - 14 xhh * yhl - - 15 xhh * yhh - - where the input chunks are (hh, hl, lh, ll). If using the Mul16 - or Mul32 multiplier options, these input chunks must be stored in - separate registers. For Mac16, the UMUL.AA.* opcodes can specify - that the inputs come from either half of the registers, so there - is no need to shift them out ahead of time. If there is no - multiply hardware, the 16-bit chunks can be extracted when setting - up the arguments to the separate multiply function. */ - - /* Save a7 since it is needed to hold a temporary value. */ - s32i a7, sp, 4 -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Calling a separate multiply function will clobber a0 and requires - use of a8 as a temporary, so save those values now. (The function - uses a custom ABI so nothing else needs to be saved.) */ - s32i a0, sp, 0 - s32i a8, sp, 8 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define xlh a12 -#define ylh a13 -#define xhh a14 -#define yhh a15 - - /* Get the high halves of the inputs into registers. */ - srli xlh, xl, 16 - srli ylh, yl, 16 - srli xhh, xh, 16 - srli yhh, yh, 16 - -#define xll xl -#define yll yl -#define xhl xh -#define yhl yh - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui xl, xl, 0, 16 - extui xh, xh, 0, 16 - extui yl, yl, 0, 16 - extui yh, yh, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a10 with carry-out in a9. */ - do_mul(a10, xl, l, yl, h) /* pp 1 */ - do_mul(a11, xl, h, yl, l) /* pp 2 */ - movi a9, 0 - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - /* Initialize a6 with a9/a10 shifted into position. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a6, a9, a10 - - /* Compute the low word into a10. */ - do_mul(a11, xl, l, yl, l) /* pp 0 */ - sll a10, a10 - add a10, a10, a11 - bgeu a10, a11, 1f - addi a6, a6, 1 -1: - /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. - This is good enough to determine the low half of a6, so that any - nonzero bits from the low word of the result can be collapsed - into a6, freeing up a register. */ - movi a9, 0 - do_mul(a11, xl, l, yh, l) /* pp 3 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - do_mul(a11, xl, h, yl, h) /* pp 4 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - do_mul(a11, xh, l, yl, l) /* pp 5 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Collapse any nonzero bits from the low word into a6. */ - beqz a10, 1f - movi a11, 1 - or a6, a6, a11 -1: - /* Add pp6-9 into a11 with carry-outs in a10. */ - do_mul(a7, xl, l, yh, h) /* pp 6 */ - do_mul(a11, xh, h, yl, l) /* pp 9 */ - movi a10, 0 - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - do_mul(a7, xl, h, yh, l) /* pp 7 */ - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - do_mul(a7, xh, l, yl, h) /* pp 8 */ - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - /* Shift a10/a11 into position, and add low half of a11 to a6. */ - src a10, a10, a11 - add a10, a10, a9 - sll a11, a11 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - /* Add pp10-12 into xl with carry-outs in a9. */ - movi a9, 0 - do_mul(xl, xl, h, yh, h) /* pp 10 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - do_mul(a10, xh, l, yh, l) /* pp 11 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - do_mul(a10, xh, h, yl, h) /* pp 12 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - /* Add pp13-14 into a11 with carry-outs in a10. */ - do_mul(a11, xh, l, yh, h) /* pp 13 */ - do_mul(a7, xh, h, yh, l) /* pp 14 */ - movi a10, 0 - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - /* Shift a10/a11 into position, and add low half of a11 to a6. */ - src a10, a10, a11 - add a10, a10, a9 - sll a11, a11 - add xl, xl, a11 - bgeu xl, a11, 1f - addi a10, a10, 1 -1: - /* Compute xh. */ - do_mul(xh, xh, h, yh, h) /* pp 15 */ - add xh, xh, a10 - - /* Restore values saved on the stack during the multiplication. */ - l32i a7, sp, 4 -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - l32i a0, sp, 0 - l32i a8, sp, 8 -#endif -#endif /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Shift left by 12 bits, unless there was a carry-out from the - multiply, in which case, shift by 11 bits and increment the - exponent. Note: It is convenient to use the constant 0x3ff - instead of 0x400 when removing the extra exponent bias (so that - it is easy to construct 0x7fe for the overflow check). Reverse - the logic here to decrement the exponent sum by one unless there - was a carry-out. */ - movi a4, 11 - srli a5, xh, 21 - 12 - bnez a5, 1f - addi a4, a4, 1 - addi a8, a8, -1 -1: ssl a4 - src xh, xh, xl - src xl, xl, a6 - sll a6, a6 - - /* Subtract the extra bias from the exponent sum (plus one to account - for the explicit "1.0" of the mantissa that will be added to the - exponent in the final result). */ - movi a4, 0x3ff - sub a8, a8, a4 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..7fd are OK here. */ - slli a4, a4, 1 /* 0x7fe */ - bgeu a8, a4, .Lmul_overflow - -.Lmul_round: - /* Round. */ - bgez a6, .Lmul_rounded - addi xl, xl, 1 - beqz xl, .Lmul_roundcarry - slli a6, a6, 1 - beqz a6, .Lmul_exactlyhalf - -.Lmul_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 20 - add xh, xh, a8 - -.Lmul_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or xh, xh, a7 - -.Lmul_done: -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -.Lmul_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - j .Lmul_rounded - -.Lmul_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow is OK -- it will be added to the exponent. */ - j .Lmul_rounded - -.Lmul_overflow: - bltz a8, .Lmul_underflow - /* Return +/- Infinity. */ - addi a8, a4, 1 /* 0x7ff */ - slli xh, a8, 20 - movi xl, 0 - j .Lmul_addsign - -.Lmul_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - mov a9, a6 - ssr a8 - bgeui a8, 32, .Lmul_bigshift - - /* Shift xh/xl right. Any bits that are shifted out of xl are saved - in a6 (combined with the shifted-out bits currently in a6) for - rounding the result. */ - sll a6, xl - src xl, xh, xl - srl xh, xh - j 1f - -.Lmul_bigshift: - bgeui a8, 64, .Lmul_flush_to_zero - sll a10, xl /* lost bits shifted out of xl */ - src a6, xh, xl - srl xl, xh - movi xh, 0 - or a9, a9, a10 - - /* Set the exponent to zero. */ -1: movi a8, 0 - - /* Pack any nonzero bits shifted out into a6. */ - beqz a9, .Lmul_round - movi a9, 1 - or a6, a6, a9 - j .Lmul_round - -.Lmul_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - j .Lmul_done - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ -#endif /* L_muldf3 */ - -#ifdef L_divdf3 - - /* Division */ -__divdf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Ldiv_yexpzero: - /* Clear the sign bit of y. */ - slli yh, yh, 1 - srli yh, yh, 1 - - /* Check for division by zero. */ - or a10, yh, yl - beqz a10, .Ldiv_yzero - - /* Normalize y. Adjust the exponent in a9. */ - beqz yh, .Ldiv_yh_zero - do_nsau a10, yh, a11, a9 - addi a10, a10, -11 - ssl a10 - src yh, yh, yl - sll yl, yl - movi a9, 1 - sub a9, a9, a10 - j .Ldiv_ynormalized -.Ldiv_yh_zero: - do_nsau a10, yl, a11, a9 - addi a10, a10, -11 - movi a9, -31 - sub a9, a9, a10 - ssl a10 - bltz a10, .Ldiv_yl_srl - sll yh, yl - movi yl, 0 - j .Ldiv_ynormalized -.Ldiv_yl_srl: - srl yh, yl - sll yl, yl - j .Ldiv_ynormalized - -.Ldiv_yzero: - /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ - slli xh, xh, 1 - srli xh, xh, 1 - or xl, xl, xh - srli xh, a7, 31 - slli xh, xh, 31 - or xh, xh, a6 - bnez xl, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: movi xl, 0 - leaf_return - -.Ldiv_xexpzero: - /* Clear the sign bit of x. */ - slli xh, xh, 1 - srli xh, xh, 1 - - /* If x is zero, return zero. */ - or a10, xh, xl - beqz a10, .Ldiv_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - beqz xh, .Ldiv_xh_zero - do_nsau a10, xh, a11, a8 - addi a10, a10, -11 - ssl a10 - src xh, xh, xl - sll xl, xl - movi a8, 1 - sub a8, a8, a10 - j .Ldiv_xnormalized -.Ldiv_xh_zero: - do_nsau a10, xl, a11, a8 - addi a10, a10, -11 - movi a8, -31 - sub a8, a8, a10 - ssl a10 - bltz a10, .Ldiv_xl_srl - sll xh, xl - movi xl, 0 - j .Ldiv_xnormalized -.Ldiv_xl_srl: - srl xh, xl - sll xl, xl - j .Ldiv_xnormalized - -.Ldiv_return_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - leaf_return - -.Ldiv_xnan_or_inf: - /* Set the sign bit of the result. */ - srli a7, yh, 31 - slli a7, a7, 31 - xor xh, xh, a7 - /* If y is NaN or Inf, return NaN. */ - bnall yh, a6, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: leaf_return - -.Ldiv_ynan_or_inf: - /* If y is Infinity, return zero. */ - slli a8, yh, 12 - or a8, a8, yl - beqz a8, .Ldiv_return_zero - /* y is NaN; return it. */ - mov xh, yh - mov xl, yl - leaf_return - -.Ldiv_highequal1: - bltu xl, yl, 2f - j 3f - - .align 4 - .global __divdf3 - .type __divdf3, @function -__divdf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Get the sign of the result. */ - xor a7, xh, yh - - /* Check for NaN and infinity. */ - ball xh, a6, .Ldiv_xnan_or_inf - ball yh, a6, .Ldiv_ynan_or_inf - - /* Extract the exponents. */ - extui a8, xh, 20, 11 - extui a9, yh, 20, 11 - - beqz a9, .Ldiv_yexpzero -.Ldiv_ynormalized: - beqz a8, .Ldiv_xexpzero -.Ldiv_xnormalized: - - /* Subtract the exponents. */ - sub a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0x1fffff - or xh, xh, a6 - and xh, xh, a10 - or yh, yh, a6 - and yh, yh, a10 - - /* Set SAR for left shift by one. */ - ssai (32 - 1) - - /* The first digit of the mantissa division must be a one. - Shift x (and adjust the exponent) as needed to make this true. */ - bltu yh, xh, 3f - beq yh, xh, .Ldiv_highequal1 -2: src xh, xh, xl - sll xl, xl - addi a8, a8, -1 -3: - /* Do the first subtraction and shift. */ - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - src xh, xh, xl - sll xl, xl - - /* Put the quotient into a10/a11. */ - movi a10, 0 - movi a11, 1 - - /* Divide one bit at a time for 52 bits. */ - movi a9, 52 -#if XCHAL_HAVE_LOOPS - loop a9, .Ldiv_loopend -#endif -.Ldiv_loop: - /* Shift the quotient << 1. */ - src a10, a10, a11 - sll a11, a11 - - /* Is this digit a 0 or 1? */ - bltu xh, yh, 3f - beq xh, yh, .Ldiv_highequal2 - - /* Output a 1 and subtract. */ -2: addi a11, a11, 1 - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - - /* Shift the dividend << 1. */ -3: src xh, xh, xl - sll xl, xl - -#if !XCHAL_HAVE_LOOPS - addi a9, a9, -1 - bnez a9, .Ldiv_loop -#endif -.Ldiv_loopend: - - /* Add the exponent bias (less one to account for the explicit "1.0" - of the mantissa that will be added to the exponent in the final - result). */ - movi a9, 0x3fe - add a8, a8, a9 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..7fd are OK here. */ - addmi a9, a9, 0x400 /* 0x7fe */ - bgeu a8, a9, .Ldiv_overflow - -.Ldiv_round: - /* Round. The remainder (<< 1) is in xh/xl. */ - bltu xh, yh, .Ldiv_rounded - beq xh, yh, .Ldiv_highequal3 -.Ldiv_roundup: - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - -.Ldiv_rounded: - mov xl, a11 - /* Add the exponent to the mantissa. */ - slli a8, a8, 20 - add xh, a10, a8 - -.Ldiv_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or xh, xh, a7 - leaf_return - -.Ldiv_highequal2: - bgeu xl, yl, 2b - j 3b - -.Ldiv_highequal3: - bltu xl, yl, .Ldiv_rounded - bne xl, yl, .Ldiv_roundup - - /* Remainder is exactly half the divisor. Round even. */ - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - srli a11, a11, 1 - slli a11, a11, 1 - j .Ldiv_rounded - -.Ldiv_overflow: - bltz a8, .Ldiv_underflow - /* Return +/- Infinity. */ - addi a8, a9, 1 /* 0x7ff */ - slli xh, a8, 20 - movi xl, 0 - j .Ldiv_addsign - -.Ldiv_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - ssr a8 - bgeui a8, 32, .Ldiv_bigshift - - /* Shift a10/a11 right. Any bits that are shifted out of a11 are - saved in a6 for rounding the result. */ - sll a6, a11 - src a11, a10, a11 - srl a10, a10 - j 1f - -.Ldiv_bigshift: - bgeui a8, 64, .Ldiv_flush_to_zero - sll a9, a11 /* lost bits shifted out of a11 */ - src a6, a10, a11 - srl a11, a10 - movi a10, 0 - or xl, xl, a9 - - /* Set the exponent to zero. */ -1: movi a8, 0 - - /* Pack any nonzero remainder (in xh/xl) into a6. */ - or xh, xh, xl - beqz xh, 1f - movi a9, 1 - or a6, a6, a9 - - /* Round a10/a11 based on the bits shifted out into a6. */ -1: bgez a6, .Ldiv_rounded - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - slli a6, a6, 1 - bnez a6, .Ldiv_rounded - srli a11, a11, 1 - slli a11, a11, 1 - j .Ldiv_rounded - -.Ldiv_roundcarry: - /* a11 is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi a10, a10, 1 - /* Overflow to the exponent field is OK. */ - j .Ldiv_rounded - -.Ldiv_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - leaf_return - -#endif /* L_divdf3 */ - -#ifdef L_cmpdf2 - - /* Equal and Not Equal */ - - .align 4 - .global __eqdf2 - .global __nedf2 - .set __nedf2, __eqdf2 - .type __eqdf2, @function -__eqdf2: - leaf_entry sp, 16 - bne xl, yl, 2f - bne xh, yh, 4f - - /* The values are equal but NaN != NaN. Check the exponent. */ - movi a6, 0x7ff00000 - ball xh, a6, 3f - - /* Equal. */ - movi a2, 0 - leaf_return - - /* Not equal. */ -2: movi a2, 1 - leaf_return - - /* Check if the mantissas are nonzero. */ -3: slli a7, xh, 12 - or a7, a7, xl - j 5f - - /* Check if x and y are zero with different signs. */ -4: or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl /* xl == yl here */ - - /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa - or x when exponent(x) = 0x7ff and x == y. */ -5: movi a2, 0 - movi a3, 1 - movnez a2, a3, a7 - leaf_return - - - /* Greater Than */ - - .align 4 - .global __gtdf2 - .type __gtdf2, @function -__gtdf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Lle_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 0 - leaf_return - - - /* Less Than or Equal */ - - .align 4 - .global __ledf2 - .type __ledf2, @function -__ledf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Lle_cmp - movi a2, 1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 1 - leaf_return - -.Lle_cmp: - /* Check if x and y have different signs. */ - xor a7, xh, yh - bltz a7, .Lle_diff_signs - - /* Check if x is negative. */ - bltz xh, .Lle_xneg - - /* Check if x <= y. */ - bltu xh, yh, 4f - bne xh, yh, 5f - bltu yl, xl, 5f -4: movi a2, 0 - leaf_return - -.Lle_xneg: - /* Check if y <= x. */ - bltu yh, xh, 4b - bne yh, xh, 5f - bgeu xl, yl, 4b -5: movi a2, 1 - leaf_return - -.Lle_diff_signs: - bltz xh, 4b - - /* Check if both x and y are zero. */ - or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl - or a7, a7, yl - movi a2, 1 - movi a3, 0 - moveqz a2, a3, a7 - leaf_return - - - /* Greater Than or Equal */ - - .align 4 - .global __gedf2 - .type __gedf2, @function -__gedf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Llt_cmp - movi a2, -1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, -1 - leaf_return - - - /* Less Than */ - - .align 4 - .global __ltdf2 - .type __ltdf2, @function -__ltdf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Llt_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 0 - leaf_return - -.Llt_cmp: - /* Check if x and y have different signs. */ - xor a7, xh, yh - bltz a7, .Llt_diff_signs - - /* Check if x is negative. */ - bltz xh, .Llt_xneg - - /* Check if x < y. */ - bltu xh, yh, 4f - bne xh, yh, 5f - bgeu xl, yl, 5f -4: movi a2, -1 - leaf_return - -.Llt_xneg: - /* Check if y < x. */ - bltu yh, xh, 4b - bne yh, xh, 5f - bltu yl, xl, 4b -5: movi a2, 0 - leaf_return - -.Llt_diff_signs: - bgez xh, 5b - - /* Check if both x and y are nonzero. */ - or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl - or a7, a7, yl - movi a2, 0 - movi a3, -1 - movnez a2, a3, a7 - leaf_return - - - /* Unordered */ - - .align 4 - .global __unorddf2 - .type __unorddf2, @function -__unorddf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 3f -1: ball yh, a6, 4f -2: movi a2, 0 - leaf_return - -3: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 1 - leaf_return - -4: slli a7, yh, 12 - or a7, a7, yl - beqz a7, 2b - movi a2, 1 - leaf_return - -#endif /* L_cmpdf2 */ - -#ifdef L_fixdfsi - - .align 4 - .global __fixdfsi - .type __fixdfsi, @function -__fixdfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixdfsi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ - extui a4, xh, 20, 11 - extui a5, a6, 19, 10 /* 0x3fe */ - sub a4, a4, a5 - bgei a4, 32, .Lfixdfsi_maxint - blti a4, 1, .Lfixdfsi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src a5, a7, xl - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixdfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixdfsi_maxint - - /* Translate NaN to +maxint. */ - movi xh, 0 - -.Lfixdfsi_maxint: - slli a4, a6, 11 /* 0x80000000 */ - addi a5, a4, -1 /* 0x7fffffff */ - movgez a4, a5, xh - mov a2, a4 - leaf_return - -.Lfixdfsi_zero: - movi a2, 0 - leaf_return - -#endif /* L_fixdfsi */ - -#ifdef L_fixdfdi - - .align 4 - .global __fixdfdi - .type __fixdfdi, @function -__fixdfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixdfdi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ - extui a4, xh, 20, 11 - extui a5, a6, 19, 10 /* 0x3fe */ - sub a4, a4, a5 - bgei a4, 64, .Lfixdfdi_maxint - blti a4, 1, .Lfixdfdi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src xh, a7, xl - sll xl, xl - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixdfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixdfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixdfdi_smallshift: - src xl, xh, xl - srl xh, xh - j .Lfixdfdi_shifted - -.Lfixdfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixdfdi_maxint - - /* Translate NaN to +maxint. */ - movi xh, 0 - -.Lfixdfdi_maxint: - slli a7, a6, 11 /* 0x80000000 */ - bgez xh, 1f - mov xh, a7 - movi xl, 0 - leaf_return - -1: addi xh, a7, -1 /* 0x7fffffff */ - movi xl, -1 - leaf_return - -.Lfixdfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -#endif /* L_fixdfdi */ - -#ifdef L_fixunsdfsi - - .align 4 - .global __fixunsdfsi - .type __fixunsdfsi, @function -__fixunsdfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixunsdfsi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ - extui a4, xh, 20, 11 - extui a5, a6, 20, 10 /* 0x3ff */ - sub a4, a4, a5 - bgei a4, 32, .Lfixunsdfsi_maxint - bltz a4, .Lfixunsdfsi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src a5, a7, xl - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 32, .Lfixunsdfsi_bigexp - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixunsdfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixunsdfsi_maxint - - /* Translate NaN to 0xffffffff. */ - movi a2, -1 - leaf_return - -.Lfixunsdfsi_maxint: - slli a4, a6, 11 /* 0x80000000 */ - movi a5, -1 /* 0xffffffff */ - movgez a4, a5, xh - mov a2, a4 - leaf_return - -.Lfixunsdfsi_zero: - movi a2, 0 - leaf_return - -.Lfixunsdfsi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz xh, 1f - mov a2, a5 /* no shift needed */ - leaf_return - - /* Return 0x80000000 if negative. */ -1: slli a2, a6, 11 - leaf_return - -#endif /* L_fixunsdfsi */ - -#ifdef L_fixunsdfdi - - .align 4 - .global __fixunsdfdi - .type __fixunsdfdi, @function -__fixunsdfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixunsdfdi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ - extui a4, xh, 20, 11 - extui a5, a6, 20, 10 /* 0x3ff */ - sub a4, a4, a5 - bgei a4, 64, .Lfixunsdfdi_maxint - bltz a4, .Lfixunsdfdi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src xh, a7, xl - sll xl, xl - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 64, .Lfixunsdfdi_bigexp - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixunsdfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixunsdfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixunsdfdi_smallshift: - src xl, xh, xl - srl xh, xh - j .Lfixunsdfdi_shifted - -.Lfixunsdfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixunsdfdi_maxint - - /* Translate NaN to 0xffffffff.... */ -1: movi xh, -1 - movi xl, -1 - leaf_return - -.Lfixunsdfdi_maxint: - bgez xh, 1b -2: slli xh, a6, 11 /* 0x80000000 */ - movi xl, 0 - leaf_return - -.Lfixunsdfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -.Lfixunsdfdi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a7, 2b - leaf_return /* no shift needed */ - -#endif /* L_fixunsdfdi */ - -#ifdef L_floatsidf - - .align 4 - .global __floatunsidf - .type __floatunsidf, @function -__floatunsidf: - leaf_entry sp, 16 - beqz a2, .Lfloatsidf_return_zero - - /* Set the sign to zero and jump to the floatsidf code. */ - movi a7, 0 - j .Lfloatsidf_normalize - - .align 4 - .global __floatsidf - .type __floatsidf, @function -__floatsidf: - leaf_entry sp, 16 - - /* Check for zero. */ - beqz a2, .Lfloatsidf_return_zero - - /* Save the sign. */ - extui a7, a2, 31, 1 - - /* Get the absolute value. */ -#if XCHAL_HAVE_ABS - abs a2, a2 -#else - neg a4, a2 - movltz a2, a4, a2 -#endif - -.Lfloatsidf_normalize: - /* Normalize with the first 1 bit in the msb. */ - do_nsau a4, a2, a5, a6 - ssl a4 - sll a5, a2 - - /* Shift the mantissa into position. */ - srli xh, a5, 11 - slli xl, a5, (32 - 11) - - /* Set the exponent. */ - movi a5, 0x41d /* 0x3fe + 31 */ - sub a5, a5, a4 - slli a5, a5, 20 - add xh, xh, a5 - - /* Add the sign and return. */ - slli a7, a7, 31 - or xh, xh, a7 - leaf_return - -.Lfloatsidf_return_zero: - movi a3, 0 - leaf_return - -#endif /* L_floatsidf */ - -#ifdef L_floatdidf - - .align 4 - .global __floatundidf - .type __floatundidf, @function -__floatundidf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Set the sign to zero and jump to the floatdidf code. */ - movi a7, 0 - j .Lfloatdidf_normalize - - .align 4 - .global __floatdidf - .type __floatdidf, @function -__floatdidf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Save the sign. */ - extui a7, xh, 31, 1 - - /* Get the absolute value. */ - bgez xh, .Lfloatdidf_normalize - neg xl, xl - neg xh, xh - beqz xl, .Lfloatdidf_normalize - addi xh, xh, -1 - -.Lfloatdidf_normalize: - /* Normalize with the first 1 bit in the msb of xh. */ - beqz xh, .Lfloatdidf_bigshift - do_nsau a4, xh, a5, a6 - ssl a4 - src xh, xh, xl - sll xl, xl - -.Lfloatdidf_shifted: - /* Shift the mantissa into position, with rounding bits in a6. */ - ssai 11 - sll a6, xl - src xl, xh, xl - srl xh, xh - - /* Set the exponent. */ - movi a5, 0x43d /* 0x3fe + 63 */ - sub a5, a5, a4 - slli a5, a5, 20 - add xh, xh, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or xh, xh, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, 2f - addi xl, xl, 1 - beqz xl, .Lfloatdidf_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatdidf_exactlyhalf -2: leaf_return - -.Lfloatdidf_bigshift: - /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ - do_nsau a4, xl, a5, a6 - ssl a4 - sll xh, xl - movi xl, 0 - addi a4, a4, 32 - j .Lfloatdidf_shifted - -.Lfloatdidf_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Lfloatdidf_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - -#endif /* L_floatdidf */ - -#ifdef L_truncdfsf2 - - .align 4 - .global __truncdfsf2 - .type __truncdfsf2, @function -__truncdfsf2: - leaf_entry sp, 16 - - /* Adjust the exponent bias. */ - movi a4, (0x3ff - 0x7f) << 20 - sub a5, xh, a4 - - /* Check for underflow. */ - xor a6, xh, a5 - bltz a6, .Ltrunc_underflow - extui a6, a5, 20, 11 - beqz a6, .Ltrunc_underflow - - /* Check for overflow. */ - movi a4, 255 - bge a6, a4, .Ltrunc_overflow - - /* Shift a5/xl << 3 into a5/a4. */ - ssai (32 - 3) - src a5, a5, xl - sll a4, xl - -.Ltrunc_addsign: - /* Add the sign bit. */ - extui a6, xh, 31, 1 - slli a6, a6, 31 - or a2, a6, a5 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a4, 1f - addi a2, a2, 1 - /* Overflow to the exponent is OK. The answer will be correct. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a4, a4, 1 - beqz a4, .Ltrunc_exactlyhalf -1: leaf_return - -.Ltrunc_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -.Ltrunc_overflow: - /* Check if exponent == 0x7ff. */ - movi a4, 0x7ff00000 - bnall xh, a4, 1f - - /* Check if mantissa is nonzero. */ - slli a5, xh, 12 - or a5, a5, xl - beqz a5, 1f - - /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ - srli a4, a4, 1 - -1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ - /* Add the sign bit. */ - extui a6, xh, 31, 1 - ssai 1 - src a2, a6, a4 - leaf_return - -.Ltrunc_underflow: - /* Find shift count for a subnormal. Flush to zero if >= 32. */ - extui a6, xh, 20, 11 - movi a5, 0x3ff - 0x7f - sub a6, a5, a6 - addi a6, a6, 1 - bgeui a6, 32, 1f - - /* Replace the exponent with an explicit "1.0". */ - slli a5, a5, 13 /* 0x700000 */ - or a5, a5, xh - slli a5, a5, 11 - srli a5, a5, 11 - - /* Shift the mantissa left by 3 bits (into a5/a4). */ - ssai (32 - 3) - src a5, a5, xl - sll a4, xl - - /* Shift right by a6. */ - ssr a6 - sll a7, a4 - src a4, a5, a4 - srl a5, a5 - beqz a7, .Ltrunc_addsign - or a4, a4, a6 /* any positive, nonzero value will work */ - j .Ltrunc_addsign - - /* Return +/- zero. */ -1: extui a2, xh, 31, 1 - slli a2, a2, 31 - leaf_return - -#endif /* L_truncdfsf2 */ - -#ifdef L_extendsfdf2 - - .align 4 - .global __extendsfdf2 - .type __extendsfdf2, @function -__extendsfdf2: - leaf_entry sp, 16 - - /* Save the sign bit and then shift it off. */ - extui a5, a2, 31, 1 - slli a5, a5, 31 - slli a4, a2, 1 - - /* Extract and check the exponent. */ - extui a6, a2, 23, 8 - beqz a6, .Lextend_expzero - addi a6, a6, 1 - beqi a6, 256, .Lextend_nan_or_inf - - /* Shift >> 3 into a4/xl. */ - srli a4, a4, 4 - slli xl, a2, (32 - 3) - - /* Adjust the exponent bias. */ - movi a6, (0x3ff - 0x7f) << 20 - add a4, a4, a6 - - /* Add the sign bit. */ - or xh, a4, a5 - leaf_return - -.Lextend_nan_or_inf: - movi a4, 0x7ff00000 - - /* Check for NaN. */ - slli a7, a2, 9 - beqz a7, 1f - - slli a6, a6, 11 /* 0x80000 */ - or a4, a4, a6 - - /* Add the sign and return. */ -1: or xh, a4, a5 - movi xl, 0 - leaf_return - -.Lextend_expzero: - beqz a4, 1b - - /* Normalize it to have 8 zero bits before the first 1 bit. */ - do_nsau a7, a4, a2, a3 - addi a7, a7, -8 - ssl a7 - sll a4, a4 - - /* Shift >> 3 into a4/xl. */ - slli xl, a4, (32 - 3) - srli a4, a4, 3 - - /* Set the exponent. */ - movi a6, 0x3fe - 0x7f - sub a6, a6, a7 - slli a6, a6, 20 - add a4, a4, a6 - - /* Add the sign and return. */ - or xh, a4, a5 - leaf_return - -#endif /* L_extendsfdf2 */ - - diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S deleted file mode 100644 index d75be0e5ae5..00000000000 --- a/gcc/config/xtensa/ieee754-sf.S +++ /dev/null @@ -1,1757 +0,0 @@ -/* IEEE-754 single-precision functions for Xtensa - Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef __XTENSA_EB__ -#define xh a2 -#define xl a3 -#define yh a4 -#define yl a5 -#else -#define xh a3 -#define xl a2 -#define yh a5 -#define yl a4 -#endif - -/* Warning! The branch displacements for some Xtensa branch instructions - are quite small, and this code has been carefully laid out to keep - branch targets in range. If you change anything, be sure to check that - the assembler is not relaxing anything to branch over a jump. */ - -#ifdef L_negsf2 - - .align 4 - .global __negsf2 - .type __negsf2, @function -__negsf2: - leaf_entry sp, 16 - movi a4, 0x80000000 - xor a2, a2, a4 - leaf_return - -#endif /* L_negsf2 */ - -#ifdef L_addsubsf3 - - /* Addition */ -__addsf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Ladd_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall a3, a6, 1f - /* If x is a NaN, return it. Otherwise, return y. */ - slli a7, a2, 9 - beqz a7, .Ladd_ynan_or_inf -1: leaf_return - -.Ladd_ynan_or_inf: - /* Return y. */ - mov a2, a3 - leaf_return - -.Ladd_opposite_signs: - /* Operand signs differ. Do a subtraction. */ - slli a7, a6, 8 - xor a3, a3, a7 - j .Lsub_same_sign - - .align 4 - .global __addsf3 - .type __addsf3, @function -__addsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Check if the two operands have the same sign. */ - xor a7, a2, a3 - bltz a7, .Ladd_opposite_signs - -.Ladd_same_sign: - /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ - ball a2, a6, .Ladd_xnan_or_inf - ball a3, a6, .Ladd_ynan_or_inf - - /* Compare the exponents. The smaller operand will be shifted - right by the exponent difference and added to the larger - one. */ - extui a7, a2, 23, 9 - extui a8, a3, 23, 9 - bltu a7, a8, .Ladd_shiftx - -.Ladd_shifty: - /* Check if the smaller (or equal) exponent is zero. */ - bnone a3, a6, .Ladd_yexpzero - - /* Replace y sign/exponent with 0x008. */ - or a3, a3, a6 - slli a3, a3, 8 - srli a3, a3, 8 - -.Ladd_yexpdiff: - /* Compute the exponent difference. */ - sub a10, a7, a8 - - /* Exponent difference > 32 -- just return the bigger value. */ - bgeui a10, 32, 1f - - /* Shift y right by the exponent difference. Any bits that are - shifted out of y are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, a3, a9 - srl a3, a3 - - /* Do the addition. */ - add a2, a2, a3 - - /* Check if the add overflowed into the exponent. */ - extui a10, a2, 23, 9 - beq a10, a7, .Ladd_round - mov a8, a7 - j .Ladd_carry - -.Ladd_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0", and increment the apparent exponent - because subnormals behave as if they had the minimum (nonzero) - exponent. Test for the case when both exponents are zero. */ - slli a3, a3, 9 - srli a3, a3, 9 - bnone a2, a6, .Ladd_bothexpzero - addi a8, a8, 1 - j .Ladd_yexpdiff - -.Ladd_bothexpzero: - /* Both exponents are zero. Handle this as a special case. There - is no need to shift or round, and the normal code for handling - a carry into the exponent field will not work because it - assumes there is an implicit "1.0" that needs to be added. */ - add a2, a2, a3 -1: leaf_return - -.Ladd_xexpzero: - /* Same as "yexpzero" except skip handling the case when both - exponents are zero. */ - slli a2, a2, 9 - srli a2, a2, 9 - addi a7, a7, 1 - j .Ladd_xexpdiff - -.Ladd_shiftx: - /* Same thing as the "shifty" code, but with x and y swapped. Also, - because the exponent difference is always nonzero in this version, - the shift sequence can use SLL and skip loading a constant zero. */ - bnone a2, a6, .Ladd_xexpzero - - or a2, a2, a6 - slli a2, a2, 8 - srli a2, a2, 8 - -.Ladd_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Ladd_returny - - ssr a10 - sll a9, a2 - srl a2, a2 - - add a2, a2, a3 - - /* Check if the add overflowed into the exponent. */ - extui a10, a2, 23, 9 - bne a10, a8, .Ladd_carry - -.Ladd_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi a2, a2, 1 - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_returny: - mov a2, a3 - leaf_return - -.Ladd_carry: - /* The addition has overflowed into the exponent field, so the - value needs to be renormalized. The mantissa of the result - can be recovered by subtracting the original exponent and - adding 0x800000 (which is the explicit "1.0" for the - mantissa of the non-shifted operand -- the "1.0" for the - shifted operand was already added). The mantissa can then - be shifted right by one bit. The explicit "1.0" of the - shifted mantissa then needs to be replaced by the exponent, - incremented by one to account for the normalizing shift. - It is faster to combine these operations: do the shift first - and combine the additions and subtractions. If x is the - original exponent, the result is: - shifted mantissa - (x << 22) + (1 << 22) + (x << 23) - or: - shifted mantissa + ((x + 1) << 22) - Note that the exponent is incremented here by leaving the - explicit "1.0" of the mantissa in the exponent field. */ - - /* Shift x right by one bit. Save the lsb. */ - mov a10, a2 - srli a2, a2, 1 - - /* See explanation above. The original exponent is in a8. */ - addi a8, a8, 1 - slli a8, a8, 22 - add a2, a2, a8 - - /* Return an Infinity if the exponent overflowed. */ - ball a2, a6, .Ladd_infinity - - /* Same thing as the "round" code except the msb of the leftover - fraction is bit 0 of a10, with the rest of the fraction in a9. */ - bbci.l a10, 0, 1f - addi a2, a2, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_infinity: - /* Clear the mantissa. */ - srli a2, a2, 23 - slli a2, a2, 23 - - /* The sign bit may have been lost in a carry-out. Put it back. */ - slli a8, a8, 1 - or a2, a2, a8 - leaf_return - -.Ladd_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - - - /* Subtraction */ -__subsf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Lsub_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall a3, a6, 1f - /* Both x and y are either NaN or Inf, so the result is NaN. */ - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Lsub_ynan_or_inf: - /* Negate y and return it. */ - slli a7, a6, 8 - xor a2, a3, a7 - leaf_return - -.Lsub_opposite_signs: - /* Operand signs differ. Do an addition. */ - slli a7, a6, 8 - xor a3, a3, a7 - j .Ladd_same_sign - - .align 4 - .global __subsf3 - .type __subsf3, @function -__subsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Check if the two operands have the same sign. */ - xor a7, a2, a3 - bltz a7, .Lsub_opposite_signs - -.Lsub_same_sign: - /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ - ball a2, a6, .Lsub_xnan_or_inf - ball a3, a6, .Lsub_ynan_or_inf - - /* Compare the operands. In contrast to addition, the entire - value matters here. */ - extui a7, a2, 23, 8 - extui a8, a3, 23, 8 - bltu a2, a3, .Lsub_xsmaller - -.Lsub_ysmaller: - /* Check if the smaller (or equal) exponent is zero. */ - bnone a3, a6, .Lsub_yexpzero - - /* Replace y sign/exponent with 0x008. */ - or a3, a3, a6 - slli a3, a3, 8 - srli a3, a3, 8 - -.Lsub_yexpdiff: - /* Compute the exponent difference. */ - sub a10, a7, a8 - - /* Exponent difference > 32 -- just return the bigger value. */ - bgeui a10, 32, 1f - - /* Shift y right by the exponent difference. Any bits that are - shifted out of y are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, a3, a9 - srl a3, a3 - - sub a2, a2, a3 - - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from a2. */ - neg a9, a9 - addi a10, a2, -1 - movnez a2, a10, a9 - - /* Check if the subtract underflowed into the exponent. */ - extui a10, a2, 23, 8 - beq a10, a7, .Lsub_round - j .Lsub_borrow - -.Lsub_yexpzero: - /* Return zero if the inputs are equal. (For the non-subnormal - case, subtracting the "1.0" will cause a borrow from the exponent - and this case can be detected when handling the borrow.) */ - beq a2, a3, .Lsub_return_zero - - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0". Unless x is also a subnormal, increment - y's apparent exponent because subnormals behave as if they had - the minimum (nonzero) exponent. */ - slli a3, a3, 9 - srli a3, a3, 9 - bnone a2, a6, .Lsub_yexpdiff - addi a8, a8, 1 - j .Lsub_yexpdiff - -.Lsub_returny: - /* Negate and return y. */ - slli a7, a6, 8 - xor a2, a3, a7 -1: leaf_return - -.Lsub_xsmaller: - /* Same thing as the "ysmaller" code, but with x and y swapped and - with y negated. */ - bnone a2, a6, .Lsub_xexpzero - - or a2, a2, a6 - slli a2, a2, 8 - srli a2, a2, 8 - -.Lsub_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Lsub_returny - - ssr a10 - movi a9, 0 - src a9, a2, a9 - srl a2, a2 - - /* Negate y. */ - slli a11, a6, 8 - xor a3, a3, a11 - - sub a2, a3, a2 - - neg a9, a9 - addi a10, a2, -1 - movnez a2, a10, a9 - - /* Check if the subtract underflowed into the exponent. */ - extui a10, a2, 23, 8 - bne a10, a8, .Lsub_borrow - -.Lsub_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi a2, a2, 1 - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Lsub_exactlyhalf -1: leaf_return - -.Lsub_xexpzero: - /* Same as "yexpzero". */ - beq a2, a3, .Lsub_return_zero - slli a2, a2, 9 - srli a2, a2, 9 - bnone a3, a6, .Lsub_xexpdiff - addi a7, a7, 1 - j .Lsub_xexpdiff - -.Lsub_return_zero: - movi a2, 0 - leaf_return - -.Lsub_borrow: - /* The subtraction has underflowed into the exponent field, so the - value needs to be renormalized. Shift the mantissa left as - needed to remove any leading zeros and adjust the exponent - accordingly. If the exponent is not large enough to remove - all the leading zeros, the result will be a subnormal value. */ - - slli a8, a2, 9 - beqz a8, .Lsub_xzero - do_nsau a6, a8, a7, a11 - srli a8, a8, 9 - bge a6, a10, .Lsub_subnormal - addi a6, a6, 1 - -.Lsub_normalize_shift: - /* Shift the mantissa (a8/a9) left by a6. */ - ssl a6 - src a8, a8, a9 - sll a9, a9 - - /* Combine the shifted mantissa with the sign and exponent, - decrementing the exponent by a6. (The exponent has already - been decremented by one due to the borrow from the subtraction, - but adding the mantissa will increment the exponent by one.) */ - srli a2, a2, 23 - sub a2, a2, a6 - slli a2, a2, 23 - add a2, a2, a8 - j .Lsub_round - -.Lsub_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -.Lsub_xzero: - /* If there was a borrow from the exponent, and the mantissa and - guard digits are all zero, then the inputs were equal and the - result should be zero. */ - beqz a9, .Lsub_return_zero - - /* Only the guard digit is nonzero. Shift by min(24, a10). */ - addi a11, a10, -24 - movi a6, 24 - movltz a6, a10, a11 - j .Lsub_normalize_shift - -.Lsub_subnormal: - /* The exponent is too small to shift away all the leading zeros. - Set a6 to the current exponent (which has already been - decremented by the borrow) so that the exponent of the result - will be zero. Do not add 1 to a6 in this case, because: (1) - adding the mantissa will not increment the exponent, so there is - no need to subtract anything extra from the exponent to - compensate, and (2) the effective exponent of a subnormal is 1 - not 0 so the shift amount must be 1 smaller than normal. */ - mov a6, a10 - j .Lsub_normalize_shift - -#endif /* L_addsubsf3 */ - -#ifdef L_mulsf3 - - /* Multiplication */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - -__mulsf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Lmul_xexpzero: - /* Clear the sign bit of x. */ - slli a2, a2, 1 - srli a2, a2, 1 - - /* If x is zero, return zero. */ - beqz a2, .Lmul_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - do_nsau a10, a2, a11, a12 - addi a10, a10, -8 - ssl a10 - sll a2, a2 - movi a8, 1 - sub a8, a8, a10 - j .Lmul_xnormalized - -.Lmul_yexpzero: - /* Clear the sign bit of y. */ - slli a3, a3, 1 - srli a3, a3, 1 - - /* If y is zero, return zero. */ - beqz a3, .Lmul_return_zero - - /* Normalize y. Adjust the exponent in a9. */ - do_nsau a10, a3, a11, a12 - addi a10, a10, -8 - ssl a10 - sll a3, a3 - movi a9, 1 - sub a9, a9, a10 - j .Lmul_ynormalized - -.Lmul_return_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - j .Lmul_done - -.Lmul_xnan_or_inf: - /* If y is zero, return NaN. */ - slli a8, a3, 1 - bnez a8, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 - j .Lmul_done -1: - /* If y is NaN, return y. */ - bnall a3, a6, .Lmul_returnx - slli a8, a3, 9 - beqz a8, .Lmul_returnx - -.Lmul_returny: - mov a2, a3 - -.Lmul_returnx: - /* Set the sign bit and return. */ - extui a7, a7, 31, 1 - slli a2, a2, 1 - ssai 1 - src a2, a7, a2 - j .Lmul_done - -.Lmul_ynan_or_inf: - /* If x is zero, return NaN. */ - slli a8, a2, 1 - bnez a8, .Lmul_returny - movi a7, 0x400000 /* make it a quiet NaN */ - or a2, a3, a7 - j .Lmul_done - - .align 4 - .global __mulsf3 - .type __mulsf3, @function -__mulsf3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 64 -#else - leaf_entry sp, 32 -#endif - movi a6, 0x7f800000 - - /* Get the sign of the result. */ - xor a7, a2, a3 - - /* Check for NaN and infinity. */ - ball a2, a6, .Lmul_xnan_or_inf - ball a3, a6, .Lmul_ynan_or_inf - - /* Extract the exponents. */ - extui a8, a2, 23, 8 - extui a9, a3, 23, 8 - - beqz a8, .Lmul_xexpzero -.Lmul_xnormalized: - beqz a9, .Lmul_yexpzero -.Lmul_ynormalized: - - /* Add the exponents. */ - add a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0xffffff - or a2, a2, a6 - and a2, a2, a10 - or a3, a3, a6 - and a3, a3, a10 - - /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ - -#if XCHAL_HAVE_MUL32_HIGH - - mull a6, a2, a3 - muluh a2, a2, a3 - -#else - - /* Break the inputs into 16-bit chunks and compute 4 32-bit partial - products. These partial products are: - - 0 xl * yl - - 1 xl * yh - 2 xh * yl - - 3 xh * yh - - If using the Mul16 or Mul32 multiplier options, these input - chunks must be stored in separate registers. For Mac16, the - UMUL.AA.* opcodes can specify that the inputs come from either - half of the registers, so there is no need to shift them out - ahead of time. If there is no multiply hardware, the 16-bit - chunks can be extracted when setting up the arguments to the - separate multiply function. */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Calling a separate multiply function will clobber a0 and requires - use of a8 as a temporary, so save those values now. (The function - uses a custom ABI so nothing else needs to be saved.) */ - s32i a0, sp, 0 - s32i a8, sp, 4 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define a2h a4 -#define a3h a5 - - /* Get the high halves of the inputs into registers. */ - srli a2h, a2, 16 - srli a3h, a3, 16 - -#define a2l a2 -#define a3l a3 - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui a2, a2, 0, 16 - extui a3, a3, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a6 with carry-out in a9. */ - do_mul(a6, a2, l, a3, h) /* pp 1 */ - do_mul(a11, a2, h, a3, l) /* pp 2 */ - movi a9, 0 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Shift the high half of a9/a6 into position in a9. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a9, a9, a6 - - /* Compute the low word into a6. */ - do_mul(a11, a2, l, a3, l) /* pp 0 */ - sll a6, a6 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Compute the high word into a2. */ - do_mul(a2, a2, h, a3, h) /* pp 3 */ - add a2, a2, a9 - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Restore values saved on the stack during the multiplication. */ - l32i a0, sp, 0 - l32i a8, sp, 4 -#endif -#endif /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Shift left by 9 bits, unless there was a carry-out from the - multiply, in which case, shift by 8 bits and increment the - exponent. */ - movi a4, 9 - srli a5, a2, 24 - 9 - beqz a5, 1f - addi a4, a4, -1 - addi a8, a8, 1 -1: ssl a4 - src a2, a2, a6 - sll a6, a6 - - /* Subtract the extra bias from the exponent sum (plus one to account - for the explicit "1.0" of the mantissa that will be added to the - exponent in the final result). */ - movi a4, 0x80 - sub a8, a8, a4 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..fd are OK here. */ - movi a4, 0xfe - bgeu a8, a4, .Lmul_overflow - -.Lmul_round: - /* Round. */ - bgez a6, .Lmul_rounded - addi a2, a2, 1 - slli a6, a6, 1 - beqz a6, .Lmul_exactlyhalf - -.Lmul_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 23 - add a2, a2, a8 - -.Lmul_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or a2, a2, a7 - -.Lmul_done: -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -.Lmul_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - j .Lmul_rounded - -.Lmul_overflow: - bltz a8, .Lmul_underflow - /* Return +/- Infinity. */ - movi a8, 0xff - slli a2, a8, 23 - j .Lmul_addsign - -.Lmul_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - mov a9, a6 - ssr a8 - bgeui a8, 32, .Lmul_flush_to_zero - - /* Shift a2 right. Any bits that are shifted out of a2 are saved - in a6 (combined with the shifted-out bits currently in a6) for - rounding the result. */ - sll a6, a2 - srl a2, a2 - - /* Set the exponent to zero. */ - movi a8, 0 - - /* Pack any nonzero bits shifted out into a6. */ - beqz a9, .Lmul_round - movi a9, 1 - or a6, a6, a9 - j .Lmul_round - -.Lmul_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - j .Lmul_done - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ -#endif /* L_mulsf3 */ - -#ifdef L_divsf3 - - /* Division */ -__divsf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Ldiv_yexpzero: - /* Clear the sign bit of y. */ - slli a3, a3, 1 - srli a3, a3, 1 - - /* Check for division by zero. */ - beqz a3, .Ldiv_yzero - - /* Normalize y. Adjust the exponent in a9. */ - do_nsau a10, a3, a4, a5 - addi a10, a10, -8 - ssl a10 - sll a3, a3 - movi a9, 1 - sub a9, a9, a10 - j .Ldiv_ynormalized - -.Ldiv_yzero: - /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ - slli a4, a2, 1 - srli a4, a4, 1 - srli a2, a7, 31 - slli a2, a2, 31 - or a2, a2, a6 - bnez a4, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Ldiv_xexpzero: - /* Clear the sign bit of x. */ - slli a2, a2, 1 - srli a2, a2, 1 - - /* If x is zero, return zero. */ - beqz a2, .Ldiv_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - do_nsau a10, a2, a4, a5 - addi a10, a10, -8 - ssl a10 - sll a2, a2 - movi a8, 1 - sub a8, a8, a10 - j .Ldiv_xnormalized - -.Ldiv_return_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - leaf_return - -.Ldiv_xnan_or_inf: - /* Set the sign bit of the result. */ - srli a7, a3, 31 - slli a7, a7, 31 - xor a2, a2, a7 - /* If y is NaN or Inf, return NaN. */ - bnall a3, a6, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Ldiv_ynan_or_inf: - /* If y is Infinity, return zero. */ - slli a8, a3, 9 - beqz a8, .Ldiv_return_zero - /* y is NaN; return it. */ - mov a2, a3 - leaf_return - - .align 4 - .global __divsf3 - .type __divsf3, @function -__divsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Get the sign of the result. */ - xor a7, a2, a3 - - /* Check for NaN and infinity. */ - ball a2, a6, .Ldiv_xnan_or_inf - ball a3, a6, .Ldiv_ynan_or_inf - - /* Extract the exponents. */ - extui a8, a2, 23, 8 - extui a9, a3, 23, 8 - - beqz a9, .Ldiv_yexpzero -.Ldiv_ynormalized: - beqz a8, .Ldiv_xexpzero -.Ldiv_xnormalized: - - /* Subtract the exponents. */ - sub a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0xffffff - or a2, a2, a6 - and a2, a2, a10 - or a3, a3, a6 - and a3, a3, a10 - - /* The first digit of the mantissa division must be a one. - Shift x (and adjust the exponent) as needed to make this true. */ - bltu a3, a2, 1f - slli a2, a2, 1 - addi a8, a8, -1 -1: - /* Do the first subtraction and shift. */ - sub a2, a2, a3 - slli a2, a2, 1 - - /* Put the quotient into a10. */ - movi a10, 1 - - /* Divide one bit at a time for 23 bits. */ - movi a9, 23 -#if XCHAL_HAVE_LOOPS - loop a9, .Ldiv_loopend -#endif -.Ldiv_loop: - /* Shift the quotient << 1. */ - slli a10, a10, 1 - - /* Is this digit a 0 or 1? */ - bltu a2, a3, 1f - - /* Output a 1 and subtract. */ - addi a10, a10, 1 - sub a2, a2, a3 - - /* Shift the dividend << 1. */ -1: slli a2, a2, 1 - -#if !XCHAL_HAVE_LOOPS - addi a9, a9, -1 - bnez a9, .Ldiv_loop -#endif -.Ldiv_loopend: - - /* Add the exponent bias (less one to account for the explicit "1.0" - of the mantissa that will be added to the exponent in the final - result). */ - addi a8, a8, 0x7e - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..fd are OK here. */ - movi a4, 0xfe - bgeu a8, a4, .Ldiv_overflow - -.Ldiv_round: - /* Round. The remainder (<< 1) is in a2. */ - bltu a2, a3, .Ldiv_rounded - addi a10, a10, 1 - beq a2, a3, .Ldiv_exactlyhalf - -.Ldiv_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 23 - add a2, a10, a8 - -.Ldiv_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or a2, a2, a7 - leaf_return - -.Ldiv_overflow: - bltz a8, .Ldiv_underflow - /* Return +/- Infinity. */ - addi a8, a4, 1 /* 0xff */ - slli a2, a8, 23 - j .Ldiv_addsign - -.Ldiv_exactlyhalf: - /* Remainder is exactly half the divisor. Round even. */ - srli a10, a10, 1 - slli a10, a10, 1 - j .Ldiv_rounded - -.Ldiv_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - ssr a8 - bgeui a8, 32, .Ldiv_flush_to_zero - - /* Shift a10 right. Any bits that are shifted out of a10 are - saved in a6 for rounding the result. */ - sll a6, a10 - srl a10, a10 - - /* Set the exponent to zero. */ - movi a8, 0 - - /* Pack any nonzero remainder (in a2) into a6. */ - beqz a2, 1f - movi a9, 1 - or a6, a6, a9 - - /* Round a10 based on the bits shifted out into a6. */ -1: bgez a6, .Ldiv_rounded - addi a10, a10, 1 - slli a6, a6, 1 - bnez a6, .Ldiv_rounded - srli a10, a10, 1 - slli a10, a10, 1 - j .Ldiv_rounded - -.Ldiv_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - leaf_return - -#endif /* L_divsf3 */ - -#ifdef L_cmpsf2 - - /* Equal and Not Equal */ - - .align 4 - .global __eqsf2 - .global __nesf2 - .set __nesf2, __eqsf2 - .type __eqsf2, @function -__eqsf2: - leaf_entry sp, 16 - bne a2, a3, 4f - - /* The values are equal but NaN != NaN. Check the exponent. */ - movi a6, 0x7f800000 - ball a2, a6, 3f - - /* Equal. */ - movi a2, 0 - leaf_return - - /* Not equal. */ -2: movi a2, 1 - leaf_return - - /* Check if the mantissas are nonzero. */ -3: slli a7, a2, 9 - j 5f - - /* Check if x and y are zero with different signs. */ -4: or a7, a2, a3 - slli a7, a7, 1 - - /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa - or x when exponent(x) = 0x7f8 and x == y. */ -5: movi a2, 0 - movi a3, 1 - movnez a2, a3, a7 - leaf_return - - - /* Greater Than */ - - .align 4 - .global __gtsf2 - .type __gtsf2, @function -__gtsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Lle_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 0 - leaf_return - - - /* Less Than or Equal */ - - .align 4 - .global __lesf2 - .type __lesf2, @function -__lesf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Lle_cmp - movi a2, 1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 1 - leaf_return - -.Lle_cmp: - /* Check if x and y have different signs. */ - xor a7, a2, a3 - bltz a7, .Lle_diff_signs - - /* Check if x is negative. */ - bltz a2, .Lle_xneg - - /* Check if x <= y. */ - bltu a3, a2, 5f -4: movi a2, 0 - leaf_return - -.Lle_xneg: - /* Check if y <= x. */ - bgeu a2, a3, 4b -5: movi a2, 1 - leaf_return - -.Lle_diff_signs: - bltz a2, 4b - - /* Check if both x and y are zero. */ - or a7, a2, a3 - slli a7, a7, 1 - movi a2, 1 - movi a3, 0 - moveqz a2, a3, a7 - leaf_return - - - /* Greater Than or Equal */ - - .align 4 - .global __gesf2 - .type __gesf2, @function -__gesf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Llt_cmp - movi a2, -1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, -1 - leaf_return - - - /* Less Than */ - - .align 4 - .global __ltsf2 - .type __ltsf2, @function -__ltsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Llt_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 0 - leaf_return - -.Llt_cmp: - /* Check if x and y have different signs. */ - xor a7, a2, a3 - bltz a7, .Llt_diff_signs - - /* Check if x is negative. */ - bltz a2, .Llt_xneg - - /* Check if x < y. */ - bgeu a2, a3, 5f -4: movi a2, -1 - leaf_return - -.Llt_xneg: - /* Check if y < x. */ - bltu a3, a2, 4b -5: movi a2, 0 - leaf_return - -.Llt_diff_signs: - bgez a2, 5b - - /* Check if both x and y are nonzero. */ - or a7, a2, a3 - slli a7, a7, 1 - movi a2, 0 - movi a3, -1 - movnez a2, a3, a7 - leaf_return - - - /* Unordered */ - - .align 4 - .global __unordsf2 - .type __unordsf2, @function -__unordsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 3f -1: ball a3, a6, 4f -2: movi a2, 0 - leaf_return - -3: slli a7, a2, 9 - beqz a7, 1b - movi a2, 1 - leaf_return - -4: slli a7, a3, 9 - beqz a7, 2b - movi a2, 1 - leaf_return - -#endif /* L_cmpsf2 */ - -#ifdef L_fixsfsi - - .align 4 - .global __fixsfsi - .type __fixsfsi, @function -__fixsfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixsfsi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7e - bgei a4, 32, .Lfixsfsi_maxint - blti a4, 1, .Lfixsfsi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli a5, a7, 8 - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixsfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixsfsi_maxint - - /* Translate NaN to +maxint. */ - movi a2, 0 - -.Lfixsfsi_maxint: - slli a4, a6, 8 /* 0x80000000 */ - addi a5, a4, -1 /* 0x7fffffff */ - movgez a4, a5, a2 - mov a2, a4 - leaf_return - -.Lfixsfsi_zero: - movi a2, 0 - leaf_return - -#endif /* L_fixsfsi */ - -#ifdef L_fixsfdi - - .align 4 - .global __fixsfdi - .type __fixsfdi, @function -__fixsfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixsfdi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7e - bgei a4, 64, .Lfixsfdi_maxint - blti a4, 1, .Lfixsfdi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli xh, a7, 8 - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixsfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixsfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixsfdi_smallshift: - movi xl, 0 - sll xl, xh - srl xh, xh - j .Lfixsfdi_shifted - -.Lfixsfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixsfdi_maxint - - /* Translate NaN to +maxint. */ - movi a2, 0 - -.Lfixsfdi_maxint: - slli a7, a6, 8 /* 0x80000000 */ - bgez a2, 1f - mov xh, a7 - movi xl, 0 - leaf_return - -1: addi xh, a7, -1 /* 0x7fffffff */ - movi xl, -1 - leaf_return - -.Lfixsfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -#endif /* L_fixsfdi */ - -#ifdef L_fixunssfsi - - .align 4 - .global __fixunssfsi - .type __fixunssfsi, @function -__fixunssfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixunssfsi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7f - bgei a4, 32, .Lfixunssfsi_maxint - bltz a4, .Lfixunssfsi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli a5, a7, 8 - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 32, .Lfixunssfsi_bigexp - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixunssfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixunssfsi_maxint - - /* Translate NaN to 0xffffffff. */ - movi a2, -1 - leaf_return - -.Lfixunssfsi_maxint: - slli a4, a6, 8 /* 0x80000000 */ - movi a5, -1 /* 0xffffffff */ - movgez a4, a5, a2 - mov a2, a4 - leaf_return - -.Lfixunssfsi_zero: - movi a2, 0 - leaf_return - -.Lfixunssfsi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a2, 1f - mov a2, a5 /* no shift needed */ - leaf_return - - /* Return 0x80000000 if negative. */ -1: slli a2, a6, 8 - leaf_return - -#endif /* L_fixunssfsi */ - -#ifdef L_fixunssfdi - - .align 4 - .global __fixunssfdi - .type __fixunssfdi, @function -__fixunssfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixunssfdi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7f - bgei a4, 64, .Lfixunssfdi_maxint - bltz a4, .Lfixunssfdi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli xh, a7, 8 - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 64, .Lfixunssfdi_bigexp - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixunssfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixunssfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixunssfdi_smallshift: - movi xl, 0 - src xl, xh, xl - srl xh, xh - j .Lfixunssfdi_shifted - -.Lfixunssfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixunssfdi_maxint - - /* Translate NaN to 0xffffffff.... */ -1: movi xh, -1 - movi xl, -1 - leaf_return - -.Lfixunssfdi_maxint: - bgez a2, 1b -2: slli xh, a6, 8 /* 0x80000000 */ - movi xl, 0 - leaf_return - -.Lfixunssfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -.Lfixunssfdi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a7, 2b - movi xl, 0 - leaf_return /* no shift needed */ - -#endif /* L_fixunssfdi */ - -#ifdef L_floatsisf - - .align 4 - .global __floatunsisf - .type __floatunsisf, @function -__floatunsisf: - leaf_entry sp, 16 - beqz a2, .Lfloatsisf_return - - /* Set the sign to zero and jump to the floatsisf code. */ - movi a7, 0 - j .Lfloatsisf_normalize - - .align 4 - .global __floatsisf - .type __floatsisf, @function -__floatsisf: - leaf_entry sp, 16 - - /* Check for zero. */ - beqz a2, .Lfloatsisf_return - - /* Save the sign. */ - extui a7, a2, 31, 1 - - /* Get the absolute value. */ -#if XCHAL_HAVE_ABS - abs a2, a2 -#else - neg a4, a2 - movltz a2, a4, a2 -#endif - -.Lfloatsisf_normalize: - /* Normalize with the first 1 bit in the msb. */ - do_nsau a4, a2, a5, a6 - ssl a4 - sll a5, a2 - - /* Shift the mantissa into position, with rounding bits in a6. */ - srli a2, a5, 8 - slli a6, a5, (32 - 8) - - /* Set the exponent. */ - movi a5, 0x9d /* 0x7e + 31 */ - sub a5, a5, a4 - slli a5, a5, 23 - add a2, a2, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or a2, a2, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, .Lfloatsisf_return - addi a2, a2, 1 /* Overflow to the exponent is OK. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatsisf_exactlyhalf - -.Lfloatsisf_return: - leaf_return - -.Lfloatsisf_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -#endif /* L_floatsisf */ - -#ifdef L_floatdisf - - .align 4 - .global __floatundisf - .type __floatundisf, @function -__floatundisf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Set the sign to zero and jump to the floatdisf code. */ - movi a7, 0 - j .Lfloatdisf_normalize - - .align 4 - .global __floatdisf - .type __floatdisf, @function -__floatdisf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Save the sign. */ - extui a7, xh, 31, 1 - - /* Get the absolute value. */ - bgez xh, .Lfloatdisf_normalize - neg xl, xl - neg xh, xh - beqz xl, .Lfloatdisf_normalize - addi xh, xh, -1 - -.Lfloatdisf_normalize: - /* Normalize with the first 1 bit in the msb of xh. */ - beqz xh, .Lfloatdisf_bigshift - do_nsau a4, xh, a5, a6 - ssl a4 - src xh, xh, xl - sll xl, xl - -.Lfloatdisf_shifted: - /* Shift the mantissa into position, with rounding bits in a6. */ - ssai 8 - sll a5, xl - src a6, xh, xl - srl xh, xh - beqz a5, 1f - movi a5, 1 - or a6, a6, a5 -1: - /* Set the exponent. */ - movi a5, 0xbd /* 0x7e + 63 */ - sub a5, a5, a4 - slli a5, a5, 23 - add a2, xh, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or a2, a2, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, 2f - addi a2, a2, 1 /* Overflow to the exponent is OK. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatdisf_exactlyhalf -2: leaf_return - -.Lfloatdisf_bigshift: - /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ - do_nsau a4, xl, a5, a6 - ssl a4 - sll xh, xl - movi xl, 0 - addi a4, a4, 32 - j .Lfloatdisf_shifted - -.Lfloatdisf_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -#endif /* L_floatdisf */ diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm deleted file mode 100644 index 071b9171177..00000000000 --- a/gcc/config/xtensa/lib1funcs.asm +++ /dev/null @@ -1,845 +0,0 @@ -/* Assembly functions for the Xtensa version of libgcc1. - Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 - Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#include "xtensa-config.h" - -/* Define macros for the ABS and ADDX* instructions to handle cases - where they are not included in the Xtensa processor configuration. */ - - .macro do_abs dst, src, tmp -#if XCHAL_HAVE_ABS - abs \dst, \src -#else - neg \tmp, \src - movgez \tmp, \src, \src - mov \dst, \tmp -#endif - .endm - - .macro do_addx2 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx2 \dst, \as, \at -#else - slli \tmp, \as, 1 - add \dst, \tmp, \at -#endif - .endm - - .macro do_addx4 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx4 \dst, \as, \at -#else - slli \tmp, \as, 2 - add \dst, \tmp, \at -#endif - .endm - - .macro do_addx8 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx8 \dst, \as, \at -#else - slli \tmp, \as, 3 - add \dst, \tmp, \at -#endif - .endm - -/* Define macros for leaf function entry and return, supporting either the - standard register windowed ABI or the non-windowed call0 ABI. These - macros do not allocate any extra stack space, so they only work for - leaf functions that do not need to spill anything to the stack. */ - - .macro leaf_entry reg, size -#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ - entry \reg, \size -#else - /* do nothing */ -#endif - .endm - - .macro leaf_return -#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ - retw -#else - ret -#endif - .endm - - -#ifdef L_mulsi3 - .align 4 - .global __mulsi3 - .type __mulsi3, @function -__mulsi3: - leaf_entry sp, 16 - -#if XCHAL_HAVE_MUL32 - mull a2, a2, a3 - -#elif XCHAL_HAVE_MUL16 - or a4, a2, a3 - srai a4, a4, 16 - bnez a4, .LMUL16 - mul16u a2, a2, a3 - leaf_return -.LMUL16: - srai a4, a2, 16 - srai a5, a3, 16 - mul16u a7, a4, a3 - mul16u a6, a5, a2 - mul16u a4, a2, a3 - add a7, a7, a6 - slli a7, a7, 16 - add a2, a7, a4 - -#elif XCHAL_HAVE_MAC16 - mul.aa.hl a2, a3 - mula.aa.lh a2, a3 - rsr a5, ACCLO - umul.aa.ll a2, a3 - rsr a4, ACCLO - slli a5, a5, 16 - add a2, a4, a5 - -#else /* !MUL32 && !MUL16 && !MAC16 */ - - /* Multiply one bit at a time, but unroll the loop 4x to better - exploit the addx instructions and avoid overhead. - Peel the first iteration to save a cycle on init. */ - - /* Avoid negative numbers. */ - xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ - do_abs a3, a3, a6 - do_abs a2, a2, a6 - - /* Swap so the second argument is smaller. */ - sub a7, a2, a3 - mov a4, a3 - movgez a4, a2, a7 /* a4 = max (a2, a3) */ - movltz a3, a2, a7 /* a3 = min (a2, a3) */ - - movi a2, 0 - extui a6, a3, 0, 1 - movnez a2, a4, a6 - - do_addx2 a7, a4, a2, a7 - extui a6, a3, 1, 1 - movnez a2, a7, a6 - - do_addx4 a7, a4, a2, a7 - extui a6, a3, 2, 1 - movnez a2, a7, a6 - - do_addx8 a7, a4, a2, a7 - extui a6, a3, 3, 1 - movnez a2, a7, a6 - - bgeui a3, 16, .Lmult_main_loop - neg a3, a2 - movltz a2, a3, a5 - leaf_return - - .align 4 -.Lmult_main_loop: - srli a3, a3, 4 - slli a4, a4, 4 - - add a7, a4, a2 - extui a6, a3, 0, 1 - movnez a2, a7, a6 - - do_addx2 a7, a4, a2, a7 - extui a6, a3, 1, 1 - movnez a2, a7, a6 - - do_addx4 a7, a4, a2, a7 - extui a6, a3, 2, 1 - movnez a2, a7, a6 - - do_addx8 a7, a4, a2, a7 - extui a6, a3, 3, 1 - movnez a2, a7, a6 - - bgeui a3, 16, .Lmult_main_loop - - neg a3, a2 - movltz a2, a3, a5 - -#endif /* !MUL32 && !MUL16 && !MAC16 */ - - leaf_return - .size __mulsi3, . - __mulsi3 - -#endif /* L_mulsi3 */ - - -#ifdef L_umulsidi3 - -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - - .align 4 - .global __umulsidi3 - .type __umulsidi3, @function -__umulsidi3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 48 -#else - leaf_entry sp, 16 -#endif - -#ifdef __XTENSA_EB__ -#define wh a2 -#define wl a3 -#else -#define wh a3 -#define wl a2 -#endif /* __XTENSA_EB__ */ - - /* This code is taken from the mulsf3 routine in ieee754-sf.S. - See more comments there. */ - -#if XCHAL_HAVE_MUL32_HIGH - mull a6, a2, a3 - muluh wh, a2, a3 - mov wl, a6 - -#else /* ! MUL32_HIGH */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* a0 and a8 will be clobbered by calling the multiply function - but a8 is not used here and need not be saved. */ - s32i a0, sp, 0 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define a2h a4 -#define a3h a5 - - /* Get the high halves of the inputs into registers. */ - srli a2h, a2, 16 - srli a3h, a3, 16 - -#define a2l a2 -#define a3l a3 - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui a2, a2, 0, 16 - extui a3, a3, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a6 with carry-out in a9. */ - do_mul(a6, a2, l, a3, h) /* pp 1 */ - do_mul(a11, a2, h, a3, l) /* pp 2 */ - movi a9, 0 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Shift the high half of a9/a6 into position in a9. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a9, a9, a6 - - /* Compute the low word into a6. */ - do_mul(a11, a2, l, a3, l) /* pp 0 */ - sll a6, a6 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Compute the high word into wh. */ - do_mul(wh, a2, h, a3, h) /* pp 3 */ - add wh, wh, a9 - mov wl, a6 - -#endif /* !MUL32_HIGH */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Restore the original return address. */ - l32i a0, sp, 0 -#endif -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ - - .size __umulsidi3, . - __umulsidi3 - -#endif /* L_umulsidi3 */ - - -/* Define a macro for the NSAU (unsigned normalize shift amount) - instruction, which computes the number of leading zero bits, - to handle cases where it is not included in the Xtensa processor - configuration. */ - - .macro do_nsau cnt, val, tmp, a -#if XCHAL_HAVE_NSA - nsau \cnt, \val -#else - mov \a, \val - movi \cnt, 0 - extui \tmp, \a, 16, 16 - bnez \tmp, 0f - movi \cnt, 16 - slli \a, \a, 16 -0: - extui \tmp, \a, 24, 8 - bnez \tmp, 1f - addi \cnt, \cnt, 8 - slli \a, \a, 8 -1: - movi \tmp, __nsau_data - extui \a, \a, 24, 8 - add \tmp, \tmp, \a - l8ui \tmp, \tmp, 0 - add \cnt, \cnt, \tmp -#endif /* !XCHAL_HAVE_NSA */ - .endm - -#ifdef L_clz - .section .rodata - .align 4 - .global __nsau_data - .type __nsau_data, @object -__nsau_data: -#if !XCHAL_HAVE_NSA - .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 - .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -#endif /* !XCHAL_HAVE_NSA */ - .size __nsau_data, . - __nsau_data - .hidden __nsau_data -#endif /* L_clz */ - - -#ifdef L_clzsi2 - .align 4 - .global __clzsi2 - .type __clzsi2, @function -__clzsi2: - leaf_entry sp, 16 - do_nsau a2, a2, a3, a4 - leaf_return - .size __clzsi2, . - __clzsi2 - -#endif /* L_clzsi2 */ - - -#ifdef L_ctzsi2 - .align 4 - .global __ctzsi2 - .type __ctzsi2, @function -__ctzsi2: - leaf_entry sp, 16 - neg a3, a2 - and a3, a3, a2 - do_nsau a2, a3, a4, a5 - neg a2, a2 - addi a2, a2, 31 - leaf_return - .size __ctzsi2, . - __ctzsi2 - -#endif /* L_ctzsi2 */ - - -#ifdef L_ffssi2 - .align 4 - .global __ffssi2 - .type __ffssi2, @function -__ffssi2: - leaf_entry sp, 16 - neg a3, a2 - and a3, a3, a2 - do_nsau a2, a3, a4, a5 - neg a2, a2 - addi a2, a2, 32 - leaf_return - .size __ffssi2, . - __ffssi2 - -#endif /* L_ffssi2 */ - - -#ifdef L_udivsi3 - .align 4 - .global __udivsi3 - .type __udivsi3, @function -__udivsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - quou a2, a2, a3 -#else - bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ - - mov a6, a2 /* keep dividend in a6 */ - do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ - do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ - ssl a4 - sll a3, a3 /* divisor <<= count */ - movi a2, 0 /* quotient = 0 */ - - /* test-subtract-and-shift loop; one quotient bit on each iteration */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a6, a3, .Lzerobit - sub a6, a6, a3 - addi a2, a2, 1 -.Lzerobit: - slli a2, a2, 1 - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - - bltu a6, a3, .Lreturn - addi a2, a2, 1 /* increment quotient if dividend >= divisor */ -.Lreturn: - leaf_return - -.Lle_one: - beqz a3, .Lerror /* if divisor == 1, return the dividend */ - leaf_return - -.Lspecial: - /* return dividend >= divisor */ - bltu a6, a3, .Lreturn0 - movi a2, 1 - leaf_return - -.Lerror: - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __udivsi3, . - __udivsi3 - -#endif /* L_udivsi3 */ - - -#ifdef L_divsi3 - .align 4 - .global __divsi3 - .type __divsi3, @function -__divsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - quos a2, a2, a3 -#else - xor a7, a2, a3 /* sign = dividend ^ divisor */ - do_abs a6, a2, a4 /* udividend = abs (dividend) */ - do_abs a3, a3, a4 /* udivisor = abs (divisor) */ - bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ - do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ - do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ - ssl a4 - sll a3, a3 /* udivisor <<= count */ - movi a2, 0 /* quotient = 0 */ - - /* test-subtract-and-shift loop; one quotient bit on each iteration */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a6, a3, .Lzerobit - sub a6, a6, a3 - addi a2, a2, 1 -.Lzerobit: - slli a2, a2, 1 - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - - bltu a6, a3, .Lreturn - addi a2, a2, 1 /* increment if udividend >= udivisor */ -.Lreturn: - neg a5, a2 - movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ - leaf_return - -.Lle_one: - beqz a3, .Lerror - neg a2, a6 /* if udivisor == 1, then return... */ - movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ - leaf_return - -.Lspecial: - bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ - movi a2, 1 - movi a4, -1 - movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ - leaf_return - -.Lerror: - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __divsi3, . - __divsi3 - -#endif /* L_divsi3 */ - - -#ifdef L_umodsi3 - .align 4 - .global __umodsi3 - .type __umodsi3, @function -__umodsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - remu a2, a2, a3 -#else - bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ - - do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ - do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ - ssl a4 - sll a3, a3 /* divisor <<= count */ - - /* test-subtract-and-shift loop */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a2, a3, .Lzerobit - sub a2, a2, a3 -.Lzerobit: - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - -.Lspecial: - bltu a2, a3, .Lreturn - sub a2, a2, a3 /* subtract once more if dividend >= divisor */ -.Lreturn: - leaf_return - -.Lle_one: - bnez a3, .Lreturn0 - - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __umodsi3, . - __umodsi3 - -#endif /* L_umodsi3 */ - - -#ifdef L_modsi3 - .align 4 - .global __modsi3 - .type __modsi3, @function -__modsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - rems a2, a2, a3 -#else - mov a7, a2 /* save original (signed) dividend */ - do_abs a2, a2, a4 /* udividend = abs (dividend) */ - do_abs a3, a3, a4 /* udivisor = abs (divisor) */ - bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ - do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ - do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ - ssl a4 - sll a3, a3 /* udivisor <<= count */ - - /* test-subtract-and-shift loop */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a2, a3, .Lzerobit - sub a2, a2, a3 -.Lzerobit: - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - -.Lspecial: - bltu a2, a3, .Lreturn - sub a2, a2, a3 /* subtract again if udividend >= udivisor */ -.Lreturn: - bgez a7, .Lpositive - neg a2, a2 /* if (dividend < 0), return -udividend */ -.Lpositive: - leaf_return - -.Lle_one: - bnez a3, .Lreturn0 - - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __modsi3, . - __modsi3 - -#endif /* L_modsi3 */ - - -#ifdef __XTENSA_EB__ -#define uh a2 -#define ul a3 -#else -#define uh a3 -#define ul a2 -#endif /* __XTENSA_EB__ */ - - -#ifdef L_ashldi3 - .align 4 - .global __ashldi3 - .type __ashldi3, @function -__ashldi3: - leaf_entry sp, 16 - ssl a4 - bgei a4, 32, .Llow_only - src uh, uh, ul - sll ul, ul - leaf_return - -.Llow_only: - sll uh, ul - movi ul, 0 - leaf_return - .size __ashldi3, . - __ashldi3 - -#endif /* L_ashldi3 */ - - -#ifdef L_ashrdi3 - .align 4 - .global __ashrdi3 - .type __ashrdi3, @function -__ashrdi3: - leaf_entry sp, 16 - ssr a4 - bgei a4, 32, .Lhigh_only - src ul, uh, ul - sra uh, uh - leaf_return - -.Lhigh_only: - sra ul, uh - srai uh, uh, 31 - leaf_return - .size __ashrdi3, . - __ashrdi3 - -#endif /* L_ashrdi3 */ - - -#ifdef L_lshrdi3 - .align 4 - .global __lshrdi3 - .type __lshrdi3, @function -__lshrdi3: - leaf_entry sp, 16 - ssr a4 - bgei a4, 32, .Lhigh_only1 - src ul, uh, ul - srl uh, uh - leaf_return - -.Lhigh_only1: - srl ul, uh - movi uh, 0 - leaf_return - .size __lshrdi3, . - __lshrdi3 - -#endif /* L_lshrdi3 */ - - -#include "ieee754-df.S" -#include "ieee754-sf.S" diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa index c0a7cb5202f..31ac2ad2452 100644 --- a/gcc/config/xtensa/t-xtensa +++ b/gcc/config/xtensa/t-xtensa @@ -17,18 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = xtensa/lib1funcs.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ - _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ - _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ - _floatdisf _floatundisf \ - _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \ - _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \ - _floatdidf _floatundidf \ - _truncdfsf2 _extendsfdf2 - LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S $(out_object_file): gt-xtensa.h |