diff options
author | aldyh <aldyh@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-08 03:20:30 +0000 |
---|---|---|
committer | aldyh <aldyh@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-08 03:20:30 +0000 |
commit | 643df0593c630691fa6877cddeefdd4c3023d444 (patch) | |
tree | 1eb48ad31d05a9ce117bedc17115de96dffa2f0b /libgcc/config/arm | |
parent | 54f3f029d816c6d1626310649adfda740e203f7b (diff) | |
parent | d5d8f1ccc6d3972dc5cfc0949e85e0b1c9e24ee0 (diff) | |
download | gcc-643df0593c630691fa6877cddeefdd4c3023d444.tar.gz |
* Merge from mainline rev 181122.transactional-memory
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/transactional-memory@181148 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgcc/config/arm')
-rw-r--r-- | libgcc/config/arm/bpabi-v6m.S | 318 | ||||
-rw-r--r-- | libgcc/config/arm/bpabi.S | 163 | ||||
-rw-r--r-- | libgcc/config/arm/bpabi.c | 56 | ||||
-rw-r--r-- | libgcc/config/arm/crti.S | 86 | ||||
-rw-r--r-- | libgcc/config/arm/crtn.S | 83 | ||||
-rw-r--r-- | libgcc/config/arm/fp16.c | 145 | ||||
-rw-r--r-- | libgcc/config/arm/ieee754-df.S | 1447 | ||||
-rw-r--r-- | libgcc/config/arm/ieee754-sf.S | 1060 | ||||
-rw-r--r-- | libgcc/config/arm/lib1funcs.S | 1829 | ||||
-rw-r--r-- | libgcc/config/arm/libgcc-bpabi.ver | 108 | ||||
-rw-r--r-- | libgcc/config/arm/libunwind.S | 2 | ||||
-rw-r--r-- | libgcc/config/arm/linux-atomic-64bit.c | 166 | ||||
-rw-r--r-- | libgcc/config/arm/linux-atomic.c | 279 | ||||
-rw-r--r-- | libgcc/config/arm/t-arm | 3 | ||||
-rw-r--r-- | libgcc/config/arm/t-bpabi | 12 | ||||
-rw-r--r-- | libgcc/config/arm/t-elf | 18 | ||||
-rw-r--r-- | libgcc/config/arm/t-linux | 7 | ||||
-rw-r--r-- | libgcc/config/arm/t-linux-eabi | 5 | ||||
-rw-r--r-- | libgcc/config/arm/t-netbsd | 7 | ||||
-rw-r--r-- | libgcc/config/arm/t-strongarm-elf | 6 | ||||
-rw-r--r-- | libgcc/config/arm/t-symbian | 17 | ||||
-rw-r--r-- | libgcc/config/arm/t-vxworks | 1 | ||||
-rw-r--r-- | libgcc/config/arm/t-wince-pe | 1 | ||||
-rw-r--r-- | libgcc/config/arm/unaligned-funcs.c | 57 |
24 files changed, 5875 insertions, 1 deletions
diff --git a/libgcc/config/arm/bpabi-v6m.S b/libgcc/config/arm/bpabi-v6m.S new file mode 100644 index 00000000000..4ecea6da5a6 --- /dev/null +++ b/libgcc/config/arm/bpabi-v6m.S @@ -0,0 +1,318 @@ +/* Miscellaneous BPABI functions. ARMv6M implementation + + Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifdef L_aeabi_lcmp + +FUNC_START aeabi_lcmp + cmp xxh, yyh + beq 1f + bgt 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 + RET +1: + sub r0, xxl, yyl + beq 1f + bhi 2f + mov r0, #1 + neg r0, r0 + RET +2: + mov r0, #1 +1: + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +FUNC_START aeabi_ulcmp + cmp xxh, yyh + bne 1f + sub r0, xxl, yyl + beq 2f +1: + bcs 1f + mov r0, #1 + neg r0, r0 + RET +1: + mov r0, #1 +2: + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +.macro test_div_by_zero signed + cmp yyh, #0 + bne 7f + cmp yyl, #0 + bne 7f + cmp xxh, #0 + bne 2f + cmp xxl, #0 +2: + .ifc \signed, unsigned + beq 3f + mov xxh, #0 + mvn xxh, xxh @ 0xffffffff + mov xxl, xxh +3: + .else + beq 5f + blt 6f + mov xxl, #0 + mvn xxl, xxl @ 0xffffffff + lsr xxh, xxl, #1 @ 0x7fffffff + b 5f +6: mov xxh, #0x80 + lsl xxh, xxh, #24 @ 0x80000000 + mov xxl, #0 +5: + .endif + @ tailcalls are tricky on v6-m. + push {r0, r1, r2} + ldr r0, 1f + adr r1, 1f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +1: + .word __aeabi_ldiv0 - 1b +7: +.endm + +#ifdef L_aeabi_ldivmod + +FUNC_START aeabi_ldivmod + test_div_by_zero signed + + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_ldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_ldivmod + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + + push {r0, r1} + mov r0, sp + push {r0, lr} + ldr r0, [sp, #8] + bl SYM(__gnu_uldivmod_helper) + ldr r3, [sp, #4] + mov lr, r3 + add sp, sp, #8 + pop {r2, r3} + RET + FUNC_END aeabi_uldivmod + +#endif /* L_aeabi_uldivmod */ + +#ifdef L_arm_addsubsf3 + +FUNC_START aeabi_frsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor r0, r0, r4 + bl __aeabi_fadd + pop {r4, pc} + + FUNC_END aeabi_frsub + +#endif /* L_arm_addsubsf3 */ + +#ifdef L_arm_cmpsf2 + +FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +FUNC_START aeabi_cfcmpeq +FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __lesf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +FUNC_START aeabi_fcmpeq + + push {r4, lr} + bl __eqsf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmpeq + +.macro COMPARISON cond, helper, mode=sf2 +FUNC_START aeabi_fcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_fcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpsf2 */ + +#ifdef L_arm_addsubdf3 + +FUNC_START aeabi_drsub + + push {r4, lr} + mov r4, #1 + lsl r4, #31 + eor xxh, xxh, r4 + bl __aeabi_dadd + pop {r4, pc} + + FUNC_END aeabi_drsub + +#endif /* L_arm_addsubdf3 */ + +#ifdef L_arm_cmpdf2 + +FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +FUNC_START aeabi_cdcmpeq +FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: push {r0, r1, r2, r3, r4, lr} + bl __ledf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + bmi 1f + mov r1, #0 + cmn r0, r1 +1: + pop {r0, r1, r2, r3, r4, pc} + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +FUNC_START aeabi_dcmpeq + + push {r4, lr} + bl __eqdf2 + neg r0, r0 + add r0, r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmpeq + +.macro COMPARISON cond, helper, mode=df2 +FUNC_START aeabi_dcmp\cond + + push {r4, lr} + bl __\helper\mode + cmp r0, #0 + b\cond 1f + mov r0, #0 + pop {r4, pc} +1: + mov r0, #1 + pop {r4, pc} + + FUNC_END aeabi_dcmp\cond +.endm + +COMPARISON lt, le +COMPARISON le, le +COMPARISON gt, ge +COMPARISON ge, ge + +#endif /* L_arm_cmpdf2 */ diff --git a/libgcc/config/arm/bpabi.S b/libgcc/config/arm/bpabi.S new file mode 100644 index 00000000000..2ff338927fa --- /dev/null +++ b/libgcc/config/arm/bpabi.S @@ -0,0 +1,163 @@ +/* Miscellaneous BPABI functions. + + Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +#ifdef L_aeabi_lcmp + +ARM_FUNC_START aeabi_lcmp + cmp xxh, yyh + do_it lt + movlt r0, #-1 + do_it gt + movgt r0, #1 + do_it ne + RETc(ne) + subs r0, xxl, yyl + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + RET + FUNC_END aeabi_lcmp + +#endif /* L_aeabi_lcmp */ + +#ifdef L_aeabi_ulcmp + +ARM_FUNC_START aeabi_ulcmp + cmp xxh, yyh + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + do_it ne + RETc(ne) + cmp xxl, yyl + do_it lo + movlo r0, #-1 + do_it hi + movhi r0, #1 + do_it eq + moveq r0, #0 + RET + FUNC_END aeabi_ulcmp + +#endif /* L_aeabi_ulcmp */ + +.macro test_div_by_zero signed +/* Tail-call to divide-by-zero handlers which may be overridden by the user, + so unwinding works properly. */ +#if defined(__thumb2__) + cbnz yyh, 1f + cbnz yyl, 1f + cmp xxh, #0 + do_it eq + cmpeq xxl, #0 + .ifc \signed, unsigned + beq 2f + mov xxh, #0xffffffff + mov xxl, xxh +2: + .else + do_it lt, t + movlt xxl, #0 + movlt xxh, #0x80000000 + do_it gt, t + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +1: +#else + /* Note: Thumb-1 code calls via an ARM shim on processors which + support ARM mode. */ + cmp yyh, #0 + cmpeq yyl, #0 + bne 2f + cmp xxh, #0 + cmpeq xxl, #0 + .ifc \signed, unsigned + movne xxh, #0xffffffff + movne xxl, #0xffffffff + .else + movlt xxh, #0x80000000 + movlt xxl, #0 + movgt xxh, #0x7fffffff + movgt xxl, #0xffffffff + .endif + b SYM (__aeabi_ldiv0) __PLT__ +2: +#endif +.endm + +#ifdef L_aeabi_ldivmod + +ARM_FUNC_START aeabi_ldivmod + test_div_by_zero signed + + sub sp, sp, #8 +#if defined(__thumb2__) + mov ip, sp + push {ip, lr} +#else + do_push {sp, lr} +#endif + bl SYM(__gnu_ldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 + do_pop {r2, r3} + RET + +#endif /* L_aeabi_ldivmod */ + +#ifdef L_aeabi_uldivmod + +ARM_FUNC_START aeabi_uldivmod + test_div_by_zero unsigned + + sub sp, sp, #8 +#if defined(__thumb2__) + mov ip, sp + push {ip, lr} +#else + do_push {sp, lr} +#endif + bl SYM(__gnu_uldivmod_helper) __PLT__ + ldr lr, [sp, #4] + add sp, sp, #8 + do_pop {r2, r3} + RET + +#endif /* L_aeabi_divmod */ + diff --git a/libgcc/config/arm/bpabi.c b/libgcc/config/arm/bpabi.c new file mode 100644 index 00000000000..283bdc0acf0 --- /dev/null +++ b/libgcc/config/arm/bpabi.c @@ -0,0 +1,56 @@ +/* Miscellaneous BPABI functions. + + Copyright (C) 2003, 2004, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +extern long long __divdi3 (long long, long long); +extern unsigned long long __udivdi3 (unsigned long long, + unsigned long long); +extern long long __gnu_ldivmod_helper (long long, long long, long long *); +extern unsigned long long __gnu_uldivmod_helper (unsigned long long, + unsigned long long, + unsigned long long *); + + +long long +__gnu_ldivmod_helper (long long a, + long long b, + long long *remainder) +{ + long long quotient; + + quotient = __divdi3 (a, b); + *remainder = a - b * quotient; + return quotient; +} + +unsigned long long +__gnu_uldivmod_helper (unsigned long long a, + unsigned long long b, + unsigned long long *remainder) +{ + unsigned long long quotient; + + quotient = __udivdi3 (a, b); + *remainder = a - b * quotient; + return quotient; +} diff --git a/libgcc/config/arm/crti.S b/libgcc/config/arm/crti.S new file mode 100644 index 00000000000..50915f9e31f --- /dev/null +++ b/libgcc/config/arm/crti.S @@ -0,0 +1,86 @@ +# Copyright (C) 2001, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +# Written By Nick Clifton +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +# This file just make a stack frame for the contents of the .fini and +# .init sections. Users may put any desired instructions in those +# sections. + +#ifdef __ELF__ +#define TYPE(x) .type x,function +#else +#define TYPE(x) +#endif +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + + # Note - this macro is complemented by the FUNC_END macro + # in crtn.S. If you change this macro you must also change + # that macro match. +.macro FUNC_START +#ifdef __thumb__ + .thumb + + push {r3, r4, r5, r6, r7, lr} +#else + .arm + # Create a stack frame and save any call-preserved registers + mov ip, sp + stmdb sp!, {r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr, pc} + sub fp, ip, #4 +#endif +.endm + + .section ".init" + .align 2 + .global _init +#ifdef __thumb__ + .thumb_func +#endif + TYPE(_init) +_init: + FUNC_START + + + .section ".fini" + .align 2 + .global _fini +#ifdef __thumb__ + .thumb_func +#endif + TYPE(_fini) +_fini: + FUNC_START + +# end of crti.S diff --git a/libgcc/config/arm/crtn.S b/libgcc/config/arm/crtn.S new file mode 100644 index 00000000000..8c5f22572f7 --- /dev/null +++ b/libgcc/config/arm/crtn.S @@ -0,0 +1,83 @@ +# Copyright (C) 2001, 2004, 2008, 2009, 2010, 2011 +# Free Software Foundation, Inc. +# Written By Nick Clifton +# +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) any +# later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ + +# This file just makes sure that the .fini and .init sections do in +# fact return. Users may put any desired instructions in those sections. +# This file is the last thing linked into any executable. + + # Note - this macro is complemented by the FUNC_START macro + # in crti.S. If you change this macro you must also change + # that macro match. + # + # Note - we do not try any fancy optimizations of the return + # sequences here, it is just not worth it. Instead keep things + # simple. Restore all the save resgisters, including the link + # register and then perform the correct function return instruction. + # We also save/restore r3 to ensure stack alignment. +.macro FUNC_END +#ifdef __thumb__ + .thumb + + pop {r3, r4, r5, r6, r7} + pop {r3} + mov lr, r3 +#else + .arm + + sub sp, fp, #40 + ldmfd sp, {r4, r5, r6, r7, r8, r9, sl, fp, sp, lr} +#endif + +#if defined __THUMB_INTERWORK__ || defined __thumb__ + bx lr +#else + mov pc, lr +#endif +.endm + + + .section ".init" + ;; + FUNC_END + + .section ".fini" + ;; + FUNC_END + +# end of crtn.S diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c new file mode 100644 index 00000000000..936caeb78d0 --- /dev/null +++ b/libgcc/config/arm/fp16.c @@ -0,0 +1,145 @@ +/* Half-float conversion routines. + + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +static inline unsigned short +__gnu_f2h_internal(unsigned int a, int ieee) +{ + unsigned short sign = (a >> 16) & 0x8000; + int aexp = (a >> 23) & 0xff; + unsigned int mantissa = a & 0x007fffff; + unsigned int mask; + unsigned int increment; + + if (aexp == 0xff) + { + if (!ieee) + return sign; + return sign | 0x7e00 | (mantissa >> 13); + } + + if (aexp == 0 && mantissa == 0) + return sign; + + aexp -= 127; + + /* Decimal point between bits 22 and 23. */ + mantissa |= 0x00800000; + if (aexp < -14) + { + mask = 0x007fffff; + if (aexp < -25) + aexp = -26; + else if (aexp != -25) + mask >>= 24 + aexp; + } + else + mask = 0x00001fff; + + /* Round. */ + if (mantissa & mask) + { + increment = (mask + 1) >> 1; + if ((mantissa & mask) == increment) + increment = mantissa & (increment << 1); + mantissa += increment; + if (mantissa >= 0x01000000) + { + mantissa >>= 1; + aexp++; + } + } + + if (ieee) + { + if (aexp > 15) + return sign | 0x7c00; + } + else + { + if (aexp > 16) + return sign | 0x7fff; + } + + if (aexp < -24) + return sign; + + if (aexp < -14) + { + mantissa >>= -14 - aexp; + aexp = -14; + } + + /* We leave the leading 1 in the mantissa, and subtract one + from the exponent bias to compensate. */ + return sign | (((aexp + 14) << 10) + (mantissa >> 13)); +} + +unsigned int +__gnu_h2f_internal(unsigned short a, int ieee) +{ + unsigned int sign = (unsigned int)(a & 0x8000) << 16; + int aexp = (a >> 10) & 0x1f; + unsigned int mantissa = a & 0x3ff; + + if (aexp == 0x1f && ieee) + return sign | 0x7f800000 | (mantissa << 13); + + if (aexp == 0) + { + int shift; + + if (mantissa == 0) + return sign; + + shift = __builtin_clz(mantissa) - 21; + mantissa <<= shift; + aexp = -shift; + } + + return sign | (((aexp + 0x70) << 23) + (mantissa << 13)); +} + +unsigned short +__gnu_f2h_ieee(unsigned int a) +{ + return __gnu_f2h_internal(a, 1); +} + +unsigned int +__gnu_h2f_ieee(unsigned short a) +{ + return __gnu_h2f_internal(a, 1); +} + +unsigned short +__gnu_f2h_alternative(unsigned int x) +{ + return __gnu_f2h_internal(x, 0); +} + +unsigned int +__gnu_h2f_alternative(unsigned short a) +{ + return __gnu_h2f_internal(a, 0); +} diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S new file mode 100644 index 00000000000..eb0c38632d0 --- /dev/null +++ b/libgcc/config/arm/ieee754-df.S @@ -0,0 +1,1447 @@ +/* ieee754-df.S double-precision floating point support for ARM + + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * For slightly simpler code please see the single precision version + * of this file. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + + +@ For FPA, float words are always big-endian. +@ For VFP, floats words follow the memory system mode. +#if defined(__VFP_FP__) && !defined(__ARMEB__) +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#else +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#endif + + +#ifdef L_arm_negdf2 + +ARM_FUNC_START negdf2 +ARM_FUNC_ALIAS aeabi_dneg negdf2 + + @ flip sign bit + eor xh, xh, #0x80000000 + RET + + FUNC_END aeabi_dneg + FUNC_END negdf2 + +#endif + +#ifdef L_arm_addsubdf3 + +ARM_FUNC_START aeabi_drsub + + eor xh, xh, #0x80000000 @ flip sign bit of first arg + b 1f + +ARM_FUNC_START subdf3 +ARM_FUNC_ALIAS aeabi_dsub subdf3 + + eor yh, yh, #0x80000000 @ flip sign bit of second arg +#if defined(__INTERWORKING_STUBS__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START adddf3 +ARM_FUNC_ALIAS aeabi_dadd adddf3 + +1: do_push {r4, r5, lr} + + @ Look for zeroes, equal values, INF, or NAN. + shift1 lsl, r4, xh, #1 + shift1 lsl, r5, yh, #1 + teq r4, r5 + do_it eq + teqeq xl, yl + do_it ne, ttt + COND(orr,s,ne) ip, r4, xl + COND(orr,s,ne) ip, r5, yl + COND(mvn,s,ne) ip, r4, asr #21 + COND(mvn,s,ne) ip, r5, asr #21 + beq LSYM(Lad_s) + + @ Compute exponent difference. Make largest exponent in r4, + @ corresponding arg in xh-xl, and positive exponent difference in r5. + shift1 lsr, r4, r4, #21 + rsbs r5, r4, r5, lsr #21 + do_it lt + rsblt r5, r5, #0 + ble 1f + add r4, r4, r5 + eor yl, xl, yl + eor yh, xh, yh + eor xl, yl, xl + eor xh, yh, xh + eor yl, xl, yl + eor yh, xh, yh +1: + @ If exponent difference is too large, return largest argument + @ already in xh-xl. We need up to 54 bit to handle proper rounding + @ of 0x1p54 - 1.1. + cmp r5, #54 + do_it hi + RETLDM "r4, r5" hi + + @ Convert mantissa to signed integer. + tst xh, #0x80000000 + mov xh, xh, lsl #12 + mov ip, #0x00100000 + orr xh, ip, xh, lsr #12 + beq 1f +#if defined(__thumb2__) + negs xl, xl + sbc xh, xh, xh, lsl #1 +#else + rsbs xl, xl, #0 + rsc xh, xh, #0 +#endif +1: + tst yh, #0x80000000 + mov yh, yh, lsl #12 + orr yh, ip, yh, lsr #12 + beq 1f +#if defined(__thumb2__) + negs yl, yl + sbc yh, yh, yh, lsl #1 +#else + rsbs yl, yl, #0 + rsc yh, yh, #0 +#endif +1: + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r4, r5 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Compensate for the exponent overlapping the mantissa MSB added later + sub r4, r4, #1 + + @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. + rsbs lr, r5, #32 + blt 1f + shift1 lsl, ip, yl, lr + shiftop adds xl xl yl lsr r5 yl + adc xh, xh, #0 + shiftop adds xl xl yh lsl lr yl + shiftop adcs xh xh yh asr r5 yh + b 2f +1: sub r5, r5, #32 + add lr, lr, #32 + cmp yl, #1 + shift1 lsl,ip, yh, lr + do_it cs + orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later + shiftop adds xl xl yh asr r5 yh + adcs xh, xh, yh, asr #31 +2: + @ We now have a result in xh-xl-ip. + @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) + and r5, xh, #0x80000000 + bpl LSYM(Lad_p) +#if defined(__thumb2__) + mov lr, #0 + negs ip, ip + sbcs xl, lr, xl + sbc xh, lr, xh +#else + rsbs ip, ip, #0 + rscs xl, xl, #0 + rsc xh, xh, #0 +#endif + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp xh, #0x00100000 + bcc LSYM(Lad_a) + cmp xh, #0x00200000 + bcc LSYM(Lad_e) + + @ Result needs to be shifted right. + movs xh, xh, lsr #1 + movs xl, xl, rrx + mov ip, ip, rrx + add r4, r4, #1 + + @ Make sure we did not bust our exponent. + mov r2, r4, lsl #21 + cmn r2, #(2 << 21) + bcs LSYM(Lad_o) + + @ Our result is now properly aligned into xh-xl, remaining bits in ip. + @ Round with MSB of ip. If halfway between two numbers, round towards + @ LSB of xl = 0. + @ Pack final result together. +LSYM(Lad_e): + cmp ip, #0x80000000 + do_it eq + COND(mov,s,eq) ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + orr xh, xh, r5 + RETLDM "r4, r5" + + @ Result must be shifted left and exponent adjusted. +LSYM(Lad_a): + movs ip, ip, lsl #1 + adcs xl, xl, xl + adc xh, xh, xh + tst xh, #0x00100000 + sub r4, r4, #1 + bne LSYM(Lad_e) + + @ No rounding necessary since ip will always be 0 at this point. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + teq xh, #0 + movne r3, #20 + moveq r3, #52 + moveq xh, xl + moveq xl, #0 + mov r2, xh + cmp r2, #(1 << 16) + movhs r2, r2, lsr #16 + subhs r3, r3, #16 + cmp r2, #(1 << 8) + movhs r2, r2, lsr #8 + subhs r3, r3, #8 + cmp r2, #(1 << 4) + movhs r2, r2, lsr #4 + subhs r3, r3, #4 + cmp r2, #(1 << 2) + subhs r3, r3, #2 + sublo r3, r3, r2, lsr #1 + sub r3, r3, r2, lsr #3 + +#else + + teq xh, #0 + do_it eq, t + moveq xh, xl + moveq xl, #0 + clz r3, xh + do_it eq + addeq r3, r3, #32 + sub r3, r3, #11 + +#endif + + @ determine how to shift the value. + subs r2, r3, #32 + bge 2f + adds r2, r2, #12 + ble 1f + + @ shift value left 21 to 31 bits, or actually right 11 to 1 bits + @ since a register switch happened above. + add ip, r2, #20 + rsb r2, r2, #12 + shift1 lsl, xl, xh, ip + shift1 lsr, xh, xh, r2 + b 3f + + @ actually shift value left 1 to 20 bits, which might also represent + @ 32 to 52 bits if counting the register switch that happened earlier. +1: add r2, r2, #20 +2: do_it le + rsble ip, r2, #32 + shift1 lsl, xh, xh, r2 +#if defined(__thumb2__) + lsr ip, xl, ip + itt le + orrle xh, xh, ip + lslle xl, xl, r2 +#else + orrle xh, xh, xl, lsr ip + movle xl, xl, lsl r2 +#endif + + @ adjust exponent accordingly. +3: subs r4, r4, r3 + do_it ge, tt + addge xh, xh, r4, lsl #20 + orrge xh, xh, r5 + RETLDM "r4, r5" ge + + @ Exponent too small, denormalize result. + @ Find out proper shift value. + mvn r4, r4 + subs r4, r4, #31 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, sign is in r5. + add r4, r4, #20 + rsb r2, r4, #32 + shift1 lsr, xl, xl, r4 + shiftop orr xl xl xh lsl r2 yh + shiftop orr xh r5 xh lsr r4 yh + RETLDM "r4, r5" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. +1: rsb r4, r4, #12 + rsb r2, r4, #32 + shift1 lsr, xl, xl, r2 + shiftop orr xl xl xh lsl r4 yh + mov xh, r5 + RETLDM "r4, r5" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. +2: shift1 lsr, xl, xh, r4 + mov xh, r5 + RETLDM "r4, r5" + + @ Adjust exponents for denormalized arguments. + @ Note that r4 must not remain equal to 0. +LSYM(Lad_d): + teq r4, #0 + eor yh, yh, #0x00100000 + do_it eq, te + eoreq xh, xh, #0x00100000 + addeq r4, r4, #1 + subne r5, r5, #1 + b LSYM(Lad_x) + + +LSYM(Lad_s): + mvns ip, r4, asr #21 + do_it ne + COND(mvn,s,ne) ip, r5, asr #21 + beq LSYM(Lad_i) + + teq r4, r5 + do_it eq + teqeq xl, yl + beq 1f + + @ Result is x + 0.0 = x or 0.0 + y = y. + orrs ip, r4, xl + do_it eq, t + moveq xh, yh + moveq xl, yl + RETLDM "r4, r5" + +1: teq xh, yh + + @ Result is x - x = 0. + do_it ne, tt + movne xh, #0 + movne xl, #0 + RETLDM "r4, r5" ne + + @ Result is x + x = 2x. + movs ip, r4, lsr #21 + bne 2f + movs xl, xl, lsl #1 + adcs xh, xh, xh + do_it cs + orrcs xh, xh, #0x80000000 + RETLDM "r4, r5" +2: adds r4, r4, #(2 << 21) + do_it cc, t + addcc xh, xh, #(1 << 20) + RETLDM "r4, r5" cc + and r5, xh, #0x80000000 + + @ Overflow: return INF. +LSYM(Lad_o): + orr xh, r5, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5" + + @ At least one of x or y is INF/NAN. + @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) + @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) + @ if either is NAN: return NAN + @ if opposite sign: return NAN + @ otherwise return xh-xl (which is INF or -INF) +LSYM(Lad_i): + mvns ip, r4, asr #21 + do_it ne, te + movne xh, yh + movne xl, yl + COND(mvn,s,eq) ip, r5, asr #21 + do_it ne, t + movne yh, xh + movne yl, xl + orrs r4, xl, xh, lsl #12 + do_it eq, te + COND(orr,s,eq) r5, yl, yh, lsl #12 + teqeq xh, yh + orrne xh, xh, #0x00080000 @ quiet NAN + RETLDM "r4, r5" + + FUNC_END aeabi_dsub + FUNC_END subdf3 + FUNC_END aeabi_dadd + FUNC_END adddf3 + +ARM_FUNC_START floatunsidf +ARM_FUNC_ALIAS aeabi_ui2d floatunsidf + + teq r0, #0 + do_it eq, t + moveq r1, #0 + RETc(eq) + do_push {r4, r5, lr} + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + mov r5, #0 @ sign bit is 0 + .ifnc xl, r0 + mov xl, r0 + .endif + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END aeabi_ui2d + FUNC_END floatunsidf + +ARM_FUNC_START floatsidf +ARM_FUNC_ALIAS aeabi_i2d floatsidf + + teq r0, #0 + do_it eq, t + moveq r1, #0 + RETc(eq) + do_push {r4, r5, lr} + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + ands r5, r0, #0x80000000 @ sign bit in r5 + do_it mi + rsbmi r0, r0, #0 @ absolute value + .ifnc xl, r0 + mov xl, r0 + .endif + mov xh, #0 + b LSYM(Lad_l) + + FUNC_END aeabi_i2d + FUNC_END floatsidf + +ARM_FUNC_START extendsfdf2 +ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 + + movs r2, r0, lsl #1 @ toss sign bit + mov xh, r2, asr #3 @ stretch exponent + mov xh, xh, rrx @ retrieve sign bit + mov xl, r2, lsl #28 @ retrieve remaining bits + do_it ne, ttt + COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent + teqne r3, #0xff000000 @ if not 0, check if INF or NAN + eorne xh, xh, #0x38000000 @ fixup exponent otherwise. + RETc(ne) @ and return it. + + teq r2, #0 @ if actually 0 + do_it ne, e + teqne r3, #0xff000000 @ or INF or NAN + RETc(eq) @ we are done already. + + @ value was denormalized. We can normalize it now. + do_push {r4, r5, lr} + mov r4, #0x380 @ setup corresponding exponent + and r5, xh, #0x80000000 @ move sign bit in r5 + bic xh, xh, #0x80000000 + b LSYM(Lad_l) + + FUNC_END aeabi_f2d + FUNC_END extendsfdf2 + +ARM_FUNC_START floatundidf +ARM_FUNC_ALIAS aeabi_ul2d floatundidf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqd f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0/r1 for backwards + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. + do_push {r4, r5, ip, lr, pc} +#else + do_push {r4, r5, lr} +#endif + + mov r5, #0 + b 2f + +ARM_FUNC_START floatdidf +ARM_FUNC_ALIAS aeabi_l2d floatdidf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqd f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0/r1 for backwards + @ compatibility. + adr ip, LSYM(f0_ret) + @ Push pc as well so that RETLDM works correctly. + do_push {r4, r5, ip, lr, pc} +#else + do_push {r4, r5, lr} +#endif + + ands r5, ah, #0x80000000 @ sign bit in r5 + bpl 2f +#if defined(__thumb2__) + negs al, al + sbc ah, ah, ah, lsl #1 +#else + rsbs al, al, #0 + rsc ah, ah, #0 +#endif +2: + mov r4, #0x400 @ initial exponent + add r4, r4, #(52-1 - 1) + + @ FPA little-endian: must swap the word order. + .ifnc xh, ah + mov ip, al + mov xh, ah + mov xl, ip + .endif + + movs ip, xh, lsr #22 + beq LSYM(Lad_p) + + @ The value is too big. Scale it down a bit... + mov r2, #3 + movs ip, ip, lsr #3 + do_it ne + addne r2, r2, #3 + movs ip, ip, lsr #3 + do_it ne + addne r2, r2, #3 + add r2, r2, ip, lsr #3 + + rsb r3, r2, #32 + shift1 lsl, ip, xl, r3 + shift1 lsr, xl, xl, r2 + shiftop orr xl xl xh lsl r3 lr + shift1 lsr, xh, xh, r2 + add r4, r4, r2 + b LSYM(Lad_p) + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + + @ Legacy code expects the result to be returned in f0. Copy it + @ there as well. +LSYM(f0_ret): + do_push {r0, r1} + ldfd f0, [sp], #8 + RETLDM + +#endif + + FUNC_END floatdidf + FUNC_END aeabi_l2d + FUNC_END floatundidf + FUNC_END aeabi_ul2d + +#endif /* L_addsubdf3 */ + +#ifdef L_arm_muldivdf3 + +ARM_FUNC_START muldf3 +ARM_FUNC_ALIAS aeabi_dmul muldf3 + do_push {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + do_it ne, tte + COND(and,s,ne) r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Lml_s) + + @ Add exponents together + add r4, r4, r5 + + @ Determine final sign. + eor r6, xh, yh + + @ Convert mantissa to unsigned integer. + @ If power of two, branch to a separate path. + bic xh, xh, ip, lsl #21 + bic yh, yh, ip, lsl #21 + orrs r5, xl, xh, lsl #12 + do_it ne + COND(orr,s,ne) r5, yl, yh, lsl #12 + orr xh, xh, #0x00100000 + orr yh, yh, #0x00100000 + beq LSYM(Lml_1) + +#if __ARM_ARCH__ < 4 + + @ Put sign bit in r6, which will be restored in yl later. + and r6, r6, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. + stmfd sp!, {r6, r7, r8, r9, sl, fp} + mov r7, xl, lsr #16 + mov r8, yl, lsr #16 + mov r9, xh, lsr #16 + mov sl, yh, lsr #16 + bic xl, xl, r7, lsl #16 + bic yl, yl, r8, lsl #16 + bic xh, xh, r9, lsl #16 + bic yh, yh, sl, lsl #16 + mul ip, xl, yl + mul fp, xl, r8 + mov lr, #0 + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, r7, yl + adds ip, ip, fp, lsl #16 + adc lr, lr, fp, lsr #16 + mul fp, xl, sl + mov r5, #0 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r7, yh + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, r8 + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, r9, yl + adds lr, lr, fp, lsl #16 + adc r5, r5, fp, lsr #16 + mul fp, xh, sl + mul r6, r9, sl + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, r9, yh + adds r5, r5, fp, lsl #16 + adc r6, r6, fp, lsr #16 + mul fp, xl, yh + adds lr, lr, fp + mul fp, r7, sl + adcs r5, r5, fp + mul fp, xh, yl + adc r6, r6, #0 + adds lr, lr, fp + mul fp, r9, r8 + adcs r5, r5, fp + mul fp, r7, r8 + adc r6, r6, #0 + adds lr, lr, fp + mul fp, xh, yh + adcs r5, r5, fp + adc r6, r6, #0 + ldmfd sp!, {yl, r7, r8, r9, sl, fp} + +#else + + @ Here is the actual multiplication. + umull ip, lr, xl, yl + mov r5, #0 + umlal lr, r5, xh, yl + and yl, r6, #0x80000000 + umlal lr, r5, xl, yh + mov r6, #0 + umlal r5, r6, xh, yh + +#endif + + @ The LSBs in ip are only significant for the final rounding. + @ Fold them into lr. + teq ip, #0 + do_it ne + orrne lr, lr, #1 + + @ Adjust result upon the MSB position. + sub r4, r4, #0xff + cmp r6, #(1 << (20-11)) + sbc r4, r4, #0x300 + bcs 1f + movs lr, lr, lsl #1 + adcs r5, r5, r5 + adc r6, r6, r6 +1: + @ Shift to final position, add sign to result. + orr xh, yl, r6, lsl #11 + orr xh, xh, r5, lsr #21 + mov xl, r5, lsl #11 + orr xl, xl, lr, lsr #21 + mov lr, lr, lsl #11 + + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + do_it hi + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp lr, #0x80000000 + do_it eq + COND(mov,s,eq) lr, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" + + @ Multiplication by 0x1p*: let''s shortcut a lot of code. +LSYM(Lml_1): + and r6, r6, #0x80000000 + orr xh, r6, xh + orr xl, xl, yl + eor xh, xh, yh + subs r4, r4, ip, lsr #1 + do_it gt, tt + COND(rsb,s,gt) r5, r4, ip + orrgt xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" gt + + @ Under/overflow: fix things up for the code below. + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r4, #(53 + 1) + do_it le, tt + movle xl, #0 + bicle xh, xh, #0x7fffffff + RETLDM "r4, r5, r6" le + + @ Find out proper shift value. + rsb r4, r4, #0 + subs r4, r4, #32 + bge 2f + adds r4, r4, #12 + bgt 1f + + @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. + add r4, r4, #20 + rsb r5, r4, #32 + shift1 lsl, r3, xl, r5 + shift1 lsr, xl, xl, r4 + shiftop orr xl xl xh lsl r5 r2 + and r2, xh, #0x80000000 + bic xh, xh, #0x80000000 + adds xl, xl, r3, lsr #31 + shiftop adc xh r2 xh lsr r4 r6 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ shift result right of 21 to 31 bits, or left 11 to 1 bits after + @ a register switch from xh to xl. Then round. +1: rsb r4, r4, #12 + rsb r5, r4, #32 + shift1 lsl, r3, xl, r4 + shift1 lsr, xl, xl, r5 + shiftop orr xl xl xh lsl r4 r2 + bic xh, xh, #0x7fffffff + adds xl, xl, r3, lsr #31 + adc xh, xh, #0 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch + @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. +2: rsb r5, r4, #32 + shiftop orr lr lr xl lsl r5 r2 + shift1 lsr, r3, xl, r4 + shiftop orr r3 r3 xh lsl r5 r2 + shift1 lsr, xl, xh, r4 + bic xh, xh, #0x7fffffff + shiftop bic xl xl xh lsr r4 r2 + add xl, xl, r3, lsr #31 + orrs lr, lr, r3, lsl #1 + do_it eq + biceq xl, xl, r3, lsr #31 + RETLDM "r4, r5, r6" + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r4, #0 + bne 2f + and r6, xh, #0x80000000 +1: movs xl, xl, lsl #1 + adc xh, xh, xh + tst xh, #0x00100000 + do_it eq + subeq r4, r4, #1 + beq 1b + orr xh, xh, r6 + teq r5, #0 + do_it ne + RETc(ne) +2: and r6, yh, #0x80000000 +3: movs yl, yl, lsl #1 + adc yh, yh, yh + tst yh, #0x00100000 + do_it eq + subeq r5, r5, #1 + beq 3b + orr yh, yh, r6 + RET + +LSYM(Lml_s): + @ Isolate the INF and NAN cases away + teq r4, ip + and r5, ip, yh, lsr #20 + do_it ne + teqne r5, ip + beq 1f + + @ Here, one or more arguments are either denormalized or zero. + orrs r6, xl, xh, lsl #1 + do_it ne + COND(orr,s,ne) r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor xh, xh, yh + and xh, xh, #0x80000000 + mov xl, #0 + RETLDM "r4, r5, r6" + +1: @ One or both args are INF or NAN. + orrs r6, xl, xh, lsl #1 + do_it eq, te + moveq xl, yl + moveq xh, yh + COND(orr,s,ne) r6, yl, yh, lsl #1 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r4, ip + bne 1f + orrs r6, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN * <anything> -> NAN +1: teq r5, ip + bne LSYM(Lml_i) + orrs r6, yl, yh, lsl #12 + do_it ne, t + movne xl, yl + movne xh, yh + bne LSYM(Lml_n) @ <anything> * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor xh, xh, yh + + @ Overflow: return INF (sign already in xh). +LSYM(Lml_o): + and xh, xh, #0x80000000 + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f00000 + mov xl, #0 + RETLDM "r4, r5, r6" + + @ Return a quiet NAN. +LSYM(Lml_n): + orr xh, xh, #0x7f000000 + orr xh, xh, #0x00f80000 + RETLDM "r4, r5, r6" + + FUNC_END aeabi_dmul + FUNC_END muldf3 + +ARM_FUNC_START divdf3 +ARM_FUNC_ALIAS aeabi_ddiv divdf3 + + do_push {r4, r5, r6, lr} + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + orr ip, ip, #0x700 + ands r4, ip, xh, lsr #20 + do_it ne, tte + COND(and,s,ne) r5, ip, yh, lsr #20 + teqne r4, ip + teqne r5, ip + bleq LSYM(Ldv_s) + + @ Substract divisor exponent from dividend''s. + sub r4, r4, r5 + + @ Preserve final sign into lr. + eor lr, xh, yh + + @ Convert mantissa to unsigned integer. + @ Dividend -> r5-r6, divisor -> yh-yl. + orrs r5, yl, yh, lsl #12 + mov xh, xh, lsl #12 + beq LSYM(Ldv_1) + mov yh, yh, lsl #12 + mov r5, #0x10000000 + orr yh, r5, yh, lsr #4 + orr yh, yh, yl, lsr #24 + mov yl, yl, lsl #8 + orr r5, r5, xh, lsr #4 + orr r5, r5, xl, lsr #24 + mov r6, xl, lsl #8 + + @ Initialize xh with final sign bit. + and xh, lr, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r5, yh + do_it eq + cmpeq r6, yl + adc r4, r4, #(255 - 2) + add r4, r4, #0x300 + bcs 1f + movs yh, yh, lsr #1 + mov yl, yl, rrx +1: + @ Perform first substraction to align result to a nibble. + subs r6, r6, yl + sbc r5, r5, yh + movs yh, yh, lsr #1 + mov yl, yl, rrx + mov xl, #0x00100000 + mov ip, #0x00080000 + + @ The actual division loop. +1: subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #1 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #2 + movs yh, yh, lsr #1 + mov yl, yl, rrx + subs lr, r6, yl + sbcs lr, r5, yh + do_it cs, tt + subcs r6, r6, yl + movcs r5, lr + orrcs xl, xl, ip, lsr #3 + + orrs lr, r5, r6 + beq 2f + mov r5, r5, lsl #4 + orr r5, r5, r6, lsr #28 + mov r6, r6, lsl #4 + mov yh, yh, lsl #3 + orr yh, yh, yl, lsr #29 + mov yl, yl, lsl #3 + movs ip, ip, lsr #4 + bne 1b + + @ We are done with a word of the result. + @ Loop again for the low word if this pass was for the high word. + tst xh, #0x00100000 + bne 3f + orr xh, xh, xl + mov xl, #0 + mov ip, #0x80000000 + b 1b +2: + @ Be sure result starts in the high word. + tst xh, #0x00100000 + do_it eq, t + orreq xh, xh, xl + moveq xl, #0 +3: + @ Check exponent range for under/overflow. + subs ip, r4, #(254 - 1) + do_it hi + cmphi ip, #0x700 + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + subs ip, r5, yh + do_it eq, t + COND(sub,s,eq) ip, r6, yl + COND(mov,s,eq) ip, xl, lsr #1 + adcs xl, xl, #0 + adc xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" + + @ Division by 0x1p*: shortcut a lot of code. +LSYM(Ldv_1): + and lr, lr, #0x80000000 + orr xh, lr, xh, lsr #12 + adds r4, r4, ip, lsr #1 + do_it gt, tt + COND(rsb,s,gt) r5, r4, ip + orrgt xh, xh, r4, lsl #20 + RETLDM "r4, r5, r6" gt + + orr xh, xh, #0x00100000 + mov lr, #0 + subs r4, r4, #1 + b LSYM(Lml_u) + + @ Result mightt need to be denormalized: put remainder bits + @ in lr for rounding considerations. +LSYM(Ldv_u): + orr lr, r5, r6 + b LSYM(Lml_u) + + @ One or both arguments is either INF, NAN or zero. +LSYM(Ldv_s): + and r5, ip, yh, lsr #20 + teq r4, ip + do_it eq + teqeq r5, ip + beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + teq r4, ip + bne 1f + orrs r4, xl, xh, lsl #12 + bne LSYM(Lml_n) @ NAN / <anything> -> NAN + teq r5, ip + bne LSYM(Lml_i) @ INF / <anything> -> INF + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r5, ip + bne 2f + orrs r5, yl, yh, lsl #12 + beq LSYM(Lml_z) @ <anything> / INF -> 0 + mov xl, yl + mov xh, yh + b LSYM(Lml_n) @ <anything> / NAN -> NAN +2: @ If both are nonzero, we need to normalize and resume above. + orrs r6, xl, xh, lsl #1 + do_it ne + COND(orr,s,ne) r6, yl, yh, lsl #1 + bne LSYM(Lml_d) + @ One or both arguments are 0. + orrs r4, xl, xh, lsl #1 + bne LSYM(Lml_i) @ <non_zero> / 0 -> INF + orrs r5, yl, yh, lsl #1 + bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END aeabi_ddiv + FUNC_END divdf3 + +#endif /* L_muldivdf3 */ + +#ifdef L_arm_cmpdf2 + +@ Note: only r0 (return value) and ip are clobbered here. + +ARM_FUNC_START gtdf2 +ARM_FUNC_ALIAS gedf2 gtdf2 + mov ip, #-1 + b 1f + +ARM_FUNC_START ltdf2 +ARM_FUNC_ALIAS ledf2 ltdf2 + mov ip, #1 + b 1f + +ARM_FUNC_START cmpdf2 +ARM_FUNC_ALIAS nedf2 cmpdf2 +ARM_FUNC_ALIAS eqdf2 cmpdf2 + mov ip, #1 @ how should we specify unordered here? + +1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + mov ip, yh, lsl #1 + do_it ne + COND(mvn,s,ne) ip, ip, asr #21 + beq 3f + + @ Test for equality. + @ Note that 0.0 is equal to -0.0. +2: add sp, sp, #4 + orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 + do_it eq, e + COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 + teqne xh, yh @ or xh == yh + do_it eq, tt + teqeq xl, yl @ and xl == yl + moveq r0, #0 @ then equal. + RETc(eq) + + @ Clear C flag + cmn r0, #0 + + @ Compare sign, + teq xh, yh + + @ Compare values if same sign + do_it pl + cmppl xh, yh + do_it eq + cmpeq xl, yl + + @ Result: + do_it cs, e + movcs r0, yh, asr #31 + mvncc r0, yh, asr #31 + orr r0, r0, #1 + RET + + @ Look for a NAN. +3: mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + bne 4f + orrs ip, xl, xh, lsl #12 + bne 5f @ x is NAN +4: mov ip, yh, lsl #1 + mvns ip, ip, asr #21 + bne 2b + orrs ip, yl, yh, lsl #12 + beq 2b @ y is not NAN +5: ldr r0, [sp], #4 @ unordered return code + RET + + FUNC_END gedf2 + FUNC_END gtdf2 + FUNC_END ledf2 + FUNC_END ltdf2 + FUNC_END nedf2 + FUNC_END eqdf2 + FUNC_END cmpdf2 + +ARM_FUNC_START aeabi_cdrcmple + + mov ip, r0 + mov r0, r2 + mov r2, ip + mov ip, r1 + mov r1, r3 + mov r3, ip + b 6f + +ARM_FUNC_START aeabi_cdcmpeq +ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: do_push {r0, lr} + ARM_CALL cmpdf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + do_it mi + cmnmi r0, #0 + RETLDM "r0" + + FUNC_END aeabi_cdcmple + FUNC_END aeabi_cdcmpeq + FUNC_END aeabi_cdrcmple + +ARM_FUNC_START aeabi_dcmpeq + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it eq, e + moveq r0, #1 @ Equal to. + movne r0, #0 @ Less than, greater than, or unordered. + RETLDM + + FUNC_END aeabi_dcmpeq + +ARM_FUNC_START aeabi_dcmplt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it cc, e + movcc r0, #1 @ Less than. + movcs r0, #0 @ Equal to, greater than, or unordered. + RETLDM + + FUNC_END aeabi_dcmplt + +ARM_FUNC_START aeabi_dcmple + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdcmple + do_it ls, e + movls r0, #1 @ Less than or equal to. + movhi r0, #0 @ Greater than or unordered. + RETLDM + + FUNC_END aeabi_dcmple + +ARM_FUNC_START aeabi_dcmpge + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdrcmple + do_it ls, e + movls r0, #1 @ Operand 2 is less than or equal to operand 1. + movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. + RETLDM + + FUNC_END aeabi_dcmpge + +ARM_FUNC_START aeabi_dcmpgt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cdrcmple + do_it cc, e + movcc r0, #1 @ Operand 2 is less than operand 1. + movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, + @ or they are unordered. + RETLDM + + FUNC_END aeabi_dcmpgt + +#endif /* L_cmpdf2 */ + +#ifdef L_arm_unorddf2 + +ARM_FUNC_START unorddf2 +ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 + + mov ip, xh, lsl #1 + mvns ip, ip, asr #21 + bne 1f + orrs ip, xl, xh, lsl #12 + bne 3f @ x is NAN +1: mov ip, yh, lsl #1 + mvns ip, ip, asr #21 + bne 2f + orrs ip, yl, yh, lsl #12 + bne 3f @ y is NAN +2: mov r0, #0 @ arguments are ordered. + RET + +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END aeabi_dcmpun + FUNC_END unorddf2 + +#endif /* L_unorddf2 */ + +#ifdef L_arm_fixdfsi + +ARM_FUNC_START fixdfsi +ARM_FUNC_ALIAS aeabi_d2iz fixdfsi + + @ check exponent range. + mov r2, xh, lsl #1 + adds r2, r2, #(1 << 21) + bcs 2f @ value is INF or NAN + bpl 1f @ value is too small + mov r3, #(0xfffffc00 + 31) + subs r2, r3, r2, asr #21 + bls 3f @ value is too large + + @ scale value + mov r3, xh, lsl #11 + orr r3, r3, #0x80000000 + orr r3, r3, xl, lsr #21 + tst xh, #0x80000000 @ the sign bit + shift1 lsr, r0, r3, r2 + do_it ne + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ x is NAN. +3: ands r0, xh, #0x80000000 @ the sign bit + do_it eq + moveq r0, #0x7fffffff @ maximum signed positive si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END aeabi_d2iz + FUNC_END fixdfsi + +#endif /* L_fixdfsi */ + +#ifdef L_arm_fixunsdfsi + +ARM_FUNC_START fixunsdfsi +ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi + + @ check exponent range. + movs r2, xh, lsl #1 + bcs 1f @ value is negative + adds r2, r2, #(1 << 21) + bcs 2f @ value is INF or NAN + bpl 1f @ value is too small + mov r3, #(0xfffffc00 + 31) + subs r2, r3, r2, asr #21 + bmi 3f @ value is too large + + @ scale value + mov r3, xh, lsl #11 + orr r3, r3, #0x80000000 + orr r3, r3, xl, lsr #21 + shift1 lsr, r0, r3, r2 + RET + +1: mov r0, #0 + RET + +2: orrs xl, xl, xh, lsl #12 + bne 4f @ value is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ How should we convert NAN? + RET + + FUNC_END aeabi_d2uiz + FUNC_END fixunsdfsi + +#endif /* L_fixunsdfsi */ + +#ifdef L_arm_truncdfsf2 + +ARM_FUNC_START truncdfsf2 +ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 + + @ check exponent range. + mov r2, xh, lsl #1 + subs r3, r2, #((1023 - 127) << 21) + do_it cs, t + COND(sub,s,cs) ip, r3, #(1 << 21) + COND(rsb,s,cs) ip, ip, #(254 << 21) + bls 2f @ value is out of range + +1: @ shift and round mantissa + and ip, xh, #0x80000000 + mov r2, xl, lsl #3 + orr xl, ip, xl, lsr #29 + cmp r2, #0x80000000 + adc r0, xl, r3, lsl #2 + do_it eq + biceq r0, r0, #1 + RET + +2: @ either overflow or underflow + tst xh, #0x40000000 + bne 3f @ overflow + + @ check if denormalized value is possible + adds r2, r3, #(23 << 21) + do_it lt, t + andlt r0, xh, #0x80000000 @ too small, return signed 0. + RETc(lt) + + @ denormalize value so we can resume with the code above afterwards. + orr xh, xh, #0x00100000 + mov r2, r2, lsr #21 + rsb r2, r2, #24 + rsb ip, r2, #32 +#if defined(__thumb2__) + lsls r3, xl, ip +#else + movs r3, xl, lsl ip +#endif + shift1 lsr, xl, xl, r2 + do_it ne + orrne xl, xl, #1 @ fold r3 for rounding considerations. + mov r3, xh, lsl #11 + mov r3, r3, lsr #11 + shiftop orr xl xl r3 lsl ip ip + shift1 lsr, r3, r3, r2 + mov r3, r3, lsl #1 + b 1b + +3: @ chech for NAN + mvns r3, r2, asr #21 + bne 5f @ simple overflow + orrs r3, xl, xh, lsl #12 + do_it ne, tt + movne r0, #0x7f000000 + orrne r0, r0, #0x00c00000 + RETc(ne) @ return NAN + +5: @ return INF with sign + and r0, xh, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + FUNC_END aeabi_d2f + FUNC_END truncdfsf2 + +#endif /* L_truncdfsf2 */ diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S new file mode 100644 index 00000000000..c93f66d8ff8 --- /dev/null +++ b/libgcc/config/arm/ieee754-sf.S @@ -0,0 +1,1060 @@ +/* ieee754-sf.S single-precision floating point support for ARM + + Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Contributed by Nicolas Pitre (nico@cam.org) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* + * Notes: + * + * The goal of this code is to be as fast as possible. This is + * not meant to be easy to understand for the casual reader. + * + * Only the default rounding mode is intended for best performances. + * Exceptions aren't supported yet, but that can be added quite easily + * if necessary without impacting performances. + */ + +#ifdef L_arm_negsf2 + +ARM_FUNC_START negsf2 +ARM_FUNC_ALIAS aeabi_fneg negsf2 + + eor r0, r0, #0x80000000 @ flip sign bit + RET + + FUNC_END aeabi_fneg + FUNC_END negsf2 + +#endif + +#ifdef L_arm_addsubsf3 + +ARM_FUNC_START aeabi_frsub + + eor r0, r0, #0x80000000 @ flip sign bit of first arg + b 1f + +ARM_FUNC_START subsf3 +ARM_FUNC_ALIAS aeabi_fsub subsf3 + + eor r1, r1, #0x80000000 @ flip sign bit of second arg +#if defined(__INTERWORKING_STUBS__) + b 1f @ Skip Thumb-code prologue +#endif + +ARM_FUNC_START addsf3 +ARM_FUNC_ALIAS aeabi_fadd addsf3 + +1: @ Look for zeroes, equal values, INF, or NAN. + movs r2, r0, lsl #1 + do_it ne, ttt + COND(mov,s,ne) r3, r1, lsl #1 + teqne r2, r3 + COND(mvn,s,ne) ip, r2, asr #24 + COND(mvn,s,ne) ip, r3, asr #24 + beq LSYM(Lad_s) + + @ Compute exponent difference. Make largest exponent in r2, + @ corresponding arg in r0, and positive exponent difference in r3. + mov r2, r2, lsr #24 + rsbs r3, r2, r3, lsr #24 + do_it gt, ttt + addgt r2, r2, r3 + eorgt r1, r0, r1 + eorgt r0, r1, r0 + eorgt r1, r0, r1 + do_it lt + rsblt r3, r3, #0 + + @ If exponent difference is too large, return largest argument + @ already in r0. We need up to 25 bit to handle proper rounding + @ of 0x1p25 - 1.1. + cmp r3, #25 + do_it hi + RETc(hi) + + @ Convert mantissa to signed integer. + tst r0, #0x80000000 + orr r0, r0, #0x00800000 + bic r0, r0, #0xff000000 + do_it ne + rsbne r0, r0, #0 + tst r1, #0x80000000 + orr r1, r1, #0x00800000 + bic r1, r1, #0xff000000 + do_it ne + rsbne r1, r1, #0 + + @ If exponent == difference, one or both args were denormalized. + @ Since this is not common case, rescale them off line. + teq r2, r3 + beq LSYM(Lad_d) +LSYM(Lad_x): + + @ Compensate for the exponent overlapping the mantissa MSB added later + sub r2, r2, #1 + + @ Shift and add second arg to first arg in r0. + @ Keep leftover bits into r1. + shiftop adds r0 r0 r1 asr r3 ip + rsb r3, r3, #32 + shift1 lsl, r1, r1, r3 + + @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) + and r3, r0, #0x80000000 + bpl LSYM(Lad_p) +#if defined(__thumb2__) + negs r1, r1 + sbc r0, r0, r0, lsl #1 +#else + rsbs r1, r1, #0 + rsc r0, r0, #0 +#endif + + @ Determine how to normalize the result. +LSYM(Lad_p): + cmp r0, #0x00800000 + bcc LSYM(Lad_a) + cmp r0, #0x01000000 + bcc LSYM(Lad_e) + + @ Result needs to be shifted right. + movs r0, r0, lsr #1 + mov r1, r1, rrx + add r2, r2, #1 + + @ Make sure we did not bust our exponent. + cmp r2, #254 + bhs LSYM(Lad_o) + + @ Our result is now properly aligned into r0, remaining bits in r1. + @ Pack final result together. + @ Round with MSB of r1. If halfway between two numbers, round towards + @ LSB of r0 = 0. +LSYM(Lad_e): + cmp r1, #0x80000000 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + orr r0, r0, r3 + RET + + @ Result must be shifted left and exponent adjusted. +LSYM(Lad_a): + movs r1, r1, lsl #1 + adc r0, r0, r0 + tst r0, #0x00800000 + sub r2, r2, #1 + bne LSYM(Lad_e) + + @ No rounding necessary since r1 will always be 0 at this point. +LSYM(Lad_l): + +#if __ARM_ARCH__ < 5 + + movs ip, r0, lsr #12 + moveq r0, r0, lsl #12 + subeq r2, r2, #12 + tst r0, #0x00ff0000 + moveq r0, r0, lsl #8 + subeq r2, r2, #8 + tst r0, #0x00f00000 + moveq r0, r0, lsl #4 + subeq r2, r2, #4 + tst r0, #0x00c00000 + moveq r0, r0, lsl #2 + subeq r2, r2, #2 + cmp r0, #0x00800000 + movcc r0, r0, lsl #1 + sbcs r2, r2, #0 + +#else + + clz ip, r0 + sub ip, ip, #8 + subs r2, r2, ip + shift1 lsl, r0, r0, ip + +#endif + + @ Final result with sign + @ If exponent negative, denormalize result. + do_it ge, et + addge r0, r0, r2, lsl #23 + rsblt r2, r2, #0 + orrge r0, r0, r3 +#if defined(__thumb2__) + do_it lt, t + lsrlt r0, r0, r2 + orrlt r0, r3, r0 +#else + orrlt r0, r3, r0, lsr r2 +#endif + RET + + @ Fixup and adjust bit position for denormalized arguments. + @ Note that r2 must not remain equal to 0. +LSYM(Lad_d): + teq r2, #0 + eor r1, r1, #0x00800000 + do_it eq, te + eoreq r0, r0, #0x00800000 + addeq r2, r2, #1 + subne r3, r3, #1 + b LSYM(Lad_x) + +LSYM(Lad_s): + mov r3, r1, lsl #1 + + mvns ip, r2, asr #24 + do_it ne + COND(mvn,s,ne) ip, r3, asr #24 + beq LSYM(Lad_i) + + teq r2, r3 + beq 1f + + @ Result is x + 0.0 = x or 0.0 + y = y. + teq r2, #0 + do_it eq + moveq r0, r1 + RET + +1: teq r0, r1 + + @ Result is x - x = 0. + do_it ne, t + movne r0, #0 + RETc(ne) + + @ Result is x + x = 2x. + tst r2, #0xff000000 + bne 2f + movs r0, r0, lsl #1 + do_it cs + orrcs r0, r0, #0x80000000 + RET +2: adds r2, r2, #(2 << 24) + do_it cc, t + addcc r0, r0, #(1 << 23) + RETc(cc) + and r3, r0, #0x80000000 + + @ Overflow: return INF. +LSYM(Lad_o): + orr r0, r3, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ At least one of r0/r1 is INF/NAN. + @ if r0 != INF/NAN: return r1 (which is INF/NAN) + @ if r1 != INF/NAN: return r0 (which is INF/NAN) + @ if r0 or r1 is NAN: return NAN + @ if opposite sign: return NAN + @ otherwise return r0 (which is INF or -INF) +LSYM(Lad_i): + mvns r2, r2, asr #24 + do_it ne, et + movne r0, r1 + COND(mvn,s,eq) r3, r3, asr #24 + movne r1, r0 + movs r2, r0, lsl #9 + do_it eq, te + COND(mov,s,eq) r3, r1, lsl #9 + teqeq r0, r1 + orrne r0, r0, #0x00400000 @ quiet NAN + RET + + FUNC_END aeabi_frsub + FUNC_END aeabi_fadd + FUNC_END addsf3 + FUNC_END aeabi_fsub + FUNC_END subsf3 + +ARM_FUNC_START floatunsisf +ARM_FUNC_ALIAS aeabi_ui2f floatunsisf + + mov r3, #0 + b 1f + +ARM_FUNC_START floatsisf +ARM_FUNC_ALIAS aeabi_i2f floatsisf + + ands r3, r0, #0x80000000 + do_it mi + rsbmi r0, r0, #0 + +1: movs ip, r0 + do_it eq + RETc(eq) + + @ Add initial exponent to sign + orr r3, r3, #((127 + 23) << 23) + + .ifnc ah, r0 + mov ah, r0 + .endif + mov al, #0 + b 2f + + FUNC_END aeabi_i2f + FUNC_END floatsisf + FUNC_END aeabi_ui2f + FUNC_END floatunsisf + +ARM_FUNC_START floatundisf +ARM_FUNC_ALIAS aeabi_ul2f floatundisf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqs f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + + mov r3, #0 + b 1f + +ARM_FUNC_START floatdisf +ARM_FUNC_ALIAS aeabi_l2f floatdisf + + orrs r2, r0, r1 +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + do_it eq, t + mvfeqs f0, #0.0 +#else + do_it eq +#endif + RETc(eq) + + ands r3, ah, #0x80000000 @ sign bit in r3 + bpl 1f +#if defined(__thumb2__) + negs al, al + sbc ah, ah, ah, lsl #1 +#else + rsbs al, al, #0 + rsc ah, ah, #0 +#endif +1: +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + @ For hard FPA code we want to return via the tail below so that + @ we can return the result in f0 as well as in r0 for backwards + @ compatibility. + str lr, [sp, #-8]! + adr lr, LSYM(f0_ret) +#endif + + movs ip, ah + do_it eq, tt + moveq ip, al + moveq ah, al + moveq al, #0 + + @ Add initial exponent to sign + orr r3, r3, #((127 + 23 + 32) << 23) + do_it eq + subeq r3, r3, #(32 << 23) +2: sub r3, r3, #(1 << 23) + +#if __ARM_ARCH__ < 5 + + mov r2, #23 + cmp ip, #(1 << 16) + do_it hs, t + movhs ip, ip, lsr #16 + subhs r2, r2, #16 + cmp ip, #(1 << 8) + do_it hs, t + movhs ip, ip, lsr #8 + subhs r2, r2, #8 + cmp ip, #(1 << 4) + do_it hs, t + movhs ip, ip, lsr #4 + subhs r2, r2, #4 + cmp ip, #(1 << 2) + do_it hs, e + subhs r2, r2, #2 + sublo r2, r2, ip, lsr #1 + subs r2, r2, ip, lsr #3 + +#else + + clz r2, ip + subs r2, r2, #8 + +#endif + + sub r3, r3, r2, lsl #23 + blt 3f + + shiftop add r3 r3 ah lsl r2 ip + shift1 lsl, ip, al, r2 + rsb r2, r2, #32 + cmp ip, #0x80000000 + shiftop adc r0 r3 al lsr r2 r2 + do_it eq + biceq r0, r0, #1 + RET + +3: add r2, r2, #32 + shift1 lsl, ip, ah, r2 + rsb r2, r2, #32 + orrs al, al, ip, lsl #1 + shiftop adc r0 r3 ah lsr r2 r2 + do_it eq + biceq r0, r0, ip, lsr #31 + RET + +#if !defined (__VFP_FP__) && !defined(__SOFTFP__) + +LSYM(f0_ret): + str r0, [sp, #-4]! + ldfs f0, [sp], #4 + RETLDM + +#endif + + FUNC_END floatdisf + FUNC_END aeabi_l2f + FUNC_END floatundisf + FUNC_END aeabi_ul2f + +#endif /* L_addsubsf3 */ + +#ifdef L_arm_muldivsf3 + +ARM_FUNC_START mulsf3 +ARM_FUNC_ALIAS aeabi_fmul mulsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + do_it ne, tt + COND(and,s,ne) r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Lml_s) +LSYM(Lml_x): + + @ Add exponents together + add r2, r2, r3 + + @ Determine final sign. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ If power of two, branch to a separate path. + @ Make up for final alignment. + movs r0, r0, lsl #9 + do_it ne + COND(mov,s,ne) r1, r1, lsl #9 + beq LSYM(Lml_1) + mov r3, #0x08000000 + orr r0, r3, r0, lsr #5 + orr r1, r3, r1, lsr #5 + +#if __ARM_ARCH__ < 4 + + @ Put sign bit in r3, which will be restored into r0 later. + and r3, ip, #0x80000000 + + @ Well, no way to make it shorter without the umull instruction. + do_push {r3, r4, r5} + mov r4, r0, lsr #16 + mov r5, r1, lsr #16 + bic r0, r0, r4, lsl #16 + bic r1, r1, r5, lsl #16 + mul ip, r4, r5 + mul r3, r0, r1 + mul r0, r5, r0 + mla r0, r4, r1, r0 + adds r3, r3, r0, lsl #16 + adc r1, ip, r0, lsr #16 + do_pop {r0, r4, r5} + +#else + + @ The actual multiplication. + umull r3, r1, r0, r1 + + @ Put final sign in r0. + and r0, ip, #0x80000000 + +#endif + + @ Adjust result upon the MSB position. + cmp r1, #(1 << 23) + do_it cc, tt + movcc r1, r1, lsl #1 + orrcc r1, r1, r3, lsr #31 + movcc r3, r3, lsl #1 + + @ Add sign to result. + orr r0, r0, r1 + + @ Apply exponent bias, check for under/overflow. + sbc r2, r2, #127 + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, #0x80000000 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + RET + + @ Multiplication by 0x1p*: let''s shortcut a lot of code. +LSYM(Lml_1): + teq r0, #0 + and ip, ip, #0x80000000 + do_it eq + moveq r1, r1, lsl #9 + orr r0, ip, r0, lsr #9 + orr r0, r0, r1, lsr #9 + subs r2, r2, #127 + do_it gt, tt + COND(rsb,s,gt) r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + RETc(gt) + + @ Under/overflow: fix things up for the code below. + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + +LSYM(Lml_u): + @ Overflow? + bgt LSYM(Lml_o) + + @ Check if denormalized result is possible, otherwise return signed 0. + cmn r2, #(24 + 1) + do_it le, t + bicle r0, r0, #0x7fffffff + RETc(le) + + @ Shift value right, round, etc. + rsb r2, r2, #0 + movs r1, r0, lsl #1 + shift1 lsr, r1, r1, r2 + rsb r2, r2, #32 + shift1 lsl, ip, r0, r2 + movs r0, r1, rrx + adc r0, r0, #0 + orrs r3, r3, ip, lsl #1 + do_it eq + biceq r0, r0, ip, lsr #31 + RET + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Lml_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: do_it eq, tt + moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: do_it eq, tt + moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Lml_x) + +LSYM(Lml_s): + @ Isolate the INF and NAN cases away + and r3, ip, r1, lsr #23 + teq r2, ip + do_it ne + teqne r3, ip + beq 1f + + @ Here, one or more arguments are either denormalized or zero. + bics ip, r0, #0x80000000 + do_it ne + COND(bic,s,ne) ip, r1, #0x80000000 + bne LSYM(Lml_d) + + @ Result is 0, but determine sign anyway. +LSYM(Lml_z): + eor r0, r0, r1 + bic r0, r0, #0x7fffffff + RET + +1: @ One or both args are INF or NAN. + teq r0, #0x0 + do_it ne, ett + teqne r0, #0x80000000 + moveq r0, r1 + teqne r1, #0x0 + teqne r1, #0x80000000 + beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN * <anything> -> NAN +1: teq r3, ip + bne LSYM(Lml_i) + movs r3, r1, lsl #9 + do_it ne + movne r0, r1 + bne LSYM(Lml_n) @ <anything> * NAN -> NAN + + @ Result is INF, but we need to determine its sign. +LSYM(Lml_i): + eor r0, r0, r1 + + @ Overflow: return INF (sign already in r0). +LSYM(Lml_o): + and r0, r0, #0x80000000 + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00800000 + RET + + @ Return a quiet NAN. +LSYM(Lml_n): + orr r0, r0, #0x7f000000 + orr r0, r0, #0x00c00000 + RET + + FUNC_END aeabi_fmul + FUNC_END mulsf3 + +ARM_FUNC_START divsf3 +ARM_FUNC_ALIAS aeabi_fdiv divsf3 + + @ Mask out exponents, trap any zero/denormal/INF/NAN. + mov ip, #0xff + ands r2, ip, r0, lsr #23 + do_it ne, tt + COND(and,s,ne) r3, ip, r1, lsr #23 + teqne r2, ip + teqne r3, ip + beq LSYM(Ldv_s) +LSYM(Ldv_x): + + @ Substract divisor exponent from dividend''s + sub r2, r2, r3 + + @ Preserve final sign into ip. + eor ip, r0, r1 + + @ Convert mantissa to unsigned integer. + @ Dividend -> r3, divisor -> r1. + movs r1, r1, lsl #9 + mov r0, r0, lsl #9 + beq LSYM(Ldv_1) + mov r3, #0x10000000 + orr r1, r3, r1, lsr #4 + orr r3, r3, r0, lsr #4 + + @ Initialize r0 (result) with final sign bit. + and r0, ip, #0x80000000 + + @ Ensure result will land to known bit position. + @ Apply exponent bias accordingly. + cmp r3, r1 + do_it cc + movcc r3, r3, lsl #1 + adc r2, r2, #(127 - 2) + + @ The actual division loop. + mov ip, #0x00800000 +1: cmp r3, r1 + do_it cs, t + subcs r3, r3, r1 + orrcs r0, r0, ip + cmp r3, r1, lsr #1 + do_it cs, t + subcs r3, r3, r1, lsr #1 + orrcs r0, r0, ip, lsr #1 + cmp r3, r1, lsr #2 + do_it cs, t + subcs r3, r3, r1, lsr #2 + orrcs r0, r0, ip, lsr #2 + cmp r3, r1, lsr #3 + do_it cs, t + subcs r3, r3, r1, lsr #3 + orrcs r0, r0, ip, lsr #3 + movs r3, r3, lsl #4 + do_it ne + COND(mov,s,ne) ip, ip, lsr #4 + bne 1b + + @ Check exponent for under/overflow. + cmp r2, #(254 - 1) + bhi LSYM(Lml_u) + + @ Round the result, merge final exponent. + cmp r3, r1 + adc r0, r0, r2, lsl #23 + do_it eq + biceq r0, r0, #1 + RET + + @ Division by 0x1p*: let''s shortcut a lot of code. +LSYM(Ldv_1): + and ip, ip, #0x80000000 + orr r0, ip, r0, lsr #9 + adds r2, r2, #127 + do_it gt, tt + COND(rsb,s,gt) r3, r2, #255 + orrgt r0, r0, r2, lsl #23 + RETc(gt) + + orr r0, r0, #0x00800000 + mov r3, #0 + subs r2, r2, #1 + b LSYM(Lml_u) + + @ One or both arguments are denormalized. + @ Scale them leftwards and preserve sign bit. +LSYM(Ldv_d): + teq r2, #0 + and ip, r0, #0x80000000 +1: do_it eq, tt + moveq r0, r0, lsl #1 + tsteq r0, #0x00800000 + subeq r2, r2, #1 + beq 1b + orr r0, r0, ip + teq r3, #0 + and ip, r1, #0x80000000 +2: do_it eq, tt + moveq r1, r1, lsl #1 + tsteq r1, #0x00800000 + subeq r3, r3, #1 + beq 2b + orr r1, r1, ip + b LSYM(Ldv_x) + + @ One or both arguments are either INF, NAN, zero or denormalized. +LSYM(Ldv_s): + and r3, ip, r1, lsr #23 + teq r2, ip + bne 1f + movs r2, r0, lsl #9 + bne LSYM(Lml_n) @ NAN / <anything> -> NAN + teq r3, ip + bne LSYM(Lml_i) @ INF / <anything> -> INF + mov r0, r1 + b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN +1: teq r3, ip + bne 2f + movs r3, r1, lsl #9 + beq LSYM(Lml_z) @ <anything> / INF -> 0 + mov r0, r1 + b LSYM(Lml_n) @ <anything> / NAN -> NAN +2: @ If both are nonzero, we need to normalize and resume above. + bics ip, r0, #0x80000000 + do_it ne + COND(bic,s,ne) ip, r1, #0x80000000 + bne LSYM(Ldv_d) + @ One or both arguments are zero. + bics r2, r0, #0x80000000 + bne LSYM(Lml_i) @ <non_zero> / 0 -> INF + bics r3, r1, #0x80000000 + bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 + b LSYM(Lml_n) @ 0 / 0 -> NAN + + FUNC_END aeabi_fdiv + FUNC_END divsf3 + +#endif /* L_muldivsf3 */ + +#ifdef L_arm_cmpsf2 + + @ The return value in r0 is + @ + @ 0 if the operands are equal + @ 1 if the first operand is greater than the second, or + @ the operands are unordered and the operation is + @ CMP, LT, LE, NE, or EQ. + @ -1 if the first operand is less than the second, or + @ the operands are unordered and the operation is GT + @ or GE. + @ + @ The Z flag will be set iff the operands are equal. + @ + @ The following registers are clobbered by this function: + @ ip, r0, r1, r2, r3 + +ARM_FUNC_START gtsf2 +ARM_FUNC_ALIAS gesf2 gtsf2 + mov ip, #-1 + b 1f + +ARM_FUNC_START ltsf2 +ARM_FUNC_ALIAS lesf2 ltsf2 + mov ip, #1 + b 1f + +ARM_FUNC_START cmpsf2 +ARM_FUNC_ALIAS nesf2 cmpsf2 +ARM_FUNC_ALIAS eqsf2 cmpsf2 + mov ip, #1 @ how should we specify unordered here? + +1: str ip, [sp, #-4]! + + @ Trap any INF/NAN first. + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mvns ip, r2, asr #24 + do_it ne + COND(mvn,s,ne) ip, r3, asr #24 + beq 3f + + @ Compare values. + @ Note that 0.0 is equal to -0.0. +2: add sp, sp, #4 + orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag + do_it ne + teqne r0, r1 @ if not 0 compare sign + do_it pl + COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0 + + @ Result: + do_it hi + movhi r0, r1, asr #31 + do_it lo + mvnlo r0, r1, asr #31 + do_it ne + orrne r0, r0, #1 + RET + + @ Look for a NAN. +3: mvns ip, r2, asr #24 + bne 4f + movs ip, r0, lsl #9 + bne 5f @ r0 is NAN +4: mvns ip, r3, asr #24 + bne 2b + movs ip, r1, lsl #9 + beq 2b @ r1 is not NAN +5: ldr r0, [sp], #4 @ return unordered code. + RET + + FUNC_END gesf2 + FUNC_END gtsf2 + FUNC_END lesf2 + FUNC_END ltsf2 + FUNC_END nesf2 + FUNC_END eqsf2 + FUNC_END cmpsf2 + +ARM_FUNC_START aeabi_cfrcmple + + mov ip, r0 + mov r0, r1 + mov r1, ip + b 6f + +ARM_FUNC_START aeabi_cfcmpeq +ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq + + @ The status-returning routines are required to preserve all + @ registers except ip, lr, and cpsr. +6: do_push {r0, r1, r2, r3, lr} + ARM_CALL cmpsf2 + @ Set the Z flag correctly, and the C flag unconditionally. + cmp r0, #0 + @ Clear the C flag if the return value was -1, indicating + @ that the first operand was smaller than the second. + do_it mi + cmnmi r0, #0 + RETLDM "r0, r1, r2, r3" + + FUNC_END aeabi_cfcmple + FUNC_END aeabi_cfcmpeq + FUNC_END aeabi_cfrcmple + +ARM_FUNC_START aeabi_fcmpeq + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it eq, e + moveq r0, #1 @ Equal to. + movne r0, #0 @ Less than, greater than, or unordered. + RETLDM + + FUNC_END aeabi_fcmpeq + +ARM_FUNC_START aeabi_fcmplt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it cc, e + movcc r0, #1 @ Less than. + movcs r0, #0 @ Equal to, greater than, or unordered. + RETLDM + + FUNC_END aeabi_fcmplt + +ARM_FUNC_START aeabi_fcmple + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfcmple + do_it ls, e + movls r0, #1 @ Less than or equal to. + movhi r0, #0 @ Greater than or unordered. + RETLDM + + FUNC_END aeabi_fcmple + +ARM_FUNC_START aeabi_fcmpge + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfrcmple + do_it ls, e + movls r0, #1 @ Operand 2 is less than or equal to operand 1. + movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. + RETLDM + + FUNC_END aeabi_fcmpge + +ARM_FUNC_START aeabi_fcmpgt + + str lr, [sp, #-8]! + ARM_CALL aeabi_cfrcmple + do_it cc, e + movcc r0, #1 @ Operand 2 is less than operand 1. + movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, + @ or they are unordered. + RETLDM + + FUNC_END aeabi_fcmpgt + +#endif /* L_cmpsf2 */ + +#ifdef L_arm_unordsf2 + +ARM_FUNC_START unordsf2 +ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 + + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 + mvns ip, r2, asr #24 + bne 1f + movs ip, r0, lsl #9 + bne 3f @ r0 is NAN +1: mvns ip, r3, asr #24 + bne 2f + movs ip, r1, lsl #9 + bne 3f @ r1 is NAN +2: mov r0, #0 @ arguments are ordered. + RET +3: mov r0, #1 @ arguments are unordered. + RET + + FUNC_END aeabi_fcmpun + FUNC_END unordsf2 + +#endif /* L_unordsf2 */ + +#ifdef L_arm_fixsfsi + +ARM_FUNC_START fixsfsi +ARM_FUNC_ALIAS aeabi_f2iz fixsfsi + + @ check exponent range. + mov r2, r0, lsl #1 + cmp r2, #(127 << 24) + bcc 1f @ value is too small + mov r3, #(127 + 31) + subs r2, r3, r2, lsr #24 + bls 2f @ value is too large + + @ scale value + mov r3, r0, lsl #8 + orr r3, r3, #0x80000000 + tst r0, #0x80000000 @ the sign bit + shift1 lsr, r0, r3, r2 + do_it ne + rsbne r0, r0, #0 + RET + +1: mov r0, #0 + RET + +2: cmp r2, #(127 + 31 - 0xff) + bne 3f + movs r2, r0, lsl #9 + bne 4f @ r0 is NAN. +3: ands r0, r0, #0x80000000 @ the sign bit + do_it eq + moveq r0, #0x7fffffff @ the maximum signed positive si + RET + +4: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END aeabi_f2iz + FUNC_END fixsfsi + +#endif /* L_fixsfsi */ + +#ifdef L_arm_fixunssfsi + +ARM_FUNC_START fixunssfsi +ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi + + @ check exponent range. + movs r2, r0, lsl #1 + bcs 1f @ value is negative + cmp r2, #(127 << 24) + bcc 1f @ value is too small + mov r3, #(127 + 31) + subs r2, r3, r2, lsr #24 + bmi 2f @ value is too large + + @ scale the value + mov r3, r0, lsl #8 + orr r3, r3, #0x80000000 + shift1 lsr, r0, r3, r2 + RET + +1: mov r0, #0 + RET + +2: cmp r2, #(127 + 31 - 0xff) + bne 3f + movs r2, r0, lsl #9 + bne 4f @ r0 is NAN. +3: mov r0, #0xffffffff @ maximum unsigned si + RET + +4: mov r0, #0 @ What should we convert NAN to? + RET + + FUNC_END aeabi_f2uiz + FUNC_END fixunssfsi + +#endif /* L_fixunssfsi */ diff --git a/libgcc/config/arm/lib1funcs.S b/libgcc/config/arm/lib1funcs.S new file mode 100644 index 00000000000..2e76c01df4b --- /dev/null +++ b/libgcc/config/arm/lib1funcs.S @@ -0,0 +1,1829 @@ +@ libgcc routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005, 2007, 2008, + 2009, 2010 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* An executable stack is *not* required for these functions. */ +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",%progbits +.previous +#endif /* __ELF__ and __linux__ */ + +#ifdef __ARM_EABI__ +/* Some attributes that are common to all routines in this file. */ + /* Tag_ABI_align_needed: This code does not require 8-byte + alignment from the caller. */ + /* .eabi_attribute 24, 0 -- default setting. */ + /* Tag_ABI_align_preserved: This code preserves 8-byte + alignment in any callee. */ + .eabi_attribute 25, 1 +#endif /* __ARM_EABI__ */ +/* ------------------------------------------------------------------------ */ + +/* We need to know what prefix to add to function names. */ + +#ifndef __USER_LABEL_PREFIX__ +#error __USER_LABEL_PREFIX__ not defined +#endif + +/* ANSI concatenation macros. */ + +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ + +#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) + +#ifdef __ELF__ +#ifdef __thumb__ +#define __PLT__ /* Not supported in Thumb assembler (for now). */ +#elif defined __vxworks && !defined __PIC__ +#define __PLT__ /* Not supported by the kernel loader. */ +#else +#define __PLT__ (PLT) +#endif +#define TYPE(x) .type SYM(x),function +#define SIZE(x) .size SYM(x), . - SYM(x) +#define LSYM(x) .x +#else +#define __PLT__ +#define TYPE(x) +#define SIZE(x) +#define LSYM(x) x +#endif + +/* Function end macros. Variants for interworking. */ + +#if defined(__ARM_ARCH_2__) +# define __ARM_ARCH__ 2 +#endif + +#if defined(__ARM_ARCH_3__) +# define __ARM_ARCH__ 3 +#endif + +#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ + || defined(__ARM_ARCH_4T__) +/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with + long multiply instructions. That includes v3M. */ +# define __ARM_ARCH__ 4 +#endif + +#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH__ 5 +#endif + +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_6M__) +# define __ARM_ARCH__ 6 +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH__ 7 +#endif + +#ifndef __ARM_ARCH__ +#error Unable to determine architecture. +#endif + +/* There are times when we might prefer Thumb1 code even if ARM code is + permitted, for example, the code might be smaller, or there might be + interworking problems with switching to ARM state if interworking is + disabled. */ +#if (defined(__thumb__) \ + && !defined(__thumb2__) \ + && (!defined(__THUMB_INTERWORK__) \ + || defined (__OPTIMIZE_SIZE__) \ + || defined(__ARM_ARCH_6M__))) +# define __prefer_thumb__ +#endif + +/* How to return from a function call depends on the architecture variant. */ + +#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) + +# define RET bx lr +# define RETc(x) bx##x lr + +/* Special precautions for interworking on armv4t. */ +# if (__ARM_ARCH__ == 4) + +/* Always use bx, not ldr pc. */ +# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) +# define __INTERWORKING__ +# endif /* __THUMB__ || __THUMB_INTERWORK__ */ + +/* Include thumb stub before arm mode code. */ +# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) +# define __INTERWORKING_STUBS__ +# endif /* __thumb__ && !__THUMB_INTERWORK__ */ + +#endif /* __ARM_ARCH == 4 */ + +#else + +# define RET mov pc, lr +# define RETc(x) mov##x pc, lr + +#endif + +.macro cfi_pop advance, reg, cfa_offset +#ifdef __ELF__ + .pushsection .debug_frame + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte \advance + .byte (0xc0 | \reg) /* DW_CFA_restore */ + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 \cfa_offset + .popsection +#endif +.endm +.macro cfi_push advance, reg, offset, cfa_offset +#ifdef __ELF__ + .pushsection .debug_frame + .byte 0x4 /* DW_CFA_advance_loc4 */ + .4byte \advance + .byte (0x80 | \reg) /* DW_CFA_offset */ + .uleb128 (\offset / -4) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .uleb128 \cfa_offset + .popsection +#endif +.endm +.macro cfi_start start_label, end_label +#ifdef __ELF__ + .pushsection .debug_frame +LSYM(Lstart_frame): + .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE +LSYM(Lstart_cie): + .4byte 0xffffffff @ CIE Identifier Tag + .byte 0x1 @ CIE Version + .ascii "\0" @ CIE Augmentation + .uleb128 0x1 @ CIE Code Alignment Factor + .sleb128 -4 @ CIE Data Alignment Factor + .byte 0xe @ CIE RA Column + .byte 0xc @ DW_CFA_def_cfa + .uleb128 0xd + .uleb128 0x0 + + .align 2 +LSYM(Lend_cie): + .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length +LSYM(Lstart_fde): + .4byte LSYM(Lstart_frame) @ FDE CIE offset + .4byte \start_label @ FDE initial location + .4byte \end_label-\start_label @ FDE address range + .popsection +#endif +.endm +.macro cfi_end end_label +#ifdef __ELF__ + .pushsection .debug_frame + .align 2 +LSYM(Lend_fde): + .popsection +\end_label: +#endif +.endm + +/* Don't pass dirn, it's there just to get token pasting right. */ + +.macro RETLDM regs=, cond=, unwind=, dirn=ia +#if defined (__INTERWORKING__) + .ifc "\regs","" + ldr\cond lr, [sp], #8 + .else +# if defined(__thumb2__) + pop\cond {\regs, lr} +# else + ldm\cond\dirn sp!, {\regs, lr} +# endif + .endif + .ifnc "\unwind", "" + /* Mark LR as restored. */ +97: cfi_pop 97b - \unwind, 0xe, 0x0 + .endif + bx\cond lr +#else + /* Caller is responsible for providing IT instruction. */ + .ifc "\regs","" + ldr\cond pc, [sp], #8 + .else +# if defined(__thumb2__) + pop\cond {\regs, pc} +# else + ldm\cond\dirn sp!, {\regs, pc} +# endif + .endif +#endif +.endm + +/* The Unified assembly syntax allows the same code to be assembled for both + ARM and Thumb-2. However this is only supported by recent gas, so define + a set of macros to allow ARM code on older assemblers. */ +#if defined(__thumb2__) +.macro do_it cond, suffix="" + it\suffix \cond +.endm +.macro shift1 op, arg0, arg1, arg2 + \op \arg0, \arg1, \arg2 +.endm +#define do_push push +#define do_pop pop +#define COND(op1, op2, cond) op1 ## op2 ## cond +/* Perform an arithmetic operation with a variable shift operand. This + requires two instructions and a scratch register on Thumb-2. */ +.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \shiftop \tmp, \src2, \shiftreg + \name \dest, \src1, \tmp +.endm +#else +.macro do_it cond, suffix="" +.endm +.macro shift1 op, arg0, arg1, arg2 + mov \arg0, \arg1, \op \arg2 +.endm +#define do_push stmfd sp!, +#define do_pop ldmfd sp!, +#define COND(op1, op2, cond) op1 ## cond ## op2 +.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp + \name \dest, \src1, \src2, \shiftop \shiftreg +.endm +#endif + +#ifdef __ARM_EABI__ +.macro ARM_LDIV0 name signed + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM (__aeabi_idiv0) __PLT__ +.endm +#else +.macro ARM_LDIV0 name signed + str lr, [sp, #-8]! +98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 + bl SYM (__div0) __PLT__ + mov r0, #0 @ About as wrong as it could be. + RETLDM unwind=98b +.endm +#endif + + +#ifdef __ARM_EABI__ +.macro THUMB_LDIV0 name signed +#if defined(__ARM_ARCH_6M__) + .ifc \signed, unsigned + cmp r0, #0 + beq 1f + mov r0, #0 + mvn r0, r0 @ 0xffffffff +1: + .else + cmp r0, #0 + beq 2f + blt 3f + mov r0, #0 + mvn r0, r0 + lsr r0, r0, #1 @ 0x7fffffff + b 2f +3: mov r0, #0x80 + lsl r0, r0, #24 @ 0x80000000 +2: + .endif + push {r0, r1, r2} + ldr r0, 4f + adr r1, 4f + add r0, r1 + str r0, [sp, #8] + @ We know we are not on armv4t, so pop pc is safe. + pop {r0, r1, pc} + .align 2 +4: + .word __aeabi_idiv0 - 4b +#elif defined(__thumb2__) + .syntax unified + .ifc \signed, unsigned + cbz r0, 1f + mov r0, #0xffffffff +1: + .else + cmp r0, #0 + do_it gt + movgt r0, #0x7fffffff + do_it lt + movlt r0, #0x80000000 + .endif + b.w SYM(__aeabi_idiv0) __PLT__ +#else + .align 2 + bx pc + nop + .arm + cmp r0, #0 + .ifc \signed, unsigned + movne r0, #0xffffffff + .else + movgt r0, #0x7fffffff + movlt r0, #0x80000000 + .endif + b SYM(__aeabi_idiv0) __PLT__ + .thumb +#endif +.endm +#else +.macro THUMB_LDIV0 name signed + push { r1, lr } +98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 + bl SYM (__div0) + mov r0, #0 @ About as wrong as it could be. +#if defined (__INTERWORKING__) + pop { r1, r2 } + bx r2 +#else + pop { r1, pc } +#endif +.endm +#endif + +.macro FUNC_END name + SIZE (__\name) +.endm + +.macro DIV_FUNC_END name signed + cfi_start __\name, LSYM(Lend_div0) +LSYM(Ldiv0): +#ifdef __thumb__ + THUMB_LDIV0 \name \signed +#else + ARM_LDIV0 \name \signed +#endif + cfi_end LSYM(Lend_div0) + FUNC_END \name +.endm + +.macro THUMB_FUNC_START name + .globl SYM (\name) + TYPE (\name) + .thumb_func +SYM (\name): +.endm + +/* Function start macros. Variants for ARM and Thumb. */ + +#ifdef __thumb__ +#define THUMB_FUNC .thumb_func +#define THUMB_CODE .force_thumb +# if defined(__thumb2__) +#define THUMB_SYNTAX .syntax divided +# else +#define THUMB_SYNTAX +# endif +#else +#define THUMB_FUNC +#define THUMB_CODE +#define THUMB_SYNTAX +#endif + +.macro FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + THUMB_CODE + THUMB_FUNC + THUMB_SYNTAX +SYM (__\name): +.endm + +/* Special function that will always be coded in ARM assembly, even if + in Thumb-only compilation. */ + +#if defined(__thumb2__) + +/* For Thumb-2 we build everything in thumb mode. */ +.macro ARM_FUNC_START name + FUNC_START \name + .syntax unified +.endm +#define EQUIV .thumb_set +.macro ARM_CALL name + bl __\name +.endm + +#elif defined(__INTERWORKING_STUBS__) + +.macro ARM_FUNC_START name + FUNC_START \name + bx pc + nop + .arm +/* A hook to tell gdb that we've switched to ARM mode. Also used to call + directly from other local arm routines. */ +_L__\name: +.endm +#define EQUIV .thumb_set +/* Branch directly to a function declared with ARM_FUNC_START. + Must be called in arm mode. */ +.macro ARM_CALL name + bl _L__\name +.endm + +#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ + +#ifdef __ARM_ARCH_6M__ +#define EQUIV .thumb_set +#else +.macro ARM_FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + .arm +SYM (__\name): +.endm +#define EQUIV .set +.macro ARM_CALL name + bl __\name +.endm +#endif + +#endif + +.macro FUNC_ALIAS new old + .globl SYM (__\new) +#if defined (__thumb__) + .thumb_set SYM (__\new), SYM (__\old) +#else + .set SYM (__\new), SYM (__\old) +#endif +.endm + +#ifndef __ARM_ARCH_6M__ +.macro ARM_FUNC_ALIAS new old + .globl SYM (__\new) + EQUIV SYM (__\new), SYM (__\old) +#if defined(__INTERWORKING_STUBS__) + .set SYM (_L__\new), SYM (_L__\old) +#endif +.endm +#endif + +#ifdef __ARMEB__ +#define xxh r0 +#define xxl r1 +#define yyh r2 +#define yyl r3 +#else +#define xxh r1 +#define xxl r0 +#define yyh r3 +#define yyl r2 +#endif + +#ifdef __ARM_EABI__ +.macro WEAK name + .weak SYM (__\name) +.endm +#endif + +#ifdef __thumb__ +/* Register aliases. */ + +work .req r4 @ XXXX is this safe ? +dividend .req r0 +divisor .req r1 +overdone .req r2 +result .req r2 +curbit .req r3 +#endif +#if 0 +ip .req r12 +sp .req r13 +lr .req r14 +pc .req r15 +#endif + +/* ------------------------------------------------------------------------ */ +/* Bodies of the division and modulo routines. */ +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + +#if defined (__thumb2__) + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsb \curbit, \curbit, #31 + adr \result, 1f + add \curbit, \result, \curbit, lsl #4 + mov \result, #0 + mov pc, \curbit +.p2align 3 +1: + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp.w \dividend, \divisor, lsl #shift + nop.n + adc.w \result, \result, \result + it cs + subcs.w \dividend, \dividend, \divisor, lsl #shift + .endr +#else + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsbs \curbit, \curbit, #31 + addne \curbit, \curbit, \curbit, lsl #1 + mov \result, #0 + addne pc, pc, \curbit, lsl #2 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + adc \result, \result, \result + subcs \dividend, \dividend, \divisor, lsl #shift + .endr +#endif + +#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ +#if __ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else /* __ARM_ARCH__ < 5 */ + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4-bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif /* __ARM_ARCH__ < 5 */ + + @ Division loop +1: cmp \dividend, \divisor + do_it hs, t + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + do_it hs, t + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + do_it ne, t + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV2_ORDER divisor, order + +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + rsbs \order, \order, #31 + addne pc, pc, \order, lsl #3 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + subcs \dividend, \dividend, \divisor, lsl #shift + .endr + +#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else /* __ARM_ARCH__ < 5 */ + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif /* __ARM_ARCH__ < 5 */ + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: + +#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ + +.endm +/* ------------------------------------------------------------------------ */ +.macro THUMB_DIV_MOD_BODY modulo + @ Load the constant 0x10000000 into our work register. + mov work, #1 + lsl work, #28 +LSYM(Loop1): + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, work + bhs LSYM(Lbignum) + cmp divisor, dividend + bhs LSYM(Lbignum) + lsl divisor, #4 + lsl curbit, #4 + b LSYM(Loop1) +LSYM(Lbignum): + @ Set work to 0x80000000 + lsl work, #3 +LSYM(Loop2): + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, work + bhs LSYM(Loop3) + cmp divisor, dividend + bhs LSYM(Loop3) + lsl divisor, #1 + lsl curbit, #1 + b LSYM(Loop2) +LSYM(Loop3): + @ Test for possible subtractions ... + .if \modulo + @ ... On the final pass, this may subtract too much from the dividend, + @ so keep track of which subtractions are done, we can fix them up + @ afterwards. + mov overdone, #0 + cmp dividend, divisor + blo LSYM(Lover1) + sub dividend, dividend, divisor +LSYM(Lover1): + lsr work, divisor, #1 + cmp dividend, work + blo LSYM(Lover2) + sub dividend, dividend, work + mov ip, curbit + mov work, #1 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover2): + lsr work, divisor, #2 + cmp dividend, work + blo LSYM(Lover3) + sub dividend, dividend, work + mov ip, curbit + mov work, #2 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover3): + lsr work, divisor, #3 + cmp dividend, work + blo LSYM(Lover4) + sub dividend, dividend, work + mov ip, curbit + mov work, #3 + ror curbit, work + orr overdone, curbit + mov curbit, ip +LSYM(Lover4): + mov ip, curbit + .else + @ ... and note which bits are done in the result. On the final pass, + @ this may subtract too much from the dividend, but the result will be ok, + @ since the "bit" will have been shifted out at the bottom. + cmp dividend, divisor + blo LSYM(Lover1) + sub dividend, dividend, divisor + orr result, result, curbit +LSYM(Lover1): + lsr work, divisor, #1 + cmp dividend, work + blo LSYM(Lover2) + sub dividend, dividend, work + lsr work, curbit, #1 + orr result, work +LSYM(Lover2): + lsr work, divisor, #2 + cmp dividend, work + blo LSYM(Lover3) + sub dividend, dividend, work + lsr work, curbit, #2 + orr result, work +LSYM(Lover3): + lsr work, divisor, #3 + cmp dividend, work + blo LSYM(Lover4) + sub dividend, dividend, work + lsr work, curbit, #3 + orr result, work +LSYM(Lover4): + .endif + + cmp dividend, #0 @ Early termination? + beq LSYM(Lover5) + lsr curbit, #4 @ No, any more bits to do? + beq LSYM(Lover5) + lsr divisor, #4 + b LSYM(Loop3) +LSYM(Lover5): + .if \modulo + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + mov work, #0xe + lsl work, #28 + and overdone, work + beq LSYM(Lgot_result) + + @ If we terminated early, because dividend became zero, then the + @ bit in ip will not be in the bottom nibble, and we should not + @ perform the additions below. We must test for this though + @ (rather relying upon the TSTs to prevent the additions) since + @ the bit in ip could be in the top two bits which might then match + @ with one of the smaller RORs. + mov curbit, ip + mov work, #0x7 + tst curbit, work + beq LSYM(Lgot_result) + + mov curbit, ip + mov work, #3 + ror curbit, work + tst overdone, curbit + beq LSYM(Lover6) + lsr work, divisor, #3 + add dividend, work +LSYM(Lover6): + mov curbit, ip + mov work, #2 + ror curbit, work + tst overdone, curbit + beq LSYM(Lover7) + lsr work, divisor, #2 + add dividend, work +LSYM(Lover7): + mov curbit, ip + mov work, #1 + ror curbit, work + tst overdone, curbit + beq LSYM(Lgot_result) + lsr work, divisor, #1 + add dividend, work + .endif +LSYM(Lgot_result): +.endm +/* ------------------------------------------------------------------------ */ +/* Start of the Real Functions */ +/* ------------------------------------------------------------------------ */ +#ifdef L_udivsi3 + +#if defined(__prefer_thumb__) + + FUNC_START udivsi3 + FUNC_ALIAS aeabi_uidiv udivsi3 + + cmp divisor, #0 + beq LSYM(Ldiv0) +LSYM(udivsi3_skip_div0_test): + mov curbit, #1 + mov result, #0 + + push { work } + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 0 + + mov r0, result + pop { work } + RET + +#else /* ARM version/Thumb-2. */ + + ARM_FUNC_START udivsi3 + ARM_FUNC_ALIAS aeabi_uidiv udivsi3 + + /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily + check for division-by-zero a second time. */ +LSYM(udivsi3_skip_div0_test): + subs r2, r1, #1 + do_it eq + RETc(eq) + bcc LSYM(Ldiv0) + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + RET + +11: do_it eq, e + moveq r0, #1 + movne r0, #0 + RET + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + RET + +#endif /* ARM version */ + + DIV_FUNC_END udivsi3 unsigned + +#if defined(__prefer_thumb__) +FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) + push {r0, r1, lr} + bl LSYM(udivsi3_skip_div0_test) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 +#else +ARM_FUNC_START aeabi_uidivmod + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(udivsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +#endif + FUNC_END aeabi_uidivmod + +#endif /* L_udivsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_umodsi3 + + FUNC_START umodsi3 + +#ifdef __thumb__ + + cmp divisor, #0 + beq LSYM(Ldiv0) + mov curbit, #1 + cmp dividend, divisor + bhs LSYM(Lover10) + RET + +LSYM(Lover10): + push { work } + + THUMB_DIV_MOD_BODY 1 + + pop { work } + RET + +#else /* ARM version. */ + + subs r2, r1, #1 @ compare divisor with 1 + bcc LSYM(Ldiv0) + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + RETc(ls) + + ARM_MOD_BODY r0, r1, r2, r3 + + RET + +#endif /* ARM version. */ + + DIV_FUNC_END umodsi3 unsigned + +#endif /* L_umodsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_divsi3 + +#if defined(__prefer_thumb__) + + FUNC_START divsi3 + FUNC_ALIAS aeabi_idiv divsi3 + + cmp divisor, #0 + beq LSYM(Ldiv0) +LSYM(divsi3_skip_div0_test): + push { work } + mov work, dividend + eor work, divisor @ Save the sign of the result. + mov ip, work + mov curbit, #1 + mov result, #0 + cmp divisor, #0 + bpl LSYM(Lover10) + neg divisor, divisor @ Loops below use unsigned. +LSYM(Lover10): + cmp dividend, #0 + bpl LSYM(Lover11) + neg dividend, dividend +LSYM(Lover11): + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 0 + + mov r0, result + mov work, ip + cmp work, #0 + bpl LSYM(Lover12) + neg r0, r0 +LSYM(Lover12): + pop { work } + RET + +#else /* ARM/Thumb-2 version. */ + + ARM_FUNC_START divsi3 + ARM_FUNC_ALIAS aeabi_idiv divsi3 + + cmp r1, #0 + beq LSYM(Ldiv0) +LSYM(divsi3_skip_div0_test): + eor ip, r0, r1 @ save the sign of the result. + do_it mi + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + do_it mi + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + do_it mi + rsbmi r0, r0, #0 + RET + +10: teq ip, r0 @ same sign ? + do_it mi + rsbmi r0, r0, #0 + RET + +11: do_it lo + movlo r0, #0 + do_it eq,t + moveq r0, ip, asr #31 + orreq r0, r0, #1 + RET + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + do_it mi + rsbmi r0, r0, #0 + RET + +#endif /* ARM version */ + + DIV_FUNC_END divsi3 signed + +#if defined(__prefer_thumb__) +FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) + push {r0, r1, lr} + bl LSYM(divsi3_skip_div0_test) + POP {r1, r2, r3} + mul r2, r0 + sub r1, r1, r2 + bx r3 +#else +ARM_FUNC_START aeabi_idivmod + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(divsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +#endif + FUNC_END aeabi_idivmod + +#endif /* L_divsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_modsi3 + + FUNC_START modsi3 + +#ifdef __thumb__ + + mov curbit, #1 + cmp divisor, #0 + beq LSYM(Ldiv0) + bpl LSYM(Lover10) + neg divisor, divisor @ Loops below use unsigned. +LSYM(Lover10): + push { work } + @ Need to save the sign of the dividend, unfortunately, we need + @ work later on. Must do this after saving the original value of + @ the work register, because we will pop this value off first. + push { dividend } + cmp dividend, #0 + bpl LSYM(Lover11) + neg dividend, dividend +LSYM(Lover11): + cmp dividend, divisor + blo LSYM(Lgot_result) + + THUMB_DIV_MOD_BODY 1 + + pop { work } + cmp work, #0 + bpl LSYM(Lover12) + neg dividend, dividend +LSYM(Lover12): + pop { work } + RET + +#else /* ARM version. */ + + cmp r1, #0 + beq LSYM(Ldiv0) + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + RET + +#endif /* ARM version */ + + DIV_FUNC_END modsi3 signed + +#endif /* L_modsi3 */ +/* ------------------------------------------------------------------------ */ +#ifdef L_dvmd_tls + +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + FUNC_START aeabi_idiv0 + FUNC_START aeabi_ldiv0 + RET + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else + FUNC_START div0 + RET + FUNC_END div0 +#endif + +#endif /* L_divmodsi_tools */ +/* ------------------------------------------------------------------------ */ +#ifdef L_dvmd_lnx +@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls + +/* Constant taken from <asm/signal.h>. */ +#define SIGFPE 8 + +#ifdef __ARM_EABI__ + WEAK aeabi_idiv0 + WEAK aeabi_ldiv0 + ARM_FUNC_START aeabi_idiv0 + ARM_FUNC_START aeabi_ldiv0 +#else + ARM_FUNC_START div0 +#endif + + do_push {r1, lr} + mov r0, #SIGFPE + bl SYM(raise) __PLT__ + RETLDM r1 + +#ifdef __ARM_EABI__ + FUNC_END aeabi_ldiv0 + FUNC_END aeabi_idiv0 +#else + FUNC_END div0 +#endif + +#endif /* L_dvmd_lnx */ +#ifdef L_clear_cache +#if defined __ARM_EABI__ && defined __linux__ +@ EABI GNU/Linux call to cacheflush syscall. + ARM_FUNC_START clear_cache + do_push {r7} +#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) + movw r7, #2 + movt r7, #0xf +#else + mov r7, #0xf0000 + add r7, r7, #2 +#endif + mov r2, #0 + swi 0 + do_pop {r7} + RET + FUNC_END clear_cache +#else +#error "This is only for ARM EABI GNU/Linux" +#endif +#endif /* L_clear_cache */ +/* ------------------------------------------------------------------------ */ +/* Dword shift operations. */ +/* All the following Dword shift variants rely on the fact that + shft xxx, Reg + is in fact done as + shft xxx, (Reg & 255) + so for Reg value in (32...63) and (-1...-31) we will get zero (in the + case of logical shifts) or the sign (for asr). */ + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ +#ifndef __symbian__ + +#ifdef L_lshrdi3 + + FUNC_START lshrdi3 + FUNC_ALIAS aeabi_llsr lshrdi3 + +#ifdef __thumb__ + lsr al, r2 + mov r3, ah + lsr ah, r2 + mov ip, r3 + sub r2, #32 + lsr r3, r2 + orr al, r3 + neg r2, r2 + mov r3, ip + lsl r3, r2 + orr al, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, lsr r2 + RET +#endif + FUNC_END aeabi_llsr + FUNC_END lshrdi3 + +#endif + +#ifdef L_ashrdi3 + + FUNC_START ashrdi3 + FUNC_ALIAS aeabi_lasr ashrdi3 + +#ifdef __thumb__ + lsr al, r2 + mov r3, ah + asr ah, r2 + sub r2, #32 + @ If r2 is negative at this point the following step would OR + @ the sign bit into all of AL. That's not what we want... + bmi 1f + mov ip, r3 + asr r3, r2 + orr al, r3 + mov r3, ip +1: + neg r2, r2 + lsl r3, r2 + orr al, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, asr r2 + RET +#endif + + FUNC_END aeabi_lasr + FUNC_END ashrdi3 + +#endif + +#ifdef L_ashldi3 + + FUNC_START ashldi3 + FUNC_ALIAS aeabi_llsl ashldi3 + +#ifdef __thumb__ + lsl ah, r2 + mov r3, al + lsl al, r2 + mov ip, r3 + sub r2, #32 + lsl r3, r2 + orr ah, r3 + neg r2, r2 + mov r3, ip + lsr r3, r2 + orr ah, r3 + RET +#else + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + orrmi ah, ah, al, lsr ip + mov al, al, lsl r2 + RET +#endif + FUNC_END aeabi_llsl + FUNC_END ashldi3 + +#endif + +#endif /* __symbian__ */ + +#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ + || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +#define HAVE_ARM_CLZ 1 +#endif + +#ifdef L_clzsi2 +#if defined(__ARM_ARCH_6M__) +FUNC_START clzsi2 + mov r1, #28 + mov r3, #1 + lsl r3, r3, #16 + cmp r0, r3 /* 0x10000 */ + bcc 2f + lsr r0, r0, #16 + sub r1, r1, #16 +2: lsr r3, r3, #8 + cmp r0, r3 /* #0x100 */ + bcc 2f + lsr r0, r0, #8 + sub r1, r1, #8 +2: lsr r3, r3, #4 + cmp r0, r3 /* #0x10 */ + bcc 2f + lsr r0, r0, #4 + sub r1, r1, #4 +2: adr r2, 1f + ldrb r0, [r2, r0] + add r0, r0, r1 + bx lr +.align 2 +1: +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 + FUNC_END clzsi2 +#else +ARM_FUNC_START clzsi2 +# if defined(HAVE_ARM_CLZ) + clz r0, r0 + RET +# else + mov r1, #28 + cmp r0, #0x10000 + do_it cs, t + movcs r0, r0, lsr #16 + subcs r1, r1, #16 + cmp r0, #0x100 + do_it cs, t + movcs r0, r0, lsr #8 + subcs r1, r1, #8 + cmp r0, #0x10 + do_it cs, t + movcs r0, r0, lsr #4 + subcs r1, r1, #4 + adr r2, 1f + ldrb r0, [r2, r0] + add r0, r0, r1 + RET +.align 2 +1: +.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 +# endif /* !HAVE_ARM_CLZ */ + FUNC_END clzsi2 +#endif +#endif /* L_clzsi2 */ + +#ifdef L_clzdi2 +#if !defined(HAVE_ARM_CLZ) + +# if defined(__ARM_ARCH_6M__) +FUNC_START clzdi2 + push {r4, lr} +# else +ARM_FUNC_START clzdi2 + do_push {r4, lr} +# endif + cmp xxh, #0 + bne 1f +# ifdef __ARMEB__ + mov r0, xxl + bl __clzsi2 + add r0, r0, #32 + b 2f +1: + bl __clzsi2 +# else + bl __clzsi2 + add r0, r0, #32 + b 2f +1: + mov r0, xxh + bl __clzsi2 +# endif +2: +# if defined(__ARM_ARCH_6M__) + pop {r4, pc} +# else + RETLDM r4 +# endif + FUNC_END clzdi2 + +#else /* HAVE_ARM_CLZ */ + +ARM_FUNC_START clzdi2 + cmp xxh, #0 + do_it eq, et + clzeq r0, xxl + clzne r0, xxh + addeq r0, r0, #32 + RET + FUNC_END clzdi2 + +#endif +#endif /* L_clzdi2 */ + +/* ------------------------------------------------------------------------ */ +/* These next two sections are here despite the fact that they contain Thumb + assembler because their presence allows interworked code to be linked even + when the GCC library is this one. */ + +/* Do not build the interworking functions when the target architecture does + not support Thumb instructions. (This can be a multilib option). */ +#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ + || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ + || __ARM_ARCH__ >= 6 + +#if defined L_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code. + The address of function to be called is loaded into a register and then + one of these labels is called via a BL instruction. This puts the + return address into the link register with the bottom bit set, and the + code here switches to the correct mode before executing the function. */ + + .text + .align 0 + .force_thumb + +.macro call_via register + THUMB_FUNC_START _call_via_\register + + bx \register + nop + + SIZE (_call_via_\register) +.endm + + call_via r0 + call_via r1 + call_via r2 + call_via r3 + call_via r4 + call_via r5 + call_via r6 + call_via r7 + call_via r8 + call_via r9 + call_via sl + call_via fp + call_via ip + call_via sp + call_via lr + +#endif /* L_call_via_rX */ + +/* Don't bother with the old interworking routines for Thumb-2. */ +/* ??? Maybe only omit these on "m" variants. */ +#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) + +#if defined L_interwork_call_via_rX + +/* These labels & instructions are used by the Arm/Thumb interworking code, + when the target address is in an unknown instruction set. The address + of function to be called is loaded into a register and then one of these + labels is called via a BL instruction. This puts the return address + into the link register with the bottom bit set, and the code here + switches to the correct mode before executing the function. Unfortunately + the target code cannot be relied upon to return via a BX instruction, so + instead we have to store the resturn address on the stack and allow the + called function to return here instead. Upon return we recover the real + return address and use a BX to get back to Thumb mode. + + There are three variations of this code. The first, + _interwork_call_via_rN(), will push the return address onto the + stack and pop it in _arm_return(). It should only be used if all + arguments are passed in registers. + + The second, _interwork_r7_call_via_rN(), instead stores the return + address at [r7, #-4]. It is the caller's responsibility to ensure + that this address is valid and contains no useful data. + + The third, _interwork_r11_call_via_rN(), works in the same way but + uses r11 instead of r7. It is useful if the caller does not really + need a frame pointer. */ + + .text + .align 0 + + .code 32 + .globl _arm_return +LSYM(Lstart_arm_return): + cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) + cfi_push 0, 0xe, -0x8, 0x8 + nop @ This nop is for the benefit of debuggers, so that + @ backtraces will use the correct unwind information. +_arm_return: + RETLDM unwind=LSYM(Lstart_arm_return) + cfi_end LSYM(Lend_arm_return) + + .globl _arm_return_r7 +_arm_return_r7: + ldr lr, [r7, #-4] + bx lr + + .globl _arm_return_r11 +_arm_return_r11: + ldr lr, [r11, #-4] + bx lr + +.macro interwork_with_frame frame, register, name, return + .code 16 + + THUMB_FUNC_START \name + + bx pc + nop + + .code 32 + tst \register, #1 + streq lr, [\frame, #-4] + adreq lr, _arm_return_\frame + bx \register + + SIZE (\name) +.endm + +.macro interwork register + .code 16 + + THUMB_FUNC_START _interwork_call_via_\register + + bx pc + nop + + .code 32 + .globl LSYM(Lchange_\register) +LSYM(Lchange_\register): + tst \register, #1 + streq lr, [sp, #-8]! + adreq lr, _arm_return + bx \register + + SIZE (_interwork_call_via_\register) + + interwork_with_frame r7,\register,_interwork_r7_call_via_\register + interwork_with_frame r11,\register,_interwork_r11_call_via_\register +.endm + + interwork r0 + interwork r1 + interwork r2 + interwork r3 + interwork r4 + interwork r5 + interwork r6 + interwork r7 + interwork r8 + interwork r9 + interwork sl + interwork fp + interwork ip + interwork sp + + /* The LR case has to be handled a little differently... */ + .code 16 + + THUMB_FUNC_START _interwork_call_via_lr + + bx pc + nop + + .code 32 + .globl .Lchange_lr +.Lchange_lr: + tst lr, #1 + stmeqdb r13!, {lr, pc} + mov ip, lr + adreq lr, _arm_return + bx ip + + SIZE (_interwork_call_via_lr) + +#endif /* L_interwork_call_via_rX */ +#endif /* !__thumb2__ */ + +/* Functions to support compact pic switch tables in thumb1 state. + All these routines take an index into the table in r0. The + table is at LR & ~1 (but this must be rounded up in the case + of 32-bit entires). They are only permitted to clobber r12 + and r14 and r0 must be preserved on exit. */ +#ifdef L_thumb1_case_sqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_sqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_sqi) +#endif + +#ifdef L_thumb1_case_uqi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uqi + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr + SIZE (__gnu_thumb1_case_uqi) +#endif + +#ifdef L_thumb1_case_shi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_shi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_shi) +#endif + +#ifdef L_thumb1_case_uhi + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_uhi + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr + SIZE (__gnu_thumb1_case_uhi) +#endif + +#ifdef L_thumb1_case_si + + .text + .align 0 + .force_thumb + .syntax unified + THUMB_FUNC_START __gnu_thumb1_case_si + push {r0, r1} + mov r1, lr + adds.n r1, r1, #2 /* Align to word. */ + lsrs r1, r1, #2 + lsls r0, r0, #2 + lsls r1, r1, #2 + ldr r0, [r1, r0] + adds r0, r0, r1 + mov lr, r0 + pop {r0, r1} + mov pc, lr /* We know we were called from thumb code. */ + SIZE (__gnu_thumb1_case_si) +#endif + +#endif /* Arch supports thumb. */ + +#ifndef __symbian__ +#ifndef __ARM_ARCH_6M__ +#include "ieee754-df.S" +#include "ieee754-sf.S" +#include "bpabi.S" +#else /* __ARM_ARCH_6M__ */ +#include "bpabi-v6m.S" +#endif /* __ARM_ARCH_6M__ */ +#endif /* !__symbian__ */ diff --git a/libgcc/config/arm/libgcc-bpabi.ver b/libgcc/config/arm/libgcc-bpabi.ver new file mode 100644 index 00000000000..3ba8364dc8e --- /dev/null +++ b/libgcc/config/arm/libgcc-bpabi.ver @@ -0,0 +1,108 @@ +# Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +GCC_3.5 { + # BPABI symbols + __aeabi_cdcmpeq + __aeabi_cdcmple + __aeabi_cdrcmple + __aeabi_cfcmpeq + __aeabi_cfcmple + __aeabi_cfrcmple + __aeabi_d2f + __aeabi_d2iz + __aeabi_d2lz + __aeabi_d2uiz + __aeabi_d2ulz + __aeabi_dadd + __aeabi_dcmpeq + __aeabi_dcmpge + __aeabi_dcmpgt + __aeabi_dcmple + __aeabi_dcmplt + __aeabi_dcmpun + __aeabi_ddiv + __aeabi_dmul + __aeabi_dneg + __aeabi_drsub + __aeabi_dsub + __aeabi_f2d + __aeabi_f2iz + __aeabi_f2lz + __aeabi_f2uiz + __aeabi_f2ulz + __aeabi_fadd + __aeabi_fcmpeq + __aeabi_fcmpge + __aeabi_fcmpgt + __aeabi_fcmple + __aeabi_fcmplt + __aeabi_fcmpun + __aeabi_fdiv + __aeabi_fmul + __aeabi_fneg + __aeabi_frsub + __aeabi_fsub + __aeabi_i2d + __aeabi_i2f + __aeabi_idiv + __aeabi_idiv0 + __aeabi_idivmod + __aeabi_l2d + __aeabi_l2f + __aeabi_lasr + __aeabi_lcmp + __aeabi_ldiv0 + __aeabi_ldivmod + __aeabi_llsl + __aeabi_llsr + __aeabi_lmul + __aeabi_ui2d + __aeabi_ui2f + __aeabi_uidiv + __aeabi_uidivmod + __aeabi_uldivmod + __aeabi_ulcmp + __aeabi_ul2d + __aeabi_ul2f + __aeabi_uread4 + __aeabi_uread8 + __aeabi_uwrite4 + __aeabi_uwrite8 + + # Exception-Handling + # \S 7.5 + _Unwind_Complete + _Unwind_VRS_Get + _Unwind_VRS_Set + _Unwind_VRS_Pop + # \S 9.2 + __aeabi_unwind_cpp_pr0 + __aeabi_unwind_cpp_pr1 + __aeabi_unwind_cpp_pr2 + # The libstdc++ exception-handling personality routine uses this + # GNU-specific entry point. + __gnu_unwind_frame +} + +%exclude { + _Unwind_Backtrace +} +GCC_4.3.0 { + _Unwind_Backtrace +} diff --git a/libgcc/config/arm/libunwind.S b/libgcc/config/arm/libunwind.S index a3a19daab4b..8166cd86e47 100644 --- a/libgcc/config/arm/libunwind.S +++ b/libgcc/config/arm/libunwind.S @@ -40,7 +40,7 @@ #ifndef __symbian__ -#include "config/arm/lib1funcs.asm" +#include "lib1funcs.S" .macro UNPREFIX name .global SYM (\name) diff --git a/libgcc/config/arm/linux-atomic-64bit.c b/libgcc/config/arm/linux-atomic-64bit.c new file mode 100644 index 00000000000..af94c7f4ae5 --- /dev/null +++ b/libgcc/config/arm/linux-atomic-64bit.c @@ -0,0 +1,166 @@ +/* 64bit Linux-specific atomic operations for ARM EABI. + Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Based on linux-atomic.c + + 64 bit additions david.gilbert@linaro.org + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* 64bit helper functions for atomic operations; the compiler will + call these when the code is compiled for a CPU without ldrexd/strexd. + (If the CPU had those then the compiler inlines the operation). + + These helpers require a kernel helper that's only present on newer + kernels; we check for that in an init section and bail out rather + unceremoneously. */ + +extern unsigned int __write (int fd, const void *buf, unsigned int count); +extern void abort (void); + +/* Kernel helper for compare-and-exchange. */ +typedef int (__kernel_cmpxchg64_t) (const long long* oldval, + const long long* newval, + long long *ptr); +#define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *) 0xffff0f60) + +/* Kernel helper page version number. */ +#define __kernel_helper_version (*(unsigned int *)0xffff0ffc) + +/* Check that the kernel has a new enough version at load. */ +static void __check_for_sync8_kernelhelper (void) +{ + if (__kernel_helper_version < 5) + { + const char err[] = "A newer kernel is required to run this binary. " + "(__kernel_cmpxchg64 helper)\n"; + /* At this point we need a way to crash with some information + for the user - I'm not sure I can rely on much else being + available at this point, so do the same as generic-morestack.c + write () and abort (). */ + __write (2 /* stderr. */, err, sizeof (err)); + abort (); + } +}; + +static void (*__sync8_kernelhelper_inithook[]) (void) + __attribute__ ((used, section (".init_array"))) = { + &__check_for_sync8_kernelhelper +}; + +#define HIDDEN __attribute__ ((visibility ("hidden"))) + +#define FETCH_AND_OP_WORD64(OP, PFX_OP, INF_OP) \ + long long HIDDEN \ + __sync_fetch_and_##OP##_8 (long long *ptr, long long val) \ + { \ + int failure; \ + long long tmp,tmp2; \ + \ + do { \ + tmp = *ptr; \ + tmp2 = PFX_OP (tmp INF_OP val); \ + failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \ + } while (failure != 0); \ + \ + return tmp; \ + } + +FETCH_AND_OP_WORD64 (add, , +) +FETCH_AND_OP_WORD64 (sub, , -) +FETCH_AND_OP_WORD64 (or, , |) +FETCH_AND_OP_WORD64 (and, , &) +FETCH_AND_OP_WORD64 (xor, , ^) +FETCH_AND_OP_WORD64 (nand, ~, &) + +#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH +#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH + +/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for + subword-sized quantities. */ + +#define OP_AND_FETCH_WORD64(OP, PFX_OP, INF_OP) \ + long long HIDDEN \ + __sync_##OP##_and_fetch_8 (long long *ptr, long long val) \ + { \ + int failure; \ + long long tmp,tmp2; \ + \ + do { \ + tmp = *ptr; \ + tmp2 = PFX_OP (tmp INF_OP val); \ + failure = __kernel_cmpxchg64 (&tmp, &tmp2, ptr); \ + } while (failure != 0); \ + \ + return tmp2; \ + } + +OP_AND_FETCH_WORD64 (add, , +) +OP_AND_FETCH_WORD64 (sub, , -) +OP_AND_FETCH_WORD64 (or, , |) +OP_AND_FETCH_WORD64 (and, , &) +OP_AND_FETCH_WORD64 (xor, , ^) +OP_AND_FETCH_WORD64 (nand, ~, &) + +long long HIDDEN +__sync_val_compare_and_swap_8 (long long *ptr, long long oldval, + long long newval) +{ + int failure; + long long actual_oldval; + + while (1) + { + actual_oldval = *ptr; + + if (__builtin_expect (oldval != actual_oldval, 0)) + return actual_oldval; + + failure = __kernel_cmpxchg64 (&actual_oldval, &newval, ptr); + + if (__builtin_expect (!failure, 1)) + return oldval; + } +} + +typedef unsigned char bool; + +bool HIDDEN +__sync_bool_compare_and_swap_8 (long long *ptr, long long oldval, + long long newval) +{ + int failure = __kernel_cmpxchg64 (&oldval, &newval, ptr); + return (failure == 0); +} + +long long HIDDEN +__sync_lock_test_and_set_8 (long long *ptr, long long val) +{ + int failure; + long long oldval; + + do { + oldval = *ptr; + failure = __kernel_cmpxchg64 (&oldval, &val, ptr); + } while (failure != 0); + + return oldval; +} diff --git a/libgcc/config/arm/linux-atomic.c b/libgcc/config/arm/linux-atomic.c new file mode 100644 index 00000000000..80f161d06a7 --- /dev/null +++ b/libgcc/config/arm/linux-atomic.c @@ -0,0 +1,279 @@ +/* Linux-specific atomic operations for ARM EABI. + Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. + Contributed by CodeSourcery. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Kernel helper for compare-and-exchange. */ +typedef int (__kernel_cmpxchg_t) (int oldval, int newval, int *ptr); +#define __kernel_cmpxchg (*(__kernel_cmpxchg_t *) 0xffff0fc0) + +/* Kernel helper for memory barrier. */ +typedef void (__kernel_dmb_t) (void); +#define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0) + +/* Note: we implement byte, short and int versions of atomic operations using + the above kernel helpers; see linux-atomic-64bit.c for "long long" (64-bit) + operations. */ + +#define HIDDEN __attribute__ ((visibility ("hidden"))) + +#ifdef __ARMEL__ +#define INVERT_MASK_1 0 +#define INVERT_MASK_2 0 +#else +#define INVERT_MASK_1 24 +#define INVERT_MASK_2 16 +#endif + +#define MASK_1 0xffu +#define MASK_2 0xffffu + +#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_fetch_and_##OP##_4 (int *ptr, int val) \ + { \ + int failure, tmp; \ + \ + do { \ + tmp = *ptr; \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return tmp; \ + } + +FETCH_AND_OP_WORD (add, , +) +FETCH_AND_OP_WORD (sub, , -) +FETCH_AND_OP_WORD (or, , |) +FETCH_AND_OP_WORD (and, , &) +FETCH_AND_OP_WORD (xor, , ^) +FETCH_AND_OP_WORD (nand, ~, &) + +#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH +#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH + +/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for + subword-sized quantities. */ + +#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN) \ + TYPE HIDDEN \ + NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val) \ + { \ + int *wordptr = (int *) ((unsigned int) ptr & ~3); \ + unsigned int mask, shift, oldval, newval; \ + int failure; \ + \ + shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = *wordptr; \ + newval = ((PFX_OP (((oldval & mask) >> shift) \ + INF_OP (unsigned int) val)) << shift) & mask; \ + newval |= oldval & ~mask; \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (RETURN & mask) >> shift; \ + } + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, oldval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval) + +#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP) \ + int HIDDEN \ + __sync_##OP##_and_fetch_4 (int *ptr, int val) \ + { \ + int tmp, failure; \ + \ + do { \ + tmp = *ptr; \ + failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr); \ + } while (failure != 0); \ + \ + return PFX_OP (tmp INF_OP val); \ + } + +OP_AND_FETCH_WORD (add, , +) +OP_AND_FETCH_WORD (sub, , -) +OP_AND_FETCH_WORD (or, , |) +OP_AND_FETCH_WORD (and, , &) +OP_AND_FETCH_WORD (xor, , ^) +OP_AND_FETCH_WORD (nand, ~, &) + +SUBWORD_SYNC_OP (add, , +, unsigned short, 2, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned short, 2, newval) +SUBWORD_SYNC_OP (or, , |, unsigned short, 2, newval) +SUBWORD_SYNC_OP (and, , &, unsigned short, 2, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned short, 2, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval) + +SUBWORD_SYNC_OP (add, , +, unsigned char, 1, newval) +SUBWORD_SYNC_OP (sub, , -, unsigned char, 1, newval) +SUBWORD_SYNC_OP (or, , |, unsigned char, 1, newval) +SUBWORD_SYNC_OP (and, , &, unsigned char, 1, newval) +SUBWORD_SYNC_OP (xor, , ^, unsigned char, 1, newval) +SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval) + +int HIDDEN +__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int actual_oldval, fail; + + while (1) + { + actual_oldval = *ptr; + + if (__builtin_expect (oldval != actual_oldval, 0)) + return actual_oldval; + + fail = __kernel_cmpxchg (actual_oldval, newval, ptr); + + if (__builtin_expect (!fail, 1)) + return oldval; + } +} + +#define SUBWORD_VAL_CAS(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + int *wordptr = (int *)((unsigned int) ptr & ~3), fail; \ + unsigned int mask, shift, actual_oldval, actual_newval; \ + \ + shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + while (1) \ + { \ + actual_oldval = *wordptr; \ + \ + if (__builtin_expect (((actual_oldval & mask) >> shift) != \ + (unsigned int) oldval, 0)) \ + return (actual_oldval & mask) >> shift; \ + \ + actual_newval = (actual_oldval & ~mask) \ + | (((unsigned int) newval << shift) & mask); \ + \ + fail = __kernel_cmpxchg (actual_oldval, actual_newval, \ + wordptr); \ + \ + if (__builtin_expect (!fail, 1)) \ + return oldval; \ + } \ + } + +SUBWORD_VAL_CAS (unsigned short, 2) +SUBWORD_VAL_CAS (unsigned char, 1) + +typedef unsigned char bool; + +bool HIDDEN +__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval) +{ + int failure = __kernel_cmpxchg (oldval, newval, ptr); + return (failure == 0); +} + +#define SUBWORD_BOOL_CAS(TYPE, WIDTH) \ + bool HIDDEN \ + __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + TYPE actual_oldval \ + = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval); \ + return (oldval == actual_oldval); \ + } + +SUBWORD_BOOL_CAS (unsigned short, 2) +SUBWORD_BOOL_CAS (unsigned char, 1) + +void HIDDEN +__sync_synchronize (void) +{ + __kernel_dmb (); +} + +int HIDDEN +__sync_lock_test_and_set_4 (int *ptr, int val) +{ + int failure, oldval; + + do { + oldval = *ptr; + failure = __kernel_cmpxchg (oldval, val, ptr); + } while (failure != 0); + + return oldval; +} + +#define SUBWORD_TEST_AND_SET(TYPE, WIDTH) \ + TYPE HIDDEN \ + __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val) \ + { \ + int failure; \ + unsigned int oldval, newval, shift, mask; \ + int *wordptr = (int *) ((unsigned int) ptr & ~3); \ + \ + shift = (((unsigned int) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH; \ + mask = MASK_##WIDTH << shift; \ + \ + do { \ + oldval = *wordptr; \ + newval = (oldval & ~mask) \ + | (((unsigned int) val << shift) & mask); \ + failure = __kernel_cmpxchg (oldval, newval, wordptr); \ + } while (failure != 0); \ + \ + return (oldval & mask) >> shift; \ + } + +SUBWORD_TEST_AND_SET (unsigned short, 2) +SUBWORD_TEST_AND_SET (unsigned char, 1) + +#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ + void HIDDEN \ + __sync_lock_release_##WIDTH (TYPE *ptr) \ + { \ + /* All writes before this point must be seen before we release \ + the lock itself. */ \ + __kernel_dmb (); \ + *ptr = 0; \ + } + +SYNC_LOCK_RELEASE (long long, 8) +SYNC_LOCK_RELEASE (int, 4) +SYNC_LOCK_RELEASE (short, 2) +SYNC_LOCK_RELEASE (char, 1) diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm new file mode 100644 index 00000000000..4e17e99b4a5 --- /dev/null +++ b/libgcc/config/arm/t-arm @@ -0,0 +1,3 @@ +LIB1ASMSRC = arm/lib1funcs.S +LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ + _thumb1_case_uhi _thumb1_case_si diff --git a/libgcc/config/arm/t-bpabi b/libgcc/config/arm/t-bpabi index a3b23dcd20c..e79cbd7064e 100644 --- a/libgcc/config/arm/t-bpabi +++ b/libgcc/config/arm/t-bpabi @@ -1,3 +1,15 @@ +# Add the bpabi.S functions. +LIB1ASMFUNCS += _aeabi_lcmp _aeabi_ulcmp _aeabi_ldivmod _aeabi_uldivmod + +# Add the BPABI C functions. +LIB2ADD += $(srcdir)/config/arm/bpabi.c \ + $(srcdir)/config/arm/unaligned-funcs.c + +LIB2ADD_ST += $(srcdir)/config/arm/fp16.c + LIB2ADDEH = $(srcdir)/config/arm/unwind-arm.c \ $(srcdir)/config/arm/libunwind.S \ $(srcdir)/config/arm/pr-support.c $(srcdir)/unwind-c.c + +# Add the BPABI names. +SHLIB_MAPFILES += $(srcdir)/config/arm/libgcc-bpabi.ver diff --git a/libgcc/config/arm/t-elf b/libgcc/config/arm/t-elf new file mode 100644 index 00000000000..d9e8064e4de --- /dev/null +++ b/libgcc/config/arm/t-elf @@ -0,0 +1,18 @@ +# For most CPUs we have an assembly soft-float implementations. +# However this is not true for ARMv6M. Here we want to use the soft-fp C +# implementation. The soft-fp code is only build for ARMv6M. This pulls +# in the asm implementation for other CPUs. +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func \ + _call_via_rX _interwork_call_via_rX \ + _lshrdi3 _ashrdi3 _ashldi3 \ + _arm_negdf2 _arm_addsubdf3 _arm_muldivdf3 _arm_cmpdf2 _arm_unorddf2 \ + _arm_fixdfsi _arm_fixunsdfsi \ + _arm_truncdfsf2 _arm_negsf2 _arm_addsubsf3 _arm_muldivsf3 \ + _arm_cmpsf2 _arm_unordsf2 _arm_fixsfsi _arm_fixunssfsi \ + _arm_floatdidf _arm_floatdisf _arm_floatundidf _arm_floatundisf \ + _clzsi2 _clzdi2 + +# Currently there is a bug somewhere in GCC's alias analysis +# or scheduling code that is breaking _fpmul_parts in fp-bit.c. +# Disabling function inlining is a workaround for this problem. +HOST_LIBGCC2_CFLAGS += -fno-inline diff --git a/libgcc/config/arm/t-linux b/libgcc/config/arm/t-linux new file mode 100644 index 00000000000..4c1efebbd87 --- /dev/null +++ b/libgcc/config/arm/t-linux @@ -0,0 +1,7 @@ +LIB1ASMSRC = arm/lib1funcs.S +LIB1ASMFUNCS = _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_lnx _clzsi2 _clzdi2 \ + _arm_addsubdf3 _arm_addsubsf3 + +# Just for these, we omit the frame pointer since it makes such a big +# difference. +HOST_LIBGCC2_CFLAGS += -fomit-frame-pointer diff --git a/libgcc/config/arm/t-linux-eabi b/libgcc/config/arm/t-linux-eabi new file mode 100644 index 00000000000..a03e2b60064 --- /dev/null +++ b/libgcc/config/arm/t-linux-eabi @@ -0,0 +1,5 @@ +# Use a version of div0 which raises SIGFPE, and a special __clear_cache. +LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache + +LIB2ADD_ST += $(srcdir)/config/arm/linux-atomic.c \ + $(srcdir)/config/arm/linux-atomic-64bit.c diff --git a/libgcc/config/arm/t-netbsd b/libgcc/config/arm/t-netbsd new file mode 100644 index 00000000000..95358f931ba --- /dev/null +++ b/libgcc/config/arm/t-netbsd @@ -0,0 +1,7 @@ +# Just for these, we omit the frame pointer since it makes such a big +# difference. It is then pointless adding debugging. +HOST_LIBGCC2_CFLAGS += -fomit-frame-pointer + +LIBGCC2_DEBUG_CFLAGS = -g0 + +LIB2ADD += $(srcdir)/floatunsidf.c $(srcdir)/floatunsisf.c diff --git a/libgcc/config/arm/t-strongarm-elf b/libgcc/config/arm/t-strongarm-elf new file mode 100644 index 00000000000..45d1b993218 --- /dev/null +++ b/libgcc/config/arm/t-strongarm-elf @@ -0,0 +1,6 @@ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _clzsi2 _clzdi2 + +# Currently there is a bug somewhere in GCC's alias analysis +# or scheduling code that is breaking _fpmul_parts in fp-bit.c. +# Disabling function inlining is a workaround for this problem. +HOST_LIBGCC2_CFLAGS += -fno-inline diff --git a/libgcc/config/arm/t-symbian b/libgcc/config/arm/t-symbian index 6788d5f40b3..06d98faa6ae 100644 --- a/libgcc/config/arm/t-symbian +++ b/libgcc/config/arm/t-symbian @@ -1,2 +1,19 @@ +LIB1ASMFUNCS += _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 + +# These functions have __aeabi equivalents and will never be called by GCC. +# By putting them in LIB1ASMFUNCS, we avoid the standard libgcc2.c code being +# used -- and we make sure that definitions are not available in lib1funcs.S, +# either, so they end up undefined. +LIB1ASMFUNCS += \ + _ashldi3 _ashrdi3 _divdi3 _floatdidf _udivmoddi4 _umoddi3 \ + _udivdi3 _lshrdi3 _moddi3 _muldi3 _negdi2 _cmpdi2 \ + _fixdfdi _fixsfdi _fixunsdfdi _fixunssfdi _floatdisf \ + _negdf2 _addsubdf3 _muldivdf3 _cmpdf2 _unorddf2 _fixdfsi _fixunsdfsi \ + _truncdfsf2 _negsf2 _addsubsf3 _muldivsf3 _cmpsf2 _unordsf2 \ + _fixsfsi _fixunssfsi + +# Include half-float helpers. +LIB2ADD_ST += $(srcdir)/config/arm/fp16.c + # Include the gcc personality routine LIB2ADDEH = $(srcdir)/unwind-c.c $(srcdir)/config/arm/pr-support.c diff --git a/libgcc/config/arm/t-vxworks b/libgcc/config/arm/t-vxworks new file mode 100644 index 00000000000..70ccdc1556a --- /dev/null +++ b/libgcc/config/arm/t-vxworks @@ -0,0 +1 @@ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _bb_init_func _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 diff --git a/libgcc/config/arm/t-wince-pe b/libgcc/config/arm/t-wince-pe new file mode 100644 index 00000000000..33ea969ccf4 --- /dev/null +++ b/libgcc/config/arm/t-wince-pe @@ -0,0 +1 @@ +LIB1ASMFUNCS += _udivsi3 _divsi3 _umodsi3 _modsi3 _dvmd_tls _call_via_rX _interwork_call_via_rX _clzsi2 _clzdi2 diff --git a/libgcc/config/arm/unaligned-funcs.c b/libgcc/config/arm/unaligned-funcs.c new file mode 100644 index 00000000000..4e684f4fc94 --- /dev/null +++ b/libgcc/config/arm/unaligned-funcs.c @@ -0,0 +1,57 @@ +/* EABI unaligned read/write functions. + + Copyright (C) 2005, 2009 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +int __aeabi_uread4 (void *); +int __aeabi_uwrite4 (int, void *); +long long __aeabi_uread8 (void *); +long long __aeabi_uwrite8 (long long, void *); + +struct __attribute__((packed)) u4 { int data; }; +struct __attribute__((packed)) u8 { long long data; }; + +int +__aeabi_uread4 (void *ptr) +{ + return ((struct u4 *) ptr)->data; +} + +int +__aeabi_uwrite4 (int data, void *ptr) +{ + ((struct u4 *) ptr)->data = data; + return data; +} + +long long +__aeabi_uread8 (void *ptr) +{ + return ((struct u8 *) ptr)->data; +} + +long long +__aeabi_uwrite8 (long long data, void *ptr) +{ + ((struct u8 *) ptr)->data = data; + return data; +} |