diff options
author | olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-06-09 21:32:37 +0000 |
---|---|---|
committer | olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-06-09 21:32:37 +0000 |
commit | fe9c9e23e0abf057ead745de1412855c5addcdef (patch) | |
tree | 5cfdce74e6c97d2a997232dd0f3a63aab12de94e | |
parent | 1bb66e410b45fefedd01ed3c63956e1d4c5fdf48 (diff) | |
download | gcc-fe9c9e23e0abf057ead745de1412855c5addcdef.tar.gz |
PR target/6526
* config/sh/lib1funcs.S (sdivsi3_i4, udivsi3_i4): Do not change bits
other than FPSCR.PR and FPSCR.SZ. Add SH4A implementation.
PR target/6526
* gcc.target/sh/pr6526.c: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@199873 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/sh/pr6526.c | 64 | ||||
-rw-r--r-- | libgcc/ChangeLog | 6 | ||||
-rw-r--r-- | libgcc/config/sh/lib1funcs.S | 225 |
4 files changed, 230 insertions, 70 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0ba3af62182..cae5502642f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-06-09 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/6526 + * gcc.target/sh/pr6526.c: New. + 2013-06-09 Jakub Jelinek <jakub@redhat.com> PR target/57568 diff --git a/gcc/testsuite/gcc.target/sh/pr6526.c b/gcc/testsuite/gcc.target/sh/pr6526.c new file mode 100644 index 00000000000..a7dd6d87af6 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr6526.c @@ -0,0 +1,64 @@ +/* Check that the XF registers are not clobbered by an integer division + that is done using double precision FPU division. */ +/* { dg-do run { target "sh*-*-*" } } */ +/* { dg-options "-O1 -mdiv=call-fp" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m4*-single" "-m4*-single-only" } } */ + +#include <assert.h> +#include <stdlib.h> + +extern void __set_fpscr (int); + +void +write_xf0 (float* f) +{ + __asm__ __volatile__ ("frchg; fmov.s @%0,fr0; frchg" : : "r" (f) : "memory"); +} + +void +read_xf0 (float* f) +{ + __asm__ __volatile__ ("frchg; fmov.s fr0,@%0; frchg" : : "r" (f) : "memory"); +} + +int __attribute__ ((noinline)) +test_00 (int a, int b) +{ + return a / b; +} + +unsigned int __attribute__ ((noinline)) +test_01 (unsigned a, unsigned b) +{ + return a / b; +} + +int __attribute__ ((noinline)) +test_02 (int x) +{ + return x & 0; +} + +int +main (void) +{ + float test_value; + int r = 0; + + /* Set FPSCR.FR to 1. */ + __set_fpscr (0x200000); + + test_value = 123; + write_xf0 (&test_value); + r += test_00 (40, 4); + read_xf0 (&test_value); + assert (test_value == 123); + + test_value = 321; + write_xf0 (&test_value); + r += test_01 (50, 5); + read_xf0 (&test_value); + assert (test_value == 321); + + return test_02 (r); +} diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 085432be4b4..832b4256e69 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,9 @@ +2013-06-09 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/6526 + * config/sh/lib1funcs.S (sdivsi3_i4, udivsi3_i4): Do not change bits + other than FPSCR.PR and FPSCR.SZ. Add SH4A implementation. + 2013-06-08 Walter Lee <walt@tilera.com> * config/tilepro/atomic.h: Don't include stdint.h or features.h. diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S index 5f0bbff264f..51addf360cf 100644 --- a/libgcc/config/sh/lib1funcs.S +++ b/libgcc/config/sh/lib1funcs.S @@ -1003,11 +1003,17 @@ hiset: sts macl,r0 ! r0 = bb*dd ENDFUNC(GLOBAL(mulsi3)) #endif #endif /* ! __SH5__ */ + +/*------------------------------------------------------------------------------ + 32 bit signed integer division that uses FPU double precision division. */ + #ifdef L_sdivsi3_i4 .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH + #if defined (__SH4__) || defined (__SH2A__) -!! args in r4 and r5, result in fpul, clobber dr0, dr2 +/* This variant is used when FPSCR.PR = 1 (double precision) is the default + setting. + Args in r4 and r5, result in fpul, clobber dr0, dr2. */ .global GLOBAL(sdivsi3_i4) HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) @@ -1021,8 +1027,13 @@ GLOBAL(sdivsi3_i4): ftrc dr0,fpul ENDFUNC(GLOBAL(sdivsi3_i4)) + #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) -!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 +/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default + setting. + Args in r4 and r5, result in fpul, clobber r2, dr0, dr2. + For this to work, we must temporarily switch the FPU do double precision, + but we better do not touch FPSCR.FR. See PR 6526. */ #if ! __SH5__ || __SH5__ == 32 #if __SH5__ @@ -1031,24 +1042,43 @@ GLOBAL(sdivsi3_i4): .global GLOBAL(sdivsi3_i4) HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) GLOBAL(sdivsi3_i4): - sts.l fpscr,@-r15 - mov #8,r2 - swap.w r2,r2 - lds r2,fpscr - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul + +#ifndef __SH4A__ + mov.l r3,@-r15 + sts fpscr,r2 + mov #8,r3 + swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit) + or r2,r3 + lds r3,fpscr // Set FPSCR.PR = 1. + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + lds r2,fpscr rts - lds.l @r15+,fpscr + mov.l @r15+,r3 +#else +/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */ + fpchg + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + fpchg + +#endif /* __SH4A__ */ ENDFUNC(GLOBAL(sdivsi3_i4)) #endif /* ! __SH5__ || __SH5__ == 32 */ #endif /* ! __SH4__ || __SH2A__ */ -#endif +#endif /* L_sdivsi3_i4 */ +//------------------------------------------------------------------------------ #ifdef L_sdivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh2e/sh3e code. */ @@ -1367,54 +1397,60 @@ div0: rts mov #0,r0 ENDFUNC(GLOBAL(sdivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif -#ifdef L_udivsi3_i4 +#endif /* ! __SHMEDIA__ */ +#endif /* L_sdivsi3 */ + +/*------------------------------------------------------------------------------ + 32 bit unsigned integer division that uses FPU double precision division. */ +#ifdef L_udivsi3_i4 .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH + #if defined (__SH4__) || defined (__SH2A__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, -!! and t bit +/* This variant is used when FPSCR.PR = 1 (double precision) is the default + setting. + Args in r4 and r5, result in fpul, + clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */ .global GLOBAL(udivsi3_i4) HIDDEN_FUNC(GLOBAL(udivsi3_i4)) GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - rotr r1 - xor r1,r4 - lds r4,fpul - mova L1,r0 + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 + xor r1,r4 + lds r4,fpul + mova L1,r0 #ifdef FMOVD_WORKS - fmov.d @r0+,dr4 + fmov.d @r0+,dr4 #else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 + fmov.s @r0+,DR40 + fmov.s @r0,DR41 #endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 rts - ftrc dr0,fpul + ftrc dr0,fpul trivial: rts - lds r4,fpul + lds r4,fpul .align 2 #ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. + .align 3 // Make the double below 8 byte aligned. #endif L1: .double 2147483648 ENDFUNC(GLOBAL(udivsi3_i4)) + #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__) #if ! __SH5__ || __SH5__ == 32 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 @@ -1436,57 +1472,106 @@ GLOBAL(udivsi3_i4): ENDFUNC(GLOBAL(udivsi3_i4)) #endif /* ! __SH5__ || __SH5__ == 32 */ + #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 +/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default + setting. + Args in r4 and r5, result in fpul, + clobber r0, r1, r4, r5, dr0, dr2, dr4. + For this to work, we must temporarily switch the FPU do double precision, + but we better do not touch FPSCR.FR. See PR 6526. */ .global GLOBAL(udivsi3_i4) HIDDEN_FUNC(GLOBAL(udivsi3_i4)) GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - sts.l fpscr,@-r15 - mova L1,r0 - lds.l @r0+,fpscr - rotr r1 - xor r1,r4 - lds r4,fpul + +#ifndef __SH4A__ + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 // r1 = 1 << 31 + sts.l fpscr,@-r15 + xor r1,r4 + mov.l @(0,r15),r0 + xor r1,r5 + mov.l L2,r1 + lds r4,fpul + or r0,r1 + mova L1,r0 + lds r1,fpscr #ifdef FMOVD_WORKS - fmov.d @r0+,dr4 + fmov.d @r0+,dr4 #else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 + fmov.s @r0+,DR40 + fmov.s @r0,DR41 #endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul rts - lds.l @r15+,fpscr + lds.l @r15+,fpscr #ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. + .align 3 // Make the double below 8 byte aligned. #endif trivial: rts - lds r4,fpul + lds r4,fpul .align 2 -L1: -#ifndef FMOVD_WORKS - .long 0x80000 +L2: +#ifdef FMOVD_WORKS + .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1 #else - .long 0x180000 + .long 0x80000 // FPSCR.PR = 1 #endif +L1: + .double 2147483648 + +#else +/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. + Although on SH4A fmovd usually works, it would require either additional + two fschg instructions or an FPSCR push + pop. It's not worth the effort + for loading only one double constant. */ + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 // r1 = 1 << 31 + fpchg + mova L1,r0 + xor r1,r4 + fmov.s @r0+,DR40 + lds r4,fpul + fmov.s @r0,DR41 + xor r1,r5 + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + fpchg + +trivial: + rts + lds r4,fpul + + .align 2 +L1: .double 2147483648 +#endif /* __SH4A__ */ + + ENDFUNC(GLOBAL(udivsi3_i4)) #endif /* ! __SH4__ */ -#endif +#endif /* L_udivsi3_i4 */ #ifdef L_udivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with |