diff options
author | olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-06-09 21:32:37 +0000 |
---|---|---|
committer | olegendo <olegendo@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-06-09 21:32:37 +0000 |
commit | fe9c9e23e0abf057ead745de1412855c5addcdef (patch) | |
tree | 5cfdce74e6c97d2a997232dd0f3a63aab12de94e /libgcc/config/sh | |
parent | 1bb66e410b45fefedd01ed3c63956e1d4c5fdf48 (diff) | |
download | gcc-fe9c9e23e0abf057ead745de1412855c5addcdef.tar.gz |
PR target/6526
* config/sh/lib1funcs.S (sdivsi3_i4, udivsi3_i4): Do not change bits
other than FPSCR.PR and FPSCR.SZ. Add SH4A implementation.
PR target/6526
* gcc.target/sh/pr6526.c: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@199873 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgcc/config/sh')
-rw-r--r-- | libgcc/config/sh/lib1funcs.S | 225 |
1 files changed, 155 insertions, 70 deletions
diff --git a/libgcc/config/sh/lib1funcs.S b/libgcc/config/sh/lib1funcs.S index 5f0bbff264f..51addf360cf 100644 --- a/libgcc/config/sh/lib1funcs.S +++ b/libgcc/config/sh/lib1funcs.S @@ -1003,11 +1003,17 @@ hiset: sts macl,r0 ! r0 = bb*dd ENDFUNC(GLOBAL(mulsi3)) #endif #endif /* ! __SH5__ */ + +/*------------------------------------------------------------------------------ + 32 bit signed integer division that uses FPU double precision division. */ + #ifdef L_sdivsi3_i4 .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH + #if defined (__SH4__) || defined (__SH2A__) -!! args in r4 and r5, result in fpul, clobber dr0, dr2 +/* This variant is used when FPSCR.PR = 1 (double precision) is the default + setting. + Args in r4 and r5, result in fpul, clobber dr0, dr2. */ .global GLOBAL(sdivsi3_i4) HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) @@ -1021,8 +1027,13 @@ GLOBAL(sdivsi3_i4): ftrc dr0,fpul ENDFUNC(GLOBAL(sdivsi3_i4)) + #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) -!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 +/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default + setting. + Args in r4 and r5, result in fpul, clobber r2, dr0, dr2. + For this to work, we must temporarily switch the FPU do double precision, + but we better do not touch FPSCR.FR. See PR 6526. */ #if ! __SH5__ || __SH5__ == 32 #if __SH5__ @@ -1031,24 +1042,43 @@ GLOBAL(sdivsi3_i4): .global GLOBAL(sdivsi3_i4) HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) GLOBAL(sdivsi3_i4): - sts.l fpscr,@-r15 - mov #8,r2 - swap.w r2,r2 - lds r2,fpscr - lds r4,fpul - float fpul,dr0 - lds r5,fpul - float fpul,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul + +#ifndef __SH4A__ + mov.l r3,@-r15 + sts fpscr,r2 + mov #8,r3 + swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit) + or r2,r3 + lds r3,fpscr // Set FPSCR.PR = 1. + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + lds r2,fpscr rts - lds.l @r15+,fpscr + mov.l @r15+,r3 +#else +/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */ + fpchg + lds r4,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + fpchg + +#endif /* __SH4A__ */ ENDFUNC(GLOBAL(sdivsi3_i4)) #endif /* ! __SH5__ || __SH5__ == 32 */ #endif /* ! __SH4__ || __SH2A__ */ -#endif +#endif /* L_sdivsi3_i4 */ +//------------------------------------------------------------------------------ #ifdef L_sdivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh2e/sh3e code. */ @@ -1367,54 +1397,60 @@ div0: rts mov #0,r0 ENDFUNC(GLOBAL(sdivsi3)) -#endif /* ! __SHMEDIA__ */ -#endif -#ifdef L_udivsi3_i4 +#endif /* ! __SHMEDIA__ */ +#endif /* L_sdivsi3 */ + +/*------------------------------------------------------------------------------ + 32 bit unsigned integer division that uses FPU double precision division. */ +#ifdef L_udivsi3_i4 .title "SH DIVIDE" -!! 4 byte integer Divide code for the Renesas SH + #if defined (__SH4__) || defined (__SH2A__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, -!! and t bit +/* This variant is used when FPSCR.PR = 1 (double precision) is the default + setting. + Args in r4 and r5, result in fpul, + clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */ .global GLOBAL(udivsi3_i4) HIDDEN_FUNC(GLOBAL(udivsi3_i4)) GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - rotr r1 - xor r1,r4 - lds r4,fpul - mova L1,r0 + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 + xor r1,r4 + lds r4,fpul + mova L1,r0 #ifdef FMOVD_WORKS - fmov.d @r0+,dr4 + fmov.d @r0+,dr4 #else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 + fmov.s @r0+,DR40 + fmov.s @r0,DR41 #endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 + float fpul,dr0 + xor r1,r5 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 rts - ftrc dr0,fpul + ftrc dr0,fpul trivial: rts - lds r4,fpul + lds r4,fpul .align 2 #ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. + .align 3 // Make the double below 8 byte aligned. #endif L1: .double 2147483648 ENDFUNC(GLOBAL(udivsi3_i4)) + #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__) #if ! __SH5__ || __SH5__ == 32 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 @@ -1436,57 +1472,106 @@ GLOBAL(udivsi3_i4): ENDFUNC(GLOBAL(udivsi3_i4)) #endif /* ! __SH5__ || __SH5__ == 32 */ + #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) -!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 +/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default + setting. + Args in r4 and r5, result in fpul, + clobber r0, r1, r4, r5, dr0, dr2, dr4. + For this to work, we must temporarily switch the FPU do double precision, + but we better do not touch FPSCR.FR. See PR 6526. */ .global GLOBAL(udivsi3_i4) HIDDEN_FUNC(GLOBAL(udivsi3_i4)) GLOBAL(udivsi3_i4): - mov #1,r1 - cmp/hi r1,r5 - bf trivial - sts.l fpscr,@-r15 - mova L1,r0 - lds.l @r0+,fpscr - rotr r1 - xor r1,r4 - lds r4,fpul + +#ifndef __SH4A__ + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 // r1 = 1 << 31 + sts.l fpscr,@-r15 + xor r1,r4 + mov.l @(0,r15),r0 + xor r1,r5 + mov.l L2,r1 + lds r4,fpul + or r0,r1 + mova L1,r0 + lds r1,fpscr #ifdef FMOVD_WORKS - fmov.d @r0+,dr4 + fmov.d @r0+,dr4 #else - fmov.s @r0+,DR40 - fmov.s @r0,DR41 + fmov.s @r0+,DR40 + fmov.s @r0,DR41 #endif - float fpul,dr0 - xor r1,r5 - lds r5,fpul - float fpul,dr2 - fadd dr4,dr0 - fadd dr4,dr2 - fdiv dr2,dr0 - ftrc dr0,fpul + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul rts - lds.l @r15+,fpscr + lds.l @r15+,fpscr #ifdef FMOVD_WORKS - .align 3 ! make double below 8 byte aligned. + .align 3 // Make the double below 8 byte aligned. #endif trivial: rts - lds r4,fpul + lds r4,fpul .align 2 -L1: -#ifndef FMOVD_WORKS - .long 0x80000 +L2: +#ifdef FMOVD_WORKS + .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1 #else - .long 0x180000 + .long 0x80000 // FPSCR.PR = 1 #endif +L1: + .double 2147483648 + +#else +/* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. + Although on SH4A fmovd usually works, it would require either additional + two fschg instructions or an FPSCR push + pop. It's not worth the effort + for loading only one double constant. */ + mov #1,r1 + cmp/hi r1,r5 + bf/s trivial + rotr r1 // r1 = 1 << 31 + fpchg + mova L1,r0 + xor r1,r4 + fmov.s @r0+,DR40 + lds r4,fpul + fmov.s @r0,DR41 + xor r1,r5 + float fpul,dr0 + lds r5,fpul + float fpul,dr2 + fadd dr4,dr0 + fadd dr4,dr2 + fdiv dr2,dr0 + ftrc dr0,fpul + rts + fpchg + +trivial: + rts + lds r4,fpul + + .align 2 +L1: .double 2147483648 +#endif /* __SH4A__ */ + + ENDFUNC(GLOBAL(udivsi3_i4)) #endif /* ! __SH4__ */ -#endif +#endif /* L_udivsi3_i4 */ #ifdef L_udivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with |