From 5be308824b5ea3f09f5dea9906592199c6145165 Mon Sep 17 00:00:00 2001 From: amylaar Date: Fri, 3 Nov 2006 14:52:19 +0000 Subject: gcc: 2006-11-03 J"orn Rennecke * config/sh/crt1.asm: Fix #ifdef indent. 2006-11-03 J"orn Rennecke Merged from STMicroelectronics sources: 2006-10-06 Andrew Stubbs * config/sh/crt1.asm (vbr_600): Add missing #if. 2006-08-03 J"orn Rennecke * sh.opt (mfused-madd): New option. * sh.md (mac_media, macsf3): Make conditional on TARGET_FMAC. 2006-07-04 Andrew Stubbs * config/sh/crt1.asm (vbr_start): Move to new section .test.vbr. Remove pointless handler at VBR+0. (vbr_200, vbr_300, vbr_500): Remove pointless handler. (vbr_600): Save and restore mach and macl, fpul and fpscr and fr0 to fr7. Make sure the timer handler is called with the correct FPU precision setting, according to the ABI. 2006-06-14 J"orn Rennecke * config/sh/sh.opt (m2a-single, m2a-single-only): Fix Condition. * config/sh/sh.h (SUPPORT_SH2A_NOFPU): Fix condition. (SUPPORT_SH2A_SINGLE_ONLY, SUPPORT_SH2A_SINGLE_ONLY): Likewise. 2006-06-09 J"orn Rennecke * sh.md (cmpgeusi_t): Change into define_insn_and_split. Accept zero as second operand. 2006-04-28 J"orn Rennecke * config/sh/divtab-sh4-300.c, config/sh/lib1funcs-4-300.asm: Fixed some bugs related to negative values, in particular -0 and overflow at -0x80000000. * config/sh/divcost-analysis: Added sh4-300 figures. 2006-04-27 J"orn Rennecke * config/sh/t-sh (MULTILIB_MATCHES): Add -m4-300* / -m4-340 options. 2006-04-26 J"orn Rennecke * config/sh/t-sh (OPT_EXTRA_PARTS): Add libgcc-4-300.a. ($(T)div_table-4-300.o, $(T)libgcc-4-300.a): New rules. * config/sh/divtab-sh4-300.c, config/sh/lib1funcs-4-300.asm: New files. * config/sh/embed-elf.h (LIBGCC_SPEC): Use -lgcc-4-300 for -m4-300* / -m4-340. 2006-04-24 J"orn Rennecke SH4-300 scheduling description & fixes to SH4-[12]00 description: * sh.md: New instruction types: fstore, movi8, fpscr_toggle, gp_mac, mac_mem, mem_mac, dfp_mul, fp_cmp. (insn_class, dfp_comp, any_fp_comp): Update. (push_fpul, movsf_ie, fpu_switch, toggle_sz, toggle_pr): Update type. (cmpgtsf_t, "cmpeqsf_t, cmpgtsf_t_i4, cmpeqsf_t_i4): Likewise. (muldf3_i): Likewise. (movsi_i): Split rI08 alternative into two separate alternatives. Update type. (movsi_ie, movsi_i_lowpart): Likewise. (movqi_i): Split ri alternative into two separate alternatives. Update type. * sh1.md (sh1_load_store, sh1_fp): Update. * sh4.md (sh4_store, sh4_mac_gp, fp_arith, fp_double_arith): Update. (mac_mem, sh4_fpscr_toggle): New insn_reservations. * sh4a.md (sh4a_mov, sh4a_load, sh4a_store, sh4a_fp_arith): Update. (sh4a_fp_double_arith): Likewise. * sh4-300.md: New file. * sh.c (sh_handle_option): Handle m4-300* options. (sh_adjust_cost): Fix latency of auto-increments. Handle SH4-300 differently than other SH4s. Check for new insn types. * sh.h (OVERRIDE_OPTIONS): Initilize sh_branch_cost if it has not been set by an option. * sh.opt (m4-300, m4-100-nofpu, m4-200-nofpu): New options. (m4-300-nofpu, -m4-340, m4-300-single, m4-300-single-only): Likewise. (mbranch-cost=): Likewise. * superh.h (STARTFILE_SPEC): Take -m4-340 into account. * sh.md (mulsf3): Remove special expansion code. (mulsf3_ie): Now a define_insn_and_split. (macsf3): Allow for TARGET_SH4. * sh.md (cbranchsi4, cbranchdi4, cbranchdi4_i): New patterns. * sh.c (prepare_cbranch_operands, expand_cbranchsi4): New functions. (expand_cbranchdi4): Likewise. (sh_rtx_costs): Give lower cost for certain CONST_INT values and for CONST_DOUBLE if the outer code is COMPARE. * sh.h (OPTIMIZATION_OPTIONS): If not optimizing for size, set TARGET_CBRANCHDI4 and TARGET_EXPAND_CBRANCHDI4. (OVERRIDE_OPTIONS): For TARGET_SHMEDIA, clear TARGET_CBRANCHDI4. (LEGITIMATE_CONSTANT_P): Also allow DImode and VOIDmode CONST_DOUBLEs. Remove redundant fp_{zero,one}_operand checks. * sh.opt (mcbranchdi, mexpand-cbranchdi, mcmpeqdi): New options. * sh-protos.h (prepare_cbranch_operands, expand_cbranchsi4): Declare. (expand_cbranchdi4): Likewise. 2006-04-20 J"orn Rennecke * sh.h (LOCAL_ALIGNMENT): Use DATA_ALIGNMENT. gcc/testsuite: 2006-11-03 J"orn Rennecke * testsuite/gcc.c-torture/execute/arith-rand-ll.c: Also test for bogus rest sign. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@118458 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 88 ++ gcc/config/sh/crt1.asm | 134 ++- gcc/config/sh/divcost-analysis | 7 +- gcc/config/sh/divtab-sh4-300.c | 81 ++ gcc/config/sh/embed-elf.h | 1 + gcc/config/sh/lib1funcs-4-300.asm | 938 +++++++++++++++++++++ gcc/config/sh/sh-protos.h | 6 +- gcc/config/sh/sh.c | 447 +++++++++- gcc/config/sh/sh.h | 20 +- gcc/config/sh/sh.md | 212 +++-- gcc/config/sh/sh.opt | 57 +- gcc/config/sh/sh1.md | 6 +- gcc/config/sh/sh4-300.md | 288 +++++++ gcc/config/sh/sh4.md | 28 +- gcc/config/sh/sh4a.md | 14 +- gcc/config/sh/superh.h | 10 +- gcc/config/sh/t-sh | 17 +- gcc/testsuite/ChangeLog | 5 + .../gcc.c-torture/execute/arith-rand-ll.c | 2 +- 19 files changed, 2131 insertions(+), 230 deletions(-) create mode 100644 gcc/config/sh/divtab-sh4-300.c create mode 100644 gcc/config/sh/lib1funcs-4-300.asm create mode 100644 gcc/config/sh/sh4-300.md diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dd6b7223607..5212852822f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,91 @@ +2006-11-03 J"orn Rennecke + + * config/sh/crt1.asm: Fix #ifdef indent. + +2006-11-03 J"orn Rennecke + Merged from STMicroelectronics sources: + 2006-10-06 Andrew Stubbs + * config/sh/crt1.asm (vbr_600): Add missing #if. + 2006-08-03 J"orn Rennecke + * sh.opt (mfused-madd): New option. + * sh.md (mac_media, macsf3): Make conditional on TARGET_FMAC. + 2006-07-04 Andrew Stubbs + * config/sh/crt1.asm (vbr_start): Move to new section .test.vbr. + Remove pointless handler at VBR+0. + (vbr_200, vbr_300, vbr_500): Remove pointless handler. + (vbr_600): Save and restore mach and macl, fpul and fpscr and fr0 to + fr7. Make sure the timer handler is called with the correct FPU + precision setting, according to the ABI. + 2006-06-14 J"orn Rennecke + * config/sh/sh.opt (m2a-single, m2a-single-only): Fix Condition. + * config/sh/sh.h (SUPPORT_SH2A_NOFPU): Fix condition. + (SUPPORT_SH2A_SINGLE_ONLY, SUPPORT_SH2A_SINGLE_ONLY): Likewise. + 2006-06-09 J"orn Rennecke + * sh.md (cmpgeusi_t): Change into define_insn_and_split. Accept + zero as second operand. + 2006-04-28 J"orn Rennecke + * config/sh/divtab-sh4-300.c, config/sh/lib1funcs-4-300.asm: + Fixed some bugs related to negative values, in particular -0 + and overflow at -0x80000000. + * config/sh/divcost-analysis: Added sh4-300 figures. + 2006-04-27 J"orn Rennecke + * config/sh/t-sh (MULTILIB_MATCHES): Add -m4-300* / -m4-340 options. + 2006-04-26 J"orn Rennecke + * config/sh/t-sh (OPT_EXTRA_PARTS): Add libgcc-4-300.a. + ($(T)div_table-4-300.o, $(T)libgcc-4-300.a): New rules. + * config/sh/divtab-sh4-300.c, config/sh/lib1funcs-4-300.asm: + New files. + * config/sh/embed-elf.h (LIBGCC_SPEC): Use -lgcc-4-300 for -m4-300* / + -m4-340. + 2006-04-24 J"orn Rennecke + SH4-300 scheduling description & fixes to SH4-[12]00 description: + * sh.md: New instruction types: fstore, movi8, fpscr_toggle, gp_mac, + mac_mem, mem_mac, dfp_mul, fp_cmp. + (insn_class, dfp_comp, any_fp_comp): Update. + (push_fpul, movsf_ie, fpu_switch, toggle_sz, toggle_pr): Update type. + (cmpgtsf_t, "cmpeqsf_t, cmpgtsf_t_i4, cmpeqsf_t_i4): Likewise. + (muldf3_i): Likewise. + (movsi_i): Split rI08 alternative into two separate alternatives. + Update type. + (movsi_ie, movsi_i_lowpart): Likewise. + (movqi_i): Split ri alternative into two separate alternatives. + Update type. + * sh1.md (sh1_load_store, sh1_fp): Update. + * sh4.md (sh4_store, sh4_mac_gp, fp_arith, fp_double_arith): Update. + (mac_mem, sh4_fpscr_toggle): New insn_reservations. + * sh4a.md (sh4a_mov, sh4a_load, sh4a_store, sh4a_fp_arith): Update. + (sh4a_fp_double_arith): Likewise. + * sh4-300.md: New file. + * sh.c (sh_handle_option): Handle m4-300* options. + (sh_adjust_cost): Fix latency of auto-increments. + Handle SH4-300 differently than other SH4s. Check for new insn types. + * sh.h (OVERRIDE_OPTIONS): Initilize sh_branch_cost if it has not + been set by an option. + * sh.opt (m4-300, m4-100-nofpu, m4-200-nofpu): New options. + (m4-300-nofpu, -m4-340, m4-300-single, m4-300-single-only): Likewise. + (mbranch-cost=): Likewise. + * superh.h (STARTFILE_SPEC): Take -m4-340 into account. + + * sh.md (mulsf3): Remove special expansion code. + (mulsf3_ie): Now a define_insn_and_split. + (macsf3): Allow for TARGET_SH4. + + * sh.md (cbranchsi4, cbranchdi4, cbranchdi4_i): New patterns. + * sh.c (prepare_cbranch_operands, expand_cbranchsi4): New functions. + (expand_cbranchdi4): Likewise. + (sh_rtx_costs): Give lower cost for certain CONST_INT values and for + CONST_DOUBLE if the outer code is COMPARE. + * sh.h (OPTIMIZATION_OPTIONS): If not optimizing for size, set + TARGET_CBRANCHDI4 and TARGET_EXPAND_CBRANCHDI4. + (OVERRIDE_OPTIONS): For TARGET_SHMEDIA, clear TARGET_CBRANCHDI4. + (LEGITIMATE_CONSTANT_P): Also allow DImode and VOIDmode CONST_DOUBLEs. + Remove redundant fp_{zero,one}_operand checks. + * sh.opt (mcbranchdi, mexpand-cbranchdi, mcmpeqdi): New options. + * sh-protos.h (prepare_cbranch_operands, expand_cbranchsi4): Declare. + (expand_cbranchdi4): Likewise. + 2006-04-20 J"orn Rennecke + * sh.h (LOCAL_ALIGNMENT): Use DATA_ALIGNMENT. + 2006-11-02 Andrew Pinski * doc/md.texi (RS6000 constraints): Document H, Z, a, t, and W diff --git a/gcc/config/sh/crt1.asm b/gcc/config/sh/crt1.asm index c110fa07427..7aa684434d7 100644 --- a/gcc/config/sh/crt1.asm +++ b/gcc/config/sh/crt1.asm @@ -1,4 +1,5 @@ -/* Copyright (C) 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. +/* Copyright (C) 2000, 2001, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. This file was pretty much copied from newlib. This file is part of GCC. @@ -894,25 +895,12 @@ ___main: nop #ifdef VBR_SETUP ! Exception handlers - .balign 256 + .section .text.vbr, "ax" vbr_start: - mov.l 2f, r0 ! load the old vbr setting (if any) - mov.l @r0, r0 - cmp/eq #0, r0 - bf 1f - ! no previous vbr - jump to own generic handler - bra handler - nop -1: ! there was a previous handler - chain them - jmp @r0 - nop - .balign 4 -2: - .long old_vbr - .balign 256 + .org 0x100 vbr_100: - #ifdef PROFILE +#ifdef PROFILE ! Note on register usage. ! we use r0..r3 as scratch in this code. If we are here due to a trapa for profiling ! then this is OK as we are just before executing any function code. @@ -1017,50 +1005,7 @@ handler_100: 2: .long old_vbr - .balign 256 -vbr_200: - mov.l 2f, r0 ! load the old vbr setting (if any) - mov.l @r0, r0 - cmp/eq #0, r0 - bf 1f - ! no previous vbr - jump to own generic handler - bra handler - nop -1: ! there was a previous handler - chain them - add #0x7f, r0 ! 0x7f - add #0x7f, r0 ! 0xfe - add #0x7f, r0 ! 0x17d - add #0x7f, r0 ! 0x1fc - add #0x4, r0 ! add 0x200 without corrupting another register - jmp @r0 - nop - .balign 4 -2: - .long old_vbr - - .balign 256 -vbr_300: - mov.l 2f, r0 ! load the old vbr setting (if any) - mov.l @r0, r0 - cmp/eq #0, r0 - bf 1f - ! no previous vbr - jump to own generic handler - bra handler - nop -1: ! there was a previous handler - chain them - rotcr r0 - rotcr r0 - add #0x7f, r0 ! 0x1fc - add #0x41, r0 ! 0x300 - rotcl r0 - rotcl r0 ! Add 0x300 without corrupting another register - jmp @r0 - nop - .balign 4 -2: - .long old_vbr - - .balign 256 + .org 0x400 vbr_400: ! Should be at vbr+0x400 mov.l 2f, r0 ! load the old vbr setting (if any) mov.l @r0, r0 @@ -1103,28 +1048,7 @@ handler: jmp @r2 nop - .balign 256 -vbr_500: - mov.l 2f, r0 ! load the old vbr setting (if any) - mov.l @r0, r0 - cmp/eq #0, r0 - ! no previous vbr - jump to own generic handler - bt handler - ! there was a previous handler - chain them - rotcr r0 - rotcr r0 - add #0x7f, r0 ! 0x1fc - add #0x7f, r0 ! 0x3f8 - add #0x42, r0 ! 0x500 - rotcl r0 - rotcl r0 ! Add 0x500 without corrupting another register - jmp @r0 - nop - .balign 4 -2: - .long old_vbr - - .balign 256 + .org 0x600 vbr_600: #ifdef PROFILE ! Should be at vbr+0x600 @@ -1140,11 +1064,48 @@ vbr_600: mov.l r6,@-r15 mov.l r7,@-r15 sts.l pr,@-r15 + sts.l mach,@-r15 + sts.l macl,@-r15 +#if defined(__SH_FPU_ANY__) + ! Save fpul and fpscr, save fr0-fr7 in 64 bit mode + ! and set the pervading precision for the timer_handler + mov #0,r0 + sts.l fpul,@-r15 + sts.l fpscr,@-r15 + lds r0,fpscr ! Clear fpscr + fmov fr0,@-r15 + fmov fr1,@-r15 + fmov fr2,@-r15 + fmov fr3,@-r15 + mov.l pervading_precision_k,r0 + fmov fr4,@-r15 + fmov fr5,@-r15 + mov.l @r0,r0 + fmov fr6,@-r15 + fmov fr7,@-r15 + lds r0,fpscr +#endif /* __SH_FPU_ANY__ */ ! Pass interrupted pc to timer_handler as first parameter (r4). stc spc, r4 mov.l timer_handler_k, r0 jsr @r0 nop +#if defined(__SH_FPU_ANY__) + mov #0,r0 + lds r0,fpscr ! Clear the fpscr + fmov @r15+,fr7 + fmov @r15+,fr6 + fmov @r15+,fr5 + fmov @r15+,fr4 + fmov @r15+,fr3 + fmov @r15+,fr2 + fmov @r15+,fr1 + fmov @r15+,fr0 + lds.l @r15+,fpscr + lds.l @r15+,fpul +#endif /* __SH_FPU_ANY__ */ + lds.l @r15+,macl + lds.l @r15+,mach lds.l @r15+,pr mov.l @r15+,r7 mov.l @r15+,r6 @@ -1157,6 +1118,13 @@ vbr_600: stc sgr, r15 ! Restore r15, destroyed by this sequence. rte nop +#if defined(__SH_FPU_ANY__) + .balign 4 +pervading_precision_k: +#define CONCAT1(A,B) A##B +#define CONCAT(A,B) CONCAT1(A,B) + .long CONCAT(__USER_LABEL_PREFIX__,__fpscr_values)+4 +#endif #else mov.l 2f, r0 ! Load the old vbr setting (if any). mov.l @r0, r0 diff --git a/gcc/config/sh/divcost-analysis b/gcc/config/sh/divcost-analysis index 541e31324b3..0296269bb52 100644 --- a/gcc/config/sh/divcost-analysis +++ b/gcc/config/sh/divcost-analysis @@ -38,12 +38,17 @@ div_r8_neg -> div_r8_neg_end: 18 div_le128_neg -> div_by_1_neg: 4 div_le128_neg -> rts 18 - absolute divisor range: + sh4-200 absolute divisor range: 1 [2..128] [129..64K) [64K..|divident|/256] >=64K,>|divident/256| udiv 18 22 38 32 30 sdiv pos: 20 24 41 35 32 sdiv neg: 15 25 42 36 33 + sh4-300 absolute divisor range: + 8 bit 16 bit 24 bit > 24 bit +udiv 15 35 28 25 +sdiv 14 36 34 31 + fp-based: diff --git a/gcc/config/sh/divtab-sh4-300.c b/gcc/config/sh/divtab-sh4-300.c new file mode 100644 index 00000000000..448b0b8af8e --- /dev/null +++ b/gcc/config/sh/divtab-sh4-300.c @@ -0,0 +1,81 @@ +/* Copyright (C) 2004, 2006 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Calculate division table for ST40-300 integer division + Contributed by Joern Rennecke + joern.rennecke@st.com */ + +#include +#include + +int +main () +{ + int i, j; + double q, r, err, max_err = 0, max_s_err = 0; + + puts("/* This table has been generated by divtab-sh4.c. */"); + puts ("\t.balign 4"); + for (i = -128; i < 128; i++) + { + int n = 0; + if (i == 0) + { + /* output some dummy number for 1/0. */ + puts ("LOCAL(div_table_clz):\n\t.byte\t0"); + continue; + } + for (j = i < 0 ? -i : i; j < 128; j += j) + n++; + printf ("\t.byte\t%d\n", n - 7); + } + puts("\ +/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,\n\ + or in bit 33 for powers of two. */\n\ + .balign 4"); + for (i = -128; i < 128; i++) + { + if (i == 0) + { + puts ("LOCAL(div_table_inv):\n\t.long\t0x0"); + continue; + } + j = i < 0 ? -i : i; + while (j < 64) + j += j; + q = 4.*(1<<30)*128/j; + r = ceil (q); + printf ("\t.long\t0x%X\n", (unsigned) r); + err = r - q; + if (err > max_err) + max_err = err; + err = err * j / 128; + if (err > max_s_err) + max_s_err = err; + } + printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err); + exit (0); +} diff --git a/gcc/config/sh/embed-elf.h b/gcc/config/sh/embed-elf.h index 4497cf34636..0d817cacf85 100644 --- a/gcc/config/sh/embed-elf.h +++ b/gcc/config/sh/embed-elf.h @@ -32,6 +32,7 @@ Boston, MA 02110-1301, USA. */ #define LIBGCC_SPEC "%{!shared: \ %{m4-100*:-lic_invalidate_array_4-100} \ %{m4-200*:-lic_invalidate_array_4-200} \ + %{m4-300*|-m4-340:-lic_invalidate_array_4a %{!Os: -lgcc-4-300}} \ %{m4a*:-lic_invalidate_array_4a}} \ %{Os: -lgcc-Os-4-200} \ -lgcc \ diff --git a/gcc/config/sh/lib1funcs-4-300.asm b/gcc/config/sh/lib1funcs-4-300.asm new file mode 100644 index 00000000000..b07912425af --- /dev/null +++ b/gcc/config/sh/lib1funcs-4-300.asm @@ -0,0 +1,938 @@ +/* Copyright (C) 2004, 2006 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file into combinations with other programs, +and to distribute those combinations without any restriction coming +from the use of this file. (The General Public License restrictions +do apply in other respects; for example, they cover modification of +the file, and distribution when not linked into a combine +executable.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* libgcc routines for the STMicroelectronics ST40-300 CPU. + Contributed by J"orn Rennecke joern.rennecke@st.com. */ + +#include "lib1funcs.h" + +#ifdef L_div_table +#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) +/* This code used shld, thus is not suitable for SH1 / SH2. */ + +/* Signed / unsigned division without use of FPU, optimized for SH4-300. + Uses a lookup table for divisors in the range -128 .. +127, and + div1 with case distinction for larger divisors in three more ranges. + The code is lumped together with the table to allow the use of mova. */ +#ifdef __LITTLE_ENDIAN__ +#define L_LSB 0 +#define L_LSWMSB 1 +#define L_MSWLSB 2 +#else +#define L_LSB 3 +#define L_LSWMSB 2 +#define L_MSWLSB 1 +#endif + + .global GLOBAL(udivsi3_i4i) + .global GLOBAL(sdivsi3_i4i) + FUNC(GLOBAL(udivsi3_i4i)) + FUNC(GLOBAL(sdivsi3_i4i)) + + .balign 4 +LOCAL(div_ge8m): ! 10 cycles up to here + rotcr r1 ! signed shift must use original sign from r4 + div0s r5,r4 + mov #24,r7 + shld r7,r6 + shad r0,r1 + rotcl r6 + div1 r5,r1 + swap.w r5,r0 ! detect -0x80000000 : 0x800000 + rotcl r6 + swap.w r4,r7 + div1 r5,r1 + swap.b r7,r7 + rotcl r6 + or r7,r0 + div1 r5,r1 + swap.w r0,r7 + rotcl r6 + or r7,r0 + div1 r5,r1 + add #-0x80,r0 + rotcl r6 + extu.w r0,r0 + div1 r5,r1 + neg r0,r0 + rotcl r6 + swap.w r0,r0 + div1 r5,r1 + mov.l @r15+,r7 + and r6,r0 + rotcl r6 + div1 r5,r1 + shll2 r0 + rotcl r6 + exts.b r0,r0 + div1 r5,r1 + swap.w r0,r0 + exts.w r0,r1 + exts.b r6,r0 + mov.l @r15+,r6 + rotcl r0 + rts + sub r1,r0 + ! 31 cycles up to here + + .balign 4 +LOCAL(udiv_ge64k): ! 3 cycles up to here + mov r4,r0 + shlr8 r0 + div0u + cmp/hi r0,r5 + bt LOCAL(udiv_r8) + mov.l r5,@-r15 + shll8 r5 + ! 7 cycles up to here + .rept 8 + div1 r5,r0 + .endr + extu.b r4,r1 ! 15 cycles up to here + extu.b r0,r6 + xor r1,r0 + xor r6,r0 + swap.b r6,r6 + .rept 8 + div1 r5,r0 + .endr ! 25 cycles up to here + extu.b r0,r0 + mov.l @r15+,r5 + or r6,r0 + mov.l @r15+,r6 + rts + rotcl r0 ! 28 cycles up to here + + .balign 4 +LOCAL(udiv_r8): ! 6 cycles up to here + mov.l r4,@-r15 + shll16 r4 + shll8 r4 + ! + shll r4 + mov r0,r1 + div1 r5,r1 + mov r4,r0 + rotcl r0 + mov.l @r15+,r4 + div1 r5,r1 + ! 12 cycles up to here + .rept 6 + rotcl r0; div1 r5,r1 + .endr + mov.l @r15+,r6 ! 24 cycles up to here + rts + rotcl r0 + + .balign 4 +LOCAL(div_ge32k): ! 6 cycles up to here + mov.l r7,@-r15 + swap.w r5,r6 + exts.b r6,r7 + exts.w r6,r6 + cmp/eq r6,r7 + extu.b r1,r6 + bf/s LOCAL(div_ge8m) + cmp/hi r1,r4 ! copy sign bit of r4 into T + rotcr r1 ! signed shift must use original sign from r4 + div0s r5,r4 + shad r0,r1 + shll8 r5 + div1 r5,r1 + mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00) + div1 r5,r1 + shlr8 r7 + div1 r5,r1 + swap.w r4,r0 + div1 r5,r1 + swap.b r0,r0 + div1 r5,r1 + or r0,r7 + div1 r5,r1 + add #-80,r7 + div1 r5,r1 + swap.w r7,r0 + div1 r5,r1 + or r0,r7 + extu.b r1,r0 + xor r6,r1 + xor r0,r1 + exts.b r0,r0 + div1 r5,r1 + extu.w r7,r7 + div1 r5,r1 + neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000 + div1 r5,r1 + and r0,r7 + div1 r5,r1 + swap.w r7,r7 ! 26 cycles up to here. + div1 r5,r1 + shll8 r0 + div1 r5,r1 + exts.w r7,r7 + div1 r5,r1 + add r0,r0 + div1 r5,r1 + sub r7,r0 + extu.b r1,r1 + mov.l @r15+,r7 + rotcl r1 + mov.l @r15+,r6 + add r1,r0 + mov #-8,r1 + rts + shad r1,r5 ! 34 cycles up to here + + .balign 4 +GLOBAL(udivsi3_i4i): + mov.l r6,@-r15 + extu.w r5,r6 + cmp/eq r5,r6 + mov #0x7f,r0 + bf LOCAL(udiv_ge64k) + cmp/hi r0,r5 + bf LOCAL(udiv_le128) + mov r4,r1 + shlr8 r1 + div0u + shlr r1 + shll16 r6 + div1 r6,r1 + extu.b r4,r0 ! 7 cycles up to here + .rept 8 + div1 r6,r1 + .endr ! 15 cycles up to here + xor r1,r0 ! xor dividend with result lsb + .rept 6 + div1 r6,r1 + .endr + mov.l r7,@-r15 ! 21 cycles up to here + div1 r6,r1 + extu.b r0,r7 + div1 r6,r1 + shll8 r7 + extu.w r1,r0 + xor r7,r1 ! replace lsb of result with lsb of dividend + div1 r6,r1 + mov #0,r7 + div1 r6,r1 + ! + div1 r6,r1 + bra LOCAL(div_end) + div1 r6,r1 ! 28 cycles up to here + + /* This is link-compatible with a GLOBAL(sdivsi3) call, + but we effectively clobber only r1, macl and mach */ + /* Because negative quotients are calculated as one's complements, + -0x80000000 divided by the smallest positive number of a number + range (0x80, 0x8000, 0x800000) causes saturation in the one's + complement representation, and we have to suppress the + one's -> two's complement adjustment. Since positive numbers + don't get such an adjustment, it's OK to also compute one's -> two's + complement adjustment suppression for a dividend of 0. */ + .balign 4 +GLOBAL(sdivsi3_i4i): + mov.l r6,@-r15 + exts.b r5,r6 + cmp/eq r5,r6 + mov #-1,r1 + bt/s LOCAL(div_le128) + cmp/pz r4 + addc r4,r1 + exts.w r5,r6 + cmp/eq r5,r6 + mov #-7,r0 + bf/s LOCAL(div_ge32k) + cmp/hi r1,r4 ! copy sign bit of r4 into T + rotcr r1 + shll16 r6 ! 7 cycles up to here + shad r0,r1 + div0s r5,r4 + div1 r6,r1 + mov.l r7,@-r15 + div1 r6,r1 + mov r4,r0 ! re-compute adjusted dividend + div1 r6,r1 + mov #-31,r7 + div1 r6,r1 + shad r7,r0 + div1 r6,r1 + add r4,r0 ! adjusted dividend + div1 r6,r1 + mov.l r8,@-r15 + div1 r6,r1 + swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80 + div1 r6,r1 + swap.b r8,r8 + xor r1,r0 ! xor dividend with result lsb + div1 r6,r1 + div1 r6,r1 + or r5,r8 + div1 r6,r1 + add #-0x80,r8 ! r8 is 0 iff there is a match + div1 r6,r1 + swap.w r8,r7 ! or upper 16 bits... + div1 r6,r1 + or r7,r8 !...into lower 16 bits + div1 r6,r1 + extu.w r8,r8 + div1 r6,r1 + extu.b r0,r7 + div1 r6,r1 + shll8 r7 + exts.w r1,r0 + xor r7,r1 ! replace lsb of result with lsb of dividend + div1 r6,r1 + neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm. + div1 r6,r1 + and r0,r8 + div1 r6,r1 + swap.w r8,r7 + div1 r6,r1 + mov.l @r15+,r8 ! 58 insns, 29 cycles up to here +LOCAL(div_end): + div1 r6,r1 + shll8 r0 + div1 r6,r1 + exts.w r7,r7 + div1 r6,r1 + add r0,r0 + div1 r6,r1 + sub r7,r0 + extu.b r1,r1 + mov.l @r15+,r7 + rotcl r1 + mov.l @r15+,r6 + rts + add r1,r0 + + .balign 4 +LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict) + mova LOCAL(div_table_inv),r0 + shll2 r6 + mov.l @(r0,r6),r1 + mova LOCAL(div_table_clz),r0 + lds r4,mach + ! + ! + ! + tst r1,r1 + ! + bt 0f + dmulu.l r1,r4 +0: mov.b @(r0,r5),r1 + clrt + ! + ! + sts mach,r0 + addc r4,r0 + rotcr r0 + mov.l @r15+,r6 + rts + shld r1,r0 + + .balign 4 +LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict) + mova LOCAL(div_table_inv),r0 + shll2 r6 + mov.l @(r0,r6),r1 + mova LOCAL(div_table_clz),r0 + neg r4,r6 + bf 0f + mov r4,r6 +0: lds r6,mach + tst r1,r1 + bt 0f + dmulu.l r1,r6 +0: div0s r4,r5 + mov.b @(r0,r5),r1 + bt/s LOCAL(le128_neg) + clrt + ! + sts mach,r0 + addc r6,r0 + rotcr r0 + mov.l @r15+,r6 + rts + shld r1,r0 + +/* Could trap divide by zero for the cost of one cycle more mispredict penalty: +... + dmulu.l r1,r6 +0: div0s r4,r5 + bt/s LOCAL(le128_neg) + tst r5,r5 + bt LOCAL(div_by_zero) + mov.b @(r0,r5),r1 + sts mach,r0 + addc r6,r0 +... +LOCAL(div_by_zero): + trapa # + .balign 4 +LOCAL(le128_neg): + bt LOCAL(div_by_zero) + mov.b @(r0,r5),r1 + sts mach,r0 + addc r6,r0 +... */ + + .balign 4 +LOCAL(le128_neg): + sts mach,r0 + addc r6,r0 + rotcr r0 + mov.l @r15+,r6 + shad r1,r0 + rts + neg r0,r0 + ENDFUNC(GLOBAL(udivsi3_i4i)) + ENDFUNC(GLOBAL(sdivsi3_i4i)) + +/* This table has been generated by divtab-sh4.c. */ + .balign 4 + .byte -7 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -1 + .byte -1 + .byte 0 +LOCAL(div_table_clz): + .byte 0 + .byte 0 + .byte -1 + .byte -1 + .byte -2 + .byte -2 + .byte -2 + .byte -2 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -3 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -4 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -5 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 + .byte -6 +/* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32, + or in bit 33 for powers of two. */ + .balign 4 + .long 0x0 + .long 0x2040811 + .long 0x4104105 + .long 0x624DD30 + .long 0x8421085 + .long 0xA6810A7 + .long 0xC9714FC + .long 0xECF56BF + .long 0x11111112 + .long 0x135C8114 + .long 0x15B1E5F8 + .long 0x18118119 + .long 0x1A7B9612 + .long 0x1CF06ADB + .long 0x1F7047DD + .long 0x21FB7813 + .long 0x24924925 + .long 0x27350B89 + .long 0x29E4129F + .long 0x2C9FB4D9 + .long 0x2F684BDB + .long 0x323E34A3 + .long 0x3521CFB3 + .long 0x38138139 + .long 0x3B13B13C + .long 0x3E22CBCF + .long 0x41414142 + .long 0x446F8657 + .long 0x47AE147B + .long 0x4AFD6A06 + .long 0x4E5E0A73 + .long 0x51D07EAF + .long 0x55555556 + .long 0x58ED2309 + .long 0x5C9882BA + .long 0x60581606 + .long 0x642C8591 + .long 0x68168169 + .long 0x6C16C16D + .long 0x702E05C1 + .long 0x745D1746 + .long 0x78A4C818 + .long 0x7D05F418 + .long 0x81818182 + .long 0x86186187 + .long 0x8ACB90F7 + .long 0x8F9C18FA + .long 0x948B0FCE + .long 0x9999999A + .long 0x9EC8E952 + .long 0xA41A41A5 + .long 0xA98EF607 + .long 0xAF286BCB + .long 0xB4E81B4F + .long 0xBACF914D + .long 0xC0E07039 + .long 0xC71C71C8 + .long 0xCD856891 + .long 0xD41D41D5 + .long 0xDAE6076C + .long 0xE1E1E1E2 + .long 0xE9131AC0 + .long 0xF07C1F08 + .long 0xF81F81F9 + .long 0x0 + .long 0x4104105 + .long 0x8421085 + .long 0xC9714FC + .long 0x11111112 + .long 0x15B1E5F8 + .long 0x1A7B9612 + .long 0x1F7047DD + .long 0x24924925 + .long 0x29E4129F + .long 0x2F684BDB + .long 0x3521CFB3 + .long 0x3B13B13C + .long 0x41414142 + .long 0x47AE147B + .long 0x4E5E0A73 + .long 0x55555556 + .long 0x5C9882BA + .long 0x642C8591 + .long 0x6C16C16D + .long 0x745D1746 + .long 0x7D05F418 + .long 0x86186187 + .long 0x8F9C18FA + .long 0x9999999A + .long 0xA41A41A5 + .long 0xAF286BCB + .long 0xBACF914D + .long 0xC71C71C8 + .long 0xD41D41D5 + .long 0xE1E1E1E2 + .long 0xF07C1F08 + .long 0x0 + .long 0x8421085 + .long 0x11111112 + .long 0x1A7B9612 + .long 0x24924925 + .long 0x2F684BDB + .long 0x3B13B13C + .long 0x47AE147B + .long 0x55555556 + .long 0x642C8591 + .long 0x745D1746 + .long 0x86186187 + .long 0x9999999A + .long 0xAF286BCB + .long 0xC71C71C8 + .long 0xE1E1E1E2 + .long 0x0 + .long 0x11111112 + .long 0x24924925 + .long 0x3B13B13C + .long 0x55555556 + .long 0x745D1746 + .long 0x9999999A + .long 0xC71C71C8 + .long 0x0 + .long 0x24924925 + .long 0x55555556 + .long 0x9999999A + .long 0x0 + .long 0x55555556 + .long 0x0 + .long 0x0 +LOCAL(div_table_inv): + .long 0x0 + .long 0x0 + .long 0x0 + .long 0x55555556 + .long 0x0 + .long 0x9999999A + .long 0x55555556 + .long 0x24924925 + .long 0x0 + .long 0xC71C71C8 + .long 0x9999999A + .long 0x745D1746 + .long 0x55555556 + .long 0x3B13B13C + .long 0x24924925 + .long 0x11111112 + .long 0x0 + .long 0xE1E1E1E2 + .long 0xC71C71C8 + .long 0xAF286BCB + .long 0x9999999A + .long 0x86186187 + .long 0x745D1746 + .long 0x642C8591 + .long 0x55555556 + .long 0x47AE147B + .long 0x3B13B13C + .long 0x2F684BDB + .long 0x24924925 + .long 0x1A7B9612 + .long 0x11111112 + .long 0x8421085 + .long 0x0 + .long 0xF07C1F08 + .long 0xE1E1E1E2 + .long 0xD41D41D5 + .long 0xC71C71C8 + .long 0xBACF914D + .long 0xAF286BCB + .long 0xA41A41A5 + .long 0x9999999A + .long 0x8F9C18FA + .long 0x86186187 + .long 0x7D05F418 + .long 0x745D1746 + .long 0x6C16C16D + .long 0x642C8591 + .long 0x5C9882BA + .long 0x55555556 + .long 0x4E5E0A73 + .long 0x47AE147B + .long 0x41414142 + .long 0x3B13B13C + .long 0x3521CFB3 + .long 0x2F684BDB + .long 0x29E4129F + .long 0x24924925 + .long 0x1F7047DD + .long 0x1A7B9612 + .long 0x15B1E5F8 + .long 0x11111112 + .long 0xC9714FC + .long 0x8421085 + .long 0x4104105 + .long 0x0 + .long 0xF81F81F9 + .long 0xF07C1F08 + .long 0xE9131AC0 + .long 0xE1E1E1E2 + .long 0xDAE6076C + .long 0xD41D41D5 + .long 0xCD856891 + .long 0xC71C71C8 + .long 0xC0E07039 + .long 0xBACF914D + .long 0xB4E81B4F + .long 0xAF286BCB + .long 0xA98EF607 + .long 0xA41A41A5 + .long 0x9EC8E952 + .long 0x9999999A + .long 0x948B0FCE + .long 0x8F9C18FA + .long 0x8ACB90F7 + .long 0x86186187 + .long 0x81818182 + .long 0x7D05F418 + .long 0x78A4C818 + .long 0x745D1746 + .long 0x702E05C1 + .long 0x6C16C16D + .long 0x68168169 + .long 0x642C8591 + .long 0x60581606 + .long 0x5C9882BA + .long 0x58ED2309 + .long 0x55555556 + .long 0x51D07EAF + .long 0x4E5E0A73 + .long 0x4AFD6A06 + .long 0x47AE147B + .long 0x446F8657 + .long 0x41414142 + .long 0x3E22CBCF + .long 0x3B13B13C + .long 0x38138139 + .long 0x3521CFB3 + .long 0x323E34A3 + .long 0x2F684BDB + .long 0x2C9FB4D9 + .long 0x29E4129F + .long 0x27350B89 + .long 0x24924925 + .long 0x21FB7813 + .long 0x1F7047DD + .long 0x1CF06ADB + .long 0x1A7B9612 + .long 0x18118119 + .long 0x15B1E5F8 + .long 0x135C8114 + .long 0x11111112 + .long 0xECF56BF + .long 0xC9714FC + .long 0xA6810A7 + .long 0x8421085 + .long 0x624DD30 + .long 0x4104105 + .long 0x2040811 + /* maximum error: 0.987342 scaled: 0.921875*/ + +#endif /* SH3 / SH4 */ + +#endif /* L_div_table */ diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index a0661545b56..e142b1cee68 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -1,6 +1,6 @@ /* Definitions of target machine for GNU compiler for Renesas / SuperH SH. Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2003, - 2004, 2005 + 2004, 2005, 2006 Free Software Foundation, Inc. Contributed by Steve Chamberlain (sac@cygnus.com). Improved by Jim Wilson (wilson@cygnus.com). @@ -69,6 +69,10 @@ extern void print_operand (FILE *, rtx, int); extern void output_pic_addr_const (FILE *, rtx); extern int expand_block_move (rtx *); extern int prepare_move_operands (rtx[], enum machine_mode mode); +extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode, + enum rtx_code comparison); +extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int); +extern bool expand_cbranchdi4 (rtx *operands, enum rtx_code comparison); extern void from_compare (rtx *, int); extern int shift_insns_rtx (rtx); extern void gen_ashift (int, int, rtx); diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 30b87480412..9f733b852f1 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -526,10 +526,15 @@ sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, case OPT_m4: case OPT_m4_100: case OPT_m4_200: + case OPT_m4_300: target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4; return true; case OPT_m4_nofpu: + case OPT_m4_100_nofpu: + case OPT_m4_200_nofpu: + case OPT_m4_300_nofpu: + case OPT_m4_340: case OPT_m4_400: case OPT_m4_500: target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU; @@ -538,12 +543,14 @@ sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, case OPT_m4_single: case OPT_m4_100_single: case OPT_m4_200_single: + case OPT_m4_300_single: target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE; return true; case OPT_m4_single_only: case OPT_m4_100_single_only: case OPT_m4_200_single_only: + case OPT_m4_300_single_only: target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY; return true; @@ -1341,6 +1348,288 @@ prepare_move_operands (rtx operands[], enum machine_mode mode) return 0; } +enum rtx_code +prepare_cbranch_operands (rtx *operands, enum machine_mode mode, + enum rtx_code comparison) +{ + rtx op1; + rtx scratch = NULL_RTX; + + if (comparison == CODE_FOR_nothing) + comparison = GET_CODE (operands[0]); + else + scratch = operands[4]; + if (GET_CODE (operands[1]) == CONST_INT + && GET_CODE (operands[2]) != CONST_INT) + { + rtx tmp = operands[1]; + + operands[1] = operands[2]; + operands[2] = tmp; + comparison = swap_condition (comparison); + } + if (GET_CODE (operands[2]) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (operands[2]); + if ((val == -1 || val == -0x81) + && (comparison == GT || comparison == LE)) + { + comparison = (comparison == GT) ? GE : LT; + operands[2] = gen_int_mode (val + 1, mode); + } + else if ((val == 1 || val == 0x80) + && (comparison == GE || comparison == LT)) + { + comparison = (comparison == GE) ? GT : LE; + operands[2] = gen_int_mode (val - 1, mode); + } + else if (val == 1 && (comparison == GEU || comparison == LTU)) + { + comparison = (comparison == GEU) ? NE : EQ; + operands[2] = CONST0_RTX (mode); + } + else if (val == 0x80 && (comparison == GEU || comparison == LTU)) + { + comparison = (comparison == GEU) ? GTU : LEU; + operands[2] = gen_int_mode (val - 1, mode); + } + else if (val == 0 && (comparison == GTU || comparison == LEU)) + comparison = (comparison == GTU) ? NE : EQ; + else if (mode == SImode + && ((val == 0x7fffffff + && (comparison == GTU || comparison == LEU)) + || ((unsigned HOST_WIDE_INT) val + == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 + && (comparison == GEU || comparison == LTU)))) + { + comparison = (comparison == GTU || comparison == GEU) ? LT : GE; + operands[2] = CONST0_RTX (mode); + } + } + op1 = operands[1]; + if (!no_new_pseudos) + operands[1] = force_reg (mode, op1); + /* When we are handling DImode comparisons, we want to keep constants so + that we can optimize the component comparisons; however, memory loads + are better issued as a whole so that they can be scheduled well. + SImode equality comparisons allow I08 constants, but only when they + compare r0. Hence, if operands[1] has to be loaded from somewhere else + into a register, that register might as well be r0, and we allow the + constant. If it is already in a register, this is likely to be + allocatated to a different hard register, thus we load the constant into + a register unless it is zero. */ + if (!REG_P (operands[2]) + && (GET_CODE (operands[2]) != CONST_INT + || (mode == SImode && operands[2] != CONST0_RTX (SImode) + && ((comparison != EQ && comparison != NE) + || (REG_P (op1) && REGNO (op1) != R0_REG) + || !CONST_OK_FOR_I08 (INTVAL (operands[2])))))) + { + if (scratch && GET_MODE (scratch) == mode) + { + emit_move_insn (scratch, operands[2]); + operands[2] = scratch; + } + else if (!no_new_pseudos) + operands[2] = force_reg (mode, operands[2]); + } + return comparison; +} + +void +expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability) +{ + rtx (*branch_expander) (rtx) = gen_branch_true; + rtx jump; + + comparison = prepare_cbranch_operands (operands, SImode, comparison); + switch (comparison) + { + case NE: case LT: case LE: case LTU: case LEU: + comparison = reverse_condition (comparison); + branch_expander = gen_branch_false; + default: ; + } + emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, T_REG), + gen_rtx_fmt_ee (comparison, SImode, + operands[1], operands[2]))); + jump = emit_jump_insn (branch_expander (operands[3])); + if (probability >= 0) + REG_NOTES (jump) + = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (probability), + REG_NOTES (jump)); + +} + +/* ??? How should we distribute probabilities when more than one branch + is generated. So far we only have soem ad-hoc observations: + - If the operands are random, they are likely to differ in both parts. + - If comparing items in a hash chain, the operands are random or equal; + operation should be EQ or NE. + - If items are searched in an ordered tree from the root, we can expect + the highpart to be unequal about half of the time; operation should be + an unequality comparison, operands non-constant, and overall probability + about 50%. Likewise for quicksort. + - Range checks will be often made against constants. Even if we assume for + simplicity an even distribution of the non-constant operand over a + sub-range here, the same probability could be generated with differently + wide sub-ranges - as long as the ratio of the part of the subrange that + is before the threshold to the part that comes after the threshold stays + the same. Thus, we can't really tell anything here; + assuming random distribution is at least simple. + */ + +bool +expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) +{ + enum rtx_code msw_taken, msw_skip, lsw_taken; + rtx skip_label; + rtx op1h, op1l, op2h, op2l; + int num_branches; + int prob, rev_prob; + int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1; + + comparison = prepare_cbranch_operands (operands, DImode, comparison); + op1h = gen_highpart_mode (SImode, DImode, operands[1]); + op2h = gen_highpart_mode (SImode, DImode, operands[2]); + op1l = gen_lowpart (SImode, operands[1]); + op2l = gen_lowpart (SImode, operands[2]); + msw_taken = msw_skip = lsw_taken = CODE_FOR_nothing; + prob = split_branch_probability; + rev_prob = REG_BR_PROB_BASE - prob; + switch (comparison) + { + /* ??? Should we use the cmpeqdi_t pattern for equality comparisons? + That costs 1 cycle more when the first branch can be predicted taken, + but saves us mispredicts because only one branch needs prediction. + It also enables generating the cmpeqdi_t-1 pattern. */ + case EQ: + if (TARGET_CMPEQDI_T) + { + emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); + emit_jump_insn (gen_branch_true (operands[3])); + return true; + } + msw_skip = NE; + lsw_taken = EQ; + if (prob >= 0) + { + /* If we had more precision, we'd use rev_prob - (rev_prob >> 32) . + */ + msw_skip_prob = rev_prob; + if (REG_BR_PROB_BASE <= 65535) + lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0; + else + { + gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64); + lsw_taken_prob + = (prob + ? (REG_BR_PROB_BASE + - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob + / ((HOST_WIDEST_INT) prob << 32))) + : 0); + } + } + break; + case NE: + if (TARGET_CMPEQDI_T) + { + emit_insn (gen_cmpeqdi_t (operands[1], operands[2])); + emit_jump_insn (gen_branch_false (operands[3])); + return true; + } + msw_taken = NE; + lsw_taken_prob = prob; + lsw_taken = NE; + lsw_taken_prob = 0; + break; + case GTU: case GT: + msw_taken = comparison; + if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1) + break; + if (comparison != GTU || op2h != CONST0_RTX (SImode)) + msw_skip = swap_condition (msw_taken); + lsw_taken = GTU; + break; + case GEU: case GE: + if (op2l == CONST0_RTX (SImode)) + msw_taken = comparison; + else + { + msw_taken = comparison == GE ? GT : GTU; + msw_skip = swap_condition (msw_taken); + lsw_taken = GEU; + } + break; + case LTU: case LT: + msw_taken = comparison; + if (op2l == CONST0_RTX (SImode)) + break; + msw_skip = swap_condition (msw_taken); + lsw_taken = LTU; + break; + case LEU: case LE: + if (GET_CODE (op2l) == CONST_INT && INTVAL (op2l) == -1) + msw_taken = comparison; + else + { + lsw_taken = LEU; + if (comparison == LE) + msw_taken = LT; + else if (op2h != CONST0_RTX (SImode)) + msw_taken = LTU; + else + break; + msw_skip = swap_condition (msw_taken); + } + break; + default: return false; + } + num_branches = ((msw_taken != CODE_FOR_nothing) + + (msw_skip != CODE_FOR_nothing) + + (lsw_taken != CODE_FOR_nothing)); + if (comparison != EQ && comparison != NE && num_branches > 1) + { + if (!CONSTANT_P (operands[2]) + && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U) + && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U)) + { + msw_taken_prob = prob / 2U; + msw_skip_prob + = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob); + lsw_taken_prob = prob; + } + else + { + msw_taken_prob = prob; + msw_skip_prob = REG_BR_PROB_BASE; + /* ??? If we have a constant op2h, should we use that when + calculating lsw_taken_prob? */ + lsw_taken_prob = prob; + } + } + operands[1] = op1h; + operands[2] = op2h; + operands[4] = NULL_RTX; + if (msw_taken != CODE_FOR_nothing) + expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); + if (msw_skip != CODE_FOR_nothing) + { + rtx taken_label = operands[3]; + + operands[3] = skip_label = gen_label_rtx (); + expand_cbranchsi4 (operands, msw_skip, msw_skip_prob); + operands[3] = taken_label; + } + operands[1] = op1l; + operands[2] = op2l; + if (lsw_taken != CODE_FOR_nothing) + expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob); + if (msw_skip != CODE_FOR_nothing) + emit_label (skip_label); + return true; +} + /* Prepare the operands for an scc instruction; make sure that the compare has been done. */ rtx @@ -1723,6 +2012,12 @@ output_branch (int logic, rtx insn, rtx *operands) } } +/* Output a code sequence for INSN using TEMPLATE with OPERANDS; but before, + fill in operands 9 as a label to the successor insn. + We try to use jump threading where possible. + IF CODE matches the comparison in the IF_THEN_ELSE of a following jump, + we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means + follow jmp and bt, if the address is in range. */ const char * output_branchy_insn (enum rtx_code code, const char *template, rtx insn, rtx *operands) @@ -2117,6 +2412,15 @@ sh_rtx_costs (rtx x, int code, int outer_code, int *total) else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) && CONST_OK_FOR_K08 (INTVAL (x))) *total = 1; + /* prepare_cmp_insn will force costly constants int registers before + the cbrach[sd]i4 pattterns can see them, so preserve potentially + interesting ones not covered by I08 above. */ + else if (outer_code == COMPARE + && ((unsigned HOST_WIDE_INT) INTVAL (x) + == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 + || INTVAL (x) == 0x7fffffff + || INTVAL (x) == 0x80 || INTVAL (x) == -0x81)) + *total = 1; else *total = 8; return true; @@ -2135,6 +2439,11 @@ sh_rtx_costs (rtx x, int code, int outer_code, int *total) case CONST_DOUBLE: if (TARGET_SHMEDIA) *total = COSTS_N_INSNS (4); + /* prepare_cmp_insn will force costly constants int registers before + the cbrachdi4 patttern can see them, so preserve potentially + interesting ones. */ + else if (outer_code == COMPARE && GET_MODE (x) == DImode) + *total = 1; else *total = 10; return true; @@ -8571,23 +8880,32 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost) } else if (REG_NOTE_KIND (link) == 0) { - enum attr_type dep_type, type; + enum attr_type type; + rtx dep_set; if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) return cost; - dep_type = get_attr_type (dep_insn); - if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) - cost--; - if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) - && (type = get_attr_type (insn)) != TYPE_CALL - && type != TYPE_SFUNC) - cost--; + dep_set = single_set (dep_insn); + /* The latency that we specify in the scheduling description refers + to the actual output, not to an auto-increment register; for that, + the latency is one. */ + if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) + { + rtx set = single_set (insn); + + if (set + && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) + && (!MEM_P (SET_DEST (set)) + || !reg_mentioned_p (SET_DEST (dep_set), + XEXP (SET_DEST (set), 0)))) + cost = 1; + } /* The only input for a call that is timing-critical is the function's address. */ - if (GET_CODE(insn) == CALL_INSN) + if (GET_CODE (insn) == CALL_INSN) { rtx call = PATTERN (insn); @@ -8599,12 +8917,16 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost) /* sibcalli_thunk uses a symbol_ref in an unspec. */ && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))) - cost = 0; + cost -= TARGET_SH4_300 ? 3 : 6; } /* Likewise, the most timing critical input for an sfuncs call is the function address. However, sfuncs typically start using their arguments pretty quickly. - Assume a four cycle delay before they are needed. */ + Assume a four cycle delay for SH4 before they are needed. + Cached ST40-300 calls are quicker, so assume only a one + cycle delay there. + ??? Maybe we should encode the delays till input registers + are needed by sfuncs into the sfunc call insn. */ /* All sfunc calls are parallels with at least four components. Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ else if (GET_CODE (PATTERN (insn)) == PARALLEL @@ -8612,50 +8934,83 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost) && (reg = sfunc_uses_reg (insn))) { if (! reg_set_p (reg, dep_insn)) - cost -= 4; - } - /* When the preceding instruction loads the shift amount of - the following SHAD/SHLD, the latency of the load is increased - by 1 cycle. */ - else if (TARGET_SH4 - && get_attr_type (insn) == TYPE_DYN_SHIFT - && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES - && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)), - XEXP (SET_SRC (single_set (insn)), - 1))) - cost++; - /* When an LS group instruction with a latency of less than - 3 cycles is followed by a double-precision floating-point - instruction, FIPR, or FTRV, the latency of the first - instruction is increased to 3 cycles. */ - else if (cost < 3 - && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP - && get_attr_dfp_comp (insn) == DFP_COMP_YES) - cost = 3; - /* The lsw register of a double-precision computation is ready one - cycle earlier. */ - else if (reload_completed - && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES - && (use_pat = single_set (insn)) - && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), - SET_SRC (use_pat))) - cost -= 1; - - if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES - && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) - cost -= 1; + cost -= TARGET_SH4_300 ? 1 : 4; + } + if (TARGET_HARD_SH4 && !TARGET_SH4_300) + { + enum attr_type dep_type = get_attr_type (dep_insn); + + if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) + cost--; + else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) + && (type = get_attr_type (insn)) != TYPE_CALL + && type != TYPE_SFUNC) + cost--; + /* When the preceding instruction loads the shift amount of + the following SHAD/SHLD, the latency of the load is increased + by 1 cycle. */ + if (get_attr_type (insn) == TYPE_DYN_SHIFT + && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES + && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)), + XEXP (SET_SRC (single_set (insn)), + 1))) + cost++; + /* When an LS group instruction with a latency of less than + 3 cycles is followed by a double-precision floating-point + instruction, FIPR, or FTRV, the latency of the first + instruction is increased to 3 cycles. */ + else if (cost < 3 + && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP + && get_attr_dfp_comp (insn) == DFP_COMP_YES) + cost = 3; + /* The lsw register of a double-precision computation is ready one + cycle earlier. */ + else if (reload_completed + && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES + && (use_pat = single_set (insn)) + && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), + SET_SRC (use_pat))) + cost -= 1; + + if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES + && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) + cost -= 1; + } + else if (TARGET_SH4_300) + { + /* Stores need their input register two cycles later. */ + if (dep_set && cost >= 1 + && ((type = get_attr_type (insn)) == TYPE_STORE + || type == TYPE_PSTORE + || type == TYPE_FSTORE || type == TYPE_MAC_MEM)) + { + rtx set = single_set (insn); + + if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0)) + && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set))) + { + cost -= 2; + /* But don't reduce the cost below 1 if the address depends + on a side effect of dep_insn. */ + if (cost < 1 + && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn)) + cost = 1; + } + } + } } /* An anti-dependence penalty of two applies if the first insn is a double precision fadd / fsub / fmul. */ - else if (REG_NOTE_KIND (link) == REG_DEP_ANTI + else if (!TARGET_SH4_300 + && REG_NOTE_KIND (link) == REG_DEP_ANTI && recog_memoized (dep_insn) >= 0 - && get_attr_type (dep_insn) == TYPE_DFP_ARITH + && (get_attr_type (dep_insn) == TYPE_DFP_ARITH + || get_attr_type (dep_insn) == TYPE_DFP_MUL) /* A lot of alleged anti-flow dependences are fake, so check this one is real. */ && flow_dependent_p (dep_insn, insn)) cost = 2; - return cost; } diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index fc4e1f282a4..1b659c75135 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -274,6 +274,7 @@ do { \ #endif #if SUPPORT_SH2 #define SUPPORT_SH3 1 +#define SUPPORT_SH2A_NOFPU 1 #endif #if SUPPORT_SH3 #define SUPPORT_SH4_NOFPU 1 @@ -281,16 +282,17 @@ do { \ #if SUPPORT_SH4_NOFPU #define SUPPORT_SH4A_NOFPU 1 #define SUPPORT_SH4AL 1 -#define SUPPORT_SH2A_NOFPU 1 #endif #if SUPPORT_SH2E #define SUPPORT_SH3E 1 +#define SUPPORT_SH2A_SINGLE_ONLY 1 #endif #if SUPPORT_SH3E #define SUPPORT_SH4_SINGLE_ONLY 1 +#endif +#if SUPPORT_SH4_SINGLE_ONLY #define SUPPORT_SH4A_SINGLE_ONLY 1 -#define SUPPORT_SH2A_SINGLE_ONLY 1 #endif #if SUPPORT_SH4 @@ -469,6 +471,11 @@ do { \ target_flags |= MASK_SMALLCODE; \ sh_div_str = SH_DIV_STR_FOR_SIZE ; \ } \ + else \ + { \ + TARGET_CBRANCHDI4 = 1; \ + TARGET_EXPAND_CBRANCHDI4 = 1; \ + } \ /* We can't meaningfully test TARGET_SHMEDIA here, because -m options \ haven't been parsed yet, hence we'd read only the default. \ sh_target_reg_class will return NO_REGS if this is not SHMEDIA, so \ @@ -608,6 +615,7 @@ do { \ else \ sh_div_strategy = SH_DIV_INV; \ } \ + TARGET_CBRANCHDI4 = 0; \ } \ /* -fprofile-arcs needs a working libgcov . In unified tree \ configurations with newlib, this requires to configure with \ @@ -668,6 +676,9 @@ do { \ sh_divsi3_libfunc = "__sdivsi3_1"; \ else \ sh_divsi3_libfunc = "__sdivsi3"; \ + if (sh_branch_cost == -1) \ + sh_branch_cost \ + = TARGET_SH5 ? 1 : ! TARGET_SH2 || TARGET_HARD_SH4 ? 2 : 1; \ if (TARGET_FMOVD) \ reg_class_from_letter['e' - 'a'] = NO_REGS; \ \ @@ -844,7 +855,7 @@ do { \ ((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \ || GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \ ? (unsigned) MIN (BIGGEST_ALIGNMENT, GET_MODE_BITSIZE (TYPE_MODE (TYPE))) \ - : (unsigned) ALIGN) + : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN)) /* Make arrays of chars word-aligned for the same reasons. */ #define DATA_ALIGNMENT(TYPE, ALIGN) \ @@ -2288,6 +2299,7 @@ struct sh_args { #define CONSTANT_ADDRESS_P(X) (GET_CODE (X) == LABEL_REF) /* Nonzero if the constant value X is a legitimate general operand. */ +/* can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */ #define LEGITIMATE_CONSTANT_P(X) \ (TARGET_SHMEDIA \ @@ -2298,7 +2310,7 @@ struct sh_args { || TARGET_SHMEDIA64) \ : (GET_CODE (X) != CONST_DOUBLE \ || GET_MODE (X) == DFmode || GET_MODE (X) == SFmode \ - || (TARGET_SH2E && (fp_zero_operand (X) || fp_one_operand (X))))) + || GET_MODE (X) == DImode || GET_MODE (X) == VOIDmode)) /* The macros REG_OK_FOR..._P assume that the arg is a REG rtx and check its validity for a certain class. diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index d091dfe0eff..a37c58308e3 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -204,7 +204,9 @@ ;; load_si Likewise, SImode variant for general register. ;; fload Likewise, but load to fp register. ;; store to memory +;; fstore floating point register to memory ;; move general purpose register to register +;; movi8 8 bit immediate to general purpose register ;; mt_group other sh4 mt instructions ;; fmove register to register, floating point ;; smpy word precision integer multiply @@ -221,11 +223,15 @@ ;; sfunc special function call with known used registers ;; call function call ;; fp floating point +;; fpscr_toggle toggle a bit in the fpscr ;; fdiv floating point divide (or square root) ;; gp_fpul move from general purpose register to fpul ;; fpul_gp move from fpul to general purpose register ;; mac_gp move from mac[lh] to general purpose register -;; dfp_arith, dfp_cmp,dfp_conv +;; gp_mac move from general purpose register to mac[lh] +;; mac_mem move from mac[lh] to memory +;; mem_mac move from memory to mac[lh] +;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv ;; ftrc_s fix_truncsfsi2_i4 ;; dfdiv double precision floating point divide (or square root) ;; cwb ic_invalidate_line_i @@ -263,7 +269,7 @@ ;; nil no-op move, will be deleted. (define_attr "type" - "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fdiv,ftrc_s,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other" + "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s,dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media,nil,other" (const_string "other")) ;; We define a new attribute namely "insn_class".We use @@ -279,12 +285,12 @@ (define_attr "insn_class" "mt_group,ex_group,ls_group,br_group,fe_group,co_group,none" (cond [(eq_attr "type" "move,mt_group") (const_string "mt_group") - (eq_attr "type" "arith,dyn_shift") (const_string "ex_group") - (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,store,gp_fpul,fpul_gp") (const_string "ls_group") + (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group") + (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,store,fstore,gp_fpul,fpul_gp") (const_string "ls_group") (eq_attr "type" "cbranch,jump") (const_string "br_group") - (eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_conv,dfdiv") + (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "fe_group") - (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb") (const_string "co_group")] + (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb,gp_mac,mac_mem,mem_mac") (const_string "co_group")] (const_string "none"))) ;; nil are zero instructions, and arith3 / arith3b are multiple instructions, ;; so these do not belong in an insn group, although they are modeled @@ -494,14 +500,14 @@ ;; SH4 Double-precision computation with double-precision result - ;; the two halves are ready at different times. (define_attr "dfp_comp" "yes,no" - (cond [(eq_attr "type" "dfp_arith,dfp_conv,dfdiv") (const_string "yes")] + (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")] (const_string "no"))) ;; Insns for which the latency of a preceding fp insn is decreased by one. (define_attr "late_fp_use" "yes,no" (const_string "no")) ;; And feeding insns for which this relevant. (define_attr "any_fp_comp" "yes,no" - (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_conv,dfdiv") + (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")] (const_string "no"))) @@ -608,16 +614,38 @@ cmp/pz %0" [(set_attr "type" "mt_group")]) +;; ------------------------------------------------------------------------- +;; SImode compare and branch +;; ------------------------------------------------------------------------- + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4" + "expand_cbranchsi4 (operands, CODE_FOR_nothing, -1); DONE;") + ;; ------------------------------------------------------------------------- ;; SImode unsigned integer comparisons ;; ------------------------------------------------------------------------- -(define_insn "cmpgeusi_t" +(define_insn_and_split "cmpgeusi_t" [(set (reg:SI T_REG) (geu:SI (match_operand:SI 0 "arith_reg_operand" "r") - (match_operand:SI 1 "arith_reg_operand" "r")))] + (match_operand:SI 1 "arith_reg_or_0_operand" "rN")))] "TARGET_SH1" "cmp/hs %1,%0" + "&& operands[0] == CONST0_RTX (SImode)" + [(pc)] + " +{ + emit_insn (gen_sett ()); + DONE; +}" [(set_attr "type" "mt_group")]) (define_insn "cmpgtusi_t" @@ -647,12 +675,64 @@ }") ;; ------------------------------------------------------------------------- -;; DImode signed integer comparisons +;; DImode compare and branch ;; ------------------------------------------------------------------------- -;; ??? Could get better scheduling by splitting the initial test from the -;; rest of the insn after reload. However, the gain would hardly justify -;; the sh.md size increase necessary to do that. + +;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism. +;; Therefore, we aim to have a set of three branches that go straight to the +;; destination, i.e. only one of them is taken at any one time. +;; This mechanism should also be slightly better for the sh4-200. + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "arith_operand" "") + (match_operand:DI 2 "arith_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_dup 4)) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4" + " +{ + enum rtx_code comparison; + + if (TARGET_EXPAND_CBRANCHDI4) + { + if (expand_cbranchdi4 (operands, CODE_FOR_nothing)) + DONE; + } + comparison = prepare_cbranch_operands (operands, DImode, CODE_FOR_nothing); + if (comparison != GET_CODE (operands[0])) + operands[0] + = gen_rtx_fmt_ee (VOIDmode, comparison, operands[1], operands[2]); + operands[4] = gen_rtx_SCRATCH (SImode); +}") + +(define_insn_and_split "cbranchdi4_i" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "arith_operand" "r,r") + (match_operand:DI 2 "arith_operand" "rN,i")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=X,&r")) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4" + "#" + "&& reload_completed" + [(pc)] + " +{ + if (!expand_cbranchdi4 (operands, GET_CODE (operands[0]))) + FAIL; + DONE; +}") + +;; ------------------------------------------------------------------------- +;; DImode signed integer comparisons +;; ------------------------------------------------------------------------- (define_insn "" [(set (reg:SI T_REG) @@ -4736,7 +4816,7 @@ label: [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))] "TARGET_SH2E && ! TARGET_SH5" "sts.l fpul,@-r15" - [(set_attr "type" "store") + [(set_attr "type" "fstore") (set_attr "late_fp_use" "yes") (set_attr "hit_stack" "yes")]) @@ -4818,9 +4898,9 @@ label: ;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T. (define_insn "movsi_i" [(set (match_operand:SI 0 "general_movdst_operand" - "=r,r,t,r,r,r,r,m,<,<,x,l,x,l,r") + "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,r") (match_operand:SI 1 "general_movsrc_operand" - "Q,rI08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))] + "Q,r,I08,r,mr,x,l,t,r,x,l,r,r,>,>,i"))] "TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH2A @@ -4829,6 +4909,7 @@ label: "@ mov.l %1,%0 mov %1,%0 + mov %1,%0 cmp/pl %1 mov.l %1,%0 sts %1,%0 @@ -4842,8 +4923,8 @@ label: lds.l %1,%0 lds.l %1,%0 fake %1,%0" - [(set_attr "type" "pcload_si,move,mt_group,load_si,mac_gp,prget,move,store,store,pstore,move,prset,load,pload,pcload_si") - (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")]) + [(set_attr "type" "pcload_si,move,movi8,mt_group,load_si,mac_gp,prget,arith,mac_mem,store,pstore,gp_mac,prset,mem_mac,pload,pcload_si") + (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")]) ;; t/r must come after r/r, lest reload will try to reload stuff like ;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2) @@ -4853,15 +4934,16 @@ label: ;; TARGET_FMOVD is in effect, and mode switching is done before reload. (define_insn "movsi_ie" [(set (match_operand:SI 0 "general_movdst_operand" - "=r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y") + "=r,r,r,r,t,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y") (match_operand:SI 1 "general_movsrc_operand" - "Q,rI08,I20,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))] + "Q,r,I08,I20,r,mr,x,l,t,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))] "(TARGET_SH2E || TARGET_SH2A) && (register_operand (operands[0], SImode) || register_operand (operands[1], SImode))" "@ mov.l %1,%0 mov %1,%0 + mov %1,%0 movi20 %1,%0 cmp/pl %1 mov.l %1,%0 @@ -4884,26 +4966,27 @@ label: flds %1,fpul fmov %1,%0 ! move optimized away" - [(set_attr "type" "pcload_si,move,move,*,load_si,mac_gp,prget,move,store,store,pstore,move,prset,load,pload,load,store,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil") - (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*") - (set_attr "length" "*,*,4,*,4,*,*,*,4,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")]) + [(set_attr "type" "pcload_si,move,movi8,move,*,load_si,mac_gp,prget,arith,store,mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore,pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil") + (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*") + (set_attr "length" "*,*,*,4,*,4,*,*,*,4,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")]) (define_insn "movsi_i_lowpart" - [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r")) - (match_operand:SI 1 "general_movsrc_operand" "Q,rI08,mr,x,l,t,r,i"))] + [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,r,m,r")) + (match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,t,r,i"))] "TARGET_SH1 && (register_operand (operands[0], SImode) || register_operand (operands[1], SImode))" "@ mov.l %1,%0 mov %1,%0 + mov %1,%0 mov.l %1,%0 sts %1,%0 sts %1,%0 movt %0 mov.l %1,%0 fake %1,%0" - [(set_attr "type" "pcload,move,load,move,prget,move,store,pcload")]) + [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,arith,store,pcload")]) (define_insn_and_split "load_ra" [(set (match_operand:SI 0 "general_movdst_operand" "") @@ -5155,19 +5238,20 @@ label: (set_attr "needs_delay_slot" "yes")]) (define_insn "movqi_i" - [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l") - (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))] + [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m,r,r,l") + (match_operand:QI 1 "general_movsrc_operand" "r,i,m,r,t,l,r"))] "TARGET_SH1 && (arith_reg_operand (operands[0], QImode) || arith_reg_operand (operands[1], QImode))" "@ + mov %1,%0 mov %1,%0 mov.b %1,%0 mov.b %1,%0 movt %0 sts %1,%0 lds %1,%0" - [(set_attr "type" "move,load,store,move,move,move")]) + [(set_attr "type" "move,movi8,load,store,arith,prget,prset")]) (define_insn "*movqi_media" [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m") @@ -5769,7 +5853,7 @@ label: (if_then_else (ne (symbol_ref "TARGET_SHCOMPACT") (const_int 0)) (const_int 10) (const_int 8))]) - (set_attr "type" "fmove,move,pcfload,fload,store,pcload,load,store,load,fload") + (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload") (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*") (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") (const_string "double") @@ -6486,7 +6570,7 @@ label: sts.l %1,%0 lds.l %1,%0 ! move optimized away" - [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,store,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,store,load,nil") + [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load,store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil") (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*") (set_attr "length" "*,*,*,*,4,4,4,*,*,*,2,2,2,4,2,2,2,2,0") (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") @@ -9929,7 +10013,7 @@ mov.l\\t1f,r0\\n\\ sts fpscr,%0 sts.l fpscr,%0" [(set_attr "length" "0,2,2,4,2,2,2,2,2") - (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,mac_gp,store")]) + (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,mac_gp,fstore")]) (define_peephole2 [(set (reg:PSI FPSCR_REG) @@ -9980,7 +10064,7 @@ mov.l\\t1f,r0\\n\\ (xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))] "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" "fschg" - [(set_attr "type" "fp") (set_attr "fp_set" "unknown")]) + [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")]) ;; There's no way we can use it today, since optimize mode switching ;; doesn't enable us to know from which mode we're switching to the @@ -9992,7 +10076,7 @@ mov.l\\t1f,r0\\n\\ (xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))] "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE" "fpchg" - [(set_attr "type" "fp")]) + [(set_attr "type" "fpscr_toggle")]) (define_expand "addsf3" [(set (match_operand:SF 0 "arith_reg_operand" "") @@ -10124,25 +10208,12 @@ mov.l\\t1f,r0\\n\\ [(set_attr "type" "fp") (set_attr "fp_mode" "single")]) -;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR -;; register in feeding fp instructions. Thus, we cannot generate fmac for -;; mixed-precision SH4 targets. To allow it to be still generated for the -;; SH3E, we use a separate insn for SH3E mulsf3. - (define_expand "mulsf3" [(set (match_operand:SF 0 "fp_arith_reg_operand" "") (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "") (match_operand:SF 2 "fp_arith_reg_operand" "")))] "TARGET_SH2E || TARGET_SHMEDIA_FPU" - " -{ - if (TARGET_SH4 || TARGET_SH2A_SINGLE) - expand_sf_binop (&gen_mulsf3_i4, operands); - else if (TARGET_SH2E) - emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2])); - if (! TARGET_SHMEDIA) - DONE; -}") + "") (define_insn "*mulsf3_media" [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") @@ -10152,6 +10223,27 @@ mov.l\\t1f,r0\\n\\ "fmul.s %1, %2, %0" [(set_attr "type" "fparith_media")]) +;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR +;; register in feeding fp instructions. Thus, in order to generate fmac, +;; we start out with a mulsf pattern that does not depend on fpscr. +;; This is split after combine to introduce the dependency, in order to +;; get mode switching and scheduling right. +(define_insn_and_split "mulsf3_ie" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E" + "fmul %2,%0" + "TARGET_SH4 || TARGET_SH2A_SINGLE" + [(const_int 0)] + " +{ + emit_insn (gen_mulsf3_i4 (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); + DONE; +}" + [(set_attr "type" "fp")]) + (define_insn "mulsf3_i4" [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") @@ -10162,20 +10254,12 @@ mov.l\\t1f,r0\\n\\ [(set_attr "type" "fp") (set_attr "fp_mode" "single")]) -(define_insn "mulsf3_ie" - [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") - (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") - (match_operand:SF 2 "fp_arith_reg_operand" "f")))] - "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" - "fmul %2,%0" - [(set_attr "type" "fp")]) - (define_insn "mac_media" [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") (match_operand:SF 2 "fp_arith_reg_operand" "f")) (match_operand:SF 3 "fp_arith_reg_operand" "0")))] - "TARGET_SHMEDIA_FPU" + "TARGET_SHMEDIA_FPU && TARGET_FMAC" "fmac.s %1, %2, %0" [(set_attr "type" "fparith_media")]) @@ -10185,7 +10269,7 @@ mov.l\\t1f,r0\\n\\ (match_operand:SF 2 "fp_arith_reg_operand" "f")) (match_operand:SF 3 "arith_reg_operand" "0"))) (use (match_operand:PSI 4 "fpscr_operand" "c"))] - "TARGET_SH2E && ! TARGET_SH4" + "TARGET_SH2E && TARGET_FMAC" "fmac fr0,%2,%0" [(set_attr "type" "fp") (set_attr "fp_mode" "single")]) @@ -10336,7 +10420,7 @@ mov.l\\t1f,r0\\n\\ (match_operand:SF 1 "fp_arith_reg_operand" "f")))] "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" "fcmp/gt %1,%0" - [(set_attr "type" "fp") + [(set_attr "type" "fp_cmp") (set_attr "fp_mode" "single")]) (define_insn "cmpeqsf_t" @@ -10345,7 +10429,7 @@ mov.l\\t1f,r0\\n\\ (match_operand:SF 1 "fp_arith_reg_operand" "f")))] "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" "fcmp/eq %1,%0" - [(set_attr "type" "fp") + [(set_attr "type" "fp_cmp") (set_attr "fp_mode" "single")]) (define_insn "ieee_ccmpeqsf_t" @@ -10365,7 +10449,7 @@ mov.l\\t1f,r0\\n\\ (use (match_operand:PSI 2 "fpscr_operand" "c"))] "(TARGET_SH4 || TARGET_SH2A_SINGLE)" "fcmp/gt %1,%0" - [(set_attr "type" "fp") + [(set_attr "type" "fp_cmp") (set_attr "fp_mode" "single")]) (define_insn "cmpeqsf_t_i4" @@ -10375,7 +10459,7 @@ mov.l\\t1f,r0\\n\\ (use (match_operand:PSI 2 "fpscr_operand" "c"))] "(TARGET_SH4 || TARGET_SH2A_SINGLE)" "fcmp/eq %1,%0" - [(set_attr "type" "fp") + [(set_attr "type" "fp_cmp") (set_attr "fp_mode" "single")]) (define_insn "*ieee_ccmpeqsf_t_4" @@ -10724,7 +10808,7 @@ mov.l\\t1f,r0\\n\\ (use (match_operand:PSI 3 "fpscr_operand" "c"))] "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" "fmul %2,%0" - [(set_attr "type" "dfp_arith") + [(set_attr "type" "dfp_mul") (set_attr "fp_mode" "double")]) (define_expand "divdf3" diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt index 7f9a87e95d9..161fdd8dcaf 100644 --- a/gcc/config/sh/sh.opt +++ b/gcc/config/sh/sh.opt @@ -57,11 +57,11 @@ Target RejectNegative Condition(SUPPORT_SH2A_NOFPU) Generate SH2a FPU-less code m2a-single -Target RejectNegative Condition (SUPPORT_SH2A_SINGLE) +Target RejectNegative Condition(SUPPORT_SH2A_SINGLE) Generate default single-precision SH2a code m2a-single-only -Target RejectNegative Condition (SUPPORT_SH2A_SINGLE_ONLY) +Target RejectNegative Condition(SUPPORT_SH2A_SINGLE_ONLY) Generate only single-precision SH2a code m2e @@ -88,10 +88,33 @@ m4-200 Target RejectNegative Condition(SUPPORT_SH4) Generate SH4-200 code +;; TARGET_SH4_300 indicates if we have the ST40-300 instruction set and +;; pipeline - irrespective of ABI. +m4-300 +Target RejectNegative Condition(SUPPORT_SH4) Var(TARGET_SH4_300) +Generate SH4-300 code + m4-nofpu Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Generate SH4 FPU-less code +m4-100-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate SH4-100 FPU-less code + +m4-200-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) +Generate SH4-200 FPU-less code + +m4-300-nofpu +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300) VarExists +Generate SH4-300 FPU-less code + +m4-340 +Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300) VarExists +Generate code for SH4 340 series (MMU/FPU-less) +;; passes -isa=sh4-nommu-nofpu to the assembler. + m4-400 Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Generate code for SH4 400 series (MMU/FPU-less) @@ -114,6 +137,10 @@ m4-200-single Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Generate default single-precision SH4-200 code +m4-300-single +Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Var(TARGET_SH4_300) VarExists +Generate default single-precision SH4-300 code + m4-single-only Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Generate only single-precision SH4 code @@ -126,6 +153,10 @@ m4-200-single-only Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Generate only single-precision SH4-200 code +m4-300-single-only +Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Var(TARGET_SH4_300) VarExists +Generate only single-precision SH4-300 code + m4a Target RejectNegative Mask(SH4A) Condition(SUPPORT_SH4A) Generate SH4a code @@ -182,6 +213,22 @@ mbigtable Target Report RejectNegative Mask(BIGTABLE) Generate 32-bit offsets in switch tables +mbranch-cost= +Target RejectNegative Joined UInteger Var(sh_branch_cost) Init(-1) +Cost to assume for a branch insn + +mcbranchdi +Target Var(TARGET_CBRANCHDI4) +Enable cbranchdi4 pattern + +mexpand-cbranchdi +Target Var(TARGET_EXPAND_CBRANCHDI4) +Expand cbranchdi4 pattern early into separate comparisons and branches. + +mcmpeqdi +Target Var(TARGET_CMPEQDI_T) +Emit cmpeqdi_t pattern even when -mcbranchdi and -mexpand-cbranchdi are in effect. + mcut2-workaround Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND) Enable SH5 cut2 workaround @@ -192,7 +239,7 @@ Align doubles at 64-bit boundaries mdiv= Target RejectNegative Joined Var(sh_div_str) Init("") -Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp call-div1 call-fp call-table +Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table mdivsi3_libfunc= Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("") @@ -201,6 +248,10 @@ Specify name for 32 bit signed division function mfmovd Target RejectNegative Mask(FMOVD) Undocumented +mfused-madd +Target Var(TARGET_FMAC) +Enable the use of the fused floating point multiply-accumulate operation + mgettrcost= Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1) Cost to assume for gettr insn diff --git a/gcc/config/sh/sh1.md b/gcc/config/sh/sh1.md index 9dfdd86508f..1198fe737b9 100644 --- a/gcc/config/sh/sh1.md +++ b/gcc/config/sh/sh1.md @@ -1,5 +1,5 @@ ;; DFA scheduling description for Renesas / SuperH SH. -;; Copyright (C) 2004 Free Software Foundation, Inc. +;; Copyright (C) 2004, 2006 Free Software Foundation, Inc. ;; This file is part of GCC. @@ -45,7 +45,7 @@ (define_insn_reservation "sh1_load_store" 2 (and (eq_attr "pipe_model" "sh1") - (eq_attr "type" "load,pcload,pload,store,pstore")) + (eq_attr "type" "load,pcload,pload,mem_mac,store,fstore,pstore,mac_mem")) "sh1memory*2") (define_insn_reservation "sh1_arith3" 3 @@ -76,7 +76,7 @@ (define_insn_reservation "sh1_fp" 2 (and (eq_attr "pipe_model" "sh1") - (eq_attr "type" "fp,fmove")) + (eq_attr "type" "fp,fpscr_toggle,fp_cmp,fmove")) "sh1fp") (define_insn_reservation "sh1_fdiv" 13 diff --git a/gcc/config/sh/sh4-300.md b/gcc/config/sh/sh4-300.md new file mode 100644 index 00000000000..228782a67fc --- /dev/null +++ b/gcc/config/sh/sh4-300.md @@ -0,0 +1,288 @@ +;; DFA scheduling description for ST40-300. +;; Copyright (C) 2004, 2006 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;; Load and store instructions save a cycle if they are aligned on a +;; four byte boundary. Using a function unit for stores encourages +;; gcc to separate load and store instructions by one instruction, +;; which makes it more likely that the linker will be able to word +;; align them when relaxing. + +;; The following description models the ST40-300 pipeline using the DFA based +;; scheduler. + +;; Two automata are defined to reduce number of states +;; which a single large automaton will have. (Factoring) + +(define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe") + +;; This unit is basically the decode unit of the processor. +;; Since SH4 is a dual issue machine,it is as if there are two +;; units so that any insn can be processed by either one +;; of the decoding unit. + +(define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline") + +;; The floating point units. + +(define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe") + +;; integer multiplier unit + +(define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline") + +;; LS unit + +(define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline") + +;; The address calculator used for branch instructions. +;; This will be reserved after "issue" of branch instructions +;; and this is to make sure that no two branch instructions +;; can be issued in parallel. + +(define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline") + +;; ---------------------------------------------------- +;; This reservation is to simplify the dual issue description. + +(define_reservation "sh4_300_issue" "sh4_300_pipe_01|sh4_300_pipe_02") + +(define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02") + +;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing") + +;; MOV RM,RN / MOV #imm8,RN / STS PR,RN +(define_insn_reservation "sh4_300_mov" 0 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "move,movi8,prget")) + "sh4_300_issue") + +;; Fixed STS from MACL / MACH +(define_insn_reservation "sh4_300_mac_gp" 0 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mac_gp")) + "sh4_300_issue+sh4_300_mul") + +;; Fixed LDS to MACL / MACH +(define_insn_reservation "sh4_300_gp_mac" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "gp_mac")) + "sh4_300_issue+sh4_300_mul") + +;; Instructions without specific resource requirements with latency 1. + +(define_insn_reservation "sh4_300_simple_arith" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mt_group,arith,dyn_shift,prset")) + "sh4_300_issue") + +;; Load and store instructions have no alignment peculiarities for the ST40-300, +;; but they use the load-store unit, which they share with the fmove type +;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . +;; Loads have a latency of three. + +;; Load Store instructions. +(define_insn_reservation "sh4_300_load" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "load,pcload,load_si,pcload_si,pload")) + "sh4_300_issue+sh4_300_ls") + +(define_insn_reservation "sh4_300_mac_load" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mem_mac")) + "sh4_300_issue+sh4_300_ls+sh4_300_mul") + +(define_insn_reservation "sh4_300_fload" 4 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fload,pcfload")) + "sh4_300_issue+sh4_300_ls+sh4_300_fpt") + +;; sh_adjust_cost describes the reduced latency of the feeding insns of a store. +;; The latency of an auto-increment register is 1; the latency of the memory +;; output is not actually considered here anyway. +(define_insn_reservation "sh4_300_store" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "store,pstore")) + "sh4_300_issue+sh4_300_ls") + +(define_insn_reservation "sh4_300_fstore" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fstore")) + "sh4_300_issue+sh4_300_ls+sh4_300_fpt") + +;; Fixed STS.L from MACL / MACH +(define_insn_reservation "sh4_300_mac_store" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mac_mem")) + "sh4_300_issue+sh4_300_mul+sh4_300_ls") + +(define_insn_reservation "sh4_300_gp_fpul" 2 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "gp_fpul")) + "sh4_300_issue+sh4_300_fpt") + +(define_insn_reservation "sh4_300_fpul_gp" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fpul_gp")) + "sh4_300_issue+sh4_300_fpt") + +;; Branch (BF,BF/S,BT,BT/S,BRA) +;; Branch Far (JMP,RTS,BRAF) +;; Group: BR +;; When displacement is 0 for BF / BT, we have effectively conditional +;; execution of one instruction, without pipeline disruption. +;; Otherwise, the latency depends on prediction success. +;; We can't really do much with the latency, even if we could express it, +;; but the pairing restrictions are useful to take into account. +;; ??? If the branch is likely, and not paired with a preceding insn, +;; or likely and likely not predicted, we might want to fill the delay slot. +;; However, there appears to be no machinery to make the compiler +;; recognize these scenarios. + +(define_insn_reservation "sh4_300_branch" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "cbranch,jump,return,jump_ind")) + "sh4_300_issue+sh4_300_br") + +;; RTE +(define_insn_reservation "sh4_300_return_from_exp" 9 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "rte")) + "sh4_300_pipe_01+sh4_300_pipe_02*9") + +;; OCBP, OCBWB +;; Group: CO +;; Latency: 1-5 +;; Issue Rate: 1 + +;; cwb is used for the sequence ocbwb @%0; extu.w %0,%2; or %1,%2; mov.l %0,@%2 +;; This description is likely inexact, but this pattern should not actually +;; appear when compiling for sh4-300; we should use isbi instead. +;; If a -mtune option is added later, we should use the icache array +;; dispatch method instead. +(define_insn_reservation "sh4_300_ocbwb" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "cwb")) + "all*3") + +;; JSR,BSR,BSRF +;; Calls have a mandatory delay slot, which we'd like to fill with an insn +;; that can be paired with the call itself. +;; Scheduling runs before reorg, so we approximate this by saying that we +;; want the call to be paired with a preceding insn. +;; In most cases, the insn that loads the address of the call should have +;; a non-zero latency (mov rn,rm doesn't make sense since we could use rn +;; for the address then). Thus, a preceding insn that can be paired with +;; a call should be elegible for the delay slot. +;; +;; calls introduce a longisch delay that is likely to flush the pipelines +;; of the caller's instructions. Ordinary functions tend to end with a +;; load to restore a register (in the delay slot of rts), while sfuncs +;; tend to end with an EX or MT insn. But that is not actually relevant, +;; since there are no instructions that contend for memory access early. +;; We could, of course, provide exact scheduling information for specific +;; sfuncs, if that should prove useful. + +(define_insn_reservation "sh4_300_call" 16 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "call,sfunc")) + "sh4_300_issue+sh4_300_br,all*15") + +;; FMOV.S / FMOV.D +(define_insn_reservation "sh4_300_fmov" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fmove")) + "sh4_300_issue+sh4_300_fpt") + +;; LDS to FPSCR +(define_insn_reservation "sh4_300_fpscr_load" 8 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "gp_fpscr")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fpt") + +;; LDS.L to FPSCR +(define_insn_reservation "sh4_300_fpscr_load_mem" 8 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "mem_fpscr")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls") + + +;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W) +(define_insn_reservation "multi" 2 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "smpy,dmpy")) + "sh4_300_issue+sh4_300_mul") + +;; FPCHG, FRCHG, FSCHG +(define_insn_reservation "fpscr_toggle" 1 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fpscr_toggle")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fpt") + +;; FCMP/EQ, FCMP/GT +(define_insn_reservation "fp_cmp" 3 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fp_cmp,dfp_cmp")) + "sh4_300_issue+sh4_300_fpu") + +;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC) +;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC) +(define_insn_reservation "fp_arith" 6 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv")) + "sh4_300_issue+sh4_300_fpu") + +;; Single Precision FDIV/SQRT +(define_insn_reservation "fp_div" 19 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "fdiv")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15") + +;; Double-precision floating-point FMUL +(define_insn_reservation "dfp_mul" 9 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "dfp_mul")) + "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3") + +;; Double precision FDIV/SQRT +(define_insn_reservation "dp_div" 35 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "dfdiv")) + "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31") + + +;; ??? We don't really want these for sh4-300. +;; this pattern itself is likely to finish in 3 cycles, but also +;; to disrupt branch prediction for taken branches for the following +;; condbranch. +(define_insn_reservation "sh4_300_arith3" 5 + (and (eq_attr "pipe_model" "sh4_300") + (eq_attr "type" "arith3")) + "sh4_300_issue,all*4") + +;; arith3b insns without brach redirection make use of the 0-offset 0-latency +;; branch feature, and thus schedule the same no matter if the branch is taken +;; or not. If the branch is redirected, the taken branch might take longer, +;; but then, we don't have to take the next branch. +;; ??? should we suppress branch redirection for sh4-300 to improve branch +;; target hit rates? +(define_insn_reservation "arith3b" 2 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "arith3")) + "issue,all") diff --git a/gcc/config/sh/sh4.md b/gcc/config/sh/sh4.md index 0937db8e6a3..b390ab99d05 100644 --- a/gcc/config/sh/sh4.md +++ b/gcc/config/sh/sh4.md @@ -1,5 +1,5 @@ ;; DFA scheduling description for SH4. -;; Copyright (C) 2004 Free Software Foundation, Inc. +;; Copyright (C) 2004, 2006 Free Software Foundation, Inc. ;; This file is part of GCC. @@ -209,9 +209,14 @@ (define_insn_reservation "sh4_store" 1 (and (eq_attr "pipe_model" "sh4") - (eq_attr "type" "store")) + (eq_attr "type" "store,fstore")) "issue+load_store,nothing,memory") +(define_insn_reservation "mac_mem" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "mac_mem")) + "d_lock,nothing,memory") + ;; Load Store instructions. ;; Group: LS ;; Latency: 1 @@ -372,35 +377,42 @@ ;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W) ;; Group: CO ;; Latency: 4 / 4 -;; Issue Rate: 1 +;; Issue Rate: 2 (define_insn_reservation "multi" 4 (and (eq_attr "pipe_model" "sh4") (eq_attr "type" "smpy,dmpy")) "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2") -;; Fixed STS from MACL / MACH +;; Fixed STS from, and LDS to MACL / MACH ;; Group: CO ;; Latency: 3 ;; Issue Rate: 1 (define_insn_reservation "sh4_mac_gp" 3 (and (eq_attr "pipe_model" "sh4") - (eq_attr "type" "mac_gp")) + (eq_attr "type" "mac_gp,gp_mac,mem_mac")) "d_lock") ;; Single precision floating point computation FCMP/EQ, -;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG +;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG ;; Group: FE ;; Latency: 3/4 ;; Issue Rate: 1 (define_insn_reservation "fp_arith" 3 (and (eq_attr "pipe_model" "sh4") - (eq_attr "type" "fp")) + (eq_attr "type" "fp,fp_cmp")) "issue,F01,F2") +;; We don't model the resource usage of this exactly because that would +;; introduce a bogus latency. +(define_insn_reservation "sh4_fpscr_toggle" 1 + (and (eq_attr "pipe_model" "sh4") + (eq_attr "type" "fpscr_toggle")) + "issue") + (define_insn_reservation "fp_arith_ftrc" 3 (and (eq_attr "pipe_model" "sh4") (eq_attr "type" "ftrc_s")) @@ -437,7 +449,7 @@ (define_insn_reservation "fp_double_arith" 8 (and (eq_attr "pipe_model" "sh4") - (eq_attr "type" "dfp_arith")) + (eq_attr "type" "dfp_arith,dfp_mul")) "issue,F01,F1+F2,fpu*4,F2") ;; Double-precision FCMP (FCMP/EQ,FCMP/GT) diff --git a/gcc/config/sh/sh4a.md b/gcc/config/sh/sh4a.md index 163a4e10d85..602c6545ae9 100644 --- a/gcc/config/sh/sh4a.md +++ b/gcc/config/sh/sh4a.md @@ -1,5 +1,5 @@ ;; Scheduling description for Renesas SH4a -;; Copyright (C) 2003, 2004 Free Software Foundation, Inc. +;; Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc. ;; ;; This file is part of GCC. ;; @@ -98,9 +98,11 @@ ;; MOV ;; Group: MT ;; Latency: 0 +;; ??? not sure if movi8 belongs here, but that's where it was +;; effectively before. (define_insn_reservation "sh4a_mov" 0 (and (eq_attr "cpu" "sh4a") - (eq_attr "type" "move")) + (eq_attr "type" "move,movi8,gp_mac")) "ID_or") ;; Load @@ -108,7 +110,7 @@ ;; Latency: 3 (define_insn_reservation "sh4a_load" 3 (and (eq_attr "cpu" "sh4a") - (eq_attr "type" "load,pcload")) + (eq_attr "type" "load,pcload,mem_mac")) "sh4a_ls+sh4a_memory") (define_insn_reservation "sh4a_load_si" 3 @@ -121,7 +123,7 @@ ;; Latency: 0 (define_insn_reservation "sh4a_store" 0 (and (eq_attr "cpu" "sh4a") - (eq_attr "type" "store")) + (eq_attr "type" "store,fstore,mac_mem")) "sh4a_ls+sh4a_memory") ;; CWB TYPE @@ -177,7 +179,7 @@ ;; Latency: 3 (define_insn_reservation "sh4a_fp_arith" 3 (and (eq_attr "cpu" "sh4a") - (eq_attr "type" "fp")) + (eq_attr "type" "fp,fp_cmp,fpscr_toggle")) "ID_or,sh4a_fex") (define_insn_reservation "sh4a_fp_arith_ftrc" 3 @@ -207,7 +209,7 @@ ;; Latency: 5 (define_insn_reservation "sh4a_fp_double_arith" 5 (and (eq_attr "cpu" "sh4a") - (eq_attr "type" "dfp_arith")) + (eq_attr "type" "dfp_arith,dfp_mul")) "ID_or,sh4a_fex*3") ;; Double precision FDIV/SQRT diff --git a/gcc/config/sh/superh.h b/gcc/config/sh/superh.h index 49bb6206d43..65154926e33 100644 --- a/gcc/config/sh/superh.h +++ b/gcc/config/sh/superh.h @@ -75,17 +75,17 @@ Boston, MA 02110-1301, USA. */ on newlib and provide the runtime support */ #undef SUBTARGET_CPP_SPEC #define SUBTARGET_CPP_SPEC \ -"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \ +"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-300*:-D__SH4_300__} %{m4-340:-D__SH4_340__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \ %(cppruntime)" /* Override the SUBTARGET_ASM_SPEC to add the runtime support */ #undef SUBTARGET_ASM_SPEC -#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)" +#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400|m4-340:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)" /* Override the SUBTARGET_ASM_RELAX_SPEC so it doesn't interfere with the runtime support by adding -isa=sh4 in the wrong place. */ #undef SUBTARGET_ASM_RELAX_SPEC -#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-400:%{!m4-500:-isa=sh4}}}}" +#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-300*:%{!m4-340:%{!m4-400:%{!m4-500:-isa=sh4}}}}}}" /* Create the CC1_SPEC to add the runtime support */ #undef CC1_SPEC @@ -102,7 +102,7 @@ Boston, MA 02110-1301, USA. */ /* Override STARTFILE_SPEC to add profiling and MMU support. */ #undef STARTFILE_SPEC #define STARTFILE_SPEC \ - "%{!shared: %{!m4-400*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}} \ - %{!shared: %{m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \ + "%{!shared: %{!m4-400*:%{!m4-340*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}}} \ + %{!shared: %{m4-340*|m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \ crti.o%s \ %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh index 3ebc09d6e3c..56b6ba1c55a 100644 --- a/gcc/config/sh/t-sh +++ b/gcc/config/sh/t-sh @@ -38,11 +38,12 @@ MULTILIB_DIRNAMES= # is why sh2a and sh2a-single need their own multilibs. MULTILIB_MATCHES = $(shell \ multilibs="$(MULTILIB_OPTIONS)" ; \ - for abi in m1,m2,m3,m4-nofpu,m4-400,m4-500,m4al,m4a-nofpu m1,m2,m2a-nofpu \ - m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4a-single-only \ + for abi in m1,m2,m3,m4-nofpu,m4-100-nofpu,m4-200-nofpu,m4-400,m4-500,m4-340,m4-300-nofpu,m4al,m4a-nofpu \ + m1,m2,m2a-nofpu \ + m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4-300-single-only,m4a-single-only \ m2e,m2a-single-only \ - m4-single,m4-100-single,m4-200-single,m4a-single \ - m4,m4-100,m4-200,m4a \ + m4-single,m4-100-single,m4-200-single,m4-300-single,m4a-single \ + m4,m4-100,m4-200,m4-300,m4a \ m5-32media,m5-compact,m5-32media \ m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \ subst= ; \ @@ -76,7 +77,7 @@ gt-sh.h : s-gtype ; @true IC_EXTRA_PARTS= libic_invalidate_array_4-100.a libic_invalidate_array_4-200.a \ libic_invalidate_array_4a.a -OPT_EXTRA_PARTS= libgcc-Os-4-200.a +OPT_EXTRA_PARTS= libgcc-Os-4-200.a libgcc-4-300.a EXTRA_MULTILIB_PARTS= $(IC_EXTRA_PARTS) $(OPT_EXTRA_PARTS) $(T)ic_invalidate_array_4-100.o: $(srcdir)/config/sh/lib1funcs.asm $(GCC_PASSES) @@ -104,6 +105,12 @@ OBJS_Os_4_200=$(T)sdivsi3_i4i-Os-4-200.o $(T)udivsi3_i4i-Os-4-200.o $(T)unwind-d $(T)libgcc-Os-4-200.a: $(OBJS_Os_4_200) $(GCC_PASSES) $(AR_CREATE_FOR_TARGET) $@ $(OBJS_Os_4_200) +$(T)div_table-4-300.o: $(srcdir)/config/sh/lib1funcs-4-300.asm $(GCC_PASSES) + $(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $@ -DL_div_table -x assembler-with-cpp $< + +$(T)libgcc-4-300.a: $(T)div_table-4-300.o $(GCC_PASSES) + $(AR_CREATE_FOR_TARGET) $@ $(T)div_table-4-300.o + # Local Variables: # mode: Makefile # End: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7a204603530..1949db13719 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2006-11-03 J"orn Rennecke + + * testsuite/gcc.c-torture/execute/arith-rand-ll.c: + Also test for bogus rest sign. + 2006-11-03 Francois-Xavier Coudert PR libfortran/27895 diff --git a/gcc/testsuite/gcc.c-torture/execute/arith-rand-ll.c b/gcc/testsuite/gcc.c-torture/execute/arith-rand-ll.c index d5791ec3303..6c3cf1885ba 100644 --- a/gcc/testsuite/gcc.c-torture/execute/arith-rand-ll.c +++ b/gcc/testsuite/gcc.c-torture/execute/arith-rand-ll.c @@ -79,7 +79,7 @@ main () continue; r1 = xx / yy; r2 = xx % yy; - if (ABS (r2) >= (unsigned int) ABS (yy) || (signed int) (r1 * yy + r2) != xx) + if (ABS (r2) >= (unsigned int) ABS (yy) || (signed int) (r1 * yy + r2) != xx || ((xx < 0) != (r2 < 0) && r2)) abort (); } { unsigned short xx = x, yy = y, r1, r2; -- cgit v1.2.1