summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog33
-rw-r--r--gcc/config/sh/elf.h49
-rw-r--r--gcc/config/sh/sh.c1755
-rw-r--r--gcc/config/sh/sh.h27
-rw-r--r--gcc/config/sh/sh.md1117
-rw-r--r--gcc/ginclude/va-sh.h124
6 files changed, 2440 insertions, 665 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2483bfbc865..5db233fcf59 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,36 @@
+Sat Nov 8 18:20:21 1997 J"orn Rennecke <amylaar@cygnus.co.uk>
+
+ * sh.h (ENABLE_REGMOVE_PASS): Define.
+
+ Bring over from FSF:
+
+ Thu Oct 30 12:21:06 1997 J"orn Rennecke <amylaar@cygnus.co.uk>
+
+ * va-sh.h (__va_arg_sh1): Define.
+ (va_arg): Use it.
+ SH3E doesn't use any integer registers for subsequent arguments
+ once a non-float value was passed in the stack.
+ * sh.c (machine_dependent_reorg): If optimizing, put explicit
+ alignment in front label for ADDR_DIFF_VEC.
+ * sh.h (PASS_IN_REG_P): Fix SH3E case.
+ (ADJUST_INSN_LENGTH): If not optimizing, add two extra bytes length.
+
+ Tue Oct 28 15:06:44 1997 J"orn Rennecke <amylaar@cygnus.co.uk>
+
+ * sh/elf.h (PREFERRED_DEBUGGING_TYPE): Undefine before including
+ svr4.h.
+
+ Mon Oct 27 16:11:52 1997 J"orn Rennecke <amylaar@cygnus.co.uk>
+
+ * sh.c (machine_dependent_reorg): When -flag_delayed_branches,
+ put an use_sfunc_addr before each sfunc.
+ * sh.md (use_sfunc_addr, dummy_jump): New insns.
+ (casesi): For TARGET_SH2, emit a dummy_jump after LAB.
+
+ Tue Oct 21 07:12:28 1997 J"orn Rennecke <amylaar@cygnus.co.uk>
+
+ * sh/elf.h (PREFERRED_DEBUGGING_TYPE): Don't redefine.
+
Fri Nov 7 10:22:24 1997 Jason Merrill <jason@yorick.cygnus.com>
* frame.c (add_fdes, count_fdes): Go back to checking pc_begin for
diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h
index 2c3294b4935..a56077e544e 100644
--- a/gcc/config/sh/elf.h
+++ b/gcc/config/sh/elf.h
@@ -25,10 +25,6 @@ Boston, MA 02111-1307, USA. */
/* No SDB debugging info. */
#undef SDB_DEBUGGING_INFO
-/* Prefer stabs. */
-#undef PREFERRED_DEBUGGING_TYPE
-#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
-
/* Undefine some macros defined in both sh.h and svr4.h. */
#undef IDENT_ASM_OP
#undef ASM_FILE_END
@@ -40,10 +36,30 @@ Boston, MA 02111-1307, USA. */
#undef ASM_OUTPUT_CONSTRUCTOR
#undef ASM_OUTPUT_DESTRUCTOR
#undef ASM_DECLARE_FUNCTION_NAME
+#undef PREFERRED_DEBUGGING_TYPE
/* Be ELF-like. */
#include "svr4.h"
+/* The prefix to add to user-visible assembler symbols.
+ Note that svr4.h redefined it from the original value (that we want)
+ in sh.h */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+#undef ASM_FILE_START
+#define ASM_FILE_START(FILE) do { \
+ output_file_directive ((FILE), main_input_filename); \
+ if (TARGET_LITTLE_ENDIAN) \
+ fprintf ((FILE), "\t.little\n"); \
+} while (0)
+
+
+
/* Let code know that this is ELF. */
#define CPP_PREDEFINES "-D__sh__ -D__ELF__ -Acpu(sh) -Amachine(sh)"
@@ -52,7 +68,7 @@ Boston, MA 02111-1307, USA. */
#define ASM_SPEC "%{ml:-little} %{mrelax:-relax}"
#undef LINK_SPEC
-#define LINK_SPEC "%{ml:-m shl} %{mrelax:-relax}"
+#define LINK_SPEC "%{ml:-m shlelf} %{mrelax:-relax}"
/* svr4.h undefined DBX_REGISTER_NUMBER, so we need to define it
again. */
@@ -63,29 +79,26 @@ Boston, MA 02111-1307, USA. */
symbol names. */
#undef ASM_OUTPUT_LABELREF
#define ASM_OUTPUT_LABELREF(STREAM,NAME) \
- fprintf (STREAM, "_%s", NAME)
-
-/* Because SH ELF uses underscores, we don't put a '.' before local
- labels, for easy compatibility with the COFF implementation. */
+ asm_fprintf (STREAM, "%U%s", NAME)
#undef ASM_GENERATE_INTERNAL_LABEL
#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
- sprintf (STRING, "*%s%d", PREFIX, NUM)
+ sprintf ((STRING), "*%s%s%d", LOCAL_LABEL_PREFIX, (PREFIX), (NUM))
#undef ASM_OUTPUT_INTERNAL_LABEL
#define ASM_OUTPUT_INTERNAL_LABEL(FILE,PREFIX,NUM) \
- fprintf (FILE, "%s%d:\n", PREFIX, NUM)
+ asm_fprintf ((FILE), "%L%s%d:\n", (PREFIX), (NUM))
#undef ASM_OUTPUT_SOURCE_LINE
#define ASM_OUTPUT_SOURCE_LINE(file, line) \
do \
{ \
static int sym_lineno = 1; \
- fprintf (file, ".stabn 68,0,%d,LM%d-", \
- line, sym_lineno); \
- assemble_name (file, \
+ asm_fprintf ((file), ".stabn 68,0,%d,%LLM%d-", \
+ (line), sym_lineno); \
+ assemble_name ((file), \
XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));\
- fprintf (file, "\nLM%d:\n", sym_lineno); \
+ asm_fprintf ((file), "\n%LLM%d:\n", sym_lineno); \
sym_lineno += 1; \
} \
while (0)
@@ -94,7 +107,7 @@ while (0)
#define DBX_OUTPUT_MAIN_SOURCE_FILE_END(FILE, FILENAME) \
do { \
text_section (); \
- fprintf (FILE, "\t.stabs \"\",%d,0,0,Letext\nLetext:\n", N_SO); \
+ fprintf ((FILE), "\t.stabs \"\",%d,0,0,Letext\nLetext:\n", N_SO); \
} while (0)
/* Arrange to call __main, rather than using crtbegin.o and crtend.o
@@ -103,3 +116,7 @@ do { \
#undef FINI_SECTION_ASM_OP
#undef STARTFILE_SPEC
#undef ENDFILE_SPEC
+
+/* HANDLE_SYSV_PRAGMA (defined by svr4.h) takes precedence over HANDLE_PRAGMA.
+ We want to use the HANDLE_PRAGMA from sh.h. */
+#undef HANDLE_SYSV_PRAGMA
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 30406f66acb..f26b60042c8 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -35,6 +35,8 @@ Boston, MA 02111-1307, USA. */
#include "output.h"
#include "insn-attr.h"
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
@@ -84,6 +86,10 @@ enum processor_type sh_cpu;
rtx sh_compare_op0;
rtx sh_compare_op1;
+enum machine_mode sh_addr_diff_vec_mode;
+rtx *uid_align;
+int uid_align_max;
+
/* Provides the class number of the smallest class containing
reg number. */
@@ -114,6 +120,8 @@ enum reg_class reg_class_from_letter[] =
/* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
/* y */ FPUL_REGS, /* z */ R0_REGS
};
+
+static void split_branches PROTO ((rtx));
/* Print the operand address in x to the stream. */
@@ -170,6 +178,7 @@ print_operand_address (stream, x)
according to modifier code.
'.' print a .s if insn needs delay slot
+ ',' print LOCAL_LABEL_PREFIX
'@' print trap, rte or rts depending upon pragma interruptness
'#' output a nop if there is nothing to put in the delay slot
'O' print a constant without the #
@@ -188,7 +197,10 @@ print_operand (stream, x, code)
case '.':
if (final_sequence
&& ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- fprintf (stream, ".s");
+ fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+ break;
+ case ',':
+ fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
break;
case '@':
if (trap_exit)
@@ -389,13 +401,12 @@ prepare_scc_operands (code)
mode = GET_MODE (sh_compare_op1);
sh_compare_op0 = force_reg (mode, sh_compare_op0);
- if (code != EQ && code != NE
- && (sh_compare_op1 != const0_rtx
- || code == GTU || code == GEU || code == LTU || code == LEU))
+ if ((code != EQ && code != NE
+ && (sh_compare_op1 != const0_rtx
+ || code == GTU || code == GEU || code == LTU || code == LEU))
+ || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
sh_compare_op1 = force_reg (mode, sh_compare_op1);
- /* ??? This should be `mode' not `SImode' in the compare, but that would
- require fixing the branch patterns too. */
emit_insn (gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (code, SImode, sh_compare_op0,
sh_compare_op1)));
@@ -410,20 +421,31 @@ from_compare (operands, code)
rtx *operands;
int code;
{
- if (code != EQ && code != NE)
+ enum machine_mode mode = GET_MODE (sh_compare_op0);
+ rtx insn;
+ if (mode == VOIDmode)
+ mode = GET_MODE (sh_compare_op1);
+ if (code != EQ
+ || mode == DImode
+ || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
{
- enum machine_mode mode = GET_MODE (sh_compare_op0);
- if (mode == VOIDmode)
- mode = GET_MODE (sh_compare_op1);
-
/* Force args into regs, since we can't use constants here. */
sh_compare_op0 = force_reg (mode, sh_compare_op0);
if (sh_compare_op1 != const0_rtx
- || code == GTU || code == GEU || code == LTU || code == LEU)
+ || code == GTU || code == GEU
+ || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
sh_compare_op1 = force_reg (mode, sh_compare_op1);
}
- operands[1] = sh_compare_op0;
- operands[2] = sh_compare_op1;
+ if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
+ {
+ from_compare (operands, GT);
+ insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
+ }
+ else
+ insn = gen_rtx (SET, VOIDmode,
+ gen_rtx (REG, SImode, 18),
+ gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
+ emit_insn (insn);
}
/* Functions to output assembly code. */
@@ -520,33 +542,54 @@ print_slot (insn)
INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
}
-/* We can't tell if we need a register as a scratch for the jump
- until after branch shortening, and then it's too late to allocate a
- register the 'proper' way. These instruction sequences are rare
- anyway, so to avoid always using a reg up from our limited set, we'll
- grab one when we need one on output. */
-
-/* ??? Should fix compiler so that using a clobber scratch in jump
- instructions works, and then this will be unnecessary. */
-
char *
output_far_jump (insn, op)
rtx insn;
rtx op;
{
- rtx thislab = gen_label_rtx ();
+ struct { rtx lab, reg, op; } this;
+ char *jump;
+ int far;
- /* Output the delay slot insn first if any. */
- if (dbr_sequence_length ())
- print_slot (final_sequence);
+ this.lab = gen_label_rtx ();
- output_asm_insn ("mov.l r13,@-r15", 0);
- output_asm_insn ("mov.l %O0,r13", &thislab);
- output_asm_insn ("jmp @r13", 0);
- output_asm_insn ("mov.l @r15+,r13", 0);
- output_asm_insn (".align 2", 0);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (thislab));
- output_asm_insn (".long %O0", &op);
+ if (braf_branch_p (insn, 0))
+ {
+ far = 0;
+ jump = "mov.w %O0,%1;braf %1";
+ }
+ else
+ {
+ far = 1;
+ jump = "mov.l %O0,%1;jmp @%1";
+ }
+ /* If we have a scratch register available, use it. */
+ if (GET_CODE (PREV_INSN (insn)) == INSN
+ && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
+ {
+ this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
+ output_asm_insn (jump, &this.lab);
+ if (dbr_sequence_length ())
+ print_slot (final_sequence);
+ else
+ output_asm_insn ("nop", 0);
+ }
+ else
+ {
+ /* Output the delay slot insn first if any. */
+ if (dbr_sequence_length ())
+ print_slot (final_sequence);
+
+ this.reg = gen_rtx (REG, SImode, 13);
+ output_asm_insn ("mov.l r13,@-r15", 0);
+ output_asm_insn (jump, &this.lab);
+ output_asm_insn ("mov.l @r15+,r13", 0);
+ }
+ if (far)
+ output_asm_insn (".align 2", 0);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
+ this.op = op;
+ output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
return "";
}
@@ -563,97 +606,100 @@ output_branch (logic, insn, operands)
rtx insn;
rtx *operands;
{
- int label = lf++;
- int length = get_attr_length (insn);
- int adjusted_length;
+ int offset
+ = (insn_addresses[INSN_UID (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0))]
+ - insn_addresses[INSN_UID (insn)]);
- /* Undo the effects of ADJUST_INSN_LENGTH, so that we get the real
- length. If NEXT_INSN (PREV_INSN (insn)) != insn, then the insn
- is inside a sequence, and ADJUST_INSN_LENGTH was not called on
- it. */
- if (PREV_INSN (insn) == NULL
- || NEXT_INSN (PREV_INSN (insn)) == insn)
+ if (offset == 260
+ && final_sequence
+ && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
{
- adjusted_length = length;
- ADJUST_INSN_LENGTH (insn, adjusted_length);
- length -= (adjusted_length - length);
+ /* The filling of the delay slot has caused a forward branch to exceed
+ its range.
+ Just emit the insn from the delay slot in front of the branch. */
+ /* The call to print_slot will clobber the operands. */
+ rtx op0 = operands[0];
+ print_slot (final_sequence);
+ operands[0] = op0;
}
-
- switch (length)
+ else if (offset < -252 || offset > 258)
{
- case 2:
- /* A branch with an unfilled delay slot. */
- case 4:
- /* Simple branch in range -252..+258 bytes */
- return logic ? "bt%. %l0" : "bf%. %l0";
+ /* This can happen when other condbranches hoist delay slot insn
+ from their destination, thus leading to code size increase.
+ But the branch will still be in the range -4092..+4098 bytes. */
- case 6:
- /* A branch with an unfilled delay slot. */
- case 8:
- /* Branch in range -4092..+4098 bytes. */
- {
- /* The call to print_slot will clobber the operands. */
- rtx op0 = operands[0];
+ int label = lf++;
+ /* The call to print_slot will clobber the operands. */
+ rtx op0 = operands[0];
- /* If the instruction in the delay slot is annulled (true), then
- there is no delay slot where we can put it now. The only safe
- place for it is after the label. */
+ /* If the instruction in the delay slot is annulled (true), then
+ there is no delay slot where we can put it now. The only safe
+ place for it is after the label. final will do that by default. */
- if (final_sequence)
- {
- fprintf (asm_out_file, "\tb%c%s\tLF%d\n", logic ? 'f' : 't',
- INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
- ? "" : ".s", label);
- if (! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
- else
- fprintf (asm_out_file, "\tb%c\tLF%d\n", logic ? 'f' : 't', label);
+ if (final_sequence
+ && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+ {
+ asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+ ASSEMBLER_DIALECT ? "/" : ".", label);
+ print_slot (final_sequence);
+ }
+ else
+ asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
- output_asm_insn ("bra %l0", &op0);
- fprintf (asm_out_file, "\tnop\n");
- fprintf (asm_out_file, "LF%d:\n", label);
+ output_asm_insn ("bra\t%l0", &op0);
+ fprintf (asm_out_file, "\tnop\n");
+ ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
- if (final_sequence
- && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
return "";
+ }
+ return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+}
- case 16:
- /* A branch with an unfilled delay slot. */
- case 18:
- /* Branches a long way away. */
- {
- /* The call to print_slot will clobber the operands. */
- rtx op0 = operands[0];
-
- /* If the instruction in the delay slot is annulled (true), then
- there is no delay slot where we can put it now. The only safe
- place for it is after the label. */
+int branch_offset ();
- if (final_sequence)
- {
- fprintf (asm_out_file, "\tb%c%s\tLF%d\n", logic ? 'f' : 't',
- INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
- ? "" : ".s", label);
- if (! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
- else
- fprintf (asm_out_file, "\tb%c\tLF%d\n", logic ? 'f' : 't', label);
-
- output_far_jump (insn, op0);
- fprintf (asm_out_file, "LF%d:\n", label);
+char *
+output_branchy_insn (code, template, insn, operands)
+ char *template;
+ enum rtx_code code;
+ rtx insn;
+ rtx *operands;
+{
+ rtx next_insn = NEXT_INSN (insn);
+ int label_nr;
- if (final_sequence
- && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
- return "";
+ if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
+ {
+ rtx src = SET_SRC (PATTERN (next_insn));
+ if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+ {
+ /* Following branch not taken */
+ operands[9] = gen_label_rtx ();
+ emit_label_after (operands[9], next_insn);
+ return template;
+ }
+ else
+ {
+ int offset = branch_offset (next_insn) + 4;
+ if (offset >= -252 && offset <= 256)
+ {
+ if (GET_CODE (src) == IF_THEN_ELSE)
+ /* branch_true */
+ src = XEXP (src, 1);
+ operands[9] = src;
+ return template;
+ }
+ }
}
+ operands[9] = gen_label_rtx ();
+ emit_label_after (operands[9], insn);
+ return template;
+}
- abort ();
+char *
+output_ieee_ccmpeq (insn, operands)
+ rtx insn, operands;
+{
+ output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
}
/* Output to FILE the start of the assembler file. */
@@ -751,20 +797,15 @@ shiftcosts (x)
/* If shift by a non constant, then this will be expensive. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- {
- if (TARGET_SH3)
- return 2;
- /* If not an sh3 then we don't even have an instruction for it. */
- return 20;
- }
+ return SH_DYNAMIC_SHIFT_COST;
/* Otherwise, return the true cost in instructions. */
if (GET_CODE (x) == ASHIFTRT)
{
int cost = ashiftrt_insns[value];
/* If SH3, then we put the constant in a reg and use shad. */
- if (TARGET_SH3 && cost > 3)
- cost = 3;
+ if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+ cost = 1 + SH_DYNAMIC_SHIFT_COST;
return cost;
}
else
@@ -999,9 +1040,11 @@ expand_ashiftrt (operands)
emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
return 1;
}
- else if (ashiftrt_insns[INTVAL (operands[2])] > 3)
+ else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+ > 1 + SH_DYNAMIC_SHIFT_COST)
{
- rtx count = force_reg (SImode, GEN_INT (- INTVAL (operands[2])));
+ rtx count
+ = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
return 1;
}
@@ -1009,7 +1052,7 @@ expand_ashiftrt (operands)
if (GET_CODE (operands[2]) != CONST_INT)
return 0;
- value = INTVAL (operands[2]);
+ value = INTVAL (operands[2]) & 31;
if (value == 31)
{
@@ -1050,6 +1093,12 @@ expand_ashiftrt (operands)
return 1;
}
+int sh_dynamicalize_shift_p (count)
+ rtx count;
+{
+ return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
+}
+
/* Try to find a good way to implement the combiner pattern
[(set (match_operand:SI 0 "register_operand" "r")
(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
@@ -1434,7 +1483,7 @@ shl_sext_kind (left_rtx, size_rtx, costp)
if (TARGET_SH3)
{
/* Try to use a dynamic shift. */
- cost = shift_insns[32 - insize] + 3;
+ cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
if (cost < best_cost)
{
kind = 0;
@@ -1645,6 +1694,8 @@ typedef struct
static pool_node pool_vector[MAX_POOL_SIZE];
static int pool_size;
+static int max_uid_before_fixup_addr_diff_vecs;
+
/* ??? If we need a constant in HImode which is the truncated value of a
constant we need in SImode, we could combine the two entries thus saving
two bytes. Is this common enough to be worth the effort of implementing
@@ -1735,7 +1786,8 @@ dump_table (scan)
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_4 (), scan);
}
- scan = emit_label_after (p->label, scan);
+ if (p->label)
+ scan = emit_label_after (p->label, scan);
scan = emit_insn_after (gen_consttable_4 (p->value), scan);
break;
case DFmode:
@@ -1746,7 +1798,8 @@ dump_table (scan)
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_4 (), scan);
}
- scan = emit_label_after (p->label, scan);
+ if (p->label)
+ scan = emit_label_after (p->label, scan);
scan = emit_insn_after (gen_consttable_8 (p->value), scan);
break;
default:
@@ -1792,7 +1845,8 @@ broken_move (insn)
order bits end up as. */
&& GET_MODE (SET_DEST (pat)) != QImode
&& CONSTANT_P (SET_SRC (pat))
- && ! (GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+ && ! (TARGET_SH3E
+ && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
&& (fp_zero_operand (SET_SRC (pat))
|| fp_one_operand (SET_SRC (pat)))
&& GET_CODE (SET_DEST (pat)) == REG
@@ -1806,28 +1860,49 @@ broken_move (insn)
return 0;
}
+int
+cache_align_p (insn)
+ rtx insn;
+{
+ rtx pat;
+
+ if (! insn)
+ return 1;
+
+ if (GET_CODE (insn) != INSN)
+ return 0;
+
+ pat = PATTERN (insn);
+ return (GET_CODE (pat) == UNSPEC_VOLATILE
+ && XINT (pat, 1) == 1
+ && INTVAL (XVECEXP (pat, 0, 0)) == CACHE_LOG);
+}
+
+static int
+mova_p (insn)
+ rtx insn;
+{
+ return (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+ && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
+}
+
/* Find the last barrier from insn FROM which is close enough to hold the
constant pool. If we can't find one, then create one near the end of
the range. */
-/* ??? It would be good to put constant pool tables between a case jump and
- the jump table. This fails for two reasons. First, there is no
- barrier after the case jump. This is a bug in the casesi pattern.
- Second, inserting the table here may break the mova instruction that
- loads the jump table address, by moving the jump table too far away.
- We fix that problem by never outputting the constant pool between a mova
- and its label. */
-
static rtx
-find_barrier (from)
- rtx from;
+find_barrier (num_mova, mova, from)
+ int num_mova;
+ rtx mova, from;
{
int count_si = 0;
int count_hi = 0;
int found_hi = 0;
int found_si = 0;
- rtx found_barrier = 0;
- rtx found_mova = 0;
+ int leading_mova = num_mova;
+ rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
int si_limit;
int hi_limit;
@@ -1843,83 +1918,100 @@ find_barrier (from)
before the table, subtract 2 for the instruction that fills the jump
delay slot. This gives 1018. */
- /* If not optimizing, then it is possible that the jump instruction we add
- won't be shortened, and thus will have a length of 14 instead of 2.
- We must adjust the limits downwards to account for this, giving a limit
- of 1008 for SImode and 500 for HImode. */
+ /* The branch will always be shortened now that the reference address for
+ forward branches is the sucessor address, thus we need no longer make
+ adjustments to the [sh]i_limit for -O0. */
- if (optimize)
- {
- si_limit = 1018;
- hi_limit = 510;
- }
- else
- {
- si_limit = 1008;
- hi_limit = 500;
- }
-
- /* If not optimizing for space, then the constant pool will be
- aligned to a 4 to 16 byte boundary. We must make room for that
- alignment that by reducing the limits.
- ??? It would be better to not align the constant pool, but
- ASM_OUTPUT_ALIGN_CODE does not make any provision for basing the
- alignment on the instruction. */
-
- if (! TARGET_SMALLCODE)
- {
- if (TARGET_SH3 || TARGET_SH3E)
- {
- si_limit -= 14;
- hi_limit -= 14;
- }
- else
- {
- si_limit -= 2;
- hi_limit -= 2;
- }
- }
+ si_limit = 1018;
+ hi_limit = 510;
while (from && count_si < si_limit && count_hi < hi_limit)
{
- int inc = get_attr_length (from);
+ int inc = 0;
+
+ /* The instructions created by fixup_addr_diff_vecs have no valid length
+ info yet. They should be considered to have zero at this point. */
+ if (INSN_UID (from) < max_uid_before_fixup_addr_diff_vecs)
+ inc = get_attr_length (from);
if (GET_CODE (from) == BARRIER)
- found_barrier = from;
+ {
+ found_barrier = from;
+ /* If we are at the end of the function, or in front of an alignemnt
+ instruction, we need not insert an extra alignment. We prefer
+ this kind of barrier. */
+
+ if (cache_align_p (next_real_insn (found_barrier)))
+ good_barrier = from;
+ }
if (broken_move (from))
{
- rtx pat = PATTERN (from);
- rtx src = SET_SRC (pat);
- rtx dst = SET_DEST (pat);
- enum machine_mode mode = GET_MODE (dst);
+ rtx pat, src, dst;
+ enum machine_mode mode;
+
+ pat = PATTERN (from);
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ src = SET_SRC (pat);
+ dst = SET_DEST (pat);
+ mode = GET_MODE (dst);
/* We must explicitly check the mode, because sometimes the
front end will generate code to load unsigned constants into
HImode targets without properly sign extending them. */
if (mode == HImode || (mode == SImode && hi_const (src)))
{
- found_hi = 1;
+ found_hi += 2;
/* We put the short constants before the long constants, so
we must count the length of short constants in the range
for the long constants. */
/* ??? This isn't optimal, but is easy to do. */
- if (found_si)
- count_si += 2;
+ si_limit -= 2;
}
else
- found_si = 1;
+ {
+ if (found_si > count_si)
+ count_si = found_si;
+ found_si += GET_MODE_SIZE (mode);
+ if (num_mova)
+ si_limit -= GET_MODE_SIZE (mode);
+ }
}
if (GET_CODE (from) == INSN
&& GET_CODE (PATTERN (from)) == SET
&& GET_CODE (SET_SRC (PATTERN (from))) == UNSPEC
&& XINT (SET_SRC (PATTERN (from)), 1) == 1)
- found_mova = from;
+ {
+ if (! num_mova++)
+ {
+ leading_mova = 0;
+ mova = from;
+ barrier_before_mova = good_barrier ? good_barrier : found_barrier;
+ }
+ if (found_si > count_si)
+ count_si = found_si;
+ }
else if (GET_CODE (from) == JUMP_INSN
&& (GET_CODE (PATTERN (from)) == ADDR_VEC
|| GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
- found_mova = 0;
+ {
+ if (num_mova)
+ num_mova--;
+ if (cache_align_p (NEXT_INSN (next_nonnote_insn (from))))
+ {
+ /* We have just passed the barrier in front front of the
+ ADDR_DIFF_VEC. Since the ADDR_DIFF_VEC is accessed
+ as data, just like our pool constants, this is a good
+ opportunity to accomodate what we have gathered so far.
+ If we waited any longer, we could end up at a barrier in
+ front of code, which gives worse cache usage for separated
+ instruction / data caches. */
+ good_barrier = found_barrier;
+ break;
+ }
+ }
if (found_si)
count_si += inc;
@@ -1928,12 +2020,42 @@ find_barrier (from)
from = NEXT_INSN (from);
}
- /* Insert the constant pool table before the mova instruction, to prevent
- the mova label reference from going out of range. */
- if (found_mova)
- from = found_mova;
+ if (num_mova)
+ if (leading_mova)
+ {
+ /* Try as we might, the leading mova is out of range. Change
+ it into a load (which will become a pcload) and retry. */
+ SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+ INSN_CODE (mova) = -1;
+ return find_barrier (0, 0, mova);
+ }
+ else
+ {
+ /* Insert the constant pool table before the mova instruction,
+ to prevent the mova label reference from going out of range. */
+ from = mova;
+ good_barrier = found_barrier = barrier_before_mova;
+ }
- if (! found_barrier)
+ if (found_barrier)
+ {
+ /* We have before prepared barriers to come in pairs, with an
+ alignment instruction in-between. We want to use the first
+ barrier, so that the alignment applies to the code.
+ If we are compiling for SH3 or newer, there are some exceptions
+ when the second barrier and the alignment doesn't exist yet, so
+ we have to add it. */
+ if (good_barrier)
+ found_barrier = good_barrier;
+ else if (! TARGET_SMALLCODE)
+ {
+ found_barrier
+ = emit_insn_before (gen_align_log (GEN_INT (CACHE_LOG)),
+ found_barrier);
+ found_barrier = emit_barrier_before (found_barrier);
+ }
+ }
+ else
{
/* We didn't find a barrier in time to dump our stuff,
so we'll make one. */
@@ -1961,6 +2083,11 @@ find_barrier (from)
LABEL_NUSES (label) = 1;
found_barrier = emit_barrier_after (from);
emit_label_after (label, found_barrier);
+ if (! TARGET_SMALLCODE)
+ {
+ emit_barrier_after (found_barrier);
+ emit_insn_after (gen_align_log (GEN_INT (CACHE_LOG)), found_barrier);
+ }
}
return found_barrier;
@@ -1970,7 +2097,7 @@ find_barrier (from)
positively find the register that is used to call the sfunc, and this
register is not used anywhere else in this instruction - except as the
destination of a set, return this register; else, return 0. */
-static rtx
+rtx
sfunc_uses_reg (insn)
rtx insn;
{
@@ -1986,7 +2113,7 @@ sfunc_uses_reg (insn)
for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
{
part = XVECEXP (pattern, 0, i);
- if (GET_CODE (part) == USE)
+ if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
reg_part = part;
}
if (! reg_part)
@@ -2092,6 +2219,480 @@ noncall_uses_reg (reg, insn, set)
return 0;
}
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+ general registers. Bits 0..15 mean that the respective registers
+ are used as inputs in the instruction. Bits 16..31 mean that the
+ registers 0..15, respectively, are used as outputs, or are clobbered.
+ IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
+int
+regs_used (x, is_dest)
+ rtx x; int is_dest;
+{
+ enum rtx_code code;
+ char *fmt;
+ int i, used = 0;
+
+ if (! x)
+ return used;
+ code = GET_CODE (x);
+ switch (code)
+ {
+ case REG:
+ if (REGNO (x) < 16)
+ return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+ << (REGNO (x) + is_dest));
+ return 0;
+ case SUBREG:
+ {
+ rtx y = SUBREG_REG (x);
+
+ if (GET_CODE (y) != REG)
+ break;
+ if (REGNO (y) < 16)
+ return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+ << (REGNO (y) + SUBREG_WORD (x) + is_dest));
+ return 0;
+ }
+ case SET:
+ return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+ case RETURN:
+ /* If there was a return value, it must have been indicated with USE. */
+ return 0x00ffff00;
+ case CLOBBER:
+ is_dest = 1;
+ break;
+ case MEM:
+ is_dest = 0;
+ break;
+ case CALL:
+ used |= 0x00ff00f0;
+ break;
+ }
+
+ fmt = GET_RTX_FORMAT (code);
+
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ register int j;
+ for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+ used |= regs_used (XVECEXP (x, i, j), is_dest);
+ }
+ else if (fmt[i] == 'e')
+ used |= regs_used (XEXP (x, i), is_dest);
+ }
+ return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+ to the desitination of the JUMP with address ADDR.
+ If the branch needs to be implemented as an indirect jump, try to find
+ a scratch register for it.
+ If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+ If any preceding insn that doesn't fit into a delay slot is good enough,
+ pass 1. Pass 2 if a definite blocking insn is needed.
+ -1 is used internally to avoid deep recursion.
+ If a blocking instruction is made or recognized, return it. */
+
+static rtx
+gen_block_redirect (jump, addr, need_block)
+ rtx jump;
+ int addr, need_block;
+{
+ int dead = 0;
+ rtx prev = prev_nonnote_insn (jump);
+ rtx dest;
+
+ /* First, check if we already have an instruction that satisfies our need. */
+ if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
+ {
+ if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+ return prev;
+ if (GET_CODE (PATTERN (prev)) == USE
+ || GET_CODE (PATTERN (prev)) == CLOBBER
+ || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+ prev = jump;
+ else if ((need_block &= ~1) < 0)
+ return prev;
+ else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+ need_block = 0;
+ }
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+ /* If the branch is out of range, try to find a scratch register for it. */
+ if (optimize
+ && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
+ {
+ rtx scan;
+ /* Don't look for the stack pointer as a scratch register,
+ it would cause trouble if an interrupt occured. */
+ unsigned try = 0x7fff, used;
+ int jump_left = flag_expensive_optimizations + 1;
+
+ /* It is likely that the most recent eligible instruction is wanted for
+ the delay slot. Therefore, find out which registers it uses, and
+ try to avoid using them. */
+
+ for (scan = jump; scan = PREV_INSN (scan); )
+ {
+ enum rtx_code code;
+
+ if (INSN_DELETED_P (scan))
+ continue;
+ code = GET_CODE (scan);
+ if (code == CODE_LABEL || code == JUMP_INSN)
+ break;
+ if (code == INSN
+ && GET_CODE (PATTERN (scan)) != USE
+ && GET_CODE (PATTERN (scan)) != CLOBBER
+ && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+ {
+ try &= ~regs_used (PATTERN (scan), 0);
+ break;
+ }
+ }
+ for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
+ {
+ enum rtx_code code;
+
+ if (INSN_DELETED_P (scan))
+ continue;
+ code = GET_CODE (scan);
+ if (GET_RTX_CLASS (code) == 'i')
+ {
+ used |= regs_used (PATTERN (scan), 0);
+ if (code == CALL_INSN)
+ used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+ dead |= (used >> 16) & ~used;
+ if (dead & try)
+ {
+ dead &= try;
+ break;
+ }
+ if (code == JUMP_INSN)
+ if (jump_left-- && simplejump_p (scan))
+ scan = JUMP_LABEL (scan);
+ else
+ break;
+ }
+ }
+ /* Mask out the stack pointer again, in case it was
+ the only 'free' register we have found. */
+ dead &= 0x7fff;
+ }
+ /* If the immediate destination is still in range, check for possible
+ threading with a jump beyond the delay slot insn.
+ Don't check if we are called recursively; the jump has been or will be
+ checked in a different invokation then. */
+
+ else if (optimize && need_block >= 0)
+ {
+ rtx next = next_active_insn (next_active_insn (dest));
+ if (next && GET_CODE (next) == JUMP_INSN
+ && GET_CODE (PATTERN (next)) == SET
+ && recog_memoized (next) == CODE_FOR_jump)
+ {
+ dest = JUMP_LABEL (next);
+ if (dest
+ && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
+ gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
+ }
+ }
+
+ if (dead)
+ {
+ rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
+
+ /* It would be nice if we could convert the jump into an indirect
+ jump / far branch right now, and thus exposing all consitituent
+ instructions to further optimization. However, reorg uses
+ simplejump_p to determine if there is an unconditional jump where
+ it should try to schedule instructions from the target of the
+ branch; simplejump_p fails for indirect jumps even if they have
+ a JUMP_LABEL. */
+ rtx insn = emit_insn_before (gen_indirect_jump_scratch
+ (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
+ , jump);
+ INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+ return insn;
+ }
+ else if (need_block)
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ return emit_insn_before (gen_block_branch_redirect
+ (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
+ , jump);
+ return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+ /* A label (to be placed) in front of the jump
+ that jumps to our ultimate destination. */
+ rtx near_label;
+ /* Where we are going to insert it if we cannot move the jump any farther,
+ or the jump itself if we have picked up an existing jump. */
+ rtx insert_place;
+ /* The ultimate destination. */
+ rtx far_label;
+ struct far_branch *prev;
+ /* If the branch has already been created, its address;
+ else the address of its first prospective user. */
+ int address;
+};
+
+enum mdep_reorg_phase_e mdep_reorg_phase;
+void
+gen_far_branch (bp)
+ struct far_branch *bp;
+{
+ rtx insn = bp->insert_place;
+ rtx jump;
+ rtx label = gen_label_rtx ();
+
+ emit_label_after (label, insn);
+ if (bp->far_label)
+ {
+ jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+ LABEL_NUSES (bp->far_label)++;
+ }
+ else
+ jump = emit_jump_insn_after (gen_return (), insn);
+ emit_label_after (bp->near_label, insn);
+ JUMP_LABEL (jump) = bp->far_label;
+ if (! invert_jump (insn, label))
+ abort ();
+ /* Prevent reorg from undoing our splits. */
+ gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+static void
+fixup_aligns ()
+{
+ rtx insn = get_last_insn ();
+ rtx align_tab[MAX_BITS_PER_WORD];
+ int i;
+
+ for (i = CACHE_LOG; i >= 0; i--)
+ align_tab[i] = insn;
+ bzero ((char *) uid_align, uid_align_max * sizeof *uid_align);
+ for (; insn; insn = PREV_INSN (insn))
+ {
+ int uid = INSN_UID (insn);
+ if (uid < uid_align_max)
+ uid_align[uid] = align_tab[1];
+ if (GET_CODE (insn) == INSN)
+ {
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
+ {
+ /* Found an alignment instruction. */
+ int log = INTVAL (XVECEXP (pat, 0, 0));
+ uid_align[uid] = align_tab[log];
+ for (i = log - 1; i >= 0; i--)
+ align_tab[i] = insn;
+ }
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) == SET)
+ {
+ rtx dest = SET_SRC (PATTERN (insn));
+ if (GET_CODE (dest) == IF_THEN_ELSE)
+ dest = XEXP (dest, 1);
+ if (GET_CODE (dest) == LABEL_REF)
+ {
+ dest = XEXP (dest, 0);
+ if (! uid_align[INSN_UID (dest)])
+ /* Mark backward branch. */
+ uid_align[uid] = 0;
+ }
+ }
+ }
+}
+
+/* Fix up ADDR_DIFF_VECs. */
+void
+fixup_addr_diff_vecs (first)
+ rtx first;
+{
+ rtx insn;
+ int max_address;
+ int need_fixup_aligns = 0;
+
+ if (optimize)
+ max_address = insn_addresses[INSN_UID (get_last_insn ())] + 2;
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx vec_lab, rel_lab, pat, min_lab, max_lab, adj;
+ int len, i, min, max, size;
+
+ if (GET_CODE (insn) != JUMP_INSN
+ || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ continue;
+ pat = PATTERN (insn);
+ rel_lab = vec_lab = XEXP (XEXP (pat, 0), 0);
+ if (TARGET_SH2)
+ {
+ rtx prev, prevpat, x;
+
+ /* Search the matching casesi_jump_2. */
+ for (prev = vec_lab; ; prev = PREV_INSN (prev))
+ {
+ if (GET_CODE (prev) != JUMP_INSN)
+ continue;
+ prevpat = PATTERN (prev);
+ if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+ continue;
+ x = XVECEXP (prevpat, 0, 1);
+ if (GET_CODE (x) != USE)
+ continue;
+ x = XEXP (x, 0);
+ if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+ break;
+ }
+ /* Fix up the ADDR_DIF_VEC to be relative
+ to the reference address of the braf. */
+ XEXP (XEXP (pat, 0), 0)
+ = rel_lab = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+ }
+ if (! optimize)
+ continue;
+ len = XVECLEN (pat, 1);
+ if (len <= 0)
+ abort ();
+ for (min = max_address, max = 0, i = len - 1; i >= 0; i--)
+ {
+ rtx lab = XEXP (XVECEXP (pat, 1, i), 0);
+ int addr = insn_addresses[INSN_UID (lab)];
+ if (addr < min)
+ {
+ min = addr;
+ min_lab = lab;
+ }
+ if (addr > max)
+ {
+ max = addr;
+ max_lab = lab;
+ }
+ }
+ adj
+ = emit_insn_before (gen_addr_diff_vec_adjust (min_lab, max_lab, rel_lab,
+ GEN_INT (len)), vec_lab);
+ size = (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+ - addr_diff_vec_adjust (adj, 0));
+ /* If this is a very small table, we want to remove the alignment after
+ the table. */
+ if (! TARGET_SMALLCODE && size <= 1 << (CACHE_LOG - 2))
+ {
+ rtx align = NEXT_INSN (next_nonnote_insn (insn));
+ PUT_CODE (align, NOTE);
+ NOTE_LINE_NUMBER (align) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (align) = 0;
+ need_fixup_aligns = 1;
+ }
+ }
+ if (need_fixup_aligns)
+ fixup_aligns ();
+}
+
+/* Say how much the ADDR_DIFF_VEC following INSN can be shortened.
+ If FIRST_PASS is nonzero, all addresses and length of following
+ insns are still uninitialized. */
+int
+addr_diff_vec_adjust (insn, first_pass)
+ rtx insn;
+ int first_pass;
+{
+ rtx pat = PATTERN (insn);
+ rtx min_lab = XEXP (XVECEXP (pat, 0, 0), 0);
+ rtx max_lab = XEXP (XVECEXP (pat, 0, 1), 0);
+ rtx rel_lab = XEXP (XVECEXP (pat, 0, 2), 0);
+ int len = INTVAL (XVECEXP (pat, 0, 3));
+ int addr, min_addr, max_addr, saving, prev_saving = 0, offset;
+ rtx align_insn = uid_align[INSN_UID (rel_lab)];
+ int standard_size = TARGET_BIGTABLE ? 4 : 2;
+ int last_size = GET_MODE_SIZE ( GET_MODE(pat));
+ int align_fuzz = 0;
+
+ if (! insn_addresses)
+ return 0;
+ if (first_pass)
+ /* If optimizing, we may start off with an optimistic guess. */
+ return optimize ? len & ~1 : 0;
+ addr = insn_addresses[INSN_UID (rel_lab)];
+ min_addr = insn_addresses[INSN_UID (min_lab)];
+ max_addr = insn_addresses[INSN_UID (max_lab)];
+ if (! last_size)
+ last_size = standard_size;
+ if (TARGET_SH2)
+ prev_saving = ((standard_size - last_size) * len) & ~1;
+
+ /* The savings are linear to the vector length. However, if we have an
+ odd saving, we need one byte again to reinstate 16 bit alignment. */
+ saving = ((standard_size - 1) * len) & ~1;
+ offset = prev_saving - saving;
+
+ if ((insn_addresses[INSN_UID (align_insn)] < max_addr
+ || (insn_addresses[INSN_UID (align_insn)] == max_addr
+ && next_real_insn (max_lab) != align_insn))
+ && GET_CODE (align_insn) == INSN)
+ {
+ int align = 1 << INTVAL (XVECEXP (PATTERN (align_insn), 0, 0));
+ int align_addr = insn_addresses[INSN_UID (align_insn)];
+ if (align_addr > insn_addresses[INSN_UID (insn)])
+ {
+ int old_offset = offset;
+ offset = (align_addr - 1 & align - 1) + offset & -align;
+ align_addr += old_offset;
+ }
+ align_fuzz += (align_addr - 1) & (align - 2);
+ align_insn = uid_align[INSN_UID (align_insn)];
+ if (insn_addresses[INSN_UID (align_insn)] <= max_addr
+ && GET_CODE (align_insn) == INSN)
+ {
+ int align2 = 1 << INTVAL (XVECEXP (PATTERN (align_insn), 0, 0));
+ align_addr = insn_addresses[INSN_UID (align_insn)];
+ if (align_addr > insn_addresses[INSN_UID (insn)])
+ {
+ int old_offset = offset;
+ offset = (align_addr - 1 & align2 - 1) + offset & -align2;
+ align_addr += old_offset;
+ }
+ align_fuzz += (align_addr - 1) & (align2 - align);
+ }
+ }
+
+ if (min_addr >= addr
+ && max_addr + offset - addr + align_fuzz <= 255)
+ {
+ PUT_MODE (pat, QImode);
+ return saving;
+ }
+ saving = 2 * len;
+/* Since alignment might play a role in min_addr if it is smaller than addr,
+ we may not use it without exact alignment compensation; a 'worst case'
+ estimate is not good enough, because it won't prevent infinite oscillation
+ of shorten_branches.
+ ??? We should fix that eventually, but the code to deal with alignments
+ should go in a new function. */
+#if 0
+ if (TARGET_BIGTABLE && min_addr - ((1 << CACHE_LOG) - 2) - addr >= -32768
+#else
+ if (TARGET_BIGTABLE && (min_addr >= addr || addr <= 32768)
+#endif
+ && max_addr - addr <= 32767 + saving - prev_saving)
+ {
+ PUT_MODE (pat, HImode);
+ return saving;
+ }
+ PUT_MODE (pat, TARGET_BIGTABLE ? SImode : HImode);
+ return 0;
+}
+
/* Exported to toplev.c.
Do a final pass over the function, just before delayed branch
@@ -2101,7 +2702,10 @@ void
machine_dependent_reorg (first)
rtx first;
{
- rtx insn;
+ rtx insn, mova;
+ int num_mova;
+ rtx r0_rtx = gen_rtx (REG, Pmode, 0);
+ rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
/* If relaxing, generate pseudo-ops to associate function calls with
the symbols they call. It does no harm to not generate these
@@ -2109,6 +2713,7 @@ machine_dependent_reorg (first)
linker to potentially relax the jsr to a bsr, and eliminate the
register load and, possibly, the constant pool entry. */
+ mdep_reorg_phase = SH_INSERT_USES_LABELS;
if (TARGET_RELAX)
{
/* Remove all REG_LABEL notes. We want to use them for our own
@@ -2330,21 +2935,178 @@ machine_dependent_reorg (first)
}
}
+ /* The following processing passes need length information.
+ addr_diff_vec_adjust needs to know if insn_addreses is valid. */
+ insn_addresses = 0;
+
+ /* If not optimizing for space, we want extra alignment for code after
+ a barrier, so that it starts on a word / cache line boundary.
+ We used to emit the alignment for the barrier itself and associate the
+ instruction length with the following instruction, but that had two
+ problems:
+ i) A code label that follows directly after a barrier gets too low an
+ address. When there is a forward branch to it, the incorrect distance
+ calculation can lead to out of range branches. That happened with
+ compile/920625-2 -O -fomit-frame-pointer in copyQueryResult.
+ ii) barriers before constant tables get the extra alignment too.
+ That is just a waste of space.
+
+ So what we do now is to insert align_* instructions after the
+ barriers. By doing that before literal tables are generated, we
+ don't have to care about these. */
+ /* We also want alignment in front of ADDR_DIFF_VECs; this is done already
+ by ASM_OUTPUT_CASE_LABEL, but when optimizing, we have to make it
+ explicit in the RTL in order to correctly shorten branches. */
+
+ if (optimize)
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx addr_diff_vec;
+
+ if (GET_CODE (insn) == BARRIER
+ && (addr_diff_vec = next_real_insn (insn)))
+ if (GET_CODE (PATTERN (addr_diff_vec)) == ADDR_DIFF_VEC)
+ emit_insn_before (gen_align_4 (),
+ XEXP (XEXP (PATTERN (addr_diff_vec), 0), 0));
+ else if (TARGET_SMALLCODE)
+ continue;
+ else if (TARGET_SH3)
+ {
+ /* We align for an entire cache line. If there is a immediately
+ preceding branch to the insn beyond the barrier, it does not
+ make sense to insert the align, because we are more likely
+ to discard useful information from the current cache line
+ when doing the align than to fetch unneeded insns when not. */
+ rtx prev = prev_real_insn (prev_real_insn (insn));
+ int slot, credit;
+
+ for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
+ credit >= 0 && prev && GET_CODE (prev) == INSN;
+ prev = prev_real_insn (prev))
+ {
+ if (GET_CODE (PATTERN (prev)) == USE
+ || GET_CODE (PATTERN (prev)) == CLOBBER)
+ continue;
+ if (slot &&
+ get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+ slot = 0;
+ credit -= get_attr_length (prev);
+ }
+ if (! prev || GET_CODE (prev) != JUMP_INSN
+ || (next_real_insn (JUMP_LABEL (prev))
+ != next_real_insn (insn))
+ || (credit - slot
+ < (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
+ {
+ insn = emit_insn_after (gen_align_log (GEN_INT (CACHE_LOG)),
+ insn);
+ insn = emit_barrier_after (insn);
+ }
+ }
+ else
+ {
+ insn = emit_insn_after (gen_align_4 (), insn);
+ insn = emit_barrier_after (insn);
+ }
+ else if (TARGET_SMALLCODE)
+ continue;
+ else if (GET_CODE (insn) == NOTE
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
+ {
+ rtx next = next_nonnote_insn (insn);
+ if (next && GET_CODE (next) == CODE_LABEL)
+ emit_insn_after (gen_align_4 (), insn);
+ }
+ }
+
+ /* If TARGET_IEEE, we might have to split some branches before fixup_align.
+ If optimizing, the double call to shorten_branches will split insns twice,
+ unless we split now all that is to split and delete the original insn. */
+ if (TARGET_IEEE || optimize)
+ for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
+ if (GET_RTX_CLASS (GET_CODE (insn)) == 'i' && ! INSN_DELETED_P (insn))
+ {
+ rtx old = insn;
+ insn = try_split (PATTERN (insn), insn, 1);
+ if (INSN_DELETED_P (old))
+ {
+ PUT_CODE (old, NOTE);
+ NOTE_LINE_NUMBER (old) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (old) = 0;
+ }
+ }
+
+ max_uid_before_fixup_addr_diff_vecs = get_max_uid ();
+
+ if (optimize)
+ {
+ uid_align_max = get_max_uid ();
+ uid_align = (rtx *) alloca (uid_align_max * sizeof *uid_align);
+ fixup_aligns ();
+ mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+ shorten_branches (first);
+ }
+ fixup_addr_diff_vecs (first);
/* Scan the function looking for move instructions which have to be
changed to pc-relative loads and insert the literal tables. */
- for (insn = first; insn; insn = NEXT_INSN (insn))
+ mdep_reorg_phase = SH_FIXUP_PCLOAD;
+ for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
{
+ if (mova_p (insn))
+ {
+ if (! num_mova++)
+ mova = insn;
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+ && num_mova)
+ {
+ rtx scan;
+ int total;
+
+ num_mova--;
+
+ /* Some code might have been inserted between the mova and
+ its ADDR_DIFF_VEC. Check if the mova is still in range. */
+ for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+ if (INSN_UID (scan) < max_uid_before_fixup_addr_diff_vecs)
+ total += get_attr_length (scan);
+
+ /* range of mova is 1020, add 4 because pc counts from address of
+ second instruction after this one, subtract 2 in case pc is 2
+ byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
+ chancles out with alignment effects of the mova itself. */
+ if (total > 1022)
+ {
+ /* Change the mova into a load, and restart scanning
+ there. broken_move will then return true for mova. */
+ SET_SRC (PATTERN (mova))
+ = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+ INSN_CODE (mova) = -1;
+ insn = mova;
+ }
+ }
if (broken_move (insn))
{
rtx scan;
/* Scan ahead looking for a barrier to stick the constant table
behind. */
- rtx barrier = find_barrier (insn);
+ rtx barrier = find_barrier (num_mova, mova, insn);
+ rtx last_float_move, last_float = 0, *last_float_addr;
+ if (num_mova && ! mova_p (mova))
+ {
+ /* find_barrier had to change the first mova into a
+ pcload; thus, we have to start with this new pcload. */
+ insn = mova;
+ num_mova = 0;
+ }
/* Now find all the moves between the points and modify them. */
for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
{
+ if (GET_CODE (scan) == CODE_LABEL)
+ last_float = 0;
if (broken_move (scan))
{
rtx *patp = &PATTERN (scan), pat = *patp;
@@ -2373,17 +3135,306 @@ machine_dependent_reorg (first)
dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
}
- lab = add_constant (src, mode);
- newsrc = gen_rtx (MEM, mode,
- gen_rtx (LABEL_REF, VOIDmode, lab));
+ if (GET_CODE (dst) == REG
+ && ((REGNO (dst) >= FIRST_FP_REG
+ && REGNO (dst) <= LAST_FP_REG)
+ || REGNO (dst) == FPUL_REG))
+ {
+ if (last_float
+ && reg_set_between_p (r0_rtx, last_float_move, scan))
+ last_float = 0;
+ lab = add_constant (src, mode, last_float);
+ if (lab)
+ emit_insn_before (gen_mova (lab), scan);
+ else
+ *last_float_addr = r0_inc_rtx;
+ last_float_move = scan;
+ last_float = src;
+ newsrc = gen_rtx (MEM, mode,
+ (REGNO (dst) == FPUL_REG
+ ? r0_inc_rtx
+ : r0_rtx));
+ last_float_addr = &XEXP (newsrc, 0);
+ }
+ else
+ {
+ lab = add_constant (src, mode, 0);
+ newsrc = gen_rtx (MEM, mode,
+ gen_rtx (LABEL_REF, VOIDmode, lab));
+ }
RTX_UNCHANGING_P (newsrc) = 1;
*patp = gen_rtx (SET, VOIDmode, dst, newsrc);
INSN_CODE (scan) = -1;
}
}
dump_table (barrier);
+ insn = barrier;
}
}
+
+ mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+ insn_addresses = 0;
+ split_branches (first);
+
+ /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+ also has an effect on the register that holds the addres of the sfunc.
+ Insert an extra dummy insn in front of each sfunc that pretends to
+ use this register. */
+ if (flag_delayed_branch)
+ {
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx reg = sfunc_uses_reg (insn);
+
+ if (! reg)
+ continue;
+ emit_insn_before (gen_use_sfunc_addr (reg), insn);
+ }
+ }
+ mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+int
+get_dest_uid (label, max_uid)
+ rtx label;
+ int max_uid;
+{
+ rtx dest = next_real_insn (label);
+ int dest_uid;
+ if (! dest)
+ /* This can happen for an undefined label. */
+ return 0;
+ dest_uid = INSN_UID (dest);
+ /* If this is a newly created branch redirection blocking instruction,
+ we cannot index the branch_uid or insn_addresses arrays with its
+ uid. But then, we won't need to, because the actual destination is
+ the following branch. */
+ while (dest_uid >= max_uid)
+ {
+ dest = NEXT_INSN (dest);
+ dest_uid = INSN_UID (dest);
+ }
+ if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
+ return 0;
+ return dest_uid;
+}
+
+/* Split condbranches that are out of range. Also add clobbers for
+ scratch registers that are needed in far jumps.
+ We do this before delay slot scheduling, so that it can take our
+ newly created instructions into account. It also allows us to
+ find branches with common targets more easily. */
+
+static void
+split_branches (first)
+ rtx first;
+{
+ rtx insn;
+ struct far_branch **uid_branch, *far_branch_list = 0;
+ int max_uid = get_max_uid ();
+
+ /* Find out which branches are out of range. */
+ uid_align_max = get_max_uid ();
+ uid_align = (rtx *) alloca (uid_align_max * sizeof *uid_align);
+ fixup_aligns ();
+ shorten_branches (first);
+
+ uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+ bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
+
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ continue;
+ else if (INSN_DELETED_P (insn))
+ {
+ /* Shorten_branches would split this instruction again,
+ so transform it into a note. */
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ /* Don't mess with ADDR_DIFF_VEC */
+ && (GET_CODE (PATTERN (insn)) == SET
+ || GET_CODE (PATTERN (insn)) == RETURN))
+ {
+ enum attr_type type = get_attr_type (insn);
+ if (type == TYPE_CBRANCH)
+ {
+ rtx next, beyond;
+
+ if (get_attr_length (insn) > 4)
+ {
+ rtx src = SET_SRC (PATTERN (insn));
+ rtx cond = XEXP (src, 0);
+ rtx olabel = XEXP (XEXP (src, 1), 0);
+ rtx jump;
+ int addr = insn_addresses[INSN_UID (insn)];
+ rtx label = 0;
+ int dest_uid = get_dest_uid (olabel, max_uid);
+ struct far_branch *bp = uid_branch[dest_uid];
+
+ /* redirect_jump needs a valid JUMP_LABEL, and it might delete
+ the label if th lABEL_BUSES count drops to zero. There is
+ always a jump_optimize pass that sets these values, but it
+ proceeds to delete unreferenced code, and then if not
+ optimizeing, to un-delete the deleted instructions, thus
+ leaving labels with too low uses counts. */
+ if (! optimize)
+ {
+ JUMP_LABEL (insn) = olabel;
+ LABEL_NUSES (olabel)++;
+ }
+ if (! bp)
+ {
+ bp = (struct far_branch *) alloca (sizeof *bp);
+ uid_branch[dest_uid] = bp;
+ bp->prev = far_branch_list;
+ far_branch_list = bp;
+ bp->far_label
+ = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+ LABEL_NUSES (bp->far_label)++;
+ }
+ else
+ {
+ label = bp->near_label;
+ if (! label && bp->address - addr >= CONDJUMP_MIN)
+ {
+ rtx block = bp->insert_place;
+
+ if (GET_CODE (PATTERN (block)) == RETURN)
+ block = PREV_INSN (block);
+ else
+ block = gen_block_redirect (block,
+ bp->address, 2);
+ label = emit_label_after (gen_label_rtx (),
+ PREV_INSN (block));
+ bp->near_label = label;
+ }
+ else if (label && ! NEXT_INSN (label))
+ if (addr + 2 - bp->address <= CONDJUMP_MAX)
+ bp->insert_place = insn;
+ else
+ gen_far_branch (bp);
+ }
+ if (! label
+ || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
+ {
+ bp->near_label = label = gen_label_rtx ();
+ bp->insert_place = insn;
+ bp->address = addr;
+ }
+ if (! redirect_jump (insn, label))
+ abort ();
+ }
+ else
+ {
+ /* get_attr_length (insn) == 2 */
+ /* Check if we have a pattern where reorg wants to redirect
+ the branch to a label from an unconditional branch that
+ is too far away. */
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ beyond
+ = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+ 0));
+
+ if ((GET_CODE (beyond) == JUMP_INSN
+ || (GET_CODE (beyond = next_active_insn (beyond))
+ == JUMP_INSN))
+ && GET_CODE (PATTERN (beyond)) == SET
+ && recog_memoized (beyond) == CODE_FOR_jump
+ && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
+ - insn_addresses[INSN_UID (insn)] + 252U)
+ > 252 + 258 + 2))
+ gen_block_redirect (beyond,
+ insn_addresses[INSN_UID (beyond)], 1);
+ }
+
+ next = next_active_insn (insn);
+
+ if ((GET_CODE (next) == JUMP_INSN
+ || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
+ && GET_CODE (PATTERN (next)) == SET
+ && recog_memoized (next) == CODE_FOR_jump
+ && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
+ - insn_addresses[INSN_UID (insn)] + 252U)
+ > 252 + 258 + 2))
+ gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
+ }
+ else if (type == TYPE_JUMP || type == TYPE_RETURN)
+ {
+ int addr = insn_addresses[INSN_UID (insn)];
+ rtx far_label = 0;
+ int dest_uid = 0;
+ struct far_branch *bp;
+
+ if (type == TYPE_JUMP)
+ {
+ far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+ dest_uid = get_dest_uid (far_label, max_uid);
+ if (! dest_uid)
+ {
+ /* Parse errors can lead to labels outside
+ the insn stream. */
+ if (! NEXT_INSN (far_label))
+ continue;
+
+ if (! optimize)
+ {
+ JUMP_LABEL (insn) = far_label;
+ LABEL_NUSES (far_label)++;
+ }
+ redirect_jump (insn, NULL_RTX);
+ far_label = 0;
+ }
+ }
+ bp = uid_branch[dest_uid];
+ if (! bp)
+ {
+ bp = (struct far_branch *) alloca (sizeof *bp);
+ uid_branch[dest_uid] = bp;
+ bp->prev = far_branch_list;
+ far_branch_list = bp;
+ bp->near_label = 0;
+ bp->far_label = far_label;
+ if (far_label)
+ LABEL_NUSES (far_label)++;
+ }
+ else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+ if (addr - bp->address <= CONDJUMP_MAX)
+ emit_label_after (bp->near_label, PREV_INSN (insn));
+ else
+ {
+ gen_far_branch (bp);
+ bp->near_label = 0;
+ }
+ else
+ bp->near_label = 0;
+ bp->address = addr;
+ bp->insert_place = insn;
+ if (! far_label)
+ emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+ else
+ gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+ }
+ }
+ /* Generate all pending far branches,
+ and free our references to the far labels. */
+ while (far_branch_list)
+ {
+ if (far_branch_list->near_label
+ && ! NEXT_INSN (far_branch_list->near_label))
+ gen_far_branch (far_branch_list);
+ if (optimize
+ && far_branch_list->far_label
+ && ! --LABEL_NUSES (far_branch_list->far_label))
+ delete_insn (far_branch_list->far_label);
+ far_branch_list = far_branch_list->prev;
+ }
+ uid_align_max = get_max_uid ();
+ uid_align = (rtx *) oballoc (uid_align_max * sizeof *uid_align);
+ fixup_aligns ();
}
/* Dump out instruction addresses, which is useful for debugging the
@@ -2545,10 +3596,11 @@ push (rn)
rtx x;
if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
|| rn == FPUL_REG)
- x = emit_insn (gen_push_e (gen_rtx (REG, SFmode, rn)));
+ x = gen_push_e (gen_rtx (REG, SFmode, rn));
else
- x = emit_insn (gen_push (gen_rtx (REG, SImode, rn)));
+ x = gen_push (gen_rtx (REG, SImode, rn));
+ x = emit_insn (x);
REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
}
@@ -2562,16 +3614,16 @@ pop (rn)
rtx x;
if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
|| rn == FPUL_REG)
- x = emit_insn (gen_pop_e (gen_rtx (REG, SFmode, rn)));
+ x = gen_pop_e (gen_rtx (REG, SFmode, rn));
else
- x = emit_insn (gen_pop (gen_rtx (REG, SImode, rn)));
+ x = gen_pop (gen_rtx (REG, SImode, rn));
+ x = emit_insn (x);
REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
}
-/* Generate code to push the regs specified in the mask, and return
- the number of bytes the insns take. */
+/* Generate code to push the regs specified in the mask. */
static void
push_regs (mask, mask2)
@@ -2579,16 +3631,21 @@ push_regs (mask, mask2)
{
int i;
+ /* Push PR last; this gives better latencies after the prologue, and
+ candidates for the return delay slot when there are no general
+ registers pushed. */
for (i = 0; i < 32; i++)
- if (mask & (1 << i))
+ if (mask & (1 << i) && i != PR_REG)
push (i);
for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
if (mask2 & (1 << (i - 32)))
push (i);
+ if (mask & (1 << PR_REG))
+ push (PR_REG);
}
/* Work out the registers which need to be saved, both as a mask and a
- count.
+ count of saved words.
If doing a pragma interrupt function, then push all regs used by the
function, and if we call another function (we can tell by looking at PR),
@@ -2601,40 +3658,28 @@ calc_live_regs (count_ptr, live_regs_mask2)
{
int reg;
int live_regs_mask = 0;
- int count = 0;
+ int count;
*live_regs_mask2 = 0;
- for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
+ for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
{
- if (pragma_interrupt && ! pragma_trapa)
+ if ((pragma_interrupt && ! pragma_trapa)
+ ? (/* Need to save all the regs ever live. */
+ (regs_ever_live[reg]
+ || (call_used_regs[reg]
+ && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
+ && regs_ever_live[PR_REG]))
+ && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+ && reg != RETURN_ADDRESS_POINTER_REGNUM
+ && reg != T_REG && reg != GBR_REG)
+ : (/* Only push those regs which are used and need to be saved. */
+ regs_ever_live[reg] && ! call_used_regs[reg]))
{
- /* Need to save all the regs ever live. */
- if ((regs_ever_live[reg]
- || (call_used_regs[reg]
- && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
- && regs_ever_live[PR_REG]))
- && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
- && reg != RETURN_ADDRESS_POINTER_REGNUM
- && reg != T_REG && reg != GBR_REG)
- {
- if (reg >= 32)
- *live_regs_mask2 |= 1 << (reg - 32);
- else
- live_regs_mask |= 1 << reg;
- count++;
- }
- }
- else
- {
- /* Only push those regs which are used and need to be saved. */
- if (regs_ever_live[reg] && ! call_used_regs[reg])
- {
- if (reg >= 32)
- *live_regs_mask2 |= 1 << (reg - 32);
- else
- live_regs_mask |= (1 << reg);
- count++;
- }
+ if (reg >= 32)
+ *live_regs_mask2 |= 1 << (reg - 32);
+ else
+ live_regs_mask |= 1 << reg;
+ count++;
}
}
@@ -2650,7 +3695,6 @@ sh_expand_prologue ()
int live_regs_mask;
int d, i;
int live_regs_mask2;
- live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
/* We have pretend args if we had an object sent partially in registers
and partially on the stack, e.g. a large structure. */
@@ -2667,7 +3711,7 @@ sh_expand_prologue ()
/* This is not used by the SH3E calling convention */
if (!TARGET_SH3E)
- {
+ {
/* Push arg regs as if they'd been provided by caller in stack. */
for (i = 0; i < NPARM_REGS(SImode); i++)
{
@@ -2679,13 +3723,14 @@ sh_expand_prologue ()
push (rn);
extra_push += 4;
}
- }
+ }
}
/* If we're supposed to switch stacks at function entry, do so now. */
if (sp_switch)
emit_insn (gen_sp_switch_1 ());
+ live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
push_regs (live_regs_mask, live_regs_mask2);
output_stack_adjust (-get_frame_size (), stack_pointer_rtx, 3);
@@ -2701,7 +3746,6 @@ sh_expand_epilogue ()
int d, i;
int live_regs_mask2;
- live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
if (frame_pointer_needed)
{
@@ -2726,10 +3770,13 @@ sh_expand_epilogue ()
/* Pop all the registers. */
+ live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+ if (live_regs_mask & (1 << PR_REG))
+ pop (PR_REG);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
{
int j = (FIRST_PSEUDO_REGISTER - 1) - i;
- if (j < 32 && (live_regs_mask & (1 << j)))
+ if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
pop (j);
else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
pop (j);
@@ -2794,17 +3841,19 @@ sh_builtin_saveregs (arglist)
named args need not be saved.
We explicitly build a pointer to the buffer because it halves the insn
count when not optimizing (otherwise the pointer is built for each reg
- saved). */
+ saved).
+ We emit the moves in reverse order so that we can use predecrement. */
fpregs = gen_reg_rtx (Pmode);
emit_move_insn (fpregs, XEXP (regbuf, 0));
- for (regno = first_floatreg; regno < NPARM_REGS (SFmode); regno ++)
- emit_move_insn (gen_rtx (MEM, SFmode,
- plus_constant (fpregs,
- GET_MODE_SIZE (SFmode)
- * (regno - first_floatreg))),
- gen_rtx (REG, SFmode,
- BASE_ARG_REG (SFmode) + regno));
+ emit_insn (gen_addsi3 (fpregs, fpregs,
+ GEN_INT (n_floatregs * UNITS_PER_WORD)));
+ for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+ gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
+ }
/* Return the address of the regbuf. */
return XEXP (regbuf, 0);
@@ -2821,9 +3870,11 @@ initial_elimination_offset (from, to)
int regs_saved;
int total_saved_regs_space;
int total_auto_space = get_frame_size ();
+ int save_flags = target_flags;
int live_regs_mask, live_regs_mask2;
live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
+ target_flags = save_flags;
total_saved_regs_space = (regs_saved) * 4;
@@ -2840,13 +3891,10 @@ initial_elimination_offset (from, to)
if (from == RETURN_ADDRESS_POINTER_REGNUM
&& (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
{
- int i, n = 0;
- for (i = PR_REG+1; i < 32; i++)
+ int i, n = total_saved_regs_space;
+ for (i = PR_REG-1; i >= 0; i--)
if (live_regs_mask & (1 << i))
- n += 4;
- for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
- if (live_regs_mask2 & (1 << (i - 32)))
- n += 4;
+ n -= 4;
return n + total_auto_space;
}
@@ -3121,6 +4169,205 @@ fp_one_operand (op)
REAL_VALUE_FROM_CONST_DOUBLE (r, op);
return REAL_VALUES_EQUAL (r, dconst1);
}
+
+int
+braf_label_ref_operand(op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ rtx prev;
+
+ if (GET_CODE (op) != LABEL_REF)
+ return 0;
+ prev = prev_real_insn (XEXP (op, 0));
+ if (GET_CODE (prev) != JUMP_INSN)
+ return 0;
+ prev = PATTERN (prev);
+ if (GET_CODE (prev) != PARALLEL || XVECLEN (prev, 0) != 2)
+ return 0;
+ prev = XVECEXP (prev, 0, 0);
+ if (GET_CODE (prev) != SET)
+ return 0;
+ prev = SET_SRC (prev);
+ if (GET_CODE (prev) != PLUS || XEXP (prev, 1) != op)
+ return 0;
+}
+
+/* Return the offset of a branch. Offsets for backward branches are
+ reported relative to the branch instruction, while offsets for forward
+ branches are reported relative to the following instruction. */
+
+int
+branch_offset (branch)
+ rtx branch;
+{
+ rtx dest = SET_SRC (PATTERN (branch)), dest_next;
+ int branch_uid = INSN_UID (branch);
+ int dest_uid, dest_addr;
+ rtx branch_align = uid_align[branch_uid];
+
+ if (GET_CODE (dest) == IF_THEN_ELSE)
+ dest = XEXP (dest, 1);
+ dest = XEXP (dest, 0);
+ dest_uid = INSN_UID (dest);
+ dest_addr = insn_addresses[dest_uid];
+ if (branch_align)
+ {
+ /* Forward branch. */
+ /* If branch is in a sequence, get the successor of the sequence. */
+ rtx next = NEXT_INSN (NEXT_INSN (PREV_INSN (branch)));
+ int next_addr = insn_addresses[INSN_UID (next)];
+ int diff;
+
+ /* If NEXT has been hoisted in a sequence further on, it address has
+ been clobbered in the previous pass. However, if that is the case,
+ we know that it is exactly 2 bytes long (because it fits in a delay
+ slot), and that there is a following label (the destination of the
+ instruction that filled its delay slot with NEXT). The address of
+ this label is reliable. */
+ if (NEXT_INSN (next))
+ {
+ int next_next_addr = insn_addresses[INSN_UID (NEXT_INSN (next))];
+ if (next_addr > next_next_addr)
+ next_addr = next_next_addr - 2;
+ }
+ diff = dest_addr - next_addr;
+ /* If BRANCH_ALIGN has been the last insn, it might be a barrier or
+ a note. */
+ if ((insn_addresses[INSN_UID (branch_align)] < dest_addr
+ || (insn_addresses[INSN_UID (branch_align)] == dest_addr
+ && next_real_insn (dest) != branch_align))
+ && GET_CODE (branch_align) == INSN)
+ {
+ int align = 1 << INTVAL (XVECEXP (PATTERN (branch_align), 0, 0));
+ int align_addr = insn_addresses[INSN_UID (branch_align)];
+ diff += (align_addr - 1) & (align - 2);
+ branch_align = uid_align[INSN_UID (branch_align)];
+ if (insn_addresses[INSN_UID (branch_align)] <= dest_addr
+ && GET_CODE (branch_align) == INSN)
+ {
+ int align2 = 1 << INTVAL (XVECEXP (PATTERN (branch_align), 0, 0));
+ align_addr = insn_addresses[INSN_UID (branch_align)];
+ diff += (align_addr - 1) & (align2 - align);
+ }
+ }
+ return diff;
+ }
+ else
+ {
+ /* Backward branch. */
+ int branch_addr = insn_addresses[branch_uid];
+ int diff = dest_addr - branch_addr;
+ int old_align = 2;
+
+ while (dest_uid >= uid_align_max || ! uid_align[dest_uid])
+ {
+ /* Label might be outside the insn stream, or even in a separate
+ insn stream, after a syntax errror. */
+ if (! NEXT_INSN (dest))
+ return 0;
+ dest = NEXT_INSN (dest), dest_uid = INSN_UID (dest);
+ }
+
+ /* By searching for a known destination, we might already have
+ stumbled on the alignment instruction. */
+ if (GET_CODE (dest) == INSN
+ && GET_CODE (PATTERN (dest)) == UNSPEC_VOLATILE
+ && XINT (PATTERN (dest), 1) == 1
+ && INTVAL (XVECEXP (PATTERN (dest), 0, 0)) > 1)
+ branch_align = dest;
+ else
+ branch_align = uid_align[dest_uid];
+ while (insn_addresses[INSN_UID (branch_align)] <= branch_addr
+ && GET_CODE (branch_align) == INSN)
+ {
+ int align = 1 << INTVAL (XVECEXP (PATTERN (branch_align), 0, 0));
+ int align_addr = insn_addresses[INSN_UID (branch_align)];
+ diff -= (align_addr - 1) & (align - old_align);
+ old_align = align;
+ branch_align = uid_align[INSN_UID (branch_align)];
+ }
+ return diff;
+ }
+}
+
+int
+short_cbranch_p (branch)
+ rtx branch;
+{
+ int offset;
+
+ if (! insn_addresses)
+ return 0;
+ if (mdep_reorg_phase <= SH_FIXUP_PCLOAD)
+ return 0;
+ offset = branch_offset (branch);
+ return (offset >= -252
+ && offset <= (NEXT_INSN (PREV_INSN (branch)) == branch ? 256 : 254));
+}
+
+/* The maximum range used for SImode constant pool entrys is 1018. A final
+ instruction can add 8 bytes while only being 4 bytes in size, thus we
+ can have a total of 1022 bytes in the pool. Add 4 bytes for a branch
+ instruction around the pool table, 2 bytes of alignment before the table,
+ and 30 bytes of alignment after the table. That gives a maximum total
+ pool size of 1058 bytes.
+ Worst case code/pool content size ratio is 1:2 (using asms).
+ Thus, in the worst case, there is one instruction in front of a maximum
+ sized pool, and then there are 1052 bytes of pool for every 508 bytes of
+ code. For the last n bytes of code, there are 2n + 36 bytes of pool.
+ If we have a forward branch, the initial table will be put after the
+ unconditional branch.
+
+ ??? We could do much better by keeping track of the actual pcloads within
+ the branch range and in the pcload range in front of the branch range. */
+
+int
+med_branch_p (branch, condlen)
+ rtx branch;
+ int condlen;
+{
+ int offset;
+
+ if (! insn_addresses)
+ return 0;
+ offset = branch_offset (branch);
+ if (mdep_reorg_phase <= SH_FIXUP_PCLOAD)
+ return offset - condlen >= -990 && offset <= 998;
+ return offset - condlen >= -4092 && offset <= 4094;
+}
+
+int
+braf_branch_p (branch, condlen)
+ rtx branch;
+ int condlen;
+{
+ int offset;
+
+ if (! insn_addresses)
+ return 0;
+ if (! TARGET_SH2)
+ return 0;
+ offset = branch_offset (branch);
+ if (mdep_reorg_phase <= SH_FIXUP_PCLOAD)
+ return offset - condlen >= -10330 && offset <= 10330;
+ return offset -condlen >= -32764 && offset <= 32766;
+}
+
+int
+align_length (insn)
+ rtx insn;
+{
+ int align = 1 << INTVAL (XVECEXP (PATTERN (insn), 0, 0));
+ if (! insn_addresses)
+ if (optimize
+ && (mdep_reorg_phase == SH_SHORTEN_BRANCHES0
+ || mdep_reorg_phase == SH_SHORTEN_BRANCHES1))
+ return 0;
+ else
+ return align - 2;
+ return align - 2 - ((insn_addresses[INSN_UID (insn)] - 2) & (align - 2));
+}
/* Return non-zero if REG is not used after INSN.
We assume REG is a reload reg, and therefore does
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index a08c125cfb7..ef6aef3786e 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -804,9 +804,16 @@ struct sh_args {
This macro is only used in this file. */
#define PASS_IN_REG_P(CUM, MODE, TYPE) \
- (ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE) \
- && ((TYPE) == 0 || ! TREE_ADDRESSABLE ((tree)(TYPE))) \
- && (! TARGET_SH3E || (ROUND_REG((CUM), (MODE)) + (GET_MODE_SIZE(MODE)/4) <= NPARM_REGS (MODE))))
+ (((TYPE) == 0 || ! TREE_ADDRESSABLE ((tree)(TYPE))) \
+ && (TARGET_SH3E \
+ ? ((MODE) == BLKmode \
+ ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \
+ + int_size_in_bytes (TYPE)) \
+ <= NPARM_REGS (SImode) * UNITS_PER_WORD) \
+ : ((ROUND_REG((CUM), (MODE)) \
+ + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \
+ <= NPARM_REGS (MODE))) \
+ : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE)))
/* Define where to put the arguments to a function.
Value is zero to push the argument on the stack,
@@ -1755,9 +1762,14 @@ sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS)
(LENGTH) = align_length (X); \
if (GET_CODE (X) == JUMP_INSN \
&& GET_CODE (PATTERN (X)) == ADDR_DIFF_VEC) \
- /* The code before an ADDR_DIFF_VEC is even aligned, thus \
- any odd estimate is wrong. */ \
- (LENGTH) &= ~1;
+ { \
+ /* The code before an ADDR_DIFF_VEC is even aligned, \
+ thus any odd estimate is wrong. */ \
+ (LENGTH) &= ~1; \
+ /* If not optimizing, the alignment is implicit. */ \
+ if (! optimize) \
+ (LENGTH) += 2; \
+ }
/* Enable a bug fix for the shorten_branches pass. */
#define SHORTEN_WITH_ADJUST_INSN_LENGTH
@@ -1859,4 +1871,7 @@ do { \
/* For the sake of libgcc2.c, indicate target supports atexit. */
#define HAVE_ATEXIT
+/* Enable the register move pass to improve code. */
+#define ENABLE_REGMOVE_PASS
+
#define SH_DYNAMIC_SHIFT_COST (TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20)
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 1b0e35d6ff7..1faad932426 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -69,67 +69,86 @@
;; Target CPU.
-(define_attr "cpu" "sh0,sh1,sh2,sh3,sh3e"
+(define_attr "cpu"
+ "sh1,sh2,sh3,sh3e"
(const (symbol_ref "sh_cpu_attr")))
+(define_attr "endian" "big,little"
+ (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
+ (const_string "little") (const_string "big"))))
+
;; cbranch conditional branch instructions
;; jump unconditional jumps
;; arith ordinary arithmetic
+;; arith3 a compound insn that behaves similarily to a sequence of
+;; three insns of type arith
+;; arith3b like above, but might end with a redirected branch
;; load from memory
+;; load_si Likewise, SImode variant for general register.
;; store to memory
;; move register to register
+;; fmove register to register, floating point
;; smpy word precision integer multiply
;; dmpy longword or doublelongword precision integer multiply
;; return rts
;; pload load of pr reg, which can't be put into delay slot of rts
;; pstore store of pr reg, which can't be put into delay slot of jsr
;; pcload pc relative load of constant value
+;; pcload_si Likewise, SImode variant for general register.
;; rte return from exception
;; sfunc special function call with known used registers
;; call function call
;; fp floating point
;; fdiv floating point divide (or square root)
+;; gp_fpul move between general purpose register and fpul
(define_attr "type"
- "cbranch,jump,arith,other,load,store,move,smpy,dmpy,return,pload,pstore,pcload,rte,sfunc,call,fp,fdiv"
+ "cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,gp_fpul"
(const_string "other"))
; If a conditional branch destination is within -252..258 bytes away
; from the instruction it can be 2 bytes long. Something in the
; range -4090..4100 bytes can be 6 bytes long. All other conditional
-; branches are 16 bytes long.
+; branches are initially assumed to be 16 bytes long.
+; In machine_dependent_reorg, we split all branches that are longer than
+; 2 bytes.
; An unconditional jump in the range -4092..4098 can be 2 bytes long.
-; Otherwise, it must be 14 bytes long.
+; For wider ranges, we need a combination of a code and a data part.
+; If we can get a scratch register for a long range jump, the code
+; part can be 4 bytes long; otherwise, it must be 8 bytes long.
+; If the jump is in the range -32764..32770, the data part can be 2 bytes
+; long; otherwise, it must be 6 bytes long.
; All other instructions are two bytes long by default.
-; All positive offsets have an adjustment added, which is the number of bytes
-; difference between this instruction length and the next larger instruction
-; length. This is because shorten_branches starts with the largest
-; instruction size and then tries to reduce them.
-
(define_attr "length" ""
(cond [(eq_attr "type" "cbranch")
- (if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -252))
- (le (minus (match_dup 0) (pc))
- (const_int 262)))
- (const_int 2)
- (if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -4090))
- (le (minus (match_dup 0) (pc))
- (const_int 4110)))
- (const_int 6)
- (const_int 16)))
-
+ (cond [(ne (symbol_ref "short_cbranch_p (insn)") (const_int 0))
+ (const_int 2)
+ (ne (symbol_ref "med_branch_p (insn, 2)") (const_int 0))
+ (const_int 6)
+ (ne (symbol_ref "braf_branch_p (insn, 2)") (const_int 0))
+ (const_int 10)
+ (ne (pc) (pc))
+ (const_int 12)
+ ] (const_int 16))
(eq_attr "type" "jump")
- (if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -4092))
- (le (minus (match_dup 0) (pc))
- (const_int 4110)))
- (const_int 2)
- (const_int 14))
+ (cond [(ne (symbol_ref "med_branch_p (insn, 0)") (const_int 0))
+ (const_int 2)
+ (and (eq (symbol_ref "GET_CODE (PREV_INSN (insn))")
+ (symbol_ref "INSN"))
+ (eq (symbol_ref "INSN_CODE (PREV_INSN (insn))")
+ (symbol_ref "code_for_indirect_jump_scratch")))
+ (if_then_else (ne (symbol_ref "braf_branch_p (insn, 0)")
+ (const_int 0))
+ (const_int 6)
+ (const_int 10))
+ (ne (symbol_ref "braf_branch_p (insn, 0)") (const_int 0))
+ (const_int 10)
+ (ne (pc) (pc))
+ (const_int 12)
+ ] (const_int 14))
] (const_int 2)))
;; (define_function_unit {name} {num-units} {n-users} {test}
@@ -140,16 +159,41 @@
;; gcc to separate load and store instructions by one instruction,
;; which makes it more likely that the linker will be able to word
;; align them when relaxing.
+
+;; Loads have a latency of two.
+;; However, call insn can have ;; a delay slot, so that we want one more
+;; insn to be scheduled between the load of the function address and the call.
+;; This is equivalent to a latency of three.
+;; We cannot use a conflict list for this, because we need to distinguish
+;; between the actual call address and the function arguments.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here.
+;; We only do this for SImode loads of general regsiters, to make the work
+;; for ADJUST_COST easier.
+(define_function_unit "memory" 1 0
+ (eq_attr "type" "load_si,pcload_si")
+ 3 2)
(define_function_unit "memory" 1 0
- (eq_attr "type" "load,pcload,pload,store,pstore") 2 2)
+ (eq_attr "type" "load,pcload,pload,store,pstore")
+ 2 2)
+
+(define_function_unit "int" 1 0
+ (eq_attr "type" "arith3,arith3b") 3 3)
+
+(define_function_unit "int" 1 0
+ (eq_attr "type" "dyn_shift") 2 2)
+
+(define_function_unit "int" 1 0
+ (eq_attr "type" "arith,arith3b,dyn_shift") 2 2)
;; ??? These are approximations.
(define_function_unit "mpy" 1 0 (eq_attr "type" "smpy") 2 2)
(define_function_unit "mpy" 1 0 (eq_attr "type" "dmpy") 3 3)
-(define_function_unit "fp" 1 0 (eq_attr "type" "fp") 2 1)
+(define_function_unit "fp" 1 0 (eq_attr "type" "fp,fmove") 2 1)
(define_function_unit "fp" 1 0 (eq_attr "type" "fdiv") 13 12)
+
; Definitions for filling branch delay slots.
(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
@@ -161,7 +205,7 @@
(define_attr "in_delay_slot" "yes,no"
(cond [(eq_attr "type" "cbranch") (const_string "no")
- (eq_attr "type" "pcload") (const_string "no")
+ (eq_attr "type" "pcload,pcload_si") (const_string "no")
(eq_attr "needs_delay_slot" "yes") (const_string "no")
(eq_attr "length" "2") (const_string "yes")
] (const_string "no")))
@@ -197,14 +241,9 @@
;; Say that we have annulled true branches, since this gives smaller and
;; faster code when branches are predicted as not taken.
-;; ??? Branches which are out-of-range actually have two delay slots,
-;; the first is either always executed or else annulled false, and the
-;; second is always annulled false. Handling these differently from
-;; in range branches would give better code.
-
(define_delay
(and (eq_attr "type" "cbranch")
- (eq_attr "cpu" "sh2,sh3"))
+ (ne (symbol_ref "TARGET_SH2") (const_int 0)))
[(eq_attr "in_delay_slot" "yes") (eq_attr "in_delay_slot" "yes") (nil)])
;; -------------------------------------------------------------------------
@@ -212,13 +251,6 @@
;; -------------------------------------------------------------------------
(define_insn ""
- [(set (match_operand:SI 0 "arith_reg_operand" "=r")
- (eq:SI (reg:SI 18)
- (const_int 1)))]
- ""
- "movt %0")
-
-(define_insn ""
[(set (reg:SI 18)
(eq:SI (and:SI (match_operand:SI 0 "arith_reg_operand" "z,r")
(match_operand:SI 1 "arith_operand" "L,r"))
@@ -288,6 +320,93 @@
}")
;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+;; ??? Could get better scheduling by splitting the initial test from the
+;; rest of the insn after reload. However, the gain would hardly justify
+;; the sh.md size increase necessary to do that.
+
+(define_insn ""
+ [(set (reg:SI 18)
+ (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")
+ (match_operand:DI 1 "arith_operand" "r"))
+ (const_int 0)))]
+ ""
+ "* return output_branchy_insn (EQ, \"tst\\t%S1,%S0\;bf\\t%l9\;tst\\t%R1,%R0\",
+ insn, operands);"
+ [(set_attr "length" "6")
+ (set_attr "type" "arith3b")])
+
+(define_insn "cmpeqdi_t"
+ [(set (reg:SI 18) (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+ (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]
+ ""
+ "*
+ return output_branchy_insn
+ (EQ,
+ (which_alternative
+ ? \"cmp/eq\\t%S1,%S0\;bf\\t%l9\;cmp/eq\\t%R1,%R0\"
+ : \"tst\\t%S0,%S0\;bf\\t%l9\;tst\\t%R0,%R0\"),
+ insn, operands);"
+ [(set_attr "length" "6")
+ (set_attr "type" "arith3b")])
+
+(define_insn "cmpgtdi_t"
+ [(set (reg:SI 18) (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+ (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+ "TARGET_SH2"
+ "@
+ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/gt\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:
+ tst\\t%S0,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/pl\\t%S0\;cmp/hi\\t%S0,%R0\\n%,Ldi%=:"
+ [(set_attr "length" "8")
+ (set_attr "type" "arith3")])
+
+(define_insn "cmpgedi_t"
+ [(set (reg:SI 18) (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+ (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+ "TARGET_SH2"
+ "@
+ cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/ge\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:
+ cmp/pz\\t%S0"
+ [(set_attr "length" "8,2")
+ (set_attr "type" "arith3,arith")])
+
+;; -------------------------------------------------------------------------
+;; DImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeudi_t"
+ [(set (reg:SI 18) (geu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+ (match_operand:DI 1 "arith_reg_operand" "r")))]
+ "TARGET_SH2"
+ "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hs\\t%S1,%S0\;cmp/hs\\t%R1,%R0\\n%,Ldi%=:"
+ [(set_attr "length" "8")
+ (set_attr "type" "arith3")])
+
+(define_insn "cmpgtudi_t"
+ [(set (reg:SI 18) (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+ (match_operand:DI 1 "arith_reg_operand" "r")))]
+ "TARGET_SH2"
+ "cmp/eq\\t%S1,%S0\;bf{.|/}s\\t%,Ldi%=\;cmp/hi\\t%S1,%S0\;cmp/hi\\t%R1,%R0\\n%,Ldi%=:"
+ [(set_attr "length" "8")
+ (set_attr "type" "arith3")])
+
+;; We save the compare operands in the cmpxx patterns and use them when
+;; we generate the branch.
+
+(define_expand "cmpdi"
+ [(set (reg:SI 18) (compare (match_operand:DI 0 "arith_operand" "")
+ (match_operand:DI 1 "arith_operand" "")))]
+ "TARGET_SH2"
+ "
+{
+ sh_compare_op0 = operands[0];
+ sh_compare_op1 = operands[1];
+ DONE;
+}")
+
+;; -------------------------------------------------------------------------
;; Addition instructions
;; -------------------------------------------------------------------------
@@ -299,9 +418,50 @@
(match_operand:DI 2 "arith_reg_operand" "r")))
(clobber (reg:SI 18))]
""
- "clrt\;addc %R2,%R0\;addc %S2,%S0"
+ "#"
[(set_attr "length" "6")])
+(define_split
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (plus:DI (match_operand:DI 1 "arith_reg_operand" "%0")
+ (match_operand:DI 2 "arith_reg_operand" "r")))
+ (clobber (reg:SI 18))]
+ "reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+ high0 = gen_rtx (REG, SImode,
+ true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ high2 = gen_rtx (REG, SImode,
+ true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ emit_insn (gen_clrt ());
+ emit_insn (gen_addc (low0, low0, gen_lowpart (SImode, operands[2])));
+ emit_insn (gen_addc1 (high0, high0, high2));
+ DONE;
+}")
+
+(define_insn "addc"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (set (reg:SI 18)
+ (ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+ ""
+ "addc %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "addc1"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (clobber (reg:SI 18))]
+ ""
+ "addc %2,%0"
+ [(set_attr "type" "arith")])
+
(define_insn "addsi3"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(plus:SI (match_operand:SI 1 "arith_operand" "%0")
@@ -322,9 +482,50 @@
(match_operand:DI 2 "arith_reg_operand" "r")))
(clobber (reg:SI 18))]
""
- "clrt\;subc %R2,%R0\;subc %S2,%S0"
+ "#"
[(set_attr "length" "6")])
+(define_split
+ [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+ (minus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+ (match_operand:DI 2 "arith_reg_operand" "r")))
+ (clobber (reg:SI 18))]
+ "reload_completed"
+ [(const_int 0)]
+ "
+{
+ rtx high0, high2, low0 = gen_lowpart (SImode, operands[0]);
+ high0 = gen_rtx (REG, SImode,
+ true_regnum (operands[0]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ high2 = gen_rtx (REG, SImode,
+ true_regnum (operands[2]) + (TARGET_LITTLE_ENDIAN ? 1 : 0));
+ emit_insn (gen_clrt ());
+ emit_insn (gen_subc (low0, low0, gen_lowpart (SImode, operands[2])));
+ emit_insn (gen_subc1 (high0, high0, high2));
+ DONE;
+}")
+
+(define_insn "subc"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (set (reg:SI 18)
+ (gtu:SI (minus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+ ""
+ "subc %2,%0"
+ [(set_attr "type" "arith")])
+
+(define_insn "subc1"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (match_operand:SI 2 "arith_reg_operand" "r"))
+ (reg:SI 18)))
+ (clobber (reg:SI 18))]
+ ""
+ "subc %2,%0"
+ [(set_attr "type" "arith")])
+
(define_insn "*subsi3_internal"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
@@ -360,6 +561,17 @@
;; We take advantage of the library routines which don't clobber as many
;; registers as a normal function call would.
+;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+;; also has an effect on the register that holds the addres of the sfunc.
+;; To make this work, we have an extra dummy insns that shows the use
+;; of this register for reorg.
+
+(define_insn "use_sfunc_addr"
+ [(set (reg:SI 17) (unspec [(match_operand:SI 0 "register_operand" "r")] 5))]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
;; hard register 0. If we used hard register 0, then the next instruction
;; would be a move from hard register 0 to a pseudo-reg. If the pseudo-reg
@@ -369,14 +581,14 @@
;; If we let reload allocate r0, then this problem can never happen.
(define_insn ""
- [(set (match_operand:SI 1 "register_operand" "=z")
+ [(set (match_operand:SI 0 "register_operand" "=z")
(udiv:SI (reg:SI 4) (reg:SI 5)))
(clobber (reg:SI 18))
(clobber (reg:SI 17))
(clobber (reg:SI 4))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
""
- "jsr @%0%#"
+ "jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -395,16 +607,16 @@
"operands[3] = gen_reg_rtx(SImode);")
(define_insn ""
- [(set (match_operand:SI 1 "register_operand" "=z")
+ [(set (match_operand:SI 0 "register_operand" "=z")
(div:SI (reg:SI 4) (reg:SI 5)))
(clobber (reg:SI 18))
(clobber (reg:SI 17))
(clobber (reg:SI 1))
(clobber (reg:SI 2))
(clobber (reg:SI 3))
- (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+ (use (match_operand:SI 1 "arith_reg_operand" "r"))]
""
- "jsr @%0%#"
+ "jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
@@ -676,7 +888,8 @@
(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
(match_operand:SI 2 "logical_operand" "r,L")))]
""
- "or %2,%0")
+ "or %2,%0"
+ [(set_attr "type" "arith")])
(define_insn "xorsi3"
[(set (match_operand:SI 0 "arith_reg_operand" "=z,r")
@@ -697,7 +910,8 @@
(set (reg:SI 18)
(lshiftrt:SI (match_dup 1) (const_int 31)))]
""
- "rotl %0")
+ "rotl %0"
+ [(set_attr "type" "arith")])
(define_insn "rotlsi3_31"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
@@ -705,14 +919,16 @@
(const_int 31)))
(clobber (reg:SI 18))]
""
- "rotr %0")
+ "rotr %0"
+ [(set_attr "type" "arith")])
-(define_insn ""
+(define_insn "rotlsi3_16"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(rotate:SI (match_operand:SI 1 "arith_reg_operand" "r")
(const_int 16)))]
""
- "swap.w %1,%0")
+ "swap.w %1,%0"
+ [(set_attr "type" "arith")])
(define_expand "rotlsi3"
[(set (match_operand:SI 0 "arith_reg_operand" "")
@@ -721,29 +937,62 @@
""
"
{
+ static char rot_tab[] = {
+ 000, 000, 000, 000, 000, 000, 010, 001,
+ 001, 001, 011, 013, 003, 003, 003, 003,
+ 003, 003, 003, 003, 003, 013, 012, 002,
+ 002, 002, 010, 000, 000, 000, 000, 000,
+ };
+
+ int count, choice;
+
if (GET_CODE (operands[2]) != CONST_INT)
FAIL;
-
- if (INTVAL (operands[2]) == 1)
- {
- emit_insn (gen_rotlsi3_1 (operands[0], operands[1]));
- DONE;
- }
- else if (INTVAL (operands[2]) == 31)
+ count = INTVAL (operands[2]);
+ choice = rot_tab[count];
+ if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1)
+ FAIL;
+ choice &= 7;
+ switch (choice)
{
- emit_insn (gen_rotlsi3_31 (operands[0], operands[1]));
- DONE;
+ case 0:
+ emit_move_insn (operands[0], operands[1]);
+ count -= (count & 16) * 2;
+ break;
+ case 3:
+ emit_insn (gen_rotlsi3_16 (operands[0], operands[1]));
+ count -= 16;
+ break;
+ case 1:
+ case 2:
+ {
+ rtx parts[2];
+ parts[0] = gen_reg_rtx (SImode);
+ parts[1] = gen_reg_rtx (SImode);
+ emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1]));
+ parts[choice-1] = operands[1];
+ emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8)));
+ emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8)));
+ emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1]));
+ count = (count & ~16) - 8;
+ }
}
- else if (INTVAL (operands[2]) != 16)
- FAIL;
+
+ for (; count > 0; count--)
+ emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+ for (; count < 0; count++)
+ emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+
+ DONE;
}")
-(define_insn ""
+(define_insn "*rotlhi3_8"
[(set (match_operand:HI 0 "arith_reg_operand" "=r")
(rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
(const_int 8)))]
""
- "swap.b %1,%0")
+ "swap.b %1,%0"
+ [(set_attr "type" "arith")])
(define_expand "rotlhi3"
[(set (match_operand:HI 0 "arith_reg_operand" "")
@@ -764,7 +1013,8 @@
(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
(match_operand:SI 2 "arith_reg_operand" "r")))]
"TARGET_SH3"
- "shld %2,%0")
+ "shld %2,%0"
+ [(set_attr "type" "dyn_shift")])
(define_insn "ashlsi3_k"
[(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
@@ -773,7 +1023,8 @@
"CONST_OK_FOR_K (INTVAL (operands[2]))"
"@
add %0,%0
- shll%O2 %0")
+ shll%O2 %0"
+ [(set_attr "type" "arith")])
(define_insn "ashlhi3_k"
[(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
@@ -782,14 +1033,15 @@
"CONST_OK_FOR_K (INTVAL (operands[2]))"
"@
add %0,%0
- shll%O2 %0")
+ shll%O2 %0"
+ [(set_attr "type" "arith")])
(define_insn "ashlsi3_n"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
(match_operand:SI 2 "const_int_operand" "n")))
(clobber (reg:SI 18))]
- ""
+ "! sh_dynamicalize_shift_p (operands[2])"
"#"
[(set (attr "length")
(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
@@ -822,6 +1074,9 @@
""
"
{
+ if (GET_CODE (operands[2]) == CONST_INT
+ && sh_dynamicalize_shift_p (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
{
emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
@@ -888,27 +1143,59 @@
(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
(const_int 16)))]
""
- "swap.w %1,%0\;exts.w %0,%0"
+ "#"
[(set_attr "length" "4")])
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+ (const_int 16)))]
+ ""
+ [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16)))
+ (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+ "operands[2] = gen_lowpart (HImode, operands[0]);")
+
;; ??? This should be a define expand.
(define_insn "ashrsi2_31"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
- (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
- (const_int 31)))
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 31)))
(clobber (reg:SI 18))]
""
- "@
- shll %0\;subc %0,%0"
+ "#"
[(set_attr "length" "4")])
+(define_split
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+ (const_int 31)))
+ (clobber (reg:SI 18))]
+ ""
+ [(const_int 0)]
+ "
+{
+ emit_insn (gen_ashlsi_c (operands[0], operands[1], operands[1]));
+ emit_insn (gen_subc1 (operands[0], operands[0], operands[0]));
+ DONE;
+}")
+
+(define_insn "ashlsi_c"
+ [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
+ (set (reg:SI 18) (lt:SI (match_operand:SI 2 "arith_reg_operand" "0")
+ (const_int 0)))]
+ ""
+ "shll %0"
+ [(set_attr "type" "arith")])
+
(define_insn "ashrsi3_d"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
(neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
"TARGET_SH3"
- "shad %2,%0")
+ "shad %2,%0"
+ [(set_attr "type" "dyn_shift")])
(define_insn "ashrsi3_n"
[(set (reg:SI 4)
@@ -937,7 +1224,8 @@
(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
(neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
"TARGET_SH3"
- "shld %2,%0")
+ "shld %2,%0"
+ [(set_attr "type" "dyn_shift")])
;; Only the single bit shift clobbers the T bit.
@@ -947,7 +1235,8 @@
(match_operand:SI 2 "const_int_operand" "M")))
(clobber (reg:SI 18))]
"CONST_OK_FOR_M (INTVAL (operands[2]))"
- "shlr %0")
+ "shlr %0"
+ [(set_attr "type" "arith")])
(define_insn "lshrsi3_k"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
@@ -955,7 +1244,8 @@
(match_operand:SI 2 "const_int_operand" "K")))]
"CONST_OK_FOR_K (INTVAL (operands[2]))
&& ! CONST_OK_FOR_M (INTVAL (operands[2]))"
- "shlr%O2 %0")
+ "shlr%O2 %0"
+ [(set_attr "type" "arith")])
(define_insn "lshrhi3_m"
[(set (match_operand:HI 0 "arith_reg_operand" "=r")
@@ -963,7 +1253,8 @@
(match_operand:HI 2 "const_int_operand" "M")))
(clobber (reg:SI 18))]
"CONST_OK_FOR_M (INTVAL (operands[2]))"
- "shlr %0")
+ "shlr %0"
+ [(set_attr "type" "arith")])
(define_insn "lshrhi3_k"
[(set (match_operand:HI 0 "arith_reg_operand" "=r")
@@ -971,14 +1262,15 @@
(match_operand:HI 2 "const_int_operand" "K")))]
"CONST_OK_FOR_K (INTVAL (operands[2]))
&& ! CONST_OK_FOR_M (INTVAL (operands[2]))"
- "shlr%O2 %0")
+ "shlr%O2 %0"
+ [(set_attr "type" "arith")])
(define_insn "lshrsi3_n"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
(match_operand:SI 2 "const_int_operand" "n")))
(clobber (reg:SI 18))]
- ""
+ "! sh_dynamicalize_shift_p (operands[2])"
"#"
[(set (attr "length")
(cond [(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 1))
@@ -1011,6 +1303,9 @@
""
"
{
+ if (GET_CODE (operands[2]) == CONST_INT
+ && sh_dynamicalize_shift_p (operands[2]))
+ operands[2] = force_reg (SImode, operands[2]);
if (TARGET_SH3 && arith_reg_operand (operands[2], GET_MODE (operands[2])))
{
rtx count = copy_to_mode_reg (SImode, operands[2]);
@@ -1060,7 +1355,8 @@
(clobber (reg:SI 18))]
""
"shll %R0\;rotcl %S0"
- [(set_attr "length" "4")])
+ [(set_attr "length" "4")
+ (set_attr "type" "arith")])
(define_expand "ashldi3"
[(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
@@ -1080,7 +1376,8 @@
(clobber (reg:SI 18))]
""
"shlr %S0\;rotcr %R0"
- [(set_attr "length" "4")])
+ [(set_attr "length" "4")
+ (set_attr "type" "arith")])
(define_expand "lshrdi3"
[(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
@@ -1100,7 +1397,8 @@
(clobber (reg:SI 18))]
""
"shar %S0\;rotcr %R0"
- [(set_attr "length" "4")])
+ [(set_attr "length" "4")
+ (set_attr "type" "arith")])
(define_expand "ashrdi3"
[(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
@@ -1306,7 +1604,8 @@
(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0")
(const_int 16))))]
""
- "xtrct %1,%0")
+ "xtrct %1,%0"
+ [(set_attr "type" "arith")])
(define_insn "xtrct_right"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
@@ -1315,7 +1614,8 @@
(ashift:SI (match_operand:SI 2 "arith_reg_operand" "r")
(const_int 16))))]
""
- "xtrct %2,%0")
+ "xtrct %2,%0"
+ [(set_attr "type" "arith")])
;; -------------------------------------------------------------------------
;; Unary arithmetic
@@ -1400,13 +1700,8 @@
;; ??? This should be a define expand.
;; ??? Or perhaps it should be dropped?
-(define_insn "extendsidi2"
- [(set (match_operand:DI 0 "arith_reg_operand" "=r")
- (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
- (clobber (reg:SI 18))]
- ""
- "mov %1,%S0\;mov %1,%R0\;shll %S0\;subc %S0,%S0"
- [(set_attr "length" "8")])
+/* There is no point in defining extendsidi2; convert_move generates good
+ code for that. */
(define_insn "extendhisi2"
[(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
@@ -1499,17 +1794,17 @@
""
"sett")
-;; t/z is first, so that it will be preferred over r/r when reloading a move
+;; t/r is first, so that it will be preferred over r/r when reloading a move
;; of a pseudo-reg into the T reg
(define_insn "movsi_i"
- [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,xl,xl,r")
- (match_operand:SI 1 "general_movsrc_operand" "z,Q,rI,m,xl,t,r,xl,r,>,i"))]
+ [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,<,xl,x,l,r")
+ (match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,m,xl,t,r,x,l,r,>,>,i"))]
"
- ! TARGET_SH3E &&
- (register_operand (operands[0], SImode)
- || register_operand (operands[1], SImode))"
+ ! TARGET_SH3E
+ && (register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode))"
"@
- tst %1,%1\;rotcl %1\;xor #1,%1\;rotcr %1
+ cmp/pl %1
mov.l %1,%0
mov %1,%0
mov.l %1,%0
@@ -1517,39 +1812,58 @@
movt %0
mov.l %1,%0
sts.l %1,%0
+ sts.l %1,%0
lds %1,%0
lds.l %1,%0
+ lds.l %1,%0
fake %1,%0"
- [(set_attr "type" "move,pcload,move,load,move,store,store,move,load,move,pcload")
- (set_attr "length" "8,*,*,*,*,*,*,*,*,*,*")])
+ [(set_attr "type" "*,pcload_si,move,load_si,move,move,store,store,pstore,move,load,pload,pcload_si")
+ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*")])
-;; t/z is first, so that it will be preferred over r/r when reloading a move
-;; of a pseudo-reg into the T reg
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (subreg:SI (reg:SF 38 fr14) 0) into T (compiling stdlib/strtod.c -m3e -O2)
;; ??? This allows moves from macl to fpul to be recognized, but these moves
;; will require a reload.
(define_insn "movsi_ie"
- [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,xl,xl,r,y,r")
- (match_operand:SI 1 "general_movsrc_operand" "z,Q,rI,m,xl,t,r,xl,r,>,i,r,y"))]
+ [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,r,y,r,y")
+ (match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,m,xl,t,r,x,l,r,>,>,i,r,y,y"))]
"TARGET_SH3E
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
"@
- tst %1,%1\;rotcl %1\;xor #1,%1\;rotcr %1
mov.l %1,%0
mov %1,%0
+ cmp/pl %1
mov.l %1,%0
sts %1,%0
movt %0
mov.l %1,%0
sts.l %1,%0
+ sts.l %1,%0
lds %1,%0
lds.l %1,%0
+ lds.l %1,%0
fake %1,%0
lds %1,%0
- sts %1,%0"
- [(set_attr "type" "move,pcload,move,load,move,store,store,move,load,move,pcload,move,move")
- (set_attr "length" "8,*,*,*,*,*,*,*,*,*,*,*,*")])
-
+ sts %1,%0
+ ! move optimized away"
+ [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,pcload_si,gp_fpul,gp_fpul,other")
+ (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
+
+(define_insn "movsi_i_lowpart"
+ [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "=r,r,r,r,r,m,r"))
+ (match_operand:SI 1 "general_movsrc_operand" "Q,rI,m,xl,t,r,i"))]
+ "register_operand (operands[0], SImode)
+ || register_operand (operands[1], SImode)"
+ "@
+ mov.l %1,%0
+ mov %1,%0
+ mov.l %1,%0
+ sts %1,%0
+ movt %0
+ mov.l %1,%0
+ fake %1,%0"
+ [(set_attr "type" "pcload,move,load,move,move,store,pcload")])
(define_expand "movsi"
[(set (match_operand:SI 0 "general_movdst_operand" "")
(match_operand:SI 1 "general_movsrc_operand" ""))]
@@ -1600,14 +1914,16 @@
;; ??? This should be a define expand.
+;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
+;; compiled with -m2 -ml -O3 -funroll-loops
(define_insn ""
- [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r")
- (match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I,i,x"))]
+ [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
+ (match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I,i,x,r"))]
"arith_reg_operand (operands[0], DImode)
|| arith_reg_operand (operands[1], DImode)"
"* return output_movedouble (insn, operands, DImode);"
[(set_attr "length" "4")
- (set_attr "type" "pcload,move,load,store,move,pcload,move")])
+ (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
;; If the output is a register and the input is memory or a register, we have
;; to be careful and see which word needs to be loaded first.
@@ -1779,15 +2095,19 @@
[(set (match_operand:DF 0 "general_movdst_operand" "")
(match_operand:DF 1 "general_movsrc_operand" ""))]
""
- "{ if (prepare_move_operands (operands, DFmode)) DONE; }")
+ "
+{
+ if (prepare_move_operands (operands, DFmode)) DONE;
+}")
+
(define_insn "movsf_i"
[(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
(match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,m,r,r,l"))]
"
- ! TARGET_SH3E &&
- (arith_reg_operand (operands[0], SFmode)
- || arith_reg_operand (operands[1], SFmode))"
+ ! TARGET_SH3E
+ && (arith_reg_operand (operands[0], SFmode)
+ || arith_reg_operand (operands[1], SFmode))"
"@
mov %1,%0
mov %1,%0
@@ -1800,10 +2120,10 @@
(define_insn "movsf_ie"
[(set (match_operand:SF 0 "general_movdst_operand"
- "=f,r,f,f,?f,f,m,r,r,m,!??r,!??f")
+ "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,ry")
(match_operand:SF 1 "general_movsrc_operand"
- "f,r,G,H,FQ,m,f,FQ,m,r,f,r"))
- (clobber (match_scratch:SI 2 "=X,X,X,X,&z,X,X,X,X,X,X,X"))]
+ "f,r,G,H,FQ,m,f,FQ,m,r,y,f,>,fr,yr"))
+ (clobber (match_scratch:SI 2 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X"))]
"TARGET_SH3E
&& (arith_reg_operand (operands[0], SFmode)
@@ -1819,29 +2139,39 @@
mov.l %1,%0
mov.l %1,%0
mov.l %1,%0
- flds %1,fpul\;sts fpul,%0
- lds %1,fpul\;fsts fpul,%0"
- [(set_attr "type" "move,move,fp,fp,pcload,load,store,pcload,load,store,move,fp")
- (set_attr "length" "*,*,*,*,4,*,*,*,*,*,4,4")])
+ fsts fpul,%0
+ flds %1,fpul
+ lds.l %1,%0
+ #
+ #"
+ [(set_attr "type" "fmove,move,fmove,fmove,pcload,load,store,pcload,load,store,fmove,fmove,load,*,*")
+ (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,*,*")])
(define_split
- [(set (match_operand:SF 0 "general_movdst_operand" "")
- (match_operand:SF 1 "general_movsrc_operand" ""))
- (clobber (reg:SI 0))]
- "GET_CODE (operands[0]) == REG && REGNO (operands[0]) < FIRST_PSEUDO_REGISTER"
- [(parallel [(set (match_dup 0) (match_dup 1))
- (clobber (scratch:SI))])]
+ [(set (match_operand:SF 0 "register_operand" "ry")
+ (match_operand:SF 1 "register_operand" "ry"))
+ (clobber (match_scratch:SI 2 "X"))]
+ "reload_completed
+ && true_regnum (operands[0]) < FIRST_FP_REG
+ && true_regnum (operands[1]) < FIRST_FP_REG"
+ [(set (match_dup 0) (match_dup 1))]
"
{
- if (REGNO (operands[0]) >= FIRST_FP_REG && REGNO (operands[0]) <= LAST_FP_REG)
- {
- if (GET_CODE (operands[1]) != MEM)
- FAIL;
- emit_insn (gen_mova (XEXP (operands[1], 0)));
- XEXP (operands[1], 0) = gen_rtx (REG, Pmode, 0);
- }
+ operands[0] = gen_rtx (REG, SImode, true_regnum (operands[0]));
+ operands[1] = gen_rtx (REG, SImode, true_regnum (operands[1]));
}")
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" ""))
+ (clobber (reg:SI 22))]
+ ""
+ [(parallel [(set (reg:SF 22) (match_dup 1))
+ (clobber (scratch:SI))])
+ (parallel [(set (match_dup 0) (reg:SF 22))
+ (clobber (scratch:SI))])]
+ "")
+
(define_expand "movsf"
[(set (match_operand:SF 0 "general_movdst_operand" "")
(match_operand:SF 1 "general_movsrc_operand" ""))]
@@ -1884,47 +2214,45 @@
"* return output_branch (0, insn, operands);"
[(set_attr "type" "cbranch")])
-(define_insn "inverse_branch_true"
- [(set (pc) (if_then_else (ne (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+;; Patterns to prevent reorg from re-combining a condbranch with a branch
+;; which destination is too far away.
+;; The const_int_operand is distinct for each branch target; it avoids
+;; unwanted matches with redundant_insn.
+(define_insn "block_branch_redirect"
+ [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] 4))]
""
- "* return output_branch (0, insn, operands);"
- [(set_attr "type" "cbranch")])
+ ""
+ [(set_attr "length" "0")])
-(define_insn "inverse_branch_false"
- [(set (pc) (if_then_else (eq (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+;; This one has the additional purpose to record a possible scratch register
+;; for the following branch.
+(define_insn "indirect_jump_scratch"
+ [(set (match_operand 0 "register_operand" "r")
+ (unspec [(match_operand 1 "const_int_operand" "")] 4))]
""
- "* return output_branch (1, insn, operands);"
- [(set_attr "type" "cbranch")])
+ ""
+ [(set_attr "length" "0")])
;; Conditional branch insns
(define_expand "beq"
- [(set (reg:SI 18) (eq:SI (match_dup 1) (match_dup 2)))
- (set (pc)
+ [(set (pc)
(if_then_else (ne (reg:SI 18) (const_int 0))
(label_ref (match_operand 0 "" ""))
(pc)))]
""
"from_compare (operands, EQ);")
-; There is no bne compare, so we reverse the branch arms.
-
(define_expand "bne"
- [(set (reg:SI 18) (eq:SI (match_dup 1) (match_dup 2)))
- (set (pc)
- (if_then_else (ne (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
""
- "from_compare (operands, NE);")
+ "from_compare (operands, EQ);")
(define_expand "bgt"
- [(set (reg:SI 18) (gt:SI (match_dup 1) (match_dup 2)))
- (set (pc)
+ [(set (pc)
(if_then_else (ne (reg:SI 18) (const_int 0))
(label_ref (match_operand 0 "" ""))
(pc)))]
@@ -1932,11 +2260,10 @@
"from_compare (operands, GT);")
(define_expand "blt"
- [(set (reg:SI 18) (ge:SI (match_dup 1) (match_dup 2)))
- (set (pc)
- (if_then_else (ne (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
""
"
{
@@ -1948,28 +2275,41 @@
emit_insn (gen_bgt (operands[0]));
DONE;
}
- from_compare (operands, LT);
+ from_compare (operands, GE);
}")
(define_expand "ble"
- [(set (reg:SI 18) (gt:SI (match_dup 1) (match_dup 2)))
- (set (pc)
- (if_then_else (ne (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
""
- "from_compare (operands, LE);")
+ "
+{
+ if (TARGET_SH3E
+ && TARGET_IEEE
+ && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
+ {
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ emit_insn (gen_bge (operands[0]));
+ DONE;
+ }
+ from_compare (operands, GT);
+}")
(define_expand "bge"
- [(set (reg:SI 18) (ge:SI (match_dup 1) (match_dup 2)))
- (set (pc)
+ [(set (pc)
(if_then_else (ne (reg:SI 18) (const_int 0))
(label_ref (match_operand 0 "" ""))
(pc)))]
""
"
{
- if (GET_MODE (sh_compare_op0) == SFmode)
+ if (TARGET_SH3E
+ && ! TARGET_IEEE
+ && GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
{
rtx tmp = sh_compare_op0;
sh_compare_op0 = sh_compare_op1;
@@ -1981,8 +2321,7 @@
}")
(define_expand "bgtu"
- [(set (reg:SI 18) (gtu:SI (match_dup 1) (match_dup 2)))
- (set (pc)
+ [(set (pc)
(if_then_else (ne (reg:SI 18) (const_int 0))
(label_ref (match_operand 0 "" ""))
(pc)))]
@@ -1990,17 +2329,15 @@
"from_compare (operands, GTU); ")
(define_expand "bltu"
- [(set (reg:SI 18) (geu:SI (match_dup 1) (match_dup 2)))
- (set (pc)
- (if_then_else (ne (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
""
- "from_compare (operands, LTU);")
+ "from_compare (operands, GEU);")
(define_expand "bgeu"
- [(set (reg:SI 18) (geu:SI (match_dup 1) (match_dup 2)))
- (set (pc)
+ [(set (pc)
(if_then_else (ne (reg:SI 18) (const_int 0))
(label_ref (match_operand 0 "" ""))
(pc)))]
@@ -2008,13 +2345,12 @@
"from_compare (operands, GEU);")
(define_expand "bleu"
- [(set (reg:SI 18) (gtu:SI (match_dup 1) (match_dup 2)))
- (set (pc)
- (if_then_else (ne (reg:SI 18) (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
+ [(set (pc)
+ (if_then_else (eq (reg:SI 18) (const_int 0))
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
""
- "from_compare (operands, LEU);")
+ "from_compare (operands, GTU);")
;; ------------------------------------------------------------------------
;; Jump and linkage insns
@@ -2027,7 +2363,7 @@
"*
{
/* The length is 16 if the delay slot is unfilled. */
- if (get_attr_length(insn) >= 14)
+ if (get_attr_length(insn) > 4)
return output_far_jump(insn, operands[0]);
else
return \"bra %l0%#\";
@@ -2074,19 +2410,38 @@
(match_operand:SI 0 "arith_reg_operand" "r"))]
""
"jmp @%0%#"
- [(set_attr "needs_delay_slot" "yes")])
+ [(set_attr "needs_delay_slot" "yes")
+ (set_attr "type" "jump_ind")])
-;; This might seem redundant, but it helps us distinguish case table jumps
+;; The use of operand 1 / 2 helps us distinguish case table jumps
;; which can be present in structured code from indirect jumps which can not
;; be present in structured code. This allows -fprofile-arcs to work.
-(define_insn "*casesi_jump"
+;; For SH1 processors.
+(define_insn "casesi_jump_1"
[(set (pc)
- (match_operand:SI 0 "arith_reg_operand" "r"))
+ (match_operand:SI 0 "register_operand" "r"))
(use (label_ref (match_operand 1 "" "")))]
""
- "jmp @%0%#"
- [(set_attr "needs_delay_slot" "yes")])
+ "jmp @%0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set_attr "type" "jump_ind")])
+
+;; For all later processors.
+(define_insn "casesi_jump_2"
+ [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r")
+ (match_operand 1 "braf_label_ref_operand" "")))
+ (use (label_ref (match_operand 2 "" "")))]
+ ""
+ "braf %0%#"
+ [(set_attr "needs_delay_slot" "yes")
+ (set_attr "type" "jump_ind")])
+
+(define_insn "dummy_jump"
+ [(set (pc) (const_int 0))]
+ ""
+ ""
+ [(set_attr "length" "0")])
;; Call subroutine returning any type.
;; ??? This probably doesn't work.
@@ -2127,22 +2482,26 @@
(eq:SI (match_operand:SI 0 "arith_reg_operand" "+r") (const_int 1)))
(set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
"TARGET_SH2"
- "dt %0")
+ "dt %0"
+ [(set_attr "type" "arith")])
(define_insn "nop"
[(const_int 0)]
""
"nop")
-;; Load address of a label. This is only generated by the casesi expand.
-;; This must use unspec, because this only works immediately before a casesi.
+;; Load address of a label. This is only generated by the casesi expand,
+;; and by machine_dependent_reorg (fixing up fp moves).
+;; This must use unspec, because this only works for labels that are
+;; within range,
(define_insn "mova"
[(set (reg:SI 0)
(unspec [(label_ref (match_operand 0 "" ""))] 1))]
""
"mova %O0,r0"
- [(set_attr "in_delay_slot" "no")])
+ [(set_attr "in_delay_slot" "no")
+ (set_attr "type" "arith")])
;; case instruction for switch statements.
@@ -2152,56 +2511,157 @@
;; operand 3 is CODE_LABEL for the table;
;; operand 4 is the CODE_LABEL to go to if index out of range.
-;; ??? There should be a barrier after the jump at the end.
-
(define_expand "casesi"
- [(set (match_dup 5) (match_operand:SI 0 "arith_reg_operand" ""))
- (set (match_dup 5) (minus:SI (match_dup 5)
+ [(match_operand:SI 0 "arith_reg_operand" "")
+ (match_operand:SI 1 "arith_reg_operand" "")
+ (match_operand:SI 2 "arith_reg_operand" "")
+ (match_operand 3 "" "") (match_operand 4 "" "")]
+ ""
+ "
+{
+ rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (SImode);
+ operands[1] = copy_to_mode_reg (SImode, operands[1]);
+ operands[2] = copy_to_mode_reg (SImode, operands[2]);
+ /* If optimizing, casesi_worker depends on the mode of the instruction
+ before label it 'uses' - operands[3]. */
+ emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4],
+ reg));
+ emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3]));
+ if (TARGET_SH2)
+ {
+ rtx lab = gen_label_rtx ();
+ emit_jump_insn (gen_casesi_jump_2 (reg2,
+ gen_rtx (LABEL_REF, VOIDmode, lab),
+ operands[3]));
+ emit_label (lab);
+ /* Put a fake jump after the label, lest some optimization might
+ delete the barrier and LAB. */
+ emit_jump_insn (gen_dummy_jump ());
+ }
+ else
+ {
+ emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3]));
+ }
+ /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to
+ operands[3], but to lab. We will fix this up in
+ machine_dependent_reorg. */
+ emit_barrier ();
+ DONE;
+}")
+
+(define_expand "casesi_0"
+ [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" ""))
+ (set (match_dup 4) (minus:SI (match_dup 4)
(match_operand:SI 1 "arith_operand" "")))
(set (reg:SI 18)
- (gtu:SI (match_dup 5)
+ (gtu:SI (match_dup 4)
(match_operand:SI 2 "arith_reg_operand" "")))
(set (pc)
(if_then_else (ne (reg:SI 18)
(const_int 0))
- (label_ref (match_operand 4 "" ""))
- (pc)))
- (set (match_dup 6) (match_dup 5))
- (set (match_dup 6) (ashift:SI (match_dup 6) (match_dup 7)))
- (set (reg:SI 0) (unspec [(label_ref (match_operand 3 "" ""))] 1))
- (parallel [(set (reg:SI 0) (plus:SI (reg:SI 0)
- (mem:HI (plus:SI (reg:SI 0)
- (match_dup 6)))))
- (set (match_dup 6) (mem:HI (plus:SI (reg:SI 0) (match_dup 6))))])
- (parallel [(set (pc) (reg:SI 0))
- (use (label_ref (match_dup 3)))])]
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
""
- "
-{
- operands[1] = copy_to_mode_reg (SImode, operands[1]);
- operands[2] = copy_to_mode_reg (SImode, operands[2]);
- operands[5] = gen_reg_rtx (SImode);
- operands[6] = gen_reg_rtx (SImode);
- operands[7] = GEN_INT (TARGET_BIGTABLE ? 2 : 1);
-}")
+ "")
-(define_insn "casesi_worker"
- [(set (reg:SI 0)
- (plus:SI (reg:SI 0)
- (mem:HI (plus:SI (reg:SI 0)
- (match_operand:SI 0 "arith_reg_operand" "+r")))))
- (set (match_dup 0) (mem:HI (plus:SI (reg:SI 0)
- (match_dup 0))))]
+;; ??? reload might clobber r0 if we use it explicitly in the RTL before
+;; reload; using a R0_REGS pseudo reg is likely to give poor code.
+;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload.
+
+(define_insn "casesi_worker_0"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec [(match_operand 1 "register_operand" "0,r")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 "=X,1"))
+ (clobber (match_scratch:SI 4 "=&z,z"))]
+ ""
+ "#")
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec [(match_operand 1 "register_operand" "")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))]
+ "! TARGET_SH2 && reload_completed"
+ [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1))
+ (parallel [(set (match_dup 0)
+ (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2))
+ (clobber (match_dup 3))])
+ (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI 0)))]
+ "LABEL_NUSES (operands[2])++;")
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec [(match_operand 1 "register_operand" "")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))]
+ "TARGET_SH2 && reload_completed"
+ [(set (reg:SI 0) (unspec [(label_ref (match_dup 2))] 1))
+ (parallel [(set (match_dup 0)
+ (unspec [(reg:SI 0) (match_dup 1) (label_ref (match_dup 2))] 2))
+ (clobber (match_dup 3))])]
+ "LABEL_NUSES (operands[2])++;")
+
+(define_insn "*casesi_worker"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec [(reg:SI 0) (match_operand 1 "register_operand" "0,r")
+ (label_ref (match_operand 2 "" ""))] 2))
+ (clobber (match_scratch:SI 3 "=X,1"))]
""
"*
{
- if (TARGET_BIGTABLE)
- return \"mov.l @(r0,%0),%0\;add %0,r0\";
- else
- return \"mov.w @(r0,%0),%0\;add %0,r0\";
+ enum machine_mode mode
+ = optimize
+ ? GET_MODE (PATTERN (prev_real_insn (operands[2])))
+ : sh_addr_diff_vec_mode;
+ switch (mode)
+ {
+ case SImode:
+ return \"shll2 %1\;mov.l @(r0,%1),%0\";
+ case HImode:
+ return \"add %1,%1\;mov.w @(r0,%1),%0\";
+ case QImode:
+ {
+ rtx adj = PATTERN (prev_real_insn (operands[2]));
+ if ((insn_addresses[INSN_UID (XEXP ( XVECEXP (adj, 0, 1), 0))]
+ - insn_addresses[INSN_UID (XEXP (XVECEXP (adj, 0, 2), 0))])
+ <= 126)
+ return \"mov.b @(r0,%1),%0\";
+ return \"mov.b @(r0,%1),%0\;extu.b %0,%0\";
+ }
+ default:
+ abort ();
+ }
}"
[(set_attr "length" "4")])
+;; Include ADDR_DIFF_VECS in the shorten_branches pass; we have to
+;; use a negative-length instruction to actually accomplish this.
+(define_insn "addr_diff_vec_adjust"
+ [(unspec_volatile [(label_ref (match_operand 0 "" ""))
+ (label_ref (match_operand 1 "" ""))
+ (label_ref (match_operand 2 "" ""))
+ (match_operand 3 "const_int_operand" "")] 7)]
+ ""
+ "*
+{
+ /* ??? ASM_OUTPUT_ADDR_DIFF_ELT gets passed no context information, so
+ we must use a kludge with a global variable. */
+ sh_addr_diff_vec_mode = GET_MODE (PATTERN (insn));
+ return \"\";
+}"
+;; Need a variable length for this to be processed in each shorten_branch pass.
+;; The actual work is done in ADJUST_INSN_LENTH, because length attributes
+;; need to be (a choice of) constants.
+;; We use the calculated length before ADJUST_INSN_LENGTH to
+;; determine if the insn_addresses array contents are valid.
+ [(set (attr "length")
+ (if_then_else (eq (pc) (const_int -1))
+ (const_int 2) (const_int 0)))])
+
(define_insn "return"
[(return)]
"reload_completed"
@@ -2233,7 +2693,8 @@
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(eq:SI (reg:SI 18) (const_int 1)))]
""
- "movt %0")
+ "movt %0"
+ [(set_attr "type" "arith")])
(define_expand "seq"
[(set (match_operand:SI 0 "arith_reg_operand" "")
@@ -2248,18 +2709,15 @@
"operands[1] = prepare_scc_operands (LT);")
(define_expand "sle"
- [(set (match_operand:SI 0 "arith_reg_operand" "")
- (match_dup 1))]
+ [(match_operand:SI 0 "arith_reg_operand" "")]
""
"
{
- if (GET_MODE (sh_compare_op0) == SFmode)
- {
- emit_insn (gen_sgt (operands[0]));
- emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
- DONE;
- }
- operands[1] = prepare_scc_operands (LE);
+ rtx tmp = sh_compare_op0;
+ sh_compare_op0 = sh_compare_op1;
+ sh_compare_op1 = tmp;
+ emit_insn (gen_sge (operands[0]));
+ DONE;
}")
(define_expand "sgt"
@@ -2276,8 +2734,22 @@
{
if (GET_MODE (sh_compare_op0) == SFmode)
{
- emit_insn (gen_slt (operands[0]));
- emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
+ if (TARGET_IEEE)
+ {
+ rtx t_reg = gen_rtx (REG, SImode, T_REG);
+ rtx lab = gen_label_rtx ();
+ emit_insn (gen_rtx (SET, VOIDmode, t_reg,
+ gen_rtx (EQ, SImode, sh_compare_op0,
+ sh_compare_op1)));
+ emit_jump_insn (gen_branch_true (lab));
+ emit_insn (gen_rtx (SET, VOIDmode, t_reg,
+ gen_rtx (GT, SImode, sh_compare_op0,
+ sh_compare_op1)));
+ emit_label (lab);
+ emit_insn (gen_movt (operands[0]));
+ }
+ else
+ emit_insn (gen_movnegt (operands[0], prepare_scc_operands (LT)));
DONE;
}
operands[1] = prepare_scc_operands (GE);
@@ -2330,6 +2802,18 @@
operands[2] = gen_reg_rtx (SImode);
}")
+;; Use the same trick for FP sle / sge
+(define_expand "movnegt"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (match_operand 0 "" "")
+ (neg:SI (plus:SI (match_dup 1)
+ (match_dup 2))))
+ (set (reg:SI 18)
+ (ne:SI (ior:SI (match_operand 1 "" "") (match_dup 2))
+ (const_int 0)))])]
+ ""
+ "operands[2] = gen_reg_rtx (SImode);")
+
;; Recognize mov #-1/negc/neg sequence, and change it to movt/add #-1.
;; This prevents a regression that occured when we switched from xor to
;; mov/neg for sne.
@@ -2416,22 +2900,42 @@
[(set_attr "length" "8")
(set_attr "in_delay_slot" "no")])
+;; Alignment is needed for some constant tables; it may also be added for
+;; Instructions at the start of loops, or after unconditional branches.
+;; ??? We would get more accurate lengths if we did instruction
+;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used
+;; here is too conservative.
+
; align to a two byte boundary
(define_insn "align_2"
- [(unspec_volatile [(const_int 0)] 10)]
+ [(unspec_volatile [(const_int 1)] 1)]
""
".align 1"
[(set_attr "length" "0")
(set_attr "in_delay_slot" "no")])
; align to a four byte boundary
+;; align_4 and align_log are instructions for the starts of loops, or
+;; after unconditional branches, which may take up extra room.
+
+(define_expand "align_4"
+ [(unspec_volatile [(const_int 2)] 1)]
+ ""
+ "")
+
+; align to a cache line boundary
-(define_insn "align_4"
- [(unspec_volatile [(const_int 0)] 5)]
+(define_insn "align_log"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 1)]
""
- ".align 2"
- [(set_attr "in_delay_slot" "no")])
+ ".align %O0"
+;; Need a variable length for this to be processed in each shorten_branch pass.
+;; The actual work is done in ADJUST_INSN_LENTH, because length attributes
+;; need to be (a choice of) constants.
+ [(set (attr "length")
+ (if_then_else (ne (pc) (pc)) (const_int 2) (const_int 0)))
+ (set_attr "in_delay_slot" "no")])
; emitted at the end of the literal table, used to emit the
; 32bit branch labels if needed.
@@ -2508,7 +3012,7 @@
(define_insn "subsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(minus:SF (match_operand:SF 1 "arith_reg_operand" "0")
- (match_operand:SF 2 "arith_reg_operand" "f")))]
+ (match_operand:SF 2 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"fsub %2,%0"
[(set_attr "type" "fp")])
@@ -2533,81 +3037,64 @@
(define_insn "divsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(div:SF (match_operand:SF 1 "arith_reg_operand" "0")
- (match_operand:SF 2 "arith_reg_operand" "f")))]
+ (match_operand:SF 2 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"fdiv %2,%0"
[(set_attr "type" "fdiv")])
-;; ??? This is the right solution, but it fails because the movs[if] patterns
-;; silently clobber FPUL (r22) for int<->fp moves. Thus we can not explicitly
-;; use FPUL here.
-;;
-;;(define_expand "floatsisf2"
-;; [(set (reg:SI 22)
-;; (match_operand:SI 1 "arith_reg_operand" ""))
-;; (set (match_operand:SF 0 "arith_reg_operand" "")
-;; (float:SF (reg:SI 22)))]
-;; "TARGET_SH3E"
-;; "")
-;;
-;;(define_insn "*floatsisf"
-;; [(set (match_operand:SF 0 "arith_reg_operand" "=f")
-;; (float:SF (reg:SI 22)))]
-;; "TARGET_SH3E"
-;; "float fpul,%0")
+(define_expand "floatsisf2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "arith_reg_operand" ""))
+ (set (match_operand:SF 0 "arith_reg_operand" "")
+ (float:SF (reg:SI 22)))]
+ "TARGET_SH3E"
+ "")
-(define_insn "floatsisf2"
+(define_insn "*floatsisf2_ie"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
- (float:SF (match_operand:SI 1 "arith_reg_operand" "r")))]
+ (float:SF (reg:SI 22)))]
"TARGET_SH3E"
- "lds %1,fpul\;float fpul,%0"
- [(set_attr "length" "4")
- (set_attr "type" "fp")])
+ "float fpul,%0"
+ [(set_attr "type" "fp")])
-;; ??? This is the right solution, but it fails because the movs[if] patterns
-;; silently clobber FPUL (r22) for int<->fp moves. Thus we can not explicitly
-;; use FPUL here.
-;;
-;;(define_expand "fix_truncsfsi2"
-;; [(set (reg:SI 22)
-;; (fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
-;; (set (match_operand:SI 0 "arith_reg_operand" "=r")
-;; (reg:SI 22))]
-;; "TARGET_SH3E"
-;; "")
-;;
-;;(define_insn "*fixsfsi"
-;; [(set (reg:SI 22)
-;; (fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))]
-;; "TARGET_SH3E"
-;; "ftrc %0,fpul")
+(define_expand "fix_truncsfsi2"
+ [(set (reg:SI 22)
+ (fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+ (set (match_operand:SI 0 "arith_reg_operand" "=r")
+ (reg:SI 22))]
+ "TARGET_SH3E"
+ "")
-(define_insn "fix_truncsfsi2"
- [(set (match_operand:SI 0 "arith_reg_operand" "=r")
- (fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))]
+(define_insn "*fixsfsi"
+ [(set (reg:SI 22)
+ (fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))]
"TARGET_SH3E"
- "ftrc %1,fpul\;sts fpul,%0"
- [(set_attr "length" "4")
- (set_attr "type" "move")])
+ "ftrc %0,fpul"
+ [(set_attr "type" "fp")])
-;; ??? This should be SFmode not SImode in the compare, but that would
-;; require fixing the branch patterns too.
-(define_insn "*cmpgtsf_t"
+(define_insn "cmpgtsf_t"
[(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"fcmp/gt %1,%0"
[(set_attr "type" "fp")])
-;; ??? This should be SFmode not SImode in the compare, but that would
-;; require fixing the branch patterns too.
-(define_insn "*cmpeqsf_t"
+(define_insn "cmpeqsf_t"
[(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"fcmp/eq %1,%0"
[(set_attr "type" "fp")])
+(define_insn "ieee_ccmpeqsf_t"
+ [(set (reg:SI 18) (ior:SI (reg:SI 18)
+ (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
+ (match_operand:SF 1 "arith_reg_operand" "f"))))]
+ "TARGET_SH3E && TARGET_IEEE"
+ "* return output_ieee_ccmpeq (insn, operands);"
+ [(set_attr "length" "4")])
+
+
(define_expand "cmpsf"
[(set (reg:SI 18) (compare (match_operand:SF 0 "arith_operand" "")
(match_operand:SF 1 "arith_operand" "")))]
@@ -2813,7 +3300,7 @@
|| (GET_CODE (operands[2]) == SUBREG
&& REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG))
&& reg_unused_after (operands[0], insn)"
- "fmov.s %2,@(%0,%1)")
+ "fmov{.s|} %2,@(%0,%1)")
(define_peephole
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -2826,7 +3313,7 @@
|| (GET_CODE (operands[2]) == SUBREG
&& REGNO (SUBREG_REG (operands[2])) >= FIRST_FP_REG))
&& reg_unused_after (operands[0], insn)"
- "fmov.s @(%0,%1),%2")
+ "fmov{.s|} @(%0,%1),%2")
;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF). */
(define_insn "sp_switch_1"
diff --git a/gcc/ginclude/va-sh.h b/gcc/ginclude/va-sh.h
index 4bae1e0dcd2..f1671c7b0b6 100644
--- a/gcc/ginclude/va-sh.h
+++ b/gcc/ginclude/va-sh.h
@@ -23,9 +23,6 @@ typedef struct {
typedef void *__gnuc_va_list;
-#define __va_rounded_size(TYPE) \
- (((sizeof (TYPE) + sizeof (int) - 1) / sizeof (int)) * sizeof (int))
-
#endif /* ! SH3E */
#endif /* __GNUC_VA_LIST */
@@ -116,104 +113,83 @@ enum __va_type_classes {
#endif
#define va_end(pvar) ((void)0)
+#ifdef __LITTLE_ENDIAN__
+#define __LITTLE_ENDIAN_P 1
+#else
+#define __LITTLE_ENDIAN_P 0
+#endif
+
+#define __SCALAR_TYPE(TYPE) \
+ ((TYPE) == __integer_type_class \
+ || (TYPE) == __char_type_class \
+ || (TYPE) == __enumeral_type_class)
+
/* RECORD_TYPE args passed using the C calling convention are
passed by invisible reference. ??? RECORD_TYPE args passed
in the stack are made to be word-aligned; for an aggregate that is
not word-aligned, we advance the pointer to the first non-reg slot. */
+ /* When this is a smaller-than-int integer, using
+ auto-increment in the promoted (SImode) is fastest;
+ however, there is no way to express that is C. Therefore,
+ we use an asm.
+ We want the MEM_IN_STRUCT_P bit set in the emitted RTL, therefore we
+ use unions even when it would otherwise be unnecessary. */
+
+#define __va_arg_sh1(AP, TYPE) __extension__ \
+__extension__ \
+({(sizeof (TYPE) == 1 \
+ ? ({union {TYPE t; char c;} __t; \
+ asm("" \
+ : "=r" (__t.c) \
+ : "0" ((((union { int i, j; } *) (AP))++)->i)); \
+ __t.t;}) \
+ : sizeof (TYPE) == 2 \
+ ? ({union {TYPE t; short s;} __t; \
+ asm("" \
+ : "=r" (__t.s) \
+ : "0" ((((union { int i, j; } *) (AP))++)->i)); \
+ __t.t;}) \
+ : sizeof (TYPE) >= 4 || __LITTLE_ENDIAN_P \
+ ? (((union { TYPE t; int i;} *) (AP))++)->t \
+ : ((union {TYPE t;TYPE u;}*) ((char *)++(int *)(AP) - sizeof (TYPE)))->t);})
+
#ifdef __SH3E__
-#ifdef __LITTLE_ENDIAN__
+#define __PASS_AS_FLOAT(TYPE_CLASS,SIZE) \
+ (TYPE_CLASS == __real_type_class && SIZE == 4)
#define va_arg(pvar,TYPE) \
__extension__ \
-(*({int __type = __builtin_classify_type (* (TYPE *) 0); \
- void * __result; \
- if (__type == __real_type_class && sizeof(TYPE) == 4) \
- /* float? */ \
+({int __type = __builtin_classify_type (* (TYPE *) 0); \
+ void * __result_p; \
+ if (__PASS_AS_FLOAT (__type, sizeof(TYPE))) \
{ \
- __va_freg *__r; \
if (pvar.__va_next_fp < pvar.__va_next_fp_limit) \
- __r = (__va_freg *) pvar.__va_next_fp++; \
- else \
- __r = (__va_freg *) pvar.__va_next_stack++; \
- __result = (char *) __r; \
- } \
- else \
- { \
- __va_greg *_r; \
- if (pvar.__va_next_o + ((sizeof (TYPE) + 3) / 4) \
- <= pvar.__va_next_o_limit) \
{ \
- _r = pvar.__va_next_o; \
- pvar.__va_next_o += (sizeof (TYPE) + 3) / 4; \
+ __result_p = &pvar.__va_next_fp; \
} \
else \
- { \
- _r = pvar.__va_next_stack; \
- pvar.__va_next_stack += (sizeof (TYPE) + 3) / 4; \
- } \
- __result = (char *) _r; \
- } \
- (TYPE *) __result;}))
-
-#else /* ! __LITTLE_ENDIAN__ */
-
-#define va_arg(pvar,TYPE) \
-__extension__ \
-(*({int __type = __builtin_classify_type (* (TYPE *) 0); \
- void * __result; \
- if (__type == __real_type_class && sizeof(TYPE) == 4) \
- /* float? */ \
- { \
- __va_freg *__r; \
- if (pvar.__va_next_fp < pvar.__va_next_fp_limit) \
- __r = (__va_freg *) pvar.__va_next_fp++; \
- else \
- __r = (__va_freg *) pvar.__va_next_stack++; \
- __result = (char *) __r; \
+ __result_p = &pvar.__va_next_stack; \
} \
else \
{ \
- __va_greg *_r; \
if (pvar.__va_next_o + ((sizeof (TYPE) + 3) / 4) \
<= pvar.__va_next_o_limit) \
- { \
- pvar.__va_next_o += (sizeof (TYPE) + 3) / 4; \
- _r = pvar.__va_next_o; \
- } \
+ __result_p = &pvar.__va_next_o; \
else \
{ \
- pvar.__va_next_stack += (sizeof (TYPE) + 3) / 4; \
- _r = pvar.__va_next_stack; \
+ if (sizeof (TYPE) > 4) \
+ pvar.__va_next_o = pvar.__va_next_o_limit; \
+ \
+ __result_p = &pvar.__va_next_stack; \
} \
- __result = ((char *) _r \
- - (sizeof (TYPE) < 4 ? sizeof (TYPE) \
- : ((sizeof (TYPE) + 3) / 4) * 4)); \
} \
- (TYPE *) __result;}))
-
-#endif /* __LITTLE_ENDIAN__ */
+ __va_arg_sh1(*(void **)__result_p, TYPE);})
#else /* ! SH3E */
-#ifdef __LITTLE_ENDIAN__
-
-/* This is for little-endian machines; small args are padded upward. */
-#define va_arg(AP, TYPE) \
- (AP = (__gnuc_va_list) ((char *) (AP) + __va_rounded_size (TYPE)), \
- *((TYPE *) (void *) ((char *) (AP) - __va_rounded_size (TYPE))))
-
-#else /* ! __LITTLE_ENDIAN__ */
-
-/* This is for big-endian machines; small args are padded downward. */
-#define va_arg(AP, TYPE) \
- (AP = (__gnuc_va_list) ((char *) (AP) + __va_rounded_size (TYPE)), \
- *((TYPE *) (void *) ((char *) (AP) \
- - ((sizeof (TYPE) < __va_rounded_size (char) \
- ? sizeof (TYPE) : __va_rounded_size (TYPE))))))
-
-#endif /* __LITTLE_ENDIAN__ */
+#define va_arg(AP, TYPE) __va_arg_sh1((AP), TYPE)
#endif /* SH3E */