summaryrefslogtreecommitdiff
path: root/gcc/config/sparc
diff options
context:
space:
mode:
authorrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>1998-04-18 01:24:59 +0000
committerrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>1998-04-18 01:24:59 +0000
commit367242d3da71d3774f5cf58898e561dcf7bdc2d8 (patch)
tree4c1a46be2073c826aee2bacd4601da897875c0df /gcc/config/sparc
parent5afcaddc1c3ef3e87d9dc00324b3c5fdf087edcc (diff)
downloadgcc-367242d3da71d3774f5cf58898e561dcf7bdc2d8.tar.gz
Sat Apr 18 01:23:11 1998 John Carr <jfc@mit.edu>
* sparc.c, sparc.h, sparc.md, sol2.h: Many changes related to V9 code generation. Use 64 bit instructions in 32 bit mode when possible. Use V9 return instruction. UltraSPARC optimizations. * sparc.h: Change gen_rtx (CODE to gen_rtx_CODE (. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@19278 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/sparc')
-rw-r--r--gcc/config/sparc/sol2.h6
-rw-r--r--gcc/config/sparc/sparc.c546
-rw-r--r--gcc/config/sparc/sparc.h188
-rw-r--r--gcc/config/sparc/sparc.md926
4 files changed, 1080 insertions, 586 deletions
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index f0c3b136ab4..2c8c5f3f97b 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -198,3 +198,9 @@ Boston, MA 02111-1307, USA. */
#define TARGET_LIVE_G0 0
#undef TARGET_BROKEN_SAVERESTORE
#define TARGET_BROKEN_SAVERESTORE 0
+
+/* Solaris allows 64 bit out and global registers in 32 bit mode.
+ sparc_override_options will disable V8+ if not generating V9 code. */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_APP_REGS + MASK_EPILOGUE + MASK_FPU + MASK_V8PLUS)
+
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 36ccb15095f..caebb088fa8 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -22,17 +22,7 @@ the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "config.h"
-#include <stdio.h>
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-#ifdef HAVE_STRING_H
-#include <string.h>
-#else
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-#endif
+#include "system.h"
#include "tree.h"
#include "rtl.h"
#include "regs.h"
@@ -208,11 +198,9 @@ sparc_override_options ()
{ "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
/* TEMIC sparclet */
{ "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
- /* "v8plus" is what Sun calls Solaris2.5 running on UltraSPARC's. */
- { "v8plus", PROCESSOR_V8PLUS, MASK_ISA, MASK_V8PLUS },
{ "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
/* TI ultrasparc */
- { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V8PLUS },
+ { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
{ 0 }
};
struct cpu_table *cpu;
@@ -288,6 +276,10 @@ sparc_override_options ()
if (TARGET_V9 && TARGET_ARCH32)
target_flags |= MASK_DEPRECATED_V8_INSNS;
+ /* V8PLUS requires V9 */
+ if (! TARGET_V9)
+ target_flags &= ~MASK_V8PLUS;
+
/* Validate -malign-loops= value, or provide default. */
if (sparc_align_loops_string)
{
@@ -333,40 +325,6 @@ sparc_override_options ()
sparc_init_modes ();
}
-/* Float conversions (v9 only).
-
- The floating point registers cannot hold DImode values because SUBREG's
- on them get the wrong register. "(subreg:SI (reg:DI M int-reg) 0)" is the
- same as "(subreg:SI (reg:DI N float-reg) 1)", but gcc doesn't know how to
- turn the "0" to a "1". Therefore, we must explicitly do the conversions
- to/from int/fp regs. `sparc64_fpconv_stack_slot' is the address of an
- 8 byte stack slot used during the transfer.
- ??? I could have used [%fp-16] but I didn't want to add yet another
- dependence on this. */
-/* ??? Can we use assign_stack_temp here? */
-
-static rtx fpconv_stack_temp;
-
-/* Called once for each function. */
-
-void
-sparc_init_expanders ()
-{
- fpconv_stack_temp = NULL_RTX;
-}
-
-/* Assign a stack temp for fp/int DImode conversions. */
-
-rtx
-sparc64_fpconv_stack_temp ()
-{
- if (fpconv_stack_temp == NULL_RTX)
- fpconv_stack_temp =
- assign_stack_local (DImode, GET_MODE_SIZE (DImode), 0);
-
- return fpconv_stack_temp;
-}
-
/* Miscellaneous utilities. */
/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
@@ -380,6 +338,14 @@ v9_regcmp_p (code)
|| code == LE || code == GT);
}
+/* 32 bit registers are zero extended so only zero/non-zero comparisons
+ work. */
+int
+v8plus_regcmp_p (code)
+ enum rtx_code code;
+{
+ return (code == EQ || code == NE);
+}
/* Operand constraints. */
@@ -798,6 +764,16 @@ v9_regcmp_op (op, mode)
return v9_regcmp_p (code);
}
+int
+v8plus_regcmp_op (op, mode)
+ register rtx op;
+ enum machine_mode mode;
+{
+ enum rtx_code code = GET_CODE (op);
+
+ return (code == EQ || code == NE);
+}
+
/* Return 1 if this is a SIGN_EXTEND or ZERO_EXTEND operation. */
int
@@ -848,8 +824,13 @@ arith_operand (op, mode)
rtx op;
enum machine_mode mode;
{
- return (register_operand (op, mode)
- || (GET_CODE (op) == CONST_INT && SMALL_INT (op)));
+ int val;
+ if (register_operand (op, mode))
+ return 1;
+ if (GET_CODE (op) != CONST_INT)
+ return 0;
+ val = INTVAL (op) & 0xffffffff;
+ return SPARC_SIMM13_P (val);
}
/* Return true if OP is a register, or is a CONST_INT that can fit in a
@@ -1059,8 +1040,15 @@ gen_compare_reg (code, x, y)
else
cc_reg = gen_rtx (REG, mode, SPARC_ICC_REG);
- emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
- gen_rtx (COMPARE, mode, x, y)));
+ if (TARGET_V8PLUS && mode == CCXmode)
+ {
+ emit_insn (gen_cmpdi_v8plus (x, y));
+ }
+ else
+ {
+ emit_insn (gen_rtx (SET, VOIDmode, cc_reg,
+ gen_rtx (COMPARE, mode, x, y)));
+ }
return cc_reg;
}
@@ -1287,14 +1275,53 @@ eligible_for_epilogue_delay (trial, slot)
|| register_operand (XEXP (src, 1), DImode)))
return 1;
- /* This matches "*return_subsi". */
- else if (GET_CODE (src) == MINUS
- && register_operand (XEXP (src, 0), SImode)
- && small_int (XEXP (src, 1), VOIDmode)
- && INTVAL (XEXP (src, 1)) != -4096)
+ return 0;
+}
+
+static int
+check_return_regs (x)
+ rtx x;
+{
+ switch (GET_CODE (x))
+ {
+ case REG:
+ return IN_OR_GLOBAL_P (x);
+
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST:
+ case SYMBOL_REF:
+ case LABEL_REF:
return 1;
+ case SET:
+ case IOR:
+ case AND:
+ case XOR:
+ case PLUS:
+ case MINUS:
+ if (check_return_regs (XEXP (x, 1)) == 0)
return 0;
+ case NOT:
+ case NEG:
+ case MEM:
+ return check_return_regs (XEXP (x, 0));
+
+ default:
+ return 0;
+ }
+
+}
+
+/* Return 1 if TRIAL references only in and global registers. */
+int
+eligible_for_return_delay (trial)
+ rtx trial;
+{
+ if (GET_CODE (PATTERN (trial)) != SET)
+ return 0;
+
+ return check_return_regs (PATTERN (trial));
}
int
@@ -1346,6 +1373,10 @@ reg_unused_after (reg, insn)
/* The table we use to reference PIC data. */
static rtx global_offset_table;
+/* The function we use to get at it. */
+static rtx get_pc_symbol;
+static char get_pc_symbol_name[256];
+
/* Ensure that we are not using patterns that are not OK with PIC. */
int
@@ -1499,61 +1530,11 @@ initialize_pic ()
static rtx
pic_setup_code ()
{
- rtx pic_pc_rtx;
- rtx l1, l2;
rtx seq;
start_sequence ();
-
- /* If -O0, show the PIC register remains live before this. */
- if (obey_regdecls)
- emit_insn (gen_rtx (USE, VOIDmode, pic_offset_table_rtx));
-
- l1 = gen_label_rtx ();
-
- pic_pc_rtx = gen_rtx (CONST, Pmode,
- gen_rtx (MINUS, Pmode,
- global_offset_table,
- gen_rtx (CONST, Pmode,
- gen_rtx (MINUS, Pmode,
- gen_rtx (LABEL_REF,
- VOIDmode, l1),
- pc_rtx))));
-
- /* sparc64: the RDPC instruction doesn't pair, and puts 4 bubbles in the
- pipe to boot. So don't use it here, especially when we're
- doing a save anyway because of %l7. */
-
- l2 = gen_label_rtx ();
- emit_label (l1);
-
- /* Iff we are doing delay branch optimization, slot the sethi up
- here so that it will fill the delay slot of the call. */
- if (flag_delayed_branch)
- emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
- gen_rtx (HIGH, Pmode, pic_pc_rtx)));
-
- /* Note that we pun calls and jumps here! */
- emit_jump_insn (gen_get_pc_via_call (l2, l1));
-
- emit_label (l2);
-
- if (!flag_delayed_branch)
- emit_insn (gen_rtx (SET, VOIDmode, pic_offset_table_rtx,
- gen_rtx (HIGH, Pmode, pic_pc_rtx)));
-
- emit_insn (gen_rtx (SET, VOIDmode,
- pic_offset_table_rtx,
- gen_rtx (LO_SUM, Pmode,
- pic_offset_table_rtx, pic_pc_rtx)));
- emit_insn (gen_rtx (SET, VOIDmode,
- pic_offset_table_rtx,
- gen_rtx (PLUS, Pmode,
- pic_offset_table_rtx,
- gen_rtx (REG, Pmode, 15))));
-
- /* emit_insn (gen_rtx (ASM_INPUT, VOIDmode, "!#PROLOGUE# 1")); */
-
+ emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
+ get_pc_symbol));
seq = gen_sequence ();
end_sequence ();
@@ -1575,9 +1556,21 @@ finalize_pic ()
if (! flag_pic)
abort ();
+ /* If we havn't emitted the special get_pc helper function, do so now. */
+ if (get_pc_symbol_name[0] == 0)
+ {
+ ASM_GENERATE_INTERNAL_LABEL (get_pc_symbol_name, "LGETPC", 0);
+
+ text_section ();
+ ASM_OUTPUT_ALIGN (asm_out_file, 3);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
+ fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
+ }
+
/* Initialize every time through, since we can't easily
know this to be permanent. */
global_offset_table = gen_rtx (SYMBOL_REF, Pmode, "_GLOBAL_OFFSET_TABLE_");
+ get_pc_symbol = gen_rtx (SYMBOL_REF, Pmode, get_pc_symbol_name);
flag_pic = 0;
emit_insn_after (pic_setup_code (), get_insns ());
@@ -1618,6 +1611,15 @@ emit_move_sequence (operands, mode)
/* Handle most common case first: storing into a register. */
if (register_operand (operand0, mode))
{
+ /* Integer constant to FP register. */
+ if (GET_CODE (operand0) == REG
+ && REGNO (operand0) >= 32
+ && REGNO (operand0) < FIRST_PSEUDO_REGISTER
+ && CONSTANT_P (operand1))
+ {
+ operand1 = validize_mem (force_const_mem (GET_MODE (operand0), operand1));
+ }
+
if (register_operand (operand1, mode)
|| (GET_CODE (operand1) == CONST_INT && SMALL_INT (operand1))
|| (GET_CODE (operand1) == CONST_DOUBLE
@@ -1683,6 +1685,7 @@ emit_move_sequence (operands, mode)
}
else if (GET_CODE (operand1) == CONST_INT
? (! SMALL_INT (operand1)
+ && INTVAL (operand1) != -4096
&& ! SPARC_SETHI_P (INTVAL (operand1)))
: GET_CODE (operand1) == CONST_DOUBLE
? ! arith_double_operand (operand1, DImode)
@@ -1704,16 +1707,20 @@ emit_move_sequence (operands, mode)
rtx temp = ((reload_in_progress || mode == DImode)
? operand0 : gen_reg_rtx (mode));
+ if (mode == SImode)
+ {
+ if (GET_CODE (operand1) == CONST_INT)
+ operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
+ else if (GET_CODE (operand1) == CONST_DOUBLE)
+ operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
+ }
+
if (TARGET_ARCH64 && mode == DImode)
emit_insn (gen_sethi_di_sp64 (temp, operand1));
else
emit_insn (gen_rtx (SET, VOIDmode, temp,
gen_rtx (HIGH, mode, operand1)));
- if (GET_CODE (operand1) == CONST_INT)
- operand1 = GEN_INT (INTVAL (operand1) & 0xffffffff);
- else if (GET_CODE (operand1) == CONST_DOUBLE)
- operand1 = GEN_INT (CONST_DOUBLE_LOW (operand1) & 0xffffffff);
operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
}
}
@@ -1763,10 +1770,16 @@ singlemove_string (operands)
else
return "sethi %%hi(%a1),%0";
}
- else if (GET_CODE (operands[1]) == CONST_INT
- && ! CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'I'))
+ else if (GET_CODE (operands[1]) == CONST_INT)
{
- HOST_WIDE_INT i = INTVAL (operands[1]);
+ /* Only consider the low 32 bits of the constant. */
+ int i = INTVAL (operands[1]) & 0xffffffff;
+
+ if (SPARC_SIMM13_P (i))
+ return "mov %1,%0";
+
+ if (i == 4096)
+ return "sub %%g0,-4096,%0";
/* If all low order 10 bits are clear, then we only need a single
sethi insn to load the constant. */
@@ -2291,9 +2304,9 @@ output_move_quad (operands)
operands[2] = adj_offsettable_operand (mem, 8);
/* ??? In arch64 case, shouldn't we use ldd/std for fp regs. */
if (mem == op1)
- return TARGET_ARCH64 ? "ldx %1,%0;ldx %2,%R0" : "ldd %1,%0;ldd %2,%S0";
+ return TARGET_ARCH64 ? "ldx %1,%0\n\tldx %2,%R0" : "ldd %1,%0\n\tldd %2,%S0";
else
- return TARGET_ARCH64 ? "stx %1,%0;stx %R1,%2" : "std %1,%0;std %S1,%2";
+ return TARGET_ARCH64 ? "stx %1,%0\n\tstx %R1,%2" : "std %1,%0\n\tstd %S1,%2";
}
}
@@ -2968,13 +2981,10 @@ enum sparc_mode_class {
/* Modes for double-float and smaller quantities. */
#define DF_MODES (S_MODES | D_MODES)
-/* ??? Sparc64 fp regs cannot hold DImode values. */
-#define DF_MODES64 (SF_MODES | (1 << (int) DF_MODE) /* | (1 << (int) D_MODE)*/)
+#define DF_MODES64 DF_MODES
/* Modes for double-float only quantities. */
-/* ??? Sparc64 fp regs cannot hold DImode values.
- See fix_truncsfdi2. */
-#define DF_ONLY_MODES ((1 << (int) DF_MODE) /*| (1 << (int) D_MODE)*/)
+#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
/* Modes for double-float and larger quantities. */
#define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
@@ -2985,8 +2995,6 @@ enum sparc_mode_class {
/* Modes for quad-float and smaller quantities. */
#define TF_MODES (DF_MODES | TF_ONLY_MODES)
-/* ??? Sparc64 fp regs cannot hold DImode values.
- See fix_truncsfdi2. */
#define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
/* Modes for condition codes. */
@@ -3115,7 +3123,9 @@ sparc_init_modes ()
/* Initialize the array used by REGNO_REG_CLASS. */
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
{
- if (i < 32)
+ if (i < 16 && TARGET_V8PLUS)
+ sparc_regno_reg_class[i] = I64_REGS;
+ else if (i < 32)
sparc_regno_reg_class[i] = GENERAL_REGS;
else if (i < 64)
sparc_regno_reg_class[i] = FP_REGS;
@@ -3584,6 +3594,8 @@ output_function_epilogue (file, size, leaf_function)
PATTERN (insn)));
final_scan_insn (insn, file, 1, 0, 1);
}
+ else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
+ fputs ("\treturn %i7+8\n\tnop\n", file);
else
fprintf (file, "\t%s\n\trestore\n", ret);
}
@@ -4566,22 +4578,77 @@ output_v9branch (op, reg, label, reversed, annul, noop)
return string;
}
-/* Output assembler code to return from a function. */
+/* Renumber registers in delay slot. Replace registers instead of
+ renumbering because they may be shared.
-/* ??? v9: Update to use the new `return' instruction. Also, add patterns to
- md file for the `return' instruction. */
+ This does not handle instructions other than move. */
+
+static void
+epilogue_renumber (where)
+ rtx *where;
+{
+ rtx x = *where;
+ enum rtx_code code = GET_CODE (x);
+
+ switch (code)
+ {
+ case MEM:
+ *where = x = copy_rtx (x);
+ epilogue_renumber (&XEXP (x, 0));
+ return;
+
+ case REG:
+ {
+ int regno = REGNO (x);
+ if (regno > 8 && regno < 24)
+ abort ();
+ if (regno >= 24 && regno < 32)
+ *where = gen_rtx_REG (GET_MODE (x), regno - 16);
+ return;
+ }
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return;
+
+ case IOR:
+ case AND:
+ case XOR:
+ case PLUS:
+ case MINUS:
+ epilogue_renumber (&XEXP (x, 1));
+ case NEG:
+ case NOT:
+ epilogue_renumber (&XEXP (x, 0));
+ return;
+
+ default:
+ debug_rtx (*where);
+ abort();
+ }
+}
+
+/* Output assembler code to return from a function. */
char *
output_return (operands)
rtx *operands;
{
+ rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
+
if (leaf_label)
{
operands[0] = leaf_label;
- return "b,a %l0";
+ return "b%* %l0%(";
}
else if (leaf_function)
{
+ /* No delay slot in a leaf function. */
+ if (delay)
+ abort ();
+
/* If we didn't allocate a frame pointer for the current function,
the stack pointer might have been adjusted. Output code to
restore it now. */
@@ -4621,8 +4688,22 @@ output_return (operands)
return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
}
}
+ else if (TARGET_V9)
+ {
+ if (delay)
+ {
+ epilogue_renumber (&SET_DEST (PATTERN (delay)));
+ epilogue_renumber (&SET_SRC (PATTERN (delay)));
+ }
+ if (SKIP_CALLERS_UNIMP_P)
+ return "return %%i7+12%#";
+ else
+ return "return %%i7+8%#";
+ }
else
{
+ if (delay)
+ abort ();
if (SKIP_CALLERS_UNIMP_P)
return "jmp %%i7+12\n\trestore";
else
@@ -4795,14 +4876,14 @@ print_operand (file, x, code)
/* On UltraSPARC, a branch in a delay slot causes a pipeline flush.
Always emit a nop in case the next instruction is a branch. */
if (dbr_sequence_length () == 0
- && (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
+ && (optimize && (int)sparc_cpu < PROCESSOR_V9))
fputs (",a", file);
return;
case '(':
/* Output a 'nop' if there's nothing for the delay slot and we are
not optimizing. This is always used with '*' above. */
if (dbr_sequence_length () == 0
- && ! (optimize && (int)sparc_cpu < PROCESSOR_V8PLUS))
+ && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
fputs ("\n\tnop", file);
return;
case '_':
@@ -6066,7 +6147,8 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
dep_type = get_attr_type (dep_insn);
#define SLOW_FP(dep_type) \
-(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
+(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
+
switch (REG_NOTE_KIND (link))
{
case 0:
@@ -6080,16 +6162,16 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
case TYPE_FPSTORE:
if (! SLOW_FP (dep_type))
return 0;
- break;
+ return cost;
case TYPE_STORE:
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
return cost;
+ if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
/* The dependency between the two instructions is on the data
that is being stored. Assume that the address of the store
is not also dependent. */
- if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
return 0;
return cost;
@@ -6109,15 +6191,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
compensate for a dependency which might not really
exist, and 0. */
if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
- || GET_CODE (SET_DEST (pat)) != MEM
- || GET_CODE (SET_SRC (dep_pat)) != MEM
- || ! rtx_equal_p (XEXP (SET_DEST (pat), 0),
- XEXP (SET_SRC (dep_pat), 0)))
+ || GET_CODE (SET_SRC (pat)) != MEM
+ || GET_CODE (SET_DEST (dep_pat)) != MEM
+ || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
+ XEXP (SET_DEST (dep_pat), 0)))
return cost + 2;
return cost + 8;
}
- break;
+ return cost;
case TYPE_BRANCH:
/* Compare to branch latency is 0. There is no benefit from
@@ -6128,16 +6210,15 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
compare to conditional move. */
if (dep_type == TYPE_FPCMP)
return cost - 1;
- break;
+ return cost;
case TYPE_FPCMOVE:
/* FMOVR class instructions can not issue in the same cycle
or the cycle after an instruction which writes any
integer register. Model this as cost 2 for dependent
instructions. */
- if (GET_CODE (PATTERN (insn)) == SET
- && (GET_MODE (SET_DEST (PATTERN (insn))) == SFmode
- || GET_MODE (SET_DEST (PATTERN (insn))) == DFmode)
+ if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
+ || dep_type == TYPE_BINARY)
&& cost < 2)
return 2;
/* Otherwise check as for integer conditional moves. */
@@ -6149,7 +6230,7 @@ ultrasparc_adjust_cost (insn, link, dep_insn, cost)
to model. */
if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
return cost + 3;
- break;
+ return cost;
default:
break;
@@ -6190,9 +6271,8 @@ sparc_issue_rate ()
{
default:
return 1;
- case PROCESSOR_V8PLUS:
case PROCESSOR_V9:
- /* Assume these generic V9 types are capable of at least dual-issue. */
+ /* Assume V9 processors are capable of at least dual-issue. */
return 2;
case PROCESSOR_SUPERSPARC:
return 3;
@@ -6200,3 +6280,175 @@ sparc_issue_rate ()
return 4;
}
}
+
+static int
+set_extends(x, insn)
+ rtx x, insn;
+{
+ register rtx pat = PATTERN (insn);
+
+ switch (GET_CODE (SET_SRC (pat)))
+ {
+ /* Load and some shift instructions zero extend. */
+ case MEM:
+ case ZERO_EXTEND:
+ /* sethi clears the high bits */
+ case HIGH:
+ /* LO_SUM is used with sethi. sethi cleared the high
+ bits and the values used with lo_sum are positive */
+ case LO_SUM:
+ /* UNSPEC is v8plus_clear_high */
+ case UNSPEC:
+ /* Store flag stores 0 or 1 */
+ case LT: case LTU:
+ case GT: case GTU:
+ case LE: case LEU:
+ case GE: case GEU:
+ case EQ:
+ case NE:
+ return 1;
+ case AND:
+ {
+ rtx op1 = XEXP (SET_SRC (pat), 1);
+ if (GET_CODE (op1) == CONST_INT)
+ return INTVAL (op1) >= 0;
+ if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
+ && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
+ return 1;
+ if (GET_CODE (op1) == REG
+ && sparc_check_64 ((op1), insn) == 1)
+ return 1;
+ }
+ case ASHIFT:
+ case LSHIFTRT:
+ return GET_MODE (SET_SRC (pat)) == SImode;
+ /* Positive integers leave the high bits zero. */
+ case CONST_DOUBLE:
+ return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
+ case CONST_INT:
+ return ! (INTVAL (x) & 0x80000000);
+ case ASHIFTRT:
+ case SIGN_EXTEND:
+ return - (GET_MODE (SET_SRC (pat)) == SImode);
+ default:
+ return 0;
+ }
+}
+
+/* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
+ unknown. Return 1 if the high bits are zero, -1 if the register is
+ sign extended. */
+int
+sparc_check_64 (x, insn)
+ rtx x, insn;
+{
+ /* If a register is set only once it is safe to ignore insns this
+ code does not know how to handle. The loop will either recognize
+ the single set and return the correct value or fail to recognize
+ it and return 0. */
+ int set_once = 0;
+
+ if (GET_CODE (x) == REG
+ && flag_expensive_optimizations
+ && REG_N_SETS (REGNO (x)) == 1)
+ set_once = 1;
+
+ if (insn == 0)
+ if (set_once)
+ insn = get_last_insn_anywhere ();
+ else
+ return 0;
+
+ while (insn = PREV_INSN (insn))
+ {
+ switch (GET_CODE (insn))
+ {
+ case JUMP_INSN:
+ case NOTE:
+ break;
+ case CODE_LABEL:
+ case CALL_INSN:
+ default:
+ if (! set_once)
+ return 0;
+ break;
+ case INSN:
+ {
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) != SET)
+ return 0;
+ if (rtx_equal_p (x, SET_DEST (pat)))
+ return set_extends (x, insn);
+ if (reg_overlap_mentioned_p (SET_DEST (pat), x))
+ return 0;
+ }
+ }
+ }
+ return 0;
+}
+
+char *
+sparc_v8plus_shift (operands, insn, opcode)
+ rtx *operands;
+ rtx insn;
+ char *opcode;
+{
+ static char asm_code[60];
+
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = operands[0];
+ output_asm_insn ("sllx %H1,32,%3", operands);
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl %L1,0,%L1", operands);
+ output_asm_insn ("or %L1,%3,%3", operands);
+
+ strcpy(asm_code, opcode);
+ if (which_alternative != 2)
+ return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
+ else
+ return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
+}
+
+
+/* Return 1 if DEST and SRC reference only global and in registers. */
+
+int
+sparc_return_peephole_ok (dest, src)
+ rtx dest, src;
+{
+ if (! TARGET_V9)
+ return 0;
+ if (leaf_function)
+ return 0;
+ if (GET_CODE (src) != CONST_INT
+ && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
+ return 0;
+ return IN_OR_GLOBAL_P (dest);
+}
+
+int
+delay_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ switch (GET_CODE (op))
+ {
+ case CONST:
+ case CONST_INT:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return 1;
+
+ case MEM:
+ return delay_operand (XEXP (op, 0), Pmode);
+
+ case REG:
+ return IN_OR_GLOBAL_P (op);
+
+ case PLUS:
+ return delay_operand (XEXP (op, 0), Pmode) && delay_operand (XEXP (op, 1), Pmode);
+
+ default:
+ return 0;
+ }
+}
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 1659e68b22e..c573f40d20a 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -158,7 +158,6 @@ Unrecognized value in TARGET_CPU_DEFAULT.
%{mcpu=f930:-D__sparclite__} %{mcpu=f934:-D__sparclite__} \
%{mcpu=v8:-D__sparc_v8__} \
%{mcpu=supersparc:-D__supersparc__ -D__sparc_v8__} \
-%{mcpu=v8plus:-D__sparc_v9__} \
%{mcpu=v9:-D__sparc_v9__} \
%{mcpu=ultrasparc:-D__sparc_v9__} \
%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
@@ -209,9 +208,9 @@ Unrecognized value in TARGET_CPU_DEFAULT.
%{mf930:-Asparclite} %{mf934:-Asparclite} \
%{mcpu=sparclite:-Asparclite} \
%{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
-%{mcpu=v8plus:-Av8plus} \
+%{mv8plus:-Av8plus} \
%{mcpu=v9:-Av9} \
-%{mcpu=ultrasparc:-Av9a} \
+%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
"
@@ -453,13 +452,17 @@ extern int target_flags;
#define MASK_VIS 0x1000000
#define TARGET_VIS (target_flags & MASK_VIS)
-/* Compile for Solaris V8+. 64 bit instructions are available but the
- high 32 bits of all registers except the globals and current outs may
- be cleared at any time. */
+/* Compile for Solaris V8+. 32 bit Solaris preserves the high bits of
+ the current out and global registers. Linux saves the high bits on
+ context switches but not signals. */
#define MASK_V8PLUS 0x2000000
#define TARGET_V8PLUS (target_flags & MASK_V8PLUS)
-/* See sparc.md */
+/* TARGET_HARD_MUL: Use hardware multiply instructions but not %y.
+ TARGET_HARD_MUL32: Use hardware multiply instructions with rd %y
+ to get high 32 bits. False in V8+ or V9 because multiply stores
+ a 64 bit result in a register. */
+
#define TARGET_HARD_MUL32 \
((TARGET_V8 || TARGET_SPARCLITE \
|| TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS) \
@@ -495,6 +498,8 @@ extern int target_flags;
{"no-app-regs", -MASK_APP_REGS}, \
{"hard-quad-float", MASK_HARD_QUAD}, \
{"soft-quad-float", -MASK_HARD_QUAD}, \
+ {"v8plus", MASK_V8PLUS}, \
+ {"no-v8plus", -MASK_V8PLUS}, \
{"vis", MASK_VIS}, \
/* ??? These are deprecated, coerced to -mcpu=. Delete in 2.9. */ \
{"cypress", 0}, \
@@ -502,7 +507,6 @@ extern int target_flags;
{"f930", 0}, \
{"f934", 0}, \
{"v8", 0}, \
- {"v8plus", 0}, \
{"supersparc", 0}, \
/* End of deprecated options. */ \
/* -mptrNN exists for *experimental* purposes. */ \
@@ -535,7 +539,6 @@ enum processor_type {
PROCESSOR_F934,
PROCESSOR_SPARCLET,
PROCESSOR_TSC701,
- PROCESSOR_V8PLUS,
PROCESSOR_V9,
PROCESSOR_ULTRASPARC
};
@@ -977,6 +980,12 @@ while (0)
: (GET_MODE_SIZE (MODE) + 3) / 4) \
: ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+/* A subreg in 64 bit mode will have the wrong offset for a floating point
+ register. The least significant part is at offset 1, compared to 0 for
+ integer registers. */
+#define ALTER_HARD_SUBREG(TMODE, WORD, FMODE, REGNO) \
+ (TARGET_ARCH64 && (REGNO) >= 32 && (REGNO) < 96 && (TMODE) == SImode ? 1 : ((REGNO) + (WORD)))
+
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
See sparc.c for how we initialize this. */
extern int *hard_regno_mode_classes;
@@ -1093,14 +1102,14 @@ extern int sparc_mode_class[];
#define STRUCT_VALUE \
(TARGET_ARCH64 \
? 0 \
- : gen_rtx (MEM, Pmode, \
- gen_rtx (PLUS, Pmode, stack_pointer_rtx, \
+ : gen_rtx_MEM (Pmode, \
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, \
GEN_INT (STRUCT_VALUE_OFFSET))))
#define STRUCT_VALUE_INCOMING \
(TARGET_ARCH64 \
? 0 \
- : gen_rtx (MEM, Pmode, \
- gen_rtx (PLUS, Pmode, frame_pointer_rtx, \
+ : gen_rtx_MEM (Pmode, \
+ gen_rtx_PLUS (Pmode, frame_pointer_rtx, \
GEN_INT (STRUCT_VALUE_OFFSET))))
/* Define the classes of registers for register constraints in the
@@ -1157,8 +1166,8 @@ extern int sparc_mode_class[];
??? Should %fcc[0123] be handled similarly?
*/
-enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS,
- GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
+enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, GENERAL_REGS, FP_REGS,
+ EXTRA_FP_REGS, GENERAL_OR_FP_REGS, GENERAL_OR_EXTRA_FP_REGS,
ALL_REGS, LIM_REG_CLASSES };
#define N_REG_CLASSES (int) LIM_REG_CLASSES
@@ -1166,15 +1175,16 @@ enum reg_class { NO_REGS, FPCC_REGS, GENERAL_REGS, FP_REGS, EXTRA_FP_REGS,
/* Give names of register classes as strings for dump file. */
#define REG_CLASS_NAMES \
- { "NO_REGS", "FPCC_REGS", "GENERAL_REGS", "FP_REGS", "EXTRA_FP_REGS", \
- "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", "ALL_REGS" }
+ { "NO_REGS", "FPCC_REGS", "I64_REGS", "GENERAL_REGS", "FP_REGS", \
+ "EXTRA_FP_REGS", "GENERAL_OR_FP_REGS", "GENERAL_OR_EXTRA_FP_REGS", \
+ "ALL_REGS" }
/* Define which registers fit in which classes.
This is an initializer for a vector of HARD_REG_SET
of length N_REG_CLASSES. */
#define REG_CLASS_CONTENTS \
- {{0, 0, 0, 0}, {0, 0, 0, 0xf}, \
+ {{0, 0, 0, 0}, {0, 0, 0, 0xf}, {0xffff, 0, 0, 0}, \
{-1, 0, 0, 0}, {0, -1, 0, 0}, {0, -1, -1, 0}, \
{-1, -1, 0, 0}, {-1, -1, -1, 0}, {-1, -1, -1, 0x1f}}
@@ -1266,15 +1276,18 @@ extern char leaf_reg_remap[];
/* Get reg_class from a letter such as appears in the machine description.
In the not-v9 case, coerce v9's 'e' class to 'f', so we can use 'e' in the
.md file for v8 and v9.
- Use 'd' and 'b' for single precision VIS operations if TARGET_VIS. */
+ 'd' and 'b' are used for single and double precision VIS operations,
+ if TARGET_VIS.
+ 'h' is used for V8+ 64 bit global and out registers. */
#define REG_CLASS_FROM_LETTER(C) \
(TARGET_V9 \
? ((C) == 'f' ? FP_REGS \
: (C) == 'e' ? EXTRA_FP_REGS \
: (C) == 'c' ? FPCC_REGS \
- : ((C) == 'd' && TARGET_VIS) ? FP_REGS \
- : ((C) == 'b' && TARGET_VIS) ? FP_REGS \
+ : ((C) == 'd' && TARGET_VIS) ? FP_REGS\
+ : ((C) == 'b' && TARGET_VIS) ? EXTRA_FP_REGS\
+ : ((C) == 'h' && TARGET_V8PLUS) ? I64_REGS\
: NO_REGS) \
: ((C) == 'f' ? FP_REGS \
: (C) == 'e' ? FP_REGS \
@@ -1299,6 +1312,8 @@ extern char leaf_reg_remap[];
/* 10 and 11 bit immediates are only used for a few specific insns.
SMALL_INT is used throughout the port so we continue to use it. */
#define SMALL_INT(X) (SPARC_SIMM13_P (INTVAL (X)))
+/* 13 bit immediate, considering only the low 32 bits */
+#define SMALL_INT32(X) (SPARC_SIMM13_P ((int)INTVAL (X) & 0xffffffff))
#define SPARC_SETHI_P(X) \
(((unsigned HOST_WIDE_INT) (X) & ~(unsigned HOST_WIDE_INT) 0xfffffc00) == 0)
@@ -1366,7 +1381,7 @@ extern char leaf_reg_remap[];
#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
(get_frame_size () == 0 \
? assign_stack_local (MODE, GET_MODE_SIZE (MODE), 0) \
- : gen_rtx (MEM, MODE, gen_rtx (PLUS, Pmode, frame_pointer_rtx, \
+ : gen_rtx_MEM (MODE, gen_rtx_PLUS (Pmode, frame_pointer_rtx, \
GEN_INT (STARTING_FRAME_OFFSET))))
/* Get_secondary_mem widens it's argument to BITS_PER_WORD which loses on v9
@@ -1501,18 +1516,18 @@ extern char leaf_reg_remap[];
/* On SPARC the value is found in the first "output" register. */
#define FUNCTION_VALUE(VALTYPE, FUNC) \
- gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
+ gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
/* But the called function leaves it in the first "input" register. */
#define FUNCTION_OUTGOING_VALUE(VALTYPE, FUNC) \
- gen_rtx (REG, TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
+ gen_rtx_REG (TYPE_MODE (VALTYPE), BASE_OUTGOING_VALUE_REG (TYPE_MODE (VALTYPE)))
/* Define how to find the value returned by a library function
assuming the value has mode MODE. */
#define LIBCALL_VALUE(MODE) \
- gen_rtx (REG, MODE, BASE_RETURN_VALUE_REG (MODE))
+ gen_rtx_REG (MODE, BASE_RETURN_VALUE_REG (MODE))
/* 1 if N is a possible register number for a function value
as seen by the caller.
@@ -1615,7 +1630,7 @@ function_arg_pass_by_reference (& (CUM), (MODE), (TYPE), (NAMED))
to pad out an argument with extra space. The value should be of type
`enum direction': either `upward' to pad above the argument,
`downward' to pad below, or `none' to inhibit padding. */
-extern enum direction function_arg_padding ();
+
#define FUNCTION_ARG_PADDING(MODE, TYPE) \
function_arg_padding ((MODE), (TYPE))
@@ -1630,17 +1645,6 @@ function_arg_padding ((MODE), (TYPE))
|| ((TYPE) && TYPE_ALIGN (TYPE) == 128))) \
? 128 : PARM_BOUNDARY)
-/* Initialize data used by insn expanders. This is called from
- init_emit, once for each function, before code is generated.
- For v9, clear the temp slot used by float/int DImode conversions.
- ??? There is the 16 bytes at [%fp-16], however we'd like to delete this
- space at some point.
- ??? Use assign_stack_temp? */
-
-extern void sparc_init_expanders ();
-extern struct rtx_def *sparc64_fpconv_stack_temp ();
-#define INIT_EXPANDERS sparc_init_expanders ()
-
/* Define the information needed to generate branch and scc insns. This is
stored from the compare operation. Note that we can't use "rtx" here
since it hasn't been defined! */
@@ -1691,8 +1695,8 @@ do { \
extern int leaf_function;
#define FUNCTION_PROLOGUE(FILE, SIZE) \
- (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, SIZE) \
- : output_function_prologue (FILE, SIZE, leaf_function))
+ (TARGET_FLAT ? sparc_flat_output_function_prologue (FILE, (int)SIZE) \
+ : output_function_prologue (FILE, (int)SIZE, leaf_function))
/* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry.
@@ -2070,8 +2074,8 @@ extern int current_function_outgoing_args_size;
extern union tree_node *current_function_decl;
#define FUNCTION_EPILOGUE(FILE, SIZE) \
- (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, SIZE) \
- : output_function_epilogue (FILE, SIZE, leaf_function))
+ (TARGET_FLAT ? sparc_flat_output_function_epilogue (FILE, (int)SIZE) \
+ : output_function_epilogue (FILE, (int)SIZE, leaf_function))
#define DELAY_SLOTS_FOR_EPILOGUE \
(TARGET_FLAT ? sparc_flat_epilogue_delay_slots () : 1)
@@ -2120,11 +2124,11 @@ do { \
} \
else \
{ \
- ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \
- ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \
- ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \
+ ASM_OUTPUT_INT (FILE, const0_rtx); \
+ ASM_OUTPUT_INT (FILE, const0_rtx); \
+ ASM_OUTPUT_INT (FILE, const0_rtx); \
ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000)); \
- ASM_OUTPUT_INT (FILE, GEN_INT (0x00000000)); \
+ ASM_OUTPUT_INT (FILE, const0_rtx); \
} \
} while (0)
@@ -2175,7 +2179,7 @@ extern struct rtx_def *sparc_builtin_saveregs ();
that holds the dynamic chain--the previous frame's address.
??? -mflat support? */
#define DYNAMIC_CHAIN_ADDRESS(frame) \
- gen_rtx (PLUS, Pmode, frame, GEN_INT (14 * UNITS_PER_WORD))
+ gen_rtx_PLUS (Pmode, frame, GEN_INT (14 * UNITS_PER_WORD))
/* The return address isn't on the stack, it is in a register, so we can't
access it from the current frame pointer. We can access it from the
@@ -2194,8 +2198,8 @@ extern struct rtx_def *sparc_builtin_saveregs ();
returns, and +12 for structure returns. */
#define RETURN_ADDR_RTX(count, frame) \
((count == -1) \
- ? gen_rtx (REG, Pmode, 31) \
- : gen_rtx (MEM, Pmode, \
+ ? gen_rtx_REG (Pmode, 31) \
+ : gen_rtx_MEM (Pmode, \
memory_address (Pmode, plus_constant (frame, 15 * UNITS_PER_WORD))))
/* Before the prologue, the return address is %o7 + 8. OK, sometimes it's
@@ -2203,7 +2207,7 @@ extern struct rtx_def *sparc_builtin_saveregs ();
Actually, just using %o7 is close enough for unwinding, but %o7+8
is something you can return to. */
#define INCOMING_RETURN_ADDR_RTX \
- gen_rtx (PLUS, word_mode, gen_rtx (REG, word_mode, 15), GEN_INT (8))
+ gen_rtx_PLUS (word_mode, gen_rtx_REG (word_mode, 15), GEN_INT (8))
/* The offset from the incoming value of %sp to the top of the stack frame
for the current function. On sparc64, we have to account for the stack
@@ -2250,6 +2254,9 @@ extern struct rtx_def *sparc_builtin_saveregs ();
/* 1 if X is an fp register. */
#define FP_REG_P(X) (REG_P (X) && REGNO_OK_FOR_FP_P (REGNO (X)))
+
+/* Is X, a REG, an in or global register? i.e. is regno 0..7 or 24..31 */
+#define IN_OR_GLOBAL_P(X) (REGNO (X) < 8 || (REGNO (X) >= 24 && REGNO (X) <= 31))
/* Maximum number of registers that can appear in a valid memory address. */
@@ -2439,30 +2446,30 @@ extern struct rtx_def *legitimize_pic_address ();
#define LEGITIMIZE_ADDRESS(X,OLDX,MODE,WIN) \
{ rtx sparc_x = (X); \
if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == MULT) \
- (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1), \
+ (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1), \
force_operand (XEXP (X, 0), NULL_RTX)); \
if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == MULT) \
- (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0), \
+ (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0), \
force_operand (XEXP (X, 1), NULL_RTX)); \
if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 0)) == PLUS) \
- (X) = gen_rtx (PLUS, Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
+ (X) = gen_rtx_PLUS (Pmode, force_operand (XEXP (X, 0), NULL_RTX),\
XEXP (X, 1)); \
if (GET_CODE (X) == PLUS && GET_CODE (XEXP (X, 1)) == PLUS) \
- (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0), \
+ (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0), \
force_operand (XEXP (X, 1), NULL_RTX)); \
if (sparc_x != (X) && memory_address_p (MODE, X)) \
goto WIN; \
if (flag_pic) (X) = legitimize_pic_address (X, MODE, 0); \
else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 1))) \
- (X) = gen_rtx (PLUS, Pmode, XEXP (X, 0), \
+ (X) = gen_rtx_PLUS (Pmode, XEXP (X, 0), \
copy_to_mode_reg (Pmode, XEXP (X, 1))); \
else if (GET_CODE (X) == PLUS && CONSTANT_ADDRESS_P (XEXP (X, 0))) \
- (X) = gen_rtx (PLUS, Pmode, XEXP (X, 1), \
+ (X) = gen_rtx_PLUS (Pmode, XEXP (X, 1), \
copy_to_mode_reg (Pmode, XEXP (X, 0))); \
else if (GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == CONST \
|| GET_CODE (X) == LABEL_REF) \
- (X) = gen_rtx (LO_SUM, Pmode, \
- copy_to_mode_reg (Pmode, gen_rtx (HIGH, Pmode, X)), X); \
+ (X) = gen_rtx_LO_SUM (Pmode, \
+ copy_to_mode_reg (Pmode, gen_rtx_HIGH (Pmode, X)), X); \
if (memory_address_p (MODE, X)) \
goto WIN; }
@@ -2512,7 +2519,7 @@ extern struct rtx_def *legitimize_pic_address ();
/* This is how to refer to the variable errno. */
#define GEN_ERRNO_RTX \
- gen_rtx (MEM, SImode, gen_rtx (SYMBOL_REF, Pmode, "errno"))
+ gen_rtx_MEM (SImode, gen_rtx_SYMBOL_REF (Pmode, "errno"))
#endif /* 0 */
/* Define if operations between registers always perform the operation
@@ -2585,7 +2592,7 @@ extern struct rtx_def *legitimize_pic_address ();
: ((GET_CODE (X) == PLUS || GET_CODE (X) == MINUS \
|| GET_CODE (X) == NEG || GET_CODE (X) == ASHIFT) \
? (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCX_NOOVmode : CC_NOOVmode) \
- : (TARGET_ARCH64 && GET_MODE (X) == DImode ? CCXmode : CCmode)))
+ : ((TARGET_ARCH64 || TARGET_V8PLUS) && GET_MODE (X) == DImode ? CCXmode : CCmode)))
/* Return non-zero if SELECT_CC_MODE will never return MODE for a
floating point inequality comparison. */
@@ -2645,32 +2652,32 @@ extern struct rtx_def *legitimize_pic_address ();
#define INIT_TARGET_OPTABS \
do { \
add_optab->handlers[(int) TFmode].libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, ADDTF3_LIBCALL); \
+ = gen_rtx_SYMBOL_REF (Pmode, ADDTF3_LIBCALL); \
sub_optab->handlers[(int) TFmode].libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, SUBTF3_LIBCALL); \
+ = gen_rtx_SYMBOL_REF (Pmode, SUBTF3_LIBCALL); \
neg_optab->handlers[(int) TFmode].libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, NEGTF2_LIBCALL); \
+ = gen_rtx_SYMBOL_REF (Pmode, NEGTF2_LIBCALL); \
smul_optab->handlers[(int) TFmode].libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, MULTF3_LIBCALL); \
+ = gen_rtx_SYMBOL_REF (Pmode, MULTF3_LIBCALL); \
flodiv_optab->handlers[(int) TFmode].libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, DIVTF3_LIBCALL); \
- eqtf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EQTF2_LIBCALL); \
- netf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, NETF2_LIBCALL); \
- gttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GTTF2_LIBCALL); \
- getf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, GETF2_LIBCALL); \
- lttf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LTTF2_LIBCALL); \
- letf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, LETF2_LIBCALL); \
- trunctfsf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFSF2_LIBCALL); \
- trunctfdf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, TRUNCTFDF2_LIBCALL); \
- extendsftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDSFTF2_LIBCALL); \
- extenddftf2_libfunc = gen_rtx (SYMBOL_REF, Pmode, EXTENDDFTF2_LIBCALL); \
- floatsitf_libfunc = gen_rtx (SYMBOL_REF, Pmode, FLOATSITF2_LIBCALL); \
- fixtfsi_libfunc = gen_rtx (SYMBOL_REF, Pmode, FIX_TRUNCTFSI2_LIBCALL); \
+ = gen_rtx_SYMBOL_REF (Pmode, DIVTF3_LIBCALL); \
+ eqtf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EQTF2_LIBCALL); \
+ netf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, NETF2_LIBCALL); \
+ gttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GTTF2_LIBCALL); \
+ getf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, GETF2_LIBCALL); \
+ lttf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LTTF2_LIBCALL); \
+ letf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, LETF2_LIBCALL); \
+ trunctfsf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFSF2_LIBCALL); \
+ trunctfdf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, TRUNCTFDF2_LIBCALL); \
+ extendsftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDSFTF2_LIBCALL); \
+ extenddftf2_libfunc = gen_rtx_SYMBOL_REF (Pmode, EXTENDDFTF2_LIBCALL); \
+ floatsitf_libfunc = gen_rtx_SYMBOL_REF (Pmode, FLOATSITF2_LIBCALL); \
+ fixtfsi_libfunc = gen_rtx_SYMBOL_REF (Pmode, FIX_TRUNCTFSI2_LIBCALL); \
fixunstfsi_libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, FIXUNS_TRUNCTFSI2_LIBCALL); \
+ = gen_rtx_SYMBOL_REF (Pmode, FIXUNS_TRUNCTFSI2_LIBCALL); \
if (TARGET_FPU) \
sqrt_optab->handlers[(int) TFmode].libfunc \
- = gen_rtx (SYMBOL_REF, Pmode, "_Q_sqrt"); \
+ = gen_rtx_SYMBOL_REF (Pmode, "_Q_sqrt"); \
INIT_SUBTARGET_OPTABS; \
} while (0)
@@ -2709,12 +2716,12 @@ extern struct rtx_def *legitimize_pic_address ();
/* Compute extra cost of moving data between one register class
and another. */
+#define GENERAL_OR_I64(C) ((C) == GENERAL_REGS || (C) == I64_REGS)
#define REGISTER_MOVE_COST(CLASS1, CLASS2) \
- (((FP_REG_CLASS_P (CLASS1) && (CLASS2) == GENERAL_REGS) \
- || ((CLASS1) == GENERAL_REGS && FP_REG_CLASS_P (CLASS2)) \
+ (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
+ || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
|| (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS) \
- ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) \
- : 2)
+ ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2)
/* Provide the costs of a rtl expression. This is in the body of a
switch on CODE. The purpose for the cost of MULT is to encourage
@@ -2741,20 +2748,17 @@ extern struct rtx_def *legitimize_pic_address ();
/* Adjust the cost of dependencies. */
#define ADJUST_COST(INSN,LINK,DEP,COST) \
-do { \
if (sparc_cpu == PROCESSOR_SUPERSPARC) \
(COST) = supersparc_adjust_cost (INSN, LINK, DEP, COST); \
else if (sparc_cpu == PROCESSOR_ULTRASPARC) \
(COST) = ultrasparc_adjust_cost (INSN, LINK, DEP, COST); \
-} while (0)
+ else
/* Conditional branches with empty delay slots have a length of two. */
#define ADJUST_INSN_LENGTH(INSN, LENGTH) \
-do { \
if (GET_CODE (INSN) == CALL_INSN \
|| (GET_CODE (INSN) == JUMP_INSN && ! simplejump_p (insn))) \
- LENGTH += 1; \
-} while (0)
+ LENGTH += 1; else
/* Control the assembler format that we output. */
@@ -3252,6 +3256,16 @@ extern int v9_regcmp_p ();
extern unsigned long sparc_flat_compute_frame_size ();
extern unsigned long sparc_type_code ();
+extern char *sparc_v8plus_shift ();
+
+#ifdef __STDC__
+/* Function used for V8+ code generation. Returns 1 if the high
+ 32 bits of REG are 0 before INSN. */
+extern int sparc_check_64 (struct rtx_def *, struct rtx_def *);
+extern int sparc_return_peephole_ok (struct rtx_def *, struct rtx_def *);
+extern int compute_frame_size (int, int);
+#endif
+
/* Defined in flags.h, but insn-emit.c does not include flags.h. */
extern int flag_pic;
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index ac79f682757..8ef692d4e63 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -34,7 +34,7 @@
;; Attribute for cpu type.
;; These must match the values for enum processor_type in sparc.h.
-(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v8plus,v9,ultrasparc"
+(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,sparclet,tsc701,v9,ultrasparc"
(const (symbol_ref "sparc_cpu_attr")))
;; Attribute for the instruction set.
@@ -67,7 +67,7 @@
;; type "call_no_delay_slot" is a call followed by an unimp instruction.
(define_attr "type"
- "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
+ "move,unary,binary,compare,load,sload,store,ialu,shift,uncond_branch,branch,call,call_no_delay_slot,return,address,imul,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrt,cmove,multi,misc"
(const_string "binary"))
;; Set true if insn uses call-clobbered intermediate register.
@@ -110,7 +110,7 @@
;; Attributes for instruction and branch scheduling
(define_attr "in_call_delay" "false,true"
- (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi")
+ (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,return,multi")
(const_string "false")
(eq_attr "type" "load,fpload,store,fpstore")
(if_then_else (eq_attr "length" "1")
@@ -127,6 +127,22 @@
(define_delay (eq_attr "type" "call")
[(eq_attr "in_call_delay" "true") (nil) (nil)])
+(define_attr "leaf_function" "false,true"
+ (const (symbol_ref "leaf_function")))
+
+
+(define_attr "in_return_delay" "false,true"
+ (if_then_else (and (and (and (eq_attr "type" "move,load,sload,store,binary,ialu")
+ (eq_attr "length" "1"))
+ (eq_attr "leaf_function" "false"))
+ (match_insn "eligible_for_return_delay"))
+ (const_string "true")
+ (const_string "false")))
+
+(define_delay (and (eq_attr "type" "return")
+ (eq_attr "isa" "v9"))
+ [(eq_attr "in_return_delay" "true") (nil) (nil)])
+
;; ??? Should implement the notion of predelay slots for floating point
;; branches. This would allow us to remove the nop always inserted before
;; a floating point branch.
@@ -356,7 +372,7 @@
(define_function_unit "ieu" 1 0
(and (eq_attr "cpu" "ultrasparc")
- (eq_attr "type" "ialu,shift,compare,cmove,call"))
+ (eq_attr "type" "ialu,binary,shift,compare,cmove,call"))
1 1)
(define_function_unit "ieu_shift" 1 0
@@ -370,12 +386,15 @@
2 1)
;; Timings; throughput/latency
-;; ?? FADD 1/3 add/sub, format conv, compar, abs, neg
-;; ?? FMUL 1/3
-;; ?? FDIVs 1/12
-;; ?? FDIVd 1/22
-;; ?? FSQRTs 1/12
-;; ?? FSQRTd 1/22
+;; FMOV 1/1 fmov, fabs, fneg
+;; FMOVcc 1/2
+;; FADD 1/4 add/sub, format conv, compar
+;; FMUL 1/4
+;; FDIVs 12/12
+;; FDIVd 22/22
+;; FSQRTs 12/12
+;; FSQRTd 22/22
+;; FCMP takes 1 cycle to branch, 2 cycles to conditional move.
(define_function_unit "fadd" 1 0
(and (eq_attr "cpu" "ultrasparc")
@@ -456,7 +475,7 @@
[(set (reg:CCX 100)
(compare:CCX (match_operand:DI 0 "register_operand" "")
(match_operand:DI 1 "arith_double_operand" "")))]
- "TARGET_ARCH64"
+ "TARGET_ARCH64 || TARGET_V8PLUS"
"
{
sparc_compare_op0 = operands[0];
@@ -521,6 +540,37 @@
"cmp %0,%1"
[(set_attr "type" "compare")])
+(define_insn "cmpdi_v8plus"
+ [(set (reg:CCX 100)
+ (compare:CCX (match_operand:DI 0 "register_operand" "r,r,r")
+ (match_operand:DI 1 "arith_double_operand" "J,I,r")))
+ (clobber (match_scratch:SI 2 "=&h,&h,&h"))
+ (clobber (match_scratch:SI 3 "=X,X,&h"))]
+ "TARGET_V8PLUS"
+ "*
+{
+ /* The srl can be omitted if the value in the %L0 or %L1 is already
+ zero extended. */
+
+ output_asm_insn (\"sllx %H0,32,%2\", operands);
+
+ if (sparc_check_64 (operands[0], insn) <= 0)
+ output_asm_insn (\"srl %L0,0,%L0\", operands);
+
+ switch (which_alternative)
+ {
+ case 0:
+ return \"orcc %L0,%2,%%g0\";
+ case 1:
+ return \"or %L0,%2,%2\;cmp %2,%1\";
+ case 2:
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn (\"srl %L1,0,%L1\", operands);
+ return \"sllx %H1,32,%3\;or %L0,%2,%2\;or %L1,%3,%3\;cmp %2,%3\";
+ }
+}"
+ [(set_attr "length" "3,4,7")])
+
(define_insn "*cmpsf_fpe"
[(set (match_operand:CCFPE 0 "fcc_reg_operand" "=c")
(compare:CCFPE (match_operand:SF 1 "register_operand" "f")
@@ -1008,7 +1058,7 @@
(const_int 0)))]
"TARGET_ARCH64"
"mov 0,%0\;movrnz %1,1,%0"
- [(set_attr "type" "unary")
+ [(set_attr "type" "cmove")
(set_attr "length" "2")])
(define_insn "*neg_snedi_zero"
@@ -1017,7 +1067,7 @@
(const_int 0))))]
"TARGET_ARCH64"
"mov 0,%0\;movrnz %1,-1,%0"
- [(set_attr "type" "unary")
+ [(set_attr "type" "cmove")
(set_attr "length" "2")])
(define_insn "*snedi_zero_trunc"
@@ -1026,7 +1076,7 @@
(const_int 0)))]
"TARGET_ARCH64"
"mov 0,%0\;movrnz %1,1,%0"
- [(set_attr "type" "unary")
+ [(set_attr "type" "cmove")
(set_attr "length" "2")])
(define_insn "*seqsi_zero"
@@ -1065,7 +1115,7 @@
(const_int 0)))]
"TARGET_ARCH64"
"mov 0,%0\;movrz %1,1,%0"
- [(set_attr "type" "unary")
+ [(set_attr "type" "cmove")
(set_attr "length" "2")])
(define_insn "*neg_seqdi_zero"
@@ -1074,7 +1124,7 @@
(const_int 0))))]
"TARGET_ARCH64"
"mov 0,%0\;movrz %1,-1,%0"
- [(set_attr "type" "unary")
+ [(set_attr "type" "cmove")
(set_attr "length" "2")])
(define_insn "*seqdi_zero_trunc"
@@ -1083,7 +1133,7 @@
(const_int 0)))]
"TARGET_ARCH64"
"mov 0,%0\;movrz %1,1,%0"
- [(set_attr "type" "unary")
+ [(set_attr "type" "cmove")
(set_attr "length" "2")])
;; We can also do (x + (i == 0)) and related, so put them in.
@@ -1645,15 +1695,16 @@
[(set_attr "type" "move")
(set_attr "length" "1")])
-(define_insn "get_pc_via_call"
- [(set (pc) (label_ref (match_operand 0 "" "")))
- (set (reg:SI 15) (label_ref (match_operand 1 "" "")))]
- ""
- "call %l0%#"
- [(set_attr "type" "uncond_branch")])
+(define_insn "get_pc"
+ [(clobber (reg:SI 15))
+ (set (match_operand 0 "register_operand" "=r")
+ (unspec [(match_operand 1 "" "") (match_operand 2 "" "")] 2))]
+ "flag_pic && REGNO (operands[0]) == 23"
+ "sethi %%hi(%a1-4),%0\;call %a2\;add %0,%%lo(%a1+4),%0"
+ [(set_attr "length" "3")])
(define_insn "get_pc_via_rdpc"
- [(set (match_operand:DI 0 "register_operand" "=r") (pc))]
+ [(set (match_operand 0 "register_operand" "=r") (pc))]
"TARGET_V9"
"rd %%pc,%0"
[(set_attr "type" "move")])
@@ -2089,7 +2140,10 @@
"! TARGET_LIVE_G0
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode)
- || operands[1] == const0_rtx)"
+ || operands[1] == const0_rtx)
+ && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
+ || REGNO (operands[0]) < 32
+ || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
"@
mov %1,%0
fmovs %1,%0
@@ -2099,7 +2153,7 @@
st %r1,%0
st %1,%0
fzeros %0"
- [(set_attr "type" "move,fp,move,load,fpload,store,fpstore,fpmove")
+ [(set_attr "type" "move,fpmove,move,load,fpload,store,fpstore,fpmove")
(set_attr "length" "1")])
(define_insn "*movsi_insn_liveg0"
@@ -2141,16 +2195,20 @@
DONE;
}")
-;; V8+ movdi is like regular 32 bit except that a 64 bit zero can be stored
-;; to aligned memory with a single instruction and the ldd/std instructions
-;; are not used.
-(define_insn "*movdi_v8plus"
- [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,f,f,Q,b")
- (match_operand:DI 1 "general_operand" "r,J,r,Q,i,?f,?Q,?f,?J"))]
- "TARGET_V8PLUS
+;; 32 bit V9 movdi is like regular 32 bit except: a 64 bit zero can be stored
+;; to aligned memory with a single instruction, the ldd/std instructions
+;; are not used, and constants can not be moved to floating point registers.
+
+(define_insn "*movdi_sp32_v9"
+ [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,Q,r,r,?e,?e,?Q,?b")
+ (match_operand:DI 1 "general_operand" "r,J,r,Q,i,e,Q,e,J"))]
+ "TARGET_V9
&& (register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode)
- || operands[1] == const0_rtx)"
+ || operands[1] == const0_rtx)
+ && (GET_CODE (operands[0]) != REG || ! CONSTANT_P (operands[1])
+ || REGNO (operands[0]) < 32
+ || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)"
"*
{
if (which_alternative == 1)
@@ -2164,13 +2222,11 @@
[(set_attr "type" "move,store,store,load,multi,fp,fpload,fpstore,fpmove")
(set_attr "length" "2,1,3,3,3,2,3,3,1")])
-;; ??? The Haifa scheduler does not split instructions after reload if
-;; it also ran before reload.
-
+;; SPARC V9 deprecates std. Split it here.
(define_split
[(set (match_operand:DI 0 "memory_operand" "=m")
(match_operand:DI 1 "register_operand" "r"))]
- "TARGET_V8PLUS && !TARGET_ARCH64 && reload_completed
+ "TARGET_V9 && ! TARGET_ARCH64 && reload_completed
&& REGNO (operands[1]) < 32 && ! MEM_VOLATILE_P (operands[0])
&& offsettable_memref_p (operands[0])"
[(set (match_dup 2) (match_dup 3))
@@ -2182,10 +2238,10 @@
operands[2] = copy_rtx (operands[0]);
PUT_MODE (operands[2], SImode);")
-(define_insn "*movdi_sp32_insn"
+(define_insn "*movdi_sp32"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
- "! TARGET_ARCH64
+ "! TARGET_V9
&& (register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode)
|| operands[1] == const0_rtx)"
@@ -2207,8 +2263,8 @@
;;; This needs the original value of operands[1], not the inverted value.
(define_insn "*movdi_sp64_insn"
- [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?f,?f,?Q")
- (match_operand:DI 1 "move_operand" "rI,K,Q,rJ,f,Q,f"))]
+ [(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,r,r,Q,?e,?e,?Q")
+ (match_operand:DI 1 "move_operand" "rI,K,Q,rJ,e,Q,e"))]
"TARGET_ARCH64
&& (register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode)
@@ -2693,24 +2749,22 @@
"
{
enum rtx_code code = GET_CODE (operands[1]);
-
- if (GET_MODE (sparc_compare_op0) == DImode
- && ! TARGET_ARCH64)
- FAIL;
+ enum machine_mode op0_mode = GET_MODE (sparc_compare_op0);
if (sparc_compare_op1 == const0_rtx
&& GET_CODE (sparc_compare_op0) == REG
- && GET_MODE (sparc_compare_op0) == DImode
- && v9_regcmp_p (code))
+ && ((TARGET_ARCH64 && op0_mode == DImode && v9_regcmp_p (code))
+ || (op0_mode == SImode && v8plus_regcmp_p (code))))
{
- operands[1] = gen_rtx (code, DImode,
+ operands[1] = gen_rtx_fmt_ee (code, op0_mode,
sparc_compare_op0, sparc_compare_op1);
}
else
{
rtx cc_reg = gen_compare_reg (code,
sparc_compare_op0, sparc_compare_op1);
- operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+ operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg),
+ cc_reg, const0_rtx);
}
}")
@@ -2729,14 +2783,15 @@
&& GET_MODE (sparc_compare_op0) == DImode
&& v9_regcmp_p (code))
{
- operands[1] = gen_rtx (code, DImode,
+ operands[1] = gen_rtx_fmt_ee (code, DImode,
sparc_compare_op0, sparc_compare_op1);
}
else
{
rtx cc_reg = gen_compare_reg (code,
sparc_compare_op0, sparc_compare_op1);
- operands[1] = gen_rtx (code, GET_MODE (cc_reg), cc_reg, const0_rtx);
+ operands[1] = gen_rtx_fmt_ee (code, GET_MODE (cc_reg),
+ cc_reg, const0_rtx);
}
}")
@@ -2963,6 +3018,57 @@
movr%d1 %2,%r4,%0"
[(set_attr "type" "cmove")])
+;; On UltraSPARC this is slightly worse than cmp/mov %icc if the register
+;; needs to be zero extended but better on average.
+(define_insn "*movsi_cc_reg_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
+ [(match_operand:SI 2 "register_operand" "r,r")
+ (const_int 0)])
+ (match_operand:SI 3 "arith10_operand" "rM,0")
+ (match_operand:SI 4 "arith10_operand" "0,rM")))]
+ "TARGET_V9"
+ "*
+{
+ if (! sparc_check_64 (operands[2], insn))
+ output_asm_insn (\"srl %2,0,%2\", operands);
+ if (which_alternative == 0)
+ return \"movr%D1 %2,%r3,%0\";
+ return \"movr%d1 %2,%r4,%0\";
+}"
+ [(set_attr "type" "cmove")
+ (set_attr "length" "2")])
+
+;; To work well this needs to know the current insn, but that is not an
+;; argument to gen_split_*.
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (if_then_else:SI (match_operator 1 "v8plus_regcmp_op"
+ [(match_operand:SI 2 "register_operand" "r,r")
+ (const_int 0)])
+ (match_operand:SI 3 "arith10_operand" "rM,0")
+ (match_operand:SI 4 "arith10_operand" "0,rM")))]
+ "reload_completed"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 4)] 9))]
+ "if (! sparc_check_64 (operands[2], NULL_RTX))
+ emit_insn (gen_v8plus_clear_high (operands[2], operands[2]));")
+
+;; A conditional move with the condition argument known to be zero extended
+(define_insn ""
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operator 1 "v8plus_regcmp_op"
+ [(match_operand:SI 2 "register_operand" "r,r")
+ (const_int 0)])
+ (match_operand:SI 3 "arith10_operand" "rM,0")
+ (match_operand:SI 4 "arith10_operand" "0,rM")] 9))]
+ "TARGET_V9"
+ "@
+ movr%D1 %2,%r3,%0
+ movr%d1 %2,%r4,%0"
+ [(set_attr "type" "cmove")])
+
;; ??? The constraints of operands 3,4 need work.
(define_insn "*movdi_cc_reg_sp64"
[(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -3130,6 +3236,7 @@
"lduh %1,%0"
[(set_attr "type" "load")])
+
;; ??? Write truncdisi pattern using sra?
(define_expand "zero_extendsidi2"
@@ -3148,6 +3255,20 @@
[(set_attr "type" "unary,load")
(set_attr "length" "1")])
+;; Zero extend a 32 bit value in a 64 bit register.
+(define_insn "v8plus_clear_high"
+ [(set (match_operand:SI 0 "reg_or_nonsymb_mem_operand" "=r,Q")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "r,r")] 10))]
+ "TARGET_V9"
+ "*
+if (which_alternative == 1)
+ return \"st %1,%0\";
+if (sparc_check_64 (operands[1], insn) > 0)
+ return \"nop\";
+return \"srl %1,0,%0\";
+"
+ [(set_attr "type" "shift,store")])
+
;; Simplify comparisons of extended values.
(define_insn "*cmp_zero_extendqisi2"
@@ -3480,115 +3601,25 @@
[(set_attr "type" "fp")])
;; Now the same for 64 bit sources.
-;; ??? We cannot put DImode values in fp regs (see below near fix_truncdfsi2).
-
-(define_expand "floatdisf2"
- [(parallel [(set (match_operand:SF 0 "register_operand" "")
- (float:SF (match_operand:DI 1 "general_operand" "")))
- (clobber (match_dup 2))
- (clobber (match_dup 3))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "
-{
- operands[2] = gen_reg_rtx (DFmode);
- operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "floatdidf2"
- [(parallel [(set (match_operand:DF 0 "register_operand" "")
- (float:DF (match_operand:DI 1 "general_operand" "")))
- (clobber (match_dup 2))
- (clobber (match_dup 3))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "
-{
- operands[2] = gen_reg_rtx (DFmode);
- operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "floatditf2"
- [(parallel [(set (match_operand:TF 0 "register_operand" "")
- (float:TF (match_operand:DI 1 "general_operand" "")))
- (clobber (match_dup 2))
- (clobber (match_dup 3))])]
- "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
- "
-{
- operands[2] = gen_reg_rtx (DFmode);
- operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_insn "*floatdisf2_insn"
- [(parallel [(set (match_operand:SF 0 "register_operand" "=f")
- (float:SF (match_operand:DI 1 "general_operand" "rm")))
- (clobber (match_operand:DF 2 "register_operand" "=&e"))
- (clobber (match_operand:DI 3 "memory_operand" "m"))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "*
-{
- if (GET_CODE (operands[1]) == MEM)
- output_asm_insn (\"ldd %1,%2\", operands);
- else
- output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
- return \"fxtos %2,%0\";
-}"
- [(set_attr "type" "fp")
- (set_attr "length" "3")])
-
-(define_insn "*floatdidf2_insn"
- [(parallel [(set (match_operand:DF 0 "register_operand" "=e")
- (float:DF (match_operand:DI 1 "general_operand" "rm")))
- (clobber (match_operand:DF 2 "register_operand" "=&e"))
- (clobber (match_operand:DI 3 "memory_operand" "m"))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "*
-{
- if (GET_CODE (operands[1]) == MEM)
- output_asm_insn (\"ldd %1,%2\", operands);
- else
- output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
- return \"fxtod %2,%0\";
-}"
- [(set_attr "type" "fp")
- (set_attr "length" "3")])
-(define_insn "*floatditf2_insn"
- [(parallel [(set (match_operand:TF 0 "register_operand" "=e")
- (float:TF (match_operand:DI 1 "general_operand" "rm")))
- (clobber (match_operand:DF 2 "register_operand" "=&e"))
- (clobber (match_operand:DI 3 "memory_operand" "m"))])]
- "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
- "*
-{
- if (GET_CODE (operands[1]) == MEM)
- output_asm_insn (\"ldd %1,%2\", operands);
- else
- output_asm_insn (\"stx %1,%3\;ldd %3,%2\", operands);
- return \"fxtoq %2,%0\";
-}"
- [(set_attr "type" "fp")
- (set_attr "length" "3")])
-
-;; ??? Ideally, these are what we would like to use.
-
-(define_insn "floatdisf2_sp64"
+(define_insn "floatdisf2"
[(set (match_operand:SF 0 "register_operand" "=f")
(float:SF (match_operand:DI 1 "register_operand" "e")))]
- "0 && TARGET_ARCH64 && TARGET_FPU"
+ "TARGET_V9 && TARGET_FPU"
"fxtos %1,%0"
[(set_attr "type" "fp")])
-(define_insn "floatdidf2_sp64"
+(define_insn "floatdidf2"
[(set (match_operand:DF 0 "register_operand" "=e")
(float:DF (match_operand:DI 1 "register_operand" "e")))]
- "0 && TARGET_ARCH64 && TARGET_FPU"
+ "TARGET_V9 && TARGET_FPU"
"fxtod %1,%0"
[(set_attr "type" "fp")])
-(define_insn "floatditf2_sp64"
+(define_insn "floatditf2"
[(set (match_operand:TF 0 "register_operand" "=e")
(float:TF (match_operand:DI 1 "register_operand" "e")))]
- "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+ "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
"fxtoq %1,%0"
[(set_attr "type" "fp")])
@@ -3616,121 +3647,26 @@
"fqtoi %1,%0"
[(set_attr "type" "fp")])
-;; Now the same, for 64-bit targets
-;; ??? We try to work around an interesting problem.
-;; If gcc tries to do a subreg on the result it will get the wrong answer:
-;; "(subreg:SI (reg:DI M int-reg) 0)" is the same as
-;; "(subreg:SI (reg:DI N float-reg) 1)", but gcc does not know how to change
-;; the "0" to a "1". One could enhance alter_subreg but it is not clear how to
-;; do this cleanly.
-
-(define_expand "fix_truncsfdi2"
- [(parallel [(set (match_operand:DI 0 "general_operand" "")
- (fix:DI (fix:SF (match_operand:SF 1 "register_operand" ""))))
- (clobber (match_dup 2))
- (clobber (match_dup 3))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "
-{
- operands[2] = gen_reg_rtx (DFmode);
- operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_expand "fix_truncdfdi2"
- [(parallel [(set (match_operand:DI 0 "general_operand" "")
- (fix:DI (fix:DF (match_operand:DF 1 "register_operand" ""))))
- (clobber (match_dup 2))
- (clobber (match_dup 3))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "
-{
- operands[2] = gen_reg_rtx (DFmode);
- operands[3] = sparc64_fpconv_stack_temp ();
-}")
+;; Now the same, for V9 targets
-(define_expand "fix_trunctfdi2"
- [(parallel [(set (match_operand:DI 0 "general_operand" "")
- (fix:DI (fix:TF (match_operand:TF 1 "register_operand" ""))))
- (clobber (match_dup 2))
- (clobber (match_dup 3))])]
- "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
- "
-{
- operands[2] = gen_reg_rtx (DFmode);
- operands[3] = sparc64_fpconv_stack_temp ();
-}")
-
-(define_insn "*fix_truncsfdi2_insn"
- [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
- (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))
- (clobber (match_operand:DF 2 "register_operand" "=&e"))
- (clobber (match_operand:DI 3 "memory_operand" "m"))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "*
-{
- output_asm_insn (\"fstox %1,%2\", operands);
- if (GET_CODE (operands[0]) == MEM)
- return \"std %2,%0\";
- else
- return \"std %2,%3\;ldx %3,%0\";
-}"
- [(set_attr "type" "fp")
- (set_attr "length" "3")])
-
-(define_insn "*fix_truncdfdi2_insn"
- [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
- (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))
- (clobber (match_operand:DF 2 "register_operand" "=&e"))
- (clobber (match_operand:DI 3 "memory_operand" "m"))])]
- "TARGET_ARCH64 && TARGET_FPU"
- "*
-{
- output_asm_insn (\"fdtox %1,%2\", operands);
- if (GET_CODE (operands[0]) == MEM)
- return \"std %2,%0\";
- else
- return \"std %2,%3\;ldx %3,%0\";
-}"
- [(set_attr "type" "fp")
- (set_attr "length" "3")])
-
-(define_insn "*fix_trunctfdi2_insn"
- [(parallel [(set (match_operand:DI 0 "general_operand" "=rm")
- (fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))
- (clobber (match_operand:DF 2 "register_operand" "=&e"))
- (clobber (match_operand:DI 3 "memory_operand" "m"))])]
- "TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
- "*
-{
- output_asm_insn (\"fqtox %1,%2\", operands);
- if (GET_CODE (operands[0]) == MEM)
- return \"std %2,%0\";
- else
- return \"std %2,%3\;ldx %3,%0\";
-}"
- [(set_attr "type" "fp")
- (set_attr "length" "3")])
-
-;; ??? Ideally, these are what we would like to use.
-
-(define_insn "fix_truncsfdi2_sp64"
+(define_insn "fix_truncsfdi2"
[(set (match_operand:DI 0 "register_operand" "=e")
(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
- "0 && TARGET_ARCH64 && TARGET_FPU"
+ "TARGET_V9 && TARGET_FPU"
"fstox %1,%0"
[(set_attr "type" "fp")])
-(define_insn "fix_truncdfdi2_sp64"
+(define_insn "fix_truncdfdi2"
[(set (match_operand:DI 0 "register_operand" "=e")
(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "e"))))]
- "0 && TARGET_ARCH64 && TARGET_FPU"
+ "TARGET_V9 && TARGET_FPU"
"fdtox %1,%0"
[(set_attr "type" "fp")])
-(define_insn "fix_trunctfdi2_sp64"
+(define_insn "fix_trunctfdi2"
[(set (match_operand:DI 0 "register_operand" "=e")
(fix:DI (fix:TF (match_operand:TF 1 "register_operand" "e"))))]
- "0 && TARGET_ARCH64 && TARGET_FPU && TARGET_HARD_QUAD"
+ "TARGET_V9 && TARGET_FPU && TARGET_HARD_QUAD"
"fqtox %1,%0"
[(set_attr "type" "fp")])
@@ -3785,6 +3721,77 @@
}"
[(set_attr "length" "2")])
+
+;; Split DImode arithmetic
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
+ (match_operand:DI 2 "arith_double_operand" "rHI")))
+ (clobber (reg:SI 100))]
+ "! TARGET_ARCH64 && reload_completed"
+ [(parallel [(set (reg:CC_NOOV 100)
+ (compare:CC_NOOV (plus:SI (match_dup 4)
+ (match_dup 5))
+ (const_int 0)))
+ (set (match_dup 3)
+ (plus:SI (match_dup 4) (match_dup 5)))])
+ (set (match_dup 6)
+ (plus:SI (plus:SI (match_dup 7)
+ (match_dup 8))
+ (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+ "operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[4] = gen_lowpart (SImode, operands[1]);
+ operands[5] = gen_lowpart (SImode, operands[2]);
+ operands[6] = gen_highpart (SImode, operands[0]);
+ operands[7] = gen_highpart (SImode, operands[1]);
+ if (GET_CODE (operands[2]) == CONST_INT)
+ operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
+ else
+ operands[8] = gen_highpart (SImode, operands[2]);")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (minus:DI (match_operand:DI 1 "arith_double_operand" "r")
+ (match_operand:DI 2 "arith_double_operand" "rHI")))
+ (clobber (reg:SI 100))]
+ "! TARGET_ARCH64 && reload_completed"
+ [(parallel [(set (reg:CC_NOOV 100)
+ (compare:CC_NOOV (minus:SI (match_dup 4)
+ (match_dup 5))
+ (const_int 0)))
+ (set (match_dup 3)
+ (minus:SI (match_dup 4) (match_dup 5)))])
+ (set (match_dup 6)
+ (minus:SI (minus:SI (match_dup 7)
+ (match_dup 8))
+ (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+ "operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[4] = gen_lowpart (SImode, operands[1]);
+ operands[5] = gen_lowpart (SImode, operands[2]);
+ operands[6] = gen_highpart (SImode, operands[0]);
+ operands[7] = gen_highpart (SImode, operands[1]);
+ operands[8] = gen_highpart (SImode, operands[2]);")
+
+;; LTU here means "carry set"
+(define_insn "*addx"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+ (match_operand:SI 2 "arith_operand" "rI"))
+ (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+ ""
+ "addx %1,%2,%0"
+ [(set_attr "type" "unary")])
+
+(define_insn "*subx"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "arith_operand" "rI"))
+ (ltu:SI (reg:CC_NOOV 100) (const_int 0))))]
+ ""
+ "subx %1,%2,%0"
+ [(set_attr "type" "unary")])
+
(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
@@ -3976,13 +3983,50 @@
"smul %1,%2,%0"
[(set_attr "type" "imul")])
-(define_insn "muldi3"
+(define_expand "muldi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
+ (match_operand:DI 2 "arith_double_operand" "rHI")))]
+ "TARGET_ARCH64 || TARGET_V8PLUS"
+ "
+{
+ if (TARGET_V8PLUS)
+ {
+ emit_insn (gen_muldi3_v8plus (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+}")
+
+(define_insn "*muldi3_sp64"
[(set (match_operand:DI 0 "register_operand" "=r")
(mult:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))]
"TARGET_ARCH64"
"mulx %1,%2,%0")
+;; V8plus wide multiply.
+(define_insn "muldi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=r,h")
+ (mult:DI (match_operand:DI 1 "arith_double_operand" "%r,0")
+ (match_operand:DI 2 "arith_double_operand" "rHI,rHI")))
+ (clobber (match_scratch:SI 3 "=&h,X"))
+ (clobber (match_scratch:SI 4 "=&h,X"))]
+ "TARGET_V8PLUS"
+ "*
+{
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn (\"srl %L1,0,%L1\", operands);
+ if (which_alternative == 1)
+ output_asm_insn (\"sllx %H1,32,%H1\", operands);
+ if (sparc_check_64 (operands[2], insn) <= 0)
+ output_asm_insn (\"srl %L2,0,%L2\", operands);
+ if (which_alternative == 1)
+ return \"or %L1,%H1,%H1\;sllx %H2,32,%L1\;or %L2,%L1,%L1\;mulx %H1,%L1,%L0\;srlx %L0,32,%H0\";
+ else
+ return \"sllx %H1,32,%3\;sllx %H2,32,%4\;or %L1,%3,%3\;or %L2,%4,%4\;mulx %3,%4,%3\;srlx %3,32,%H0\;mov %3,%L0\";
+}"
+ [(set_attr "length" "9,8")])
+
;; It is not known whether this will match.
(define_insn "*cmp_mul_set"
@@ -4010,11 +4054,35 @@
}
}")
+;; V9 puts the 64 bit product in a 64 bit register. Only out or global
+;; registers can hold 64 bit values in the V8plus environment.
+(define_insn "*mulsidi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=h,r")
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+ (clobber (match_scratch:SI 3 "=X,&h"))]
+ "TARGET_V8PLUS"
+ "@
+ smul %1,%2,%L0\;srlx %L0,32,%H0
+ smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+ [(set_attr "length" "2,3")])
+
+(define_insn "*const_mulsidi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=h,r")
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (match_operand:SI 2 "small_int" "I,I")))
+ (clobber (match_scratch:SI 3 "=X,&h"))]
+ "TARGET_V8PLUS"
+ "@
+ smul %1,%2,%L0\;srlx %L0,32,%H0
+ smul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+ [(set_attr "length" "2,3")])
+
(define_insn "*mulsidi3_sp32"
[(set (match_operand:DI 0 "register_operand" "=r")
(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
(sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
- "TARGET_HARD_MUL"
+ "TARGET_HARD_MUL32"
"*
{
return TARGET_SPARCLET ? \"smuld %1,%2,%L0\" : \"smul %1,%2,%L0\;rd %%y,%H0\";
@@ -4052,15 +4120,34 @@
emit_insn (gen_const_smulsi3_highpart (operands[0], operands[1], operands[2]));
DONE;
}
+ if (TARGET_V8PLUS)
+ {
+ emit_insn (gen_smulsidi3_highpart_v8plus (operands[0], operands[1],
+ operands[2], GEN_INT (32)));
+ DONE;
+ }
}")
+(define_insn "smulsidi3_highpart_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=h,r")
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+ (match_operand:SI 3 "const_int_operand" "i,i"))))
+ (clobber (match_scratch:SI 4 "=X,&h"))]
+ "TARGET_V8PLUS"
+ "@
+ smul %1,%2,%0\;srlx %0,%3,%0
+ smul %1,%2,%4\;srlx %4,%3,%0"
+ [(set_attr "length" "2")])
+
(define_insn "*smulsidi3_highpart_sp32"
[(set (match_operand:SI 0 "register_operand" "=r")
(truncate:SI
(lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
(sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
(const_int 32))))]
- "TARGET_HARD_MUL"
+ "TARGET_HARD_MUL32"
"smul %1,%2,%%g0\;rd %%y,%0"
[(set_attr "length" "2")])
@@ -4070,7 +4157,7 @@
(lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand:SI 2 "register_operand" "r"))
(const_int 32))))]
- "TARGET_V8 || TARGET_SPARCLITE || TARGET_SPARCLET || TARGET_DEPRECATED_V8_INSNS"
+ "TARGET_HARD_MUL32"
"smul %1,%2,%%g0\;rd %%y,%0"
[(set_attr "length" "2")])
@@ -4086,13 +4173,29 @@
emit_insn (gen_const_umulsidi3 (operands[0], operands[1], operands[2]));
DONE;
}
+ if (TARGET_V8PLUS)
+ {
+ emit_insn (gen_umulsidi3_v8plus (operands[0], operands[1], operands[2]));
+ DONE;
+ }
}")
+(define_insn "umulsidi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=h,r")
+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r"))))
+ (clobber (match_scratch:SI 3 "=X,&h"))]
+ "TARGET_V8PLUS"
+ "@
+ umul %1,%2,%L0\;srlx %L0,32,%H0
+ umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+ [(set_attr "length" "2,3")])
+
(define_insn "*umulsidi3_sp32"
[(set (match_operand:DI 0 "register_operand" "=r")
(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
- "TARGET_HARD_MUL"
+ "TARGET_HARD_MUL32"
"*
{
return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -4107,7 +4210,7 @@
[(set (match_operand:DI 0 "register_operand" "=r")
(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand:SI 2 "uns_small_int" "")))]
- "TARGET_HARD_MUL"
+ "TARGET_HARD_MUL32"
"*
{
return TARGET_SPARCLET ? \"umuld %1,%2,%L0\" : \"umul %1,%2,%L0\;rd %%y,%H0\";
@@ -4116,6 +4219,17 @@
(if_then_else (eq_attr "isa" "sparclet")
(const_int 1) (const_int 2)))])
+(define_insn "const_umulsidi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=h,r")
+ (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (match_operand:SI 2 "uns_small_int" "")))
+ (clobber (match_scratch:SI 3 "=X,h"))]
+ "TARGET_V8PLUS"
+ "@
+ umul %1,%2,%L0\;srlx %L0,32,%H0
+ umul %1,%2,%3\;srlx %3,32,%H0\;mov %3,%L0"
+ [(set_attr "length" "2,3")])
+
(define_expand "umulsi3_highpart"
[(set (match_operand:SI 0 "register_operand" "")
(truncate:SI
@@ -4125,6 +4239,12 @@
"TARGET_HARD_MUL"
"
{
+ if (TARGET_V8PLUS)
+ {
+ emit_insn (gen_umulsidi3_highpart_v8plus (operands[0], operands[1],
+ operands[2], GEN_INT (32)));
+ DONE;
+ }
if (CONSTANT_P (operands[2]))
{
emit_insn (gen_const_umulsi3_highpart (operands[0], operands[1], operands[2]));
@@ -4132,13 +4252,39 @@
}
}")
+(define_insn "umulsidi3_highpart_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=h,r")
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (zero_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
+ (match_operand:SI 3 "const_int_operand" "i,i"))))
+ (clobber (match_scratch:SI 4 "=X,h"))]
+ "TARGET_V8PLUS"
+ "@
+ umul %1,%2,%0\;srlx %0,%3,%0
+ umul %1,%2,%4\;srlx %4,%3,%0"
+ [(set_attr "length" "2")])
+
+(define_insn "const_umulsi3_highpart_v8plus"
+ [(set (match_operand:SI 0 "register_operand" "=h,r")
+ (truncate:SI
+ (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
+ (match_operand:SI 2 "uns_small_int" ""))
+ (match_operand:SI 3 "const_int_operand" "i,i"))))
+ (clobber (match_scratch:SI 4 "=X,h"))]
+ "TARGET_V8PLUS"
+ "@
+ umul %1,%2,%0\;srlx %0,%3,%0
+ umul %1,%2,%4\;srlx %4,%3,%0"
+ [(set_attr "length" "2")])
+
(define_insn "*umulsidi3_highpart_sp32"
[(set (match_operand:SI 0 "register_operand" "=r")
(truncate:SI
(lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
(const_int 32))))]
- "TARGET_HARD_MUL"
+ "TARGET_HARD_MUL32"
"umul %1,%2,%%g0\;rd %%y,%0"
[(set_attr "length" "2")])
@@ -4148,7 +4294,7 @@
(lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand:SI 2 "uns_small_int" ""))
(const_int 32))))]
- "TARGET_HARD_MUL"
+ "TARGET_HARD_MUL32"
"umul %1,%2,%%g0\;rd %%y,%0"
[(set_attr "length" "2")])
@@ -4156,21 +4302,27 @@
;; a y register write and a use of it for correct results.
(define_insn "divsi3"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (div:SI (match_operand:SI 1 "register_operand" "r")
- (match_operand:SI 2 "arith_operand" "rI")))
- (clobber (match_scratch:SI 3 "=&r"))]
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (div:SI (match_operand:SI 1 "register_operand" "r,r")
+ (match_operand:SI 2 "move_operand" "rI,m")))
+ (clobber (match_scratch:SI 3 "=&r,&r"))]
"TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
"*
{
+ if (which_alternative == 0)
if (TARGET_V9)
return \"sra %1,31,%3\;wr %%g0,%3,%%y\;sdiv %1,%2,%0\";
else
return \"sra %1,31,%3\;wr %%g0,%3,%%y\;nop\;nop\;nop\;sdiv %1,%2,%0\";
+ else
+ if (TARGET_V9)
+ return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;sdiv %1,%3,%0\";
+ else
+ return \"sra %1,31,%3\;wr %%g0,%3,%%y\;ld %2,%3\;nop\;nop\;sdiv %1,%3,%0\";
}"
[(set (attr "length")
(if_then_else (eq_attr "isa" "v9")
- (const_int 3) (const_int 6)))])
+ (const_int 4) (const_int 7)))])
(define_insn "divdi3"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -4202,19 +4354,28 @@
(const_int 3) (const_int 6)))])
(define_insn "udivsi3"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (udiv:SI (match_operand:SI 1 "register_operand" "r")
- (match_operand:SI 2 "arith_operand" "rI")))]
+ [(set (match_operand:SI 0 "register_operand" "=r,&r,&r")
+ (udiv:SI (match_operand:SI 1 "reg_or_nonsymb_mem_operand" "r,r,m")
+ (match_operand:SI 2 "move_operand" "rI,m,r")))]
"TARGET_V8 || TARGET_DEPRECATED_V8_INSNS"
"*
{
+ output_asm_insn (\"wr %%g0,%%g0,%%y\", operands);
+ switch (which_alternative)
+ {
+ default:
if (TARGET_V9)
- return \"wr %%g0,%%g0,%%y\;udiv %1,%2,%0\";
- else
- return \"wr %%g0,%%g0,%%y\;nop\;nop\;nop\;udiv %1,%2,%0\";
+ return \"udiv %1,%2,%0\";
+ return \"nop\;nop\;nop\;udiv %1,%2,%0\";
+ case 1:
+ return \"ld %2,%0\;nop\;nop\;udiv %1,%0,%0\";
+ case 2:
+ return \"ld %1,%0\;nop\;nop\;udiv %0,%2,%0\";
+ }
}"
[(set (attr "length")
- (if_then_else (eq_attr "isa" "v9")
+ (if_then_else (and (eq_attr "isa" "v9")
+ (eq_attr "alternative" "0"))
(const_int 2) (const_int 5)))])
(define_insn "udivdi3"
@@ -4341,13 +4502,13 @@
(match_operand:SI 2 "" "")))
(clobber (match_operand:SI 3 "register_operand" ""))]
"GET_CODE (operands[2]) == CONST_INT
- && !SMALL_INT (operands[2])
+ && !SMALL_INT32 (operands[2])
&& (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
[(set (match_dup 3) (match_dup 4))
(set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 1)))]
"
{
- operands[4] = GEN_INT (~INTVAL (operands[2]));
+ operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
(define_insn "*and_not_di_sp32"
@@ -4436,13 +4597,13 @@
(match_operand:SI 2 "" "")))
(clobber (match_operand:SI 3 "register_operand" ""))]
"GET_CODE (operands[2]) == CONST_INT
- && !SMALL_INT (operands[2])
+ && !SMALL_INT32 (operands[2])
&& (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
[(set (match_dup 3) (match_dup 4))
(set (match_dup 0) (ior:SI (not:SI (match_dup 3)) (match_dup 1)))]
"
{
- operands[4] = GEN_INT (~INTVAL (operands[2]));
+ operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
(define_insn "*or_not_di_sp32"
@@ -4479,7 +4640,7 @@
""
"")
-(define_insn "*xorsi3_sp32"
+(define_insn "*xordi3_sp32"
[(set (match_operand:DI 0 "register_operand" "=r,b")
(xor:DI (match_operand:DI 1 "arith_double_operand" "%r,b")
(match_operand:DI 2 "arith_double_operand" "rHI,b")))]
@@ -4506,7 +4667,8 @@
}
return \"xor %1,%2,%0\;xor %R1,%R2,%R0\";
}"
- [(set_attr "length" "2,1")])
+ [(set_attr "length" "2,1")
+ (set_attr "type" "ialu,fp")])
(define_insn "*xordi3_sp64"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -4531,13 +4693,13 @@
(match_operand:SI 2 "" "")))
(clobber (match_operand:SI 3 "register_operand" ""))]
"GET_CODE (operands[2]) == CONST_INT
- && !SMALL_INT (operands[2])
+ && !SMALL_INT32 (operands[2])
&& (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
[(set (match_dup 3) (match_dup 4))
(set (match_dup 0) (not:SI (xor:SI (match_dup 3) (match_dup 1))))]
"
{
- operands[4] = GEN_INT (~INTVAL (operands[2]));
+ operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
(define_split
@@ -4546,13 +4708,13 @@
(match_operand:SI 2 "" ""))))
(clobber (match_operand:SI 3 "register_operand" ""))]
"GET_CODE (operands[2]) == CONST_INT
- && !SMALL_INT (operands[2])
+ && !SMALL_INT32 (operands[2])
&& (INTVAL (operands[2]) & 0x3ff) == 0x3ff"
[(set (match_dup 3) (match_dup 4))
(set (match_dup 0) (xor:SI (match_dup 3) (match_dup 1)))]
"
{
- operands[4] = GEN_INT (~INTVAL (operands[2]));
+ operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
;; xnor patterns. Note that (a ^ ~b) == (~a ^ b) == ~(a ^ b).
@@ -4849,7 +5011,7 @@
{
if (which_alternative == 0)
return \"xnor %1,0,%0\";
- if (which_alternative == 1)
+ if (which_alternative == 2)
return \"fnot1s %1,%0\";
if (TARGET_LIVE_G0)
output_asm_insn (\"and %%g0,0,%%g0\", operands);
@@ -5138,7 +5300,23 @@
}"
[(set_attr "type" "shift")])
-(define_insn "ashldi3"
+(define_expand "ashldi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:SI 2 "arith_operand" "rI")))]
+ "TARGET_ARCH64 || TARGET_V8PLUS"
+ "
+{
+ if (! TARGET_ARCH64)
+ {
+ if (GET_CODE (operands[2]) == CONST_INT)
+ FAIL;
+ emit_insn (gen_ashldi3_v8plus (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+}")
+
+(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r")
(ashift:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:SI 2 "arith_operand" "rI")))]
@@ -5152,6 +5330,15 @@
return \"sllx %1,%2,%0\";
}")
+(define_insn "ashldi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+ (ashift:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+ (clobber (match_scratch:SI 3 "=X,X,&h"))]
+ "TARGET_V8PLUS"
+ "*return sparc_v8plus_shift (operands, insn, \"sllx\");"
+ [(set_attr "length" "5,5,6")])
+
(define_insn "*cmp_cc_ashift_1"
[(set (reg:CC_NOOV 100)
(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
@@ -5186,7 +5373,21 @@
}"
[(set_attr "type" "shift")])
-(define_insn "ashrdi3"
+(define_expand "ashrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:SI 2 "arith_operand" "rI")))]
+ "TARGET_ARCH64 || TARGET_V8PLUS"
+ "
+if (! TARGET_ARCH64)
+ {
+ if (GET_CODE (operands[2]) == CONST_INT)
+ FAIL; /* prefer generic code in this case */
+ emit_insn (gen_ashrdi3_v8plus (operands[0], operands[1], operands[2]));
+ DONE;
+ }")
+
+(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r")
(ashiftrt:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:SI 2 "arith_operand" "rI")))]
@@ -5200,6 +5401,15 @@
return \"srax %1,%2,%0\";
}")
+(define_insn "ashrdi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+ (clobber (match_scratch:SI 3 "=X,X,&h"))]
+ "TARGET_V8PLUS"
+ "*return sparc_v8plus_shift (operands, insn, \"srax\");"
+ [(set_attr "length" "5,5,6")])
+
(define_insn "lshrsi3"
[(set (match_operand:SI 0 "register_operand" "=r")
(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
@@ -5215,7 +5425,21 @@
}"
[(set_attr "type" "shift")])
-(define_insn "lshrdi3"
+(define_expand "lshrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:SI 2 "arith_operand" "rI")))]
+ "TARGET_ARCH64 || TARGET_V8PLUS"
+ "
+if (! TARGET_ARCH64)
+ {
+ if (GET_CODE (operands[2]) == CONST_INT)
+ FAIL;
+ emit_insn (gen_lshrdi3_v8plus (operands[0], operands[1], operands[2]));
+ DONE;
+ }")
+
+(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r")
(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:SI 2 "arith_operand" "rI")))]
@@ -5228,6 +5452,15 @@
return \"srlx %1,%2,%0\";
}")
+
+(define_insn "lshrdi3_v8plus"
+ [(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
+ (match_operand:SI 2 "arith_operand" "rI,rI,rI")))
+ (clobber (match_scratch:SI 3 "=X,X,&h"))]
+ "TARGET_V8PLUS"
+ "*return sparc_v8plus_shift (operands, insn, \"srlx\");"
+ [(set_attr "length" "5,5,6")])
;; Unconditional and other jump instructions
;; On the Sparc, by setting the annul bit on an unconditional branch, the
@@ -5658,7 +5891,15 @@
(use (reg:SI 31))]
"! TARGET_EPILOGUE"
"* return output_return (operands);"
- [(set_attr "type" "multi")])
+ [(set_attr "type" "return")])
+
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "arith_operand" "rI"))
+ (parallel [(return)
+ (use (reg:SI 31))])]
+ "sparc_return_peephole_ok (operands[0], operands[1])"
+ "return %%i7+8\;mov %Y1,%Y0")
(define_insn "nop"
[(const_int 0)]
@@ -5684,10 +5925,10 @@
;; ??? Doesn't work with -mflat.
(define_expand "nonlocal_goto"
- [(match_operand:SI 0 "" "")
+ [(match_operand:SI 0 "general_operand" "")
(match_operand:SI 1 "general_operand" "")
(match_operand:SI 2 "general_operand" "")
- (match_operand:SI 3 "general_operand" "")]
+ (match_operand:SI 3 "" "")]
""
"
{
@@ -5715,15 +5956,20 @@
and reload the appropriate value into %fp. */
emit_move_insn (frame_pointer_rtx, stack);
- /* Put in the static chain register the nonlocal label address. */
- emit_move_insn (static_chain_rtx, chain);
-
/* USE of frame_pointer_rtx added for consistency; not clear if
really needed. */
- emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));
+ /*emit_insn (gen_rtx (USE, VOIDmode, frame_pointer_rtx));*/
emit_insn (gen_rtx (USE, VOIDmode, stack_pointer_rtx));
- emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
/* Return, restoring reg window and jumping to goto handler. */
+ if (TARGET_V9 && GET_CODE (chain) == CONST_INT)
+ {
+ emit_insn (gen_goto_handler_and_restore_v9 (static_chain_rtx, chain));
+ emit_barrier ();
+ DONE;
+ }
+ /* Put in the static chain register the nonlocal label address. */
+ emit_move_insn (static_chain_rtx, chain);
+ emit_insn (gen_rtx (USE, VOIDmode, static_chain_rtx));
emit_insn (gen_goto_handler_and_restore ());
emit_barrier ();
DONE;
@@ -5733,22 +5979,32 @@
(define_insn "flush_register_windows"
[(unspec_volatile [(const_int 0)] 1)]
""
- ;; ??? Use TARGET_V9 instead?
- "* return TARGET_ARCH64 ? \"flushw\" : \"ta 3\";"
+ "* return TARGET_V9 ? \"flushw\" : \"ta 3\";"
[(set_attr "type" "misc")])
(define_insn "goto_handler_and_restore"
- [(unspec_volatile [(const_int 0)] 2)
- (use (reg:SI 8))]
- ""
+ [(unspec_volatile [(reg:SI 8)] 2)]
+ "! TARGET_V9"
"jmp %%o0+0\;restore"
[(set_attr "type" "misc")
(set_attr "length" "2")])
-;; Implement setjmp. Step one, set up the buffer.
+(define_insn "goto_handler_and_restore_v9"
+ [(unspec_volatile [(reg:SI 8)
+ (match_operand:SI 0 "register_operand" "=r,r")
+ (match_operand:SI 1 "const_int_operand" "I,n")] 3)]
+ "TARGET_V9"
+ "@
+ return %%o0+0\;mov %1,%Y0
+ sethi %%hi(%1),%0\;return %%o0+0\;or %Y0,%%lo(%1),%Y0"
+ [(set_attr "type" "misc")
+ (set_attr "length" "2,3")])
+
+;; Pattern for use after a setjmp to store FP and the return register
+;; into the stack area.
-(define_expand "builtin_setjmp_setup"
- [(unspec [(match_operand 0 "" "")] 3)]
+(define_expand "setjmp"
+ [(const_int 0)]
""
"
{
@@ -6146,54 +6402,6 @@
&& ! FP_REG_P (operands[0]) && ! FP_REG_P (operands[1])"
"orcc %1,0,%0")
-;; Do {sign,zero}-extended compares somewhat more efficiently.
-;; ??? Is this now the Right Way to do this? Or will SCRATCH
-;; eventually have some impact here?
-
-(define_peephole
- [(set (match_operand:HI 0 "register_operand" "")
- (match_operand:HI 1 "memory_operand" ""))
- (set (match_operand:SI 2 "register_operand" "")
- (sign_extend:SI (match_dup 0)))
- (set (reg:CC 100)
- (compare:CC (match_dup 2)
- (const_int 0)))]
- ""
- "ldsh %1,%0\;orcc %0,0,%2")
-
-(define_peephole
- [(set (match_operand:HI 0 "register_operand" "")
- (match_operand:HI 1 "memory_operand" ""))
- (set (match_operand:DI 2 "register_operand" "")
- (sign_extend:DI (match_dup 0)))
- (set (reg:CCX 100)
- (compare:CCX (match_dup 2)
- (const_int 0)))]
- "TARGET_ARCH64"
- "ldsh %1,%0\;orcc %0,0,%2")
-
-(define_peephole
- [(set (match_operand:QI 0 "register_operand" "")
- (match_operand:QI 1 "memory_operand" ""))
- (set (match_operand:SI 2 "register_operand" "")
- (sign_extend:SI (match_dup 0)))
- (set (reg:CC 100)
- (compare:CC (match_dup 2)
- (const_int 0)))]
- ""
- "ldsb %1,%0\;orcc %0,0,%2")
-
-(define_peephole
- [(set (match_operand:QI 0 "register_operand" "")
- (match_operand:QI 1 "memory_operand" ""))
- (set (match_operand:DI 2 "register_operand" "")
- (sign_extend:DI (match_dup 0)))
- (set (reg:CCX 100)
- (compare:CCX (match_dup 2)
- (const_int 0)))]
- "TARGET_ARCH64"
- "ldsb %1,%0\;orcc %0,0,%2")
-
;; Floating-point move peepholes
;; ??? v9: Do we want similar ones?
@@ -6235,6 +6443,9 @@
{
if (! TARGET_ARCH64 && current_function_returns_struct)
return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+ else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+ || IN_OR_GLOBAL_P (operands[1])))
+ return \"return %%i7+8\;mov %Y1,%Y0\";
else
return \"ret\;restore %%g0,%1,%Y0\";
}"
@@ -6249,6 +6460,9 @@
{
if (! TARGET_ARCH64 && current_function_returns_struct)
return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+ else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+ || IN_OR_GLOBAL_P (operands[1])))
+ return \"return %%i7+8\;mov %Y1,%Y0\";
else
return \"ret\;restore %%g0,%1,%Y0\";
}"
@@ -6263,6 +6477,9 @@
{
if (! TARGET_ARCH64 && current_function_returns_struct)
return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+ else if (TARGET_V9 && (GET_CODE (operands[1]) == CONST_INT
+ || IN_OR_GLOBAL_P (operands[1])))
+ return \"return %%i7+8\;mov %Y1,%Y0\";
else
return \"ret\;restore %%g0,%1,%Y0\";
}"
@@ -6280,6 +6497,8 @@
{
if (! TARGET_ARCH64 && current_function_returns_struct)
return \"jmp %%i7+12\;restore %%g0,%1,%Y0\";
+ else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1]))
+ return \"return %%i7+8\;mov %Y1,%Y0\";
else
return \"ret\;restore %%g0,%1,%Y0\";
}"
@@ -6287,16 +6506,19 @@
(define_insn "*return_addsi"
[(set (match_operand:SI 0 "restore_operand" "")
- (plus:SI (match_operand:SI 1 "arith_operand" "%r")
+ (plus:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "arith_operand" "rI")))
(return)]
- "! TARGET_EPILOGUE && ! TARGET_LIVE_G0
- && (register_operand (operands[1], SImode)
- || register_operand (operands[2], SImode))"
+ "! TARGET_EPILOGUE && ! TARGET_LIVE_G0"
"*
{
if (! TARGET_ARCH64 && current_function_returns_struct)
return \"jmp %%i7+12\;restore %r1,%2,%Y0\";
+ /* If operands are global or in registers, can use return */
+ else if (TARGET_V9 && IN_OR_GLOBAL_P (operands[1])
+ && (GET_CODE (operands[2]) == CONST_INT
+ || IN_OR_GLOBAL_P (operands[2])))
+ return \"return %%i7+8\;add %Y1,%Y2,%Y0\";
else
return \"ret\;restore %r1,%2,%Y0\";
}"