/* Output routines for Motorola MCore processor Copyright (C) 1993-2017 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ #define IN_TARGET_CODE 1 #include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" #include "target.h" #include "rtl.h" #include "tree.h" #include "df.h" #include "memmodel.h" #include "tm_p.h" #include "stringpool.h" #include "attribs.h" #include "emit-rtl.h" #include "diagnostic-core.h" #include "stor-layout.h" #include "varasm.h" #include "calls.h" #include "mcore.h" #include "output.h" #include "explow.h" #include "expr.h" #include "cfgrtl.h" #include "builtins.h" #include "regs.h" /* This file should be included last. */ #include "target-def.h" /* For dumping information about frame sizes. */ char * mcore_current_function_name = 0; long mcore_current_compilation_timestamp = 0; /* Global variables for machine-dependent things. */ /* Provides the class number of the smallest class containing reg number. */ const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = { GENERAL_REGS, ONLYR1_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, LRW_REGS, GENERAL_REGS, GENERAL_REGS, C_REGS, NO_REGS, NO_REGS, }; struct mcore_frame { int arg_size; /* Stdarg spills (bytes). */ int reg_size; /* Non-volatile reg saves (bytes). */ int reg_mask; /* Non-volatile reg saves. */ int local_size; /* Locals. */ int outbound_size; /* Arg overflow on calls out. */ int pad_outbound; int pad_local; int pad_reg; /* Describe the steps we'll use to grow it. */ #define MAX_STACK_GROWS 4 /* Gives us some spare space. */ int growth[MAX_STACK_GROWS]; int arg_offset; int reg_offset; int reg_growth; int local_growth; }; typedef enum { COND_NO, COND_MOV_INSN, COND_CLR_INSN, COND_INC_INSN, COND_DEC_INSN, COND_BRANCH_INSN } cond_type; static void output_stack_adjust (int, int); static int calc_live_regs (int *); static int try_constant_tricks (HOST_WIDE_INT, HOST_WIDE_INT *, HOST_WIDE_INT *); static const char * output_inline_const (machine_mode, rtx *); static void layout_mcore_frame (struct mcore_frame *); static void mcore_setup_incoming_varargs (cumulative_args_t, machine_mode, tree, int *, int); static cond_type is_cond_candidate (rtx); static rtx_insn *emit_new_cond_insn (rtx_insn *, int); static rtx_insn *conditionalize_block (rtx_insn *); static void conditionalize_optimization (void); static void mcore_reorg (void); static rtx handle_structs_in_regs (machine_mode, const_tree, int); static void mcore_mark_dllexport (tree); static void mcore_mark_dllimport (tree); static int mcore_dllexport_p (tree); static int mcore_dllimport_p (tree); static tree mcore_handle_naked_attribute (tree *, tree, tree, int, bool *); #ifdef OBJECT_FORMAT_ELF static void mcore_asm_named_section (const char *, unsigned int, tree); #endif static void mcore_print_operand (FILE *, rtx, int); static void mcore_print_operand_address (FILE *, machine_mode, rtx); static bool mcore_print_operand_punct_valid_p (unsigned char code); static void mcore_unique_section (tree, int); static void mcore_encode_section_info (tree, rtx, int); static const char *mcore_strip_name_encoding (const char *); static int mcore_const_costs (rtx, RTX_CODE); static int mcore_and_cost (rtx); static int mcore_ior_cost (rtx); static bool mcore_rtx_costs (rtx, machine_mode, int, int, int *, bool); static void mcore_external_libcall (rtx); static bool mcore_return_in_memory (const_tree, const_tree); static int mcore_arg_partial_bytes (cumulative_args_t, machine_mode, tree, bool); static rtx mcore_function_arg (cumulative_args_t, machine_mode, const_tree, bool); static void mcore_function_arg_advance (cumulative_args_t, machine_mode, const_tree, bool); static unsigned int mcore_function_arg_boundary (machine_mode, const_tree); static void mcore_asm_trampoline_template (FILE *); static void mcore_trampoline_init (rtx, tree, rtx); static bool mcore_warn_func_return (tree); static void mcore_option_override (void); static bool mcore_legitimate_constant_p (machine_mode, rtx); static bool mcore_legitimate_address_p (machine_mode, rtx, bool, addr_space_t); static bool mcore_hard_regno_mode_ok (unsigned int, machine_mode); static bool mcore_modes_tieable_p (machine_mode, machine_mode); /* MCore specific attributes. */ static const struct attribute_spec mcore_attribute_table[] = { /* { name, min_len, max_len, decl_req, type_req, fn_type_req, affects_type_identity, handler, exclude } */ { "dllexport", 0, 0, true, false, false, false, NULL, NULL }, { "dllimport", 0, 0, true, false, false, false, NULL, NULL }, { "naked", 0, 0, true, false, false, false, mcore_handle_naked_attribute, NULL }, { NULL, 0, 0, false, false, false, false, NULL, NULL } }; /* Initialize the GCC target structure. */ #undef TARGET_ASM_EXTERNAL_LIBCALL #define TARGET_ASM_EXTERNAL_LIBCALL mcore_external_libcall #if TARGET_DLLIMPORT_DECL_ATTRIBUTES #undef TARGET_MERGE_DECL_ATTRIBUTES #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes #endif #ifdef OBJECT_FORMAT_ELF #undef TARGET_ASM_UNALIGNED_HI_OP #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t" #undef TARGET_ASM_UNALIGNED_SI_OP #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" #endif #undef TARGET_PRINT_OPERAND #define TARGET_PRINT_OPERAND mcore_print_operand #undef TARGET_PRINT_OPERAND_ADDRESS #define TARGET_PRINT_OPERAND_ADDRESS mcore_print_operand_address #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P #define TARGET_PRINT_OPERAND_PUNCT_VALID_P mcore_print_operand_punct_valid_p #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE mcore_attribute_table #undef TARGET_ASM_UNIQUE_SECTION #define TARGET_ASM_UNIQUE_SECTION mcore_unique_section #undef TARGET_ASM_FUNCTION_RODATA_SECTION #define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section #undef TARGET_ENCODE_SECTION_INFO #define TARGET_ENCODE_SECTION_INFO mcore_encode_section_info #undef TARGET_STRIP_NAME_ENCODING #define TARGET_STRIP_NAME_ENCODING mcore_strip_name_encoding #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS mcore_rtx_costs #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG mcore_reorg #undef TARGET_PROMOTE_FUNCTION_MODE #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote #undef TARGET_PROMOTE_PROTOTYPES #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY mcore_return_in_memory #undef TARGET_MUST_PASS_IN_STACK #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size #undef TARGET_PASS_BY_REFERENCE #define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack #undef TARGET_ARG_PARTIAL_BYTES #define TARGET_ARG_PARTIAL_BYTES mcore_arg_partial_bytes #undef TARGET_FUNCTION_ARG #define TARGET_FUNCTION_ARG mcore_function_arg #undef TARGET_FUNCTION_ARG_ADVANCE #define TARGET_FUNCTION_ARG_ADVANCE mcore_function_arg_advance #undef TARGET_FUNCTION_ARG_BOUNDARY #define TARGET_FUNCTION_ARG_BOUNDARY mcore_function_arg_boundary #undef TARGET_SETUP_INCOMING_VARARGS #define TARGET_SETUP_INCOMING_VARARGS mcore_setup_incoming_varargs #undef TARGET_ASM_TRAMPOLINE_TEMPLATE #define TARGET_ASM_TRAMPOLINE_TEMPLATE mcore_asm_trampoline_template #undef TARGET_TRAMPOLINE_INIT #define TARGET_TRAMPOLINE_INIT mcore_trampoline_init #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE mcore_option_override #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P mcore_legitimate_constant_p #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P mcore_legitimate_address_p #undef TARGET_LRA_P #define TARGET_LRA_P hook_bool_void_false #undef TARGET_WARN_FUNC_RETURN #define TARGET_WARN_FUNC_RETURN mcore_warn_func_return #undef TARGET_HARD_REGNO_MODE_OK #define TARGET_HARD_REGNO_MODE_OK mcore_hard_regno_mode_ok #undef TARGET_MODES_TIEABLE_P #define TARGET_MODES_TIEABLE_P mcore_modes_tieable_p #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings struct gcc_target targetm = TARGET_INITIALIZER; /* Adjust the stack and return the number of bytes taken to do it. */ static void output_stack_adjust (int direction, int size) { /* If extending stack a lot, we do it incrementally. */ if (direction < 0 && size > mcore_stack_increment && mcore_stack_increment > 0) { rtx tmp = gen_rtx_REG (SImode, 1); rtx memref; emit_insn (gen_movsi (tmp, GEN_INT (mcore_stack_increment))); do { emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp)); memref = gen_rtx_MEM (SImode, stack_pointer_rtx); MEM_VOLATILE_P (memref) = 1; emit_insn (gen_movsi (memref, stack_pointer_rtx)); size -= mcore_stack_increment; } while (size > mcore_stack_increment); /* SIZE is now the residual for the last adjustment, which doesn't require a probe. */ } if (size) { rtx insn; rtx val = GEN_INT (size); if (size > 32) { rtx nval = gen_rtx_REG (SImode, 1); emit_insn (gen_movsi (nval, val)); val = nval; } if (direction > 0) insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, val); else insn = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, val); emit_insn (insn); } } /* Work out the registers which need to be saved, both as a mask and a count. */ static int calc_live_regs (int * count) { int reg; int live_regs_mask = 0; * count = 0; for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++) { if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) { (*count)++; live_regs_mask |= (1 << reg); } } return live_regs_mask; } /* Print the operand address in x to the stream. */ static void mcore_print_operand_address (FILE * stream, machine_mode /*mode*/, rtx x) { switch (GET_CODE (x)) { case REG: fprintf (stream, "(%s)", reg_names[REGNO (x)]); break; case PLUS: { rtx base = XEXP (x, 0); rtx index = XEXP (x, 1); if (GET_CODE (base) != REG) { /* Ensure that BASE is a register (one of them must be). */ rtx temp = base; base = index; index = temp; } switch (GET_CODE (index)) { case CONST_INT: fprintf (stream, "(%s," HOST_WIDE_INT_PRINT_DEC ")", reg_names[REGNO(base)], INTVAL (index)); break; default: gcc_unreachable (); } } break; default: output_addr_const (stream, x); break; } } static bool mcore_print_operand_punct_valid_p (unsigned char code) { return (code == '.' || code == '#' || code == '*' || code == '^' || code == '!'); } /* Print operand x (an rtx) in assembler syntax to file stream according to modifier code. 'R' print the next register or memory location along, i.e. the lsw in a double word value 'O' print a constant without the # 'M' print a constant as its negative 'P' print log2 of a power of two 'Q' print log2 of an inverse of a power of two 'U' print register for ldm/stm instruction 'X' print byte number for xtrbN instruction. */ static void mcore_print_operand (FILE * stream, rtx x, int code) { switch (code) { case 'N': if (INTVAL(x) == -1) fprintf (asm_out_file, "32"); else fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) + 1)); break; case 'P': fprintf (asm_out_file, "%d", exact_log2 (INTVAL (x) & 0xffffffff)); break; case 'Q': fprintf (asm_out_file, "%d", exact_log2 (~INTVAL (x))); break; case 'O': fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); break; case 'M': fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, - INTVAL (x)); break; case 'R': /* Next location along in memory or register. */ switch (GET_CODE (x)) { case REG: fputs (reg_names[REGNO (x) + 1], (stream)); break; case MEM: mcore_print_operand_address (stream, GET_MODE (x), XEXP (adjust_address (x, SImode, 4), 0)); break; default: gcc_unreachable (); } break; case 'U': fprintf (asm_out_file, "%s-%s", reg_names[REGNO (x)], reg_names[REGNO (x) + 3]); break; case 'x': fprintf (asm_out_file, HOST_WIDE_INT_PRINT_HEX, INTVAL (x)); break; case 'X': fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 3 - INTVAL (x) / 8); break; default: switch (GET_CODE (x)) { case REG: fputs (reg_names[REGNO (x)], (stream)); break; case MEM: output_address (GET_MODE (x), XEXP (x, 0)); break; default: output_addr_const (stream, x); break; } break; } } /* What does a constant cost ? */ static int mcore_const_costs (rtx exp, enum rtx_code code) { HOST_WIDE_INT val = INTVAL (exp); /* Easy constants. */ if ( CONST_OK_FOR_I (val) || CONST_OK_FOR_M (val) || CONST_OK_FOR_N (val) || (code == PLUS && CONST_OK_FOR_L (val))) return 1; else if (code == AND && ( CONST_OK_FOR_M (~val) || CONST_OK_FOR_N (~val))) return 2; else if (code == PLUS && ( CONST_OK_FOR_I (-val) || CONST_OK_FOR_M (-val) || CONST_OK_FOR_N (-val))) return 2; return 5; } /* What does an and instruction cost - we do this b/c immediates may have been relaxed. We want to ensure that cse will cse relaxed immeds out. Otherwise we'll get bad code (multiple reloads of the same const). */ static int mcore_and_cost (rtx x) { HOST_WIDE_INT val; if (GET_CODE (XEXP (x, 1)) != CONST_INT) return 2; val = INTVAL (XEXP (x, 1)); /* Do it directly. */ if (CONST_OK_FOR_K (val) || CONST_OK_FOR_M (~val)) return 2; /* Takes one instruction to load. */ else if (const_ok_for_mcore (val)) return 3; /* Takes two instructions to load. */ else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val)) return 4; /* Takes a lrw to load. */ return 5; } /* What does an or cost - see and_cost(). */ static int mcore_ior_cost (rtx x) { HOST_WIDE_INT val; if (GET_CODE (XEXP (x, 1)) != CONST_INT) return 2; val = INTVAL (XEXP (x, 1)); /* Do it directly with bclri. */ if (CONST_OK_FOR_M (val)) return 2; /* Takes one instruction to load. */ else if (const_ok_for_mcore (val)) return 3; /* Takes two instructions to load. */ else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val)) return 4; /* Takes a lrw to load. */ return 5; } static bool mcore_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code, int opno ATTRIBUTE_UNUSED, int * total, bool speed ATTRIBUTE_UNUSED) { int code = GET_CODE (x); switch (code) { case CONST_INT: *total = mcore_const_costs (x, (enum rtx_code) outer_code); return true; case CONST: case LABEL_REF: case SYMBOL_REF: *total = 5; return true; case CONST_DOUBLE: *total = 10; return true; case AND: *total = COSTS_N_INSNS (mcore_and_cost (x)); return true; case IOR: *total = COSTS_N_INSNS (mcore_ior_cost (x)); return true; case DIV: case UDIV: case MOD: case UMOD: case FLOAT: case FIX: *total = COSTS_N_INSNS (100); return true; default: return false; } } /* Prepare the operands for a comparison. Return whether the branch/setcc should reverse the operands. */ bool mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1) { rtx cc_reg = gen_rtx_REG (CCmode, CC_REG); bool invert; if (GET_CODE (op1) == CONST_INT) { HOST_WIDE_INT val = INTVAL (op1); switch (code) { case GTU: /* Unsigned > 0 is the same as != 0; everything else is converted below to LEU (reversed cmphs). */ if (val == 0) code = NE; break; /* Check whether (LE A imm) can become (LT A imm + 1), or (GT A imm) can become (GE A imm + 1). */ case GT: case LE: if (CONST_OK_FOR_J (val + 1)) { op1 = GEN_INT (val + 1); code = code == LE ? LT : GE; } break; default: break; } } if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT) op1 = force_reg (SImode, op1); /* cmpnei: 0-31 (K immediate) cmplti: 1-32 (J immediate, 0 using btsti x,31). */ invert = false; switch (code) { case EQ: /* Use inverted condition, cmpne. */ code = NE; invert = true; /* FALLTHRU */ case NE: /* Use normal condition, cmpne. */ if (GET_CODE (op1) == CONST_INT && ! CONST_OK_FOR_K (INTVAL (op1))) op1 = force_reg (SImode, op1); break; case LE: /* Use inverted condition, reversed cmplt. */ code = GT; invert = true; /* FALLTHRU */ case GT: /* Use normal condition, reversed cmplt. */ if (GET_CODE (op1) == CONST_INT) op1 = force_reg (SImode, op1); break; case GE: /* Use inverted condition, cmplt. */ code = LT; invert = true; /* FALLTHRU */ case LT: /* Use normal condition, cmplt. */ if (GET_CODE (op1) == CONST_INT && /* covered by btsti x,31. */ INTVAL (op1) != 0 && ! CONST_OK_FOR_J (INTVAL (op1))) op1 = force_reg (SImode, op1); break; case GTU: /* Use inverted condition, cmple. */ /* We coped with unsigned > 0 above. */ gcc_assert (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0); code = LEU; invert = true; /* FALLTHRU */ case LEU: /* Use normal condition, reversed cmphs. */ if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0) op1 = force_reg (SImode, op1); break; case LTU: /* Use inverted condition, cmphs. */ code = GEU; invert = true; /* FALLTHRU */ case GEU: /* Use normal condition, cmphs. */ if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0) op1 = force_reg (SImode, op1); break; default: break; } emit_insn (gen_rtx_SET (cc_reg, gen_rtx_fmt_ee (code, CCmode, op0, op1))); return invert; } int mcore_symbolic_address_p (rtx x) { switch (GET_CODE (x)) { case SYMBOL_REF: case LABEL_REF: return 1; case CONST: x = XEXP (x, 0); return ( (GET_CODE (XEXP (x, 0)) == SYMBOL_REF || GET_CODE (XEXP (x, 0)) == LABEL_REF) && GET_CODE (XEXP (x, 1)) == CONST_INT); default: return 0; } } /* Functions to output assembly code for a function call. */ char * mcore_output_call (rtx operands[], int index) { static char buffer[20]; rtx addr = operands [index]; if (REG_P (addr)) { if (TARGET_CG_DATA) { gcc_assert (mcore_current_function_name); ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, "unknown", 1); } sprintf (buffer, "jsr\t%%%d", index); } else { if (TARGET_CG_DATA) { gcc_assert (mcore_current_function_name); gcc_assert (GET_CODE (addr) == SYMBOL_REF); ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, XSTR (addr, 0), 0); } sprintf (buffer, "jbsr\t%%%d", index); } return buffer; } /* Can we load a constant with a single instruction ? */ int const_ok_for_mcore (HOST_WIDE_INT value) { if (value >= 0 && value <= 127) return 1; /* Try exact power of two. */ if (CONST_OK_FOR_M (value)) return 1; /* Try exact power of two - 1. */ if (CONST_OK_FOR_N (value) && value != -1) return 1; return 0; } /* Can we load a constant inline with up to 2 instructions ? */ int mcore_const_ok_for_inline (HOST_WIDE_INT value) { HOST_WIDE_INT x, y; return try_constant_tricks (value, & x, & y) > 0; } /* Are we loading the constant using a not ? */ int mcore_const_trick_uses_not (HOST_WIDE_INT value) { HOST_WIDE_INT x, y; return try_constant_tricks (value, & x, & y) == 2; } /* Try tricks to load a constant inline and return the trick number if success (0 is non-inlinable). 0: not inlinable 1: single instruction (do the usual thing) 2: single insn followed by a 'not' 3: single insn followed by a subi 4: single insn followed by an addi 5: single insn followed by rsubi 6: single insn followed by bseti 7: single insn followed by bclri 8: single insn followed by rotli 9: single insn followed by lsli 10: single insn followed by ixh 11: single insn followed by ixw. */ static int try_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT * x, HOST_WIDE_INT * y) { HOST_WIDE_INT i; unsigned HOST_WIDE_INT bit, shf, rot; if (const_ok_for_mcore (value)) return 1; /* Do the usual thing. */ if (! TARGET_HARDLIT) return 0; if (const_ok_for_mcore (~value)) { *x = ~value; return 2; } for (i = 1; i <= 32; i++) { if (const_ok_for_mcore (value - i)) { *x = value - i; *y = i; return 3; } if (const_ok_for_mcore (value + i)) { *x = value + i; *y = i; return 4; } } bit = 0x80000000ULL; for (i = 0; i <= 31; i++) { if (const_ok_for_mcore (i - value)) { *x = i - value; *y = i; return 5; } if (const_ok_for_mcore (value & ~bit)) { *y = bit; *x = value & ~bit; return 6; } if (const_ok_for_mcore (value | bit)) { *y = ~bit; *x = value | bit; return 7; } bit >>= 1; } shf = value; rot = value; for (i = 1; i < 31; i++) { int c; /* MCore has rotate left. */ c = rot << 31; rot >>= 1; rot &= 0x7FFFFFFF; rot |= c; /* Simulate rotate. */ if (const_ok_for_mcore (rot)) { *y = i; *x = rot; return 8; } if (shf & 1) shf = 0; /* Can't use logical shift, low order bit is one. */ shf >>= 1; if (shf != 0 && const_ok_for_mcore (shf)) { *y = i; *x = shf; return 9; } } if ((value % 3) == 0 && const_ok_for_mcore (value / 3)) { *x = value / 3; return 10; } if ((value % 5) == 0 && const_ok_for_mcore (value / 5)) { *x = value / 5; return 11; } return 0; } /* Check whether reg is dead at first. This is done by searching ahead for either the next use (i.e., reg is live), a death note, or a set of reg. Don't just use dead_or_set_p() since reload does not always mark deaths (especially if PRESERVE_DEATH_NOTES_REGNO_P is not defined). We can ignore subregs by extracting the actual register. BRC */ int mcore_is_dead (rtx_insn *first, rtx reg) { rtx_insn *insn; /* For mcore, subregs can't live independently of their parent regs. */ if (GET_CODE (reg) == SUBREG) reg = SUBREG_REG (reg); /* Dies immediately. */ if (dead_or_set_p (first, reg)) return 1; /* Look for conclusive evidence of live/death, otherwise we have to assume that it is live. */ for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn)) { if (JUMP_P (insn)) return 0; /* We lose track, assume it is alive. */ else if (CALL_P (insn)) { /* Call's might use it for target or register parms. */ if (reg_referenced_p (reg, PATTERN (insn)) || find_reg_fusage (insn, USE, reg)) return 0; else if (dead_or_set_p (insn, reg)) return 1; } else if (NONJUMP_INSN_P (insn)) { if (reg_referenced_p (reg, PATTERN (insn))) return 0; else if (dead_or_set_p (insn, reg)) return 1; } } /* No conclusive evidence either way, we cannot take the chance that control flow hid the use from us -- "I'm not dead yet". */ return 0; } /* Count the number of ones in mask. */ int mcore_num_ones (HOST_WIDE_INT mask) { /* A trick to count set bits recently posted on comp.compilers. */ mask = (mask >> 1 & 0x55555555) + (mask & 0x55555555); mask = ((mask >> 2) & 0x33333333) + (mask & 0x33333333); mask = ((mask >> 4) + mask) & 0x0f0f0f0f; mask = ((mask >> 8) + mask); return (mask + (mask >> 16)) & 0xff; } /* Count the number of zeros in mask. */ int mcore_num_zeros (HOST_WIDE_INT mask) { return 32 - mcore_num_ones (mask); } /* Determine byte being masked. */ int mcore_byte_offset (unsigned int mask) { if (mask == 0x00ffffffL) return 0; else if (mask == 0xff00ffffL) return 1; else if (mask == 0xffff00ffL) return 2; else if (mask == 0xffffff00L) return 3; return -1; } /* Determine halfword being masked. */ int mcore_halfword_offset (unsigned int mask) { if (mask == 0x0000ffffL) return 0; else if (mask == 0xffff0000L) return 1; return -1; } /* Output a series of bseti's corresponding to mask. */ const char * mcore_output_bseti (rtx dst, int mask) { rtx out_operands[2]; int bit; out_operands[0] = dst; for (bit = 0; bit < 32; bit++) { if ((mask & 0x1) == 0x1) { out_operands[1] = GEN_INT (bit); output_asm_insn ("bseti\t%0,%1", out_operands); } mask >>= 1; } return ""; } /* Output a series of bclri's corresponding to mask. */ const char * mcore_output_bclri (rtx dst, int mask) { rtx out_operands[2]; int bit; out_operands[0] = dst; for (bit = 0; bit < 32; bit++) { if ((mask & 0x1) == 0x0) { out_operands[1] = GEN_INT (bit); output_asm_insn ("bclri\t%0,%1", out_operands); } mask >>= 1; } return ""; } /* Output a conditional move of two constants that are +/- 1 within each other. See the "movtK" patterns in mcore.md. I'm not sure this is really worth the effort. */ const char * mcore_output_cmov (rtx operands[], int cmp_t, const char * test) { HOST_WIDE_INT load_value; HOST_WIDE_INT adjust_value; rtx out_operands[4]; out_operands[0] = operands[0]; /* Check to see which constant is loadable. */ if (const_ok_for_mcore (INTVAL (operands[1]))) { out_operands[1] = operands[1]; out_operands[2] = operands[2]; } else if (const_ok_for_mcore (INTVAL (operands[2]))) { out_operands[1] = operands[2]; out_operands[2] = operands[1]; /* Complement test since constants are swapped. */ cmp_t = (cmp_t == 0); } load_value = INTVAL (out_operands[1]); adjust_value = INTVAL (out_operands[2]); /* First output the test if folded into the pattern. */ if (test) output_asm_insn (test, operands); /* Load the constant - for now, only support constants that can be generated with a single instruction. maybe add general inlinable constants later (this will increase the # of patterns since the instruction sequence has a different length attribute). */ if (load_value >= 0 && load_value <= 127) output_asm_insn ("movi\t%0,%1", out_operands); else if (CONST_OK_FOR_M (load_value)) output_asm_insn ("bgeni\t%0,%P1", out_operands); else if (CONST_OK_FOR_N (load_value)) output_asm_insn ("bmaski\t%0,%N1", out_operands); /* Output the constant adjustment. */ if (load_value > adjust_value) { if (cmp_t) output_asm_insn ("decf\t%0", out_operands); else output_asm_insn ("dect\t%0", out_operands); } else { if (cmp_t) output_asm_insn ("incf\t%0", out_operands); else output_asm_insn ("inct\t%0", out_operands); } return ""; } /* Outputs the peephole for moving a constant that gets not'ed followed by an and (i.e. combine the not and the and into andn). BRC */ const char * mcore_output_andn (rtx insn ATTRIBUTE_UNUSED, rtx operands[]) { HOST_WIDE_INT x, y; rtx out_operands[3]; const char * load_op; char buf[256]; int trick_no; trick_no = try_constant_tricks (INTVAL (operands[1]), &x, &y); gcc_assert (trick_no == 2); out_operands[0] = operands[0]; out_operands[1] = GEN_INT (x); out_operands[2] = operands[2]; if (x >= 0 && x <= 127) load_op = "movi\t%0,%1"; /* Try exact power of two. */ else if (CONST_OK_FOR_M (x)) load_op = "bgeni\t%0,%P1"; /* Try exact power of two - 1. */ else if (CONST_OK_FOR_N (x)) load_op = "bmaski\t%0,%N1"; else { load_op = "BADMOVI-andn\t%0, %1"; gcc_unreachable (); } sprintf (buf, "%s\n\tandn\t%%2,%%0", load_op); output_asm_insn (buf, out_operands); return ""; } /* Output an inline constant. */ static const char * output_inline_const (machine_mode mode, rtx operands[]) { HOST_WIDE_INT x = 0, y = 0; int trick_no; rtx out_operands[3]; char buf[256]; char load_op[256]; const char *dst_fmt; HOST_WIDE_INT value; value = INTVAL (operands[1]); trick_no = try_constant_tricks (value, &x, &y); /* lrw's are handled separately: Large inlinable constants never get turned into lrw's. Our caller uses try_constant_tricks to back off to an lrw rather than calling this routine. */ gcc_assert (trick_no != 0); if (trick_no == 1) x = value; /* operands: 0 = dst, 1 = load immed., 2 = immed. adjustment. */ out_operands[0] = operands[0]; out_operands[1] = GEN_INT (x); if (trick_no > 2) out_operands[2] = GEN_INT (y); /* Select dst format based on mode. */ if (mode == DImode && (! TARGET_LITTLE_END)) dst_fmt = "%R0"; else dst_fmt = "%0"; if (x >= 0 && x <= 127) sprintf (load_op, "movi\t%s,%%1", dst_fmt); /* Try exact power of two. */ else if (CONST_OK_FOR_M (x)) sprintf (load_op, "bgeni\t%s,%%P1", dst_fmt); /* Try exact power of two - 1. */ else if (CONST_OK_FOR_N (x)) sprintf (load_op, "bmaski\t%s,%%N1", dst_fmt); else { sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt); gcc_unreachable (); } switch (trick_no) { case 1: strcpy (buf, load_op); break; case 2: /* not */ sprintf (buf, "%s\n\tnot\t%s\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 3: /* add */ sprintf (buf, "%s\n\taddi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 4: /* sub */ sprintf (buf, "%s\n\tsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 5: /* rsub */ /* Never happens unless -mrsubi, see try_constant_tricks(). */ sprintf (buf, "%s\n\trsubi\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 6: /* bseti */ sprintf (buf, "%s\n\tbseti\t%s,%%P2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 7: /* bclr */ sprintf (buf, "%s\n\tbclri\t%s,%%Q2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 8: /* rotl */ sprintf (buf, "%s\n\trotli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 9: /* lsl */ sprintf (buf, "%s\n\tlsli\t%s,%%2\t// %ld 0x%lx", load_op, dst_fmt, value, value); break; case 10: /* ixh */ sprintf (buf, "%s\n\tixh\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value); break; case 11: /* ixw */ sprintf (buf, "%s\n\tixw\t%s,%s\t// %ld 0x%lx", load_op, dst_fmt, dst_fmt, value, value); break; default: return ""; } output_asm_insn (buf, out_operands); return ""; } /* Output a move of a word or less value. */ const char * mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[], machine_mode mode ATTRIBUTE_UNUSED) { rtx dst = operands[0]; rtx src = operands[1]; if (GET_CODE (dst) == REG) { if (GET_CODE (src) == REG) { if (REGNO (src) == CC_REG) /* r-c */ return "mvc\t%0"; else return "mov\t%0,%1"; /* r-r*/ } else if (GET_CODE (src) == MEM) { if (GET_CODE (XEXP (src, 0)) == LABEL_REF) return "lrw\t%0,[%1]"; /* a-R */ else switch (GET_MODE (src)) /* r-m */ { case E_SImode: return "ldw\t%0,%1"; case E_HImode: return "ld.h\t%0,%1"; case E_QImode: return "ld.b\t%0,%1"; default: gcc_unreachable (); } } else if (GET_CODE (src) == CONST_INT) { HOST_WIDE_INT x, y; if (CONST_OK_FOR_I (INTVAL (src))) /* r-I */ return "movi\t%0,%1"; else if (CONST_OK_FOR_M (INTVAL (src))) /* r-M */ return "bgeni\t%0,%P1\t// %1 %x1"; else if (CONST_OK_FOR_N (INTVAL (src))) /* r-N */ return "bmaski\t%0,%N1\t// %1 %x1"; else if (try_constant_tricks (INTVAL (src), &x, &y)) /* R-P */ return output_inline_const (SImode, operands); /* 1-2 insns */ else return "lrw\t%0,%x1\t// %1"; /* Get it from literal pool. */ } else return "lrw\t%0, %1"; /* Into the literal pool. */ } else if (GET_CODE (dst) == MEM) /* m-r */ switch (GET_MODE (dst)) { case E_SImode: return "stw\t%1,%0"; case E_HImode: return "st.h\t%1,%0"; case E_QImode: return "st.b\t%1,%0"; default: gcc_unreachable (); } gcc_unreachable (); } /* Return a sequence of instructions to perform DI or DF move. Since the MCORE cannot move a DI or DF in one instruction, we have to take care when we see overlapping source and dest registers. */ const char * mcore_output_movedouble (rtx operands[], machine_mode mode ATTRIBUTE_UNUSED) { rtx dst = operands[0]; rtx src = operands[1]; if (GET_CODE (dst) == REG) { if (GET_CODE (src) == REG) { int dstreg = REGNO (dst); int srcreg = REGNO (src); /* Ensure the second source not overwritten. */ if (srcreg + 1 == dstreg) return "mov %R0,%R1\n\tmov %0,%1"; else return "mov %0,%1\n\tmov %R0,%R1"; } else if (GET_CODE (src) == MEM) { rtx memexp = XEXP (src, 0); int dstreg = REGNO (dst); int basereg = -1; if (GET_CODE (memexp) == LABEL_REF) return "lrw\t%0,[%1]\n\tlrw\t%R0,[%R1]"; else if (GET_CODE (memexp) == REG) basereg = REGNO (memexp); else if (GET_CODE (memexp) == PLUS) { if (GET_CODE (XEXP (memexp, 0)) == REG) basereg = REGNO (XEXP (memexp, 0)); else if (GET_CODE (XEXP (memexp, 1)) == REG) basereg = REGNO (XEXP (memexp, 1)); else gcc_unreachable (); } else gcc_unreachable (); /* ??? length attribute is wrong here. */ if (dstreg == basereg) { /* Just load them in reverse order. */ return "ldw\t%R0,%R1\n\tldw\t%0,%1"; /* XXX: alternative: move basereg to basereg+1 and then fall through. */ } else return "ldw\t%0,%1\n\tldw\t%R0,%R1"; } else if (GET_CODE (src) == CONST_INT) { if (TARGET_LITTLE_END) { if (CONST_OK_FOR_I (INTVAL (src))) output_asm_insn ("movi %0,%1", operands); else if (CONST_OK_FOR_M (INTVAL (src))) output_asm_insn ("bgeni %0,%P1", operands); else if (CONST_OK_FOR_N (INTVAL (src))) output_asm_insn ("bmaski %0,%N1", operands); else gcc_unreachable (); if (INTVAL (src) < 0) return "bmaski %R0,32"; else return "movi %R0,0"; } else { if (CONST_OK_FOR_I (INTVAL (src))) output_asm_insn ("movi %R0,%1", operands); else if (CONST_OK_FOR_M (INTVAL (src))) output_asm_insn ("bgeni %R0,%P1", operands); else if (CONST_OK_FOR_N (INTVAL (src))) output_asm_insn ("bmaski %R0,%N1", operands); else gcc_unreachable (); if (INTVAL (src) < 0) return "bmaski %0,32"; else return "movi %0,0"; } } else gcc_unreachable (); } else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG) return "stw\t%1,%0\n\tstw\t%R1,%R0"; else gcc_unreachable (); } /* Predicates used by the templates. */ int mcore_arith_S_operand (rtx op) { if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (~INTVAL (op))) return 1; return 0; } /* Expand insert bit field. BRC */ int mcore_expand_insv (rtx operands[]) { int width = INTVAL (operands[1]); int posn = INTVAL (operands[2]); int mask; rtx mreg, sreg, ereg; /* To get width 1 insv, the test in store_bit_field() (expmed.c, line 191) for width==1 must be removed. Look around line 368. This is something we really want the md part to do. */ if (width == 1 && GET_CODE (operands[3]) == CONST_INT) { /* Do directly with bseti or bclri. */ /* RBE: 2/97 consider only low bit of constant. */ if ((INTVAL (operands[3]) & 1) == 0) { mask = ~(1 << posn); emit_insn (gen_rtx_SET (operands[0], gen_rtx_AND (SImode, operands[0], GEN_INT (mask)))); } else { mask = 1 << posn; emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (SImode, operands[0], GEN_INT (mask)))); } return 1; } /* Look at some bit-field placements that we aren't interested in handling ourselves, unless specifically directed to do so. */ if (! TARGET_W_FIELD) return 0; /* Generally, give up about now. */ if (width == 8 && posn % 8 == 0) /* Byte sized and aligned; let caller break it up. */ return 0; if (width == 16 && posn % 16 == 0) /* Short sized and aligned; let caller break it up. */ return 0; /* The general case - we can do this a little bit better than what the machine independent part tries. This will get rid of all the subregs that mess up constant folding in combine when working with relaxed immediates. */ /* If setting the entire field, do it directly. */ if (GET_CODE (operands[3]) == CONST_INT && INTVAL (operands[3]) == ((1 << width) - 1)) { mreg = force_reg (SImode, GEN_INT (INTVAL (operands[3]) << posn)); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (SImode, operands[0], mreg))); return 1; } /* Generate the clear mask. */ mreg = force_reg (SImode, GEN_INT (~(((1 << width) - 1) << posn))); /* Clear the field, to overlay it later with the source. */ emit_insn (gen_rtx_SET (operands[0], gen_rtx_AND (SImode, operands[0], mreg))); /* If the source is constant 0, we've nothing to add back. */ if (GET_CODE (operands[3]) == CONST_INT && INTVAL (operands[3]) == 0) return 1; /* XXX: Should we worry about more games with constant values? We've covered the high profile: set/clear single-bit and many-bit fields. How often do we see "arbitrary bit pattern" constants? */ sreg = copy_to_mode_reg (SImode, operands[3]); /* Extract src as same width as dst (needed for signed values). We always have to do this since we widen everything to SImode. We don't have to mask if we're shifting this up against the MSB of the register (e.g., the shift will push out any hi-order bits. */ if (width + posn != (int) GET_MODE_SIZE (SImode)) { ereg = force_reg (SImode, GEN_INT ((1 << width) - 1)); emit_insn (gen_rtx_SET (sreg, gen_rtx_AND (SImode, sreg, ereg))); } /* Insert source value in dest. */ if (posn != 0) emit_insn (gen_rtx_SET (sreg, gen_rtx_ASHIFT (SImode, sreg, GEN_INT (posn)))); emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (SImode, operands[0], sreg))); return 1; } /* ??? Block move stuff stolen from m88k. This code has not been verified for correctness. */ /* Emit code to perform a block move. Choose the best method. OPERANDS[0] is the destination. OPERANDS[1] is the source. OPERANDS[2] is the size. OPERANDS[3] is the alignment safe to use. */ /* Emit code to perform a block move with an offset sequence of ldw/st instructions (..., ldw 0, stw 1, ldw 1, stw 0, ...). SIZE and ALIGN are known constants. DEST and SRC are registers. OFFSET is the known starting point for the output pattern. */ static const machine_mode mode_from_align[] = { VOIDmode, QImode, HImode, VOIDmode, SImode, }; static void block_move_sequence (rtx dst_mem, rtx src_mem, int size, int align) { rtx temp[2]; machine_mode mode[2]; int amount[2]; bool active[2]; int phase = 0; int next; int offset_ld = 0; int offset_st = 0; rtx x; x = XEXP (dst_mem, 0); if (!REG_P (x)) { x = force_reg (Pmode, x); dst_mem = replace_equiv_address (dst_mem, x); } x = XEXP (src_mem, 0); if (!REG_P (x)) { x = force_reg (Pmode, x); src_mem = replace_equiv_address (src_mem, x); } active[0] = active[1] = false; do { next = phase; phase ^= 1; if (size > 0) { int next_amount; next_amount = (size >= 4 ? 4 : (size >= 2 ? 2 : 1)); next_amount = MIN (next_amount, align); amount[next] = next_amount; mode[next] = mode_from_align[next_amount]; temp[next] = gen_reg_rtx (mode[next]); x = adjust_address (src_mem, mode[next], offset_ld); emit_insn (gen_rtx_SET (temp[next], x)); offset_ld += next_amount; size -= next_amount; active[next] = true; } if (active[phase]) { active[phase] = false; x = adjust_address (dst_mem, mode[phase], offset_st); emit_insn (gen_rtx_SET (x, temp[phase])); offset_st += amount[phase]; } } while (active[next]); } bool mcore_expand_block_move (rtx *operands) { HOST_WIDE_INT align, bytes, max; if (GET_CODE (operands[2]) != CONST_INT) return false; bytes = INTVAL (operands[2]); align = INTVAL (operands[3]); if (bytes <= 0) return false; if (align > 4) align = 4; switch (align) { case 4: if (bytes & 1) max = 4*4; else if (bytes & 3) max = 8*4; else max = 16*4; break; case 2: max = 4*2; break; case 1: max = 4*1; break; default: gcc_unreachable (); } if (bytes <= max) { block_move_sequence (operands[0], operands[1], bytes, align); return true; } return false; } /* Code to generate prologue and epilogue sequences. */ static int number_of_regs_before_varargs; /* Set by TARGET_SETUP_INCOMING_VARARGS to indicate to prolog that this is for a varargs function. */ static int current_function_anonymous_args; #define STACK_BYTES (STACK_BOUNDARY/BITS_PER_UNIT) #define STORE_REACH (64) /* Maximum displace of word store + 4. */ #define ADDI_REACH (32) /* Maximum addi operand. */ static void layout_mcore_frame (struct mcore_frame * infp) { int n; unsigned int i; int nbytes; int regarg; int localregarg; int outbounds; unsigned int growths; int step; /* Might have to spill bytes to re-assemble a big argument that was passed partially in registers and partially on the stack. */ nbytes = crtl->args.pretend_args_size; /* Determine how much space for spilled anonymous args (e.g., stdarg). */ if (current_function_anonymous_args) nbytes += (NPARM_REGS - number_of_regs_before_varargs) * UNITS_PER_WORD; infp->arg_size = nbytes; /* How much space to save non-volatile registers we stomp. */ infp->reg_mask = calc_live_regs (& n); infp->reg_size = n * 4; /* And the rest of it... locals and space for overflowed outbounds. */ infp->local_size = get_frame_size (); infp->outbound_size = crtl->outgoing_args_size; /* Make sure we have a whole number of words for the locals. */ if (infp->local_size % STACK_BYTES) infp->local_size = (infp->local_size + STACK_BYTES - 1) & ~ (STACK_BYTES -1); /* Only thing we know we have to pad is the outbound space, since we've aligned our locals assuming that base of locals is aligned. */ infp->pad_local = 0; infp->pad_reg = 0; infp->pad_outbound = 0; if (infp->outbound_size % STACK_BYTES) infp->pad_outbound = STACK_BYTES - (infp->outbound_size % STACK_BYTES); /* Now we see how we want to stage the prologue so that it does the most appropriate stack growth and register saves to either: (1) run fast, (2) reduce instruction space, or (3) reduce stack space. */ for (i = 0; i < ARRAY_SIZE (infp->growth); i++) infp->growth[i] = 0; regarg = infp->reg_size + infp->arg_size; localregarg = infp->local_size + regarg; outbounds = infp->outbound_size + infp->pad_outbound; growths = 0; /* XXX: Consider one where we consider localregarg + outbound too! */ /* Frame of <= 32 bytes and using stm would get <= 2 registers. use stw's with offsets and buy the frame in one shot. */ if (localregarg <= ADDI_REACH && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000)) { /* Make sure we'll be aligned. */ if (localregarg % STACK_BYTES) infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES); step = localregarg + infp->pad_reg; infp->reg_offset = infp->local_size; if (outbounds + step <= ADDI_REACH && !frame_pointer_needed) { step += outbounds; infp->reg_offset += outbounds; outbounds = 0; } infp->arg_offset = step - 4; infp->growth[growths++] = step; infp->reg_growth = growths; infp->local_growth = growths; /* If we haven't already folded it in. */ if (outbounds) infp->growth[growths++] = outbounds; goto finish; } /* Frame can't be done with a single subi, but can be done with 2 insns. If the 'stm' is getting <= 2 registers, we use stw's and shift some of the stack purchase into the first subi, so both are single instructions. */ if (localregarg <= STORE_REACH && (infp->local_size > ADDI_REACH) && (infp->reg_size <= 8 || (infp->reg_mask & 0xc000) != 0xc000)) { int all; /* Make sure we'll be aligned; use either pad_reg or pad_local. */ if (localregarg % STACK_BYTES) infp->pad_reg = STACK_BYTES - (localregarg % STACK_BYTES); all = localregarg + infp->pad_reg + infp->pad_local; step = ADDI_REACH; /* As much up front as we can. */ if (step > all) step = all; /* XXX: Consider whether step will still be aligned; we believe so. */ infp->arg_offset = step - 4; infp->growth[growths++] = step; infp->reg_growth = growths; infp->reg_offset = step - infp->pad_reg - infp->reg_size; all -= step; /* Can we fold in any space required for outbounds? */ if (outbounds + all <= ADDI_REACH && !frame_pointer_needed) { all += outbounds; outbounds = 0; } /* Get the rest of the locals in place. */ step = all; infp->growth[growths++] = step; infp->local_growth = growths; all -= step; gcc_assert (all == 0); /* Finish off if we need to do so. */ if (outbounds) infp->growth[growths++] = outbounds; goto finish; } /* Registers + args is nicely aligned, so we'll buy that in one shot. Then we buy the rest of the frame in 1 or 2 steps depending on whether we need a frame pointer. */ if ((regarg % STACK_BYTES) == 0) { infp->growth[growths++] = regarg; infp->reg_growth = growths; infp->arg_offset = regarg - 4; infp->reg_offset = 0; if (infp->local_size % STACK_BYTES) infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES); step = infp->local_size + infp->pad_local; if (!frame_pointer_needed) { step += outbounds; outbounds = 0; } infp->growth[growths++] = step; infp->local_growth = growths; /* If there's any left to be done. */ if (outbounds) infp->growth[growths++] = outbounds; goto finish; } /* XXX: optimizations that we'll want to play with.... -- regarg is not aligned, but it's a small number of registers; use some of localsize so that regarg is aligned and then save the registers. */ /* Simple encoding; plods down the stack buying the pieces as it goes. -- does not optimize space consumption. -- does not attempt to optimize instruction counts. -- but it is safe for all alignments. */ if (regarg % STACK_BYTES != 0) infp->pad_reg = STACK_BYTES - (regarg % STACK_BYTES); infp->growth[growths++] = infp->arg_size + infp->reg_size + infp->pad_reg; infp->reg_growth = growths; infp->arg_offset = infp->growth[0] - 4; infp->reg_offset = 0; if (frame_pointer_needed) { if (infp->local_size % STACK_BYTES != 0) infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES); infp->growth[growths++] = infp->local_size + infp->pad_local; infp->local_growth = growths; infp->growth[growths++] = outbounds; } else { if ((infp->local_size + outbounds) % STACK_BYTES != 0) infp->pad_local = STACK_BYTES - ((infp->local_size + outbounds) % STACK_BYTES); infp->growth[growths++] = infp->local_size + infp->pad_local + outbounds; infp->local_growth = growths; } /* Anything else that we've forgotten?, plus a few consistency checks. */ finish: gcc_assert (infp->reg_offset >= 0); gcc_assert (growths <= MAX_STACK_GROWS); for (i = 0; i < growths; i++) gcc_assert (!(infp->growth[i] % STACK_BYTES)); } /* Define the offset between two registers, one to be eliminated, and the other its replacement, at the start of a routine. */ int mcore_initial_elimination_offset (int from, int to) { int above_frame; int below_frame; struct mcore_frame fi; layout_mcore_frame (& fi); /* fp to ap */ above_frame = fi.local_size + fi.pad_local + fi.reg_size + fi.pad_reg; /* sp to fp */ below_frame = fi.outbound_size + fi.pad_outbound; if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) return above_frame; if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) return above_frame + below_frame; if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) return below_frame; gcc_unreachable (); } /* Keep track of some information about varargs for the prolog. */ static void mcore_setup_incoming_varargs (cumulative_args_t args_so_far_v, machine_mode mode, tree type, int * ptr_pretend_size ATTRIBUTE_UNUSED, int second_time ATTRIBUTE_UNUSED) { CUMULATIVE_ARGS *args_so_far = get_cumulative_args (args_so_far_v); current_function_anonymous_args = 1; /* We need to know how many argument registers are used before the varargs start, so that we can push the remaining argument registers during the prologue. */ number_of_regs_before_varargs = *args_so_far + mcore_num_arg_regs (mode, type); /* There is a bug somewhere in the arg handling code. Until I can find it this workaround always pushes the last named argument onto the stack. */ number_of_regs_before_varargs = *args_so_far; /* The last named argument may be split between argument registers and the stack. Allow for this here. */ if (number_of_regs_before_varargs > NPARM_REGS) number_of_regs_before_varargs = NPARM_REGS; } void mcore_expand_prolog (void) { struct mcore_frame fi; int space_allocated = 0; int growth = 0; /* Find out what we're doing. */ layout_mcore_frame (&fi); space_allocated = fi.arg_size + fi.reg_size + fi.local_size + fi.outbound_size + fi.pad_outbound + fi.pad_local + fi.pad_reg; if (TARGET_CG_DATA) { /* Emit a symbol for this routine's frame size. */ rtx x; x = DECL_RTL (current_function_decl); gcc_assert (GET_CODE (x) == MEM); x = XEXP (x, 0); gcc_assert (GET_CODE (x) == SYMBOL_REF); free (mcore_current_function_name); mcore_current_function_name = xstrdup (XSTR (x, 0)); ASM_OUTPUT_CG_NODE (asm_out_file, mcore_current_function_name, space_allocated); if (cfun->calls_alloca) ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name, "alloca", 1); /* 970425: RBE: We're looking at how the 8byte alignment affects stack layout and where we had to pad things. This emits information we can extract which tells us about frame sizes and the like. */ fprintf (asm_out_file, "\t.equ\t__$frame$info$_%s_$_%d_%d_x%x_%d_%d_%d,0\n", mcore_current_function_name, fi.arg_size, fi.reg_size, fi.reg_mask, fi.local_size, fi.outbound_size, frame_pointer_needed); } if (mcore_naked_function_p ()) return; /* Handle stdarg+regsaves in one shot: can't be more than 64 bytes. */ output_stack_adjust (-1, fi.growth[growth++]); /* Grows it. */ /* If we have a parameter passed partially in regs and partially in memory, the registers will have been stored to memory already in function.c. So we only need to do something here for varargs functions. */ if (fi.arg_size != 0 && crtl->args.pretend_args_size == 0) { int offset; int rn = FIRST_PARM_REG + NPARM_REGS - 1; int remaining = fi.arg_size; for (offset = fi.arg_offset; remaining >= 4; offset -= 4, rn--, remaining -= 4) { emit_insn (gen_movsi (gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, offset)), gen_rtx_REG (SImode, rn))); } } /* Do we need another stack adjustment before we do the register saves? */ if (growth < fi.reg_growth) output_stack_adjust (-1, fi.growth[growth++]); /* Grows it. */ if (fi.reg_size != 0) { int i; int offs = fi.reg_offset; for (i = 15; i >= 0; i--) { if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000)) { int first_reg = 15; while (fi.reg_mask & (1 << first_reg)) first_reg--; first_reg++; emit_insn (gen_store_multiple (gen_rtx_MEM (SImode, stack_pointer_rtx), gen_rtx_REG (SImode, first_reg), GEN_INT (16 - first_reg))); i -= (15 - first_reg); offs += (16 - first_reg) * 4; } else if (fi.reg_mask & (1 << i)) { emit_insn (gen_movsi (gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, offs)), gen_rtx_REG (SImode, i))); offs += 4; } } } /* Figure the locals + outbounds. */ if (frame_pointer_needed) { /* If we haven't already purchased to 'fp'. */ if (growth < fi.local_growth) output_stack_adjust (-1, fi.growth[growth++]); /* Grows it. */ emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); /* ... and then go any remaining distance for outbounds, etc. */ if (fi.growth[growth]) output_stack_adjust (-1, fi.growth[growth++]); } else { if (growth < fi.local_growth) output_stack_adjust (-1, fi.growth[growth++]); /* Grows it. */ if (fi.growth[growth]) output_stack_adjust (-1, fi.growth[growth++]); } } void mcore_expand_epilog (void) { struct mcore_frame fi; int i; int offs; int growth = MAX_STACK_GROWS - 1 ; /* Find out what we're doing. */ layout_mcore_frame(&fi); if (mcore_naked_function_p ()) return; /* If we had a frame pointer, restore the sp from that. */ if (frame_pointer_needed) { emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx)); growth = fi.local_growth - 1; } else { /* XXX: while loop should accumulate and do a single sell. */ while (growth >= fi.local_growth) { if (fi.growth[growth] != 0) output_stack_adjust (1, fi.growth[growth]); growth--; } } /* Make sure we've shrunk stack back to the point where the registers were laid down. This is typically 0/1 iterations. Then pull the register save information back off the stack. */ while (growth >= fi.reg_growth) output_stack_adjust ( 1, fi.growth[growth--]); offs = fi.reg_offset; for (i = 15; i >= 0; i--) { if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000)) { int first_reg; /* Find the starting register. */ first_reg = 15; while (fi.reg_mask & (1 << first_reg)) first_reg--; first_reg++; emit_insn (gen_load_multiple (gen_rtx_REG (SImode, first_reg), gen_rtx_MEM (SImode, stack_pointer_rtx), GEN_INT (16 - first_reg))); i -= (15 - first_reg); offs += (16 - first_reg) * 4; } else if (fi.reg_mask & (1 << i)) { emit_insn (gen_movsi (gen_rtx_REG (SImode, i), gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, offs)))); offs += 4; } } /* Give back anything else. */ /* XXX: Should accumulate total and then give it back. */ while (growth >= 0) output_stack_adjust ( 1, fi.growth[growth--]); } /* This code is borrowed from the SH port. */ /* The MCORE cannot load a large constant into a register, constants have to come from a pc relative load. The reference of a pc relative load instruction must be less than 1k in front of the instruction. This means that we often have to dump a constant inside a function, and generate code to branch around it. It is important to minimize this, since the branches will slow things down and make things bigger. Worst case code looks like: lrw L1,r0 br L2 align L1: .long value L2: .. lrw L3,r0 br L4 align L3: .long value L4: .. We fix this by performing a scan before scheduling, which notices which instructions need to have their operands fetched from the constant table and builds the table. The algorithm is: scan, find an instruction which needs a pcrel move. Look forward, find the last barrier which is within MAX_COUNT bytes of the requirement. If there isn't one, make one. Process all the instructions between the find and the barrier. In the above example, we can tell that L3 is within 1k of L1, so the first move can be shrunk from the 2 insn+constant sequence into just 1 insn, and the constant moved to L3 to make: lrw L1,r0 .. lrw L3,r0 bra L4 align L3:.long value L4:.long value Then the second move becomes the target for the shortening process. */ typedef struct { rtx value; /* Value in table. */ rtx label; /* Label of value. */ } pool_node; /* The maximum number of constants that can fit into one pool, since the pc relative range is 0...1020 bytes and constants are at least 4 bytes long. We subtract 4 from the range to allow for the case where we need to add a branch/align before the constant pool. */ #define MAX_COUNT 1016 #define MAX_POOL_SIZE (MAX_COUNT/4) static pool_node pool_vector[MAX_POOL_SIZE]; static int pool_size; /* Dump out any constants accumulated in the final pass. These will only be labels. */ const char * mcore_output_jump_label_table (void) { int i; if (pool_size) { fprintf (asm_out_file, "\t.align 2\n"); for (i = 0; i < pool_size; i++) { pool_node * p = pool_vector + i; (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (p->label)); output_asm_insn (".long %0", &p->value); } pool_size = 0; } return ""; } /* Check whether insn is a candidate for a conditional. */ static cond_type is_cond_candidate (rtx insn) { /* The only things we conditionalize are those that can be directly changed into a conditional. Only bother with SImode items. If we wanted to be a little more aggressive, we could also do other modes such as DImode with reg-reg move or load 0. */ if (NONJUMP_INSN_P (insn)) { rtx pat = PATTERN (insn); rtx src, dst; if (GET_CODE (pat) != SET) return COND_NO; dst = XEXP (pat, 0); if ((GET_CODE (dst) != REG && GET_CODE (dst) != SUBREG) || GET_MODE (dst) != SImode) return COND_NO; src = XEXP (pat, 1); if ((GET_CODE (src) == REG || (GET_CODE (src) == SUBREG && GET_CODE (SUBREG_REG (src)) == REG)) && GET_MODE (src) == SImode) return COND_MOV_INSN; else if (GET_CODE (src) == CONST_INT && INTVAL (src) == 0) return COND_CLR_INSN; else if (GET_CODE (src) == PLUS && (GET_CODE (XEXP (src, 0)) == REG || (GET_CODE (XEXP (src, 0)) == SUBREG && GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) && GET_MODE (XEXP (src, 0)) == SImode && GET_CODE (XEXP (src, 1)) == CONST_INT && INTVAL (XEXP (src, 1)) == 1) return COND_INC_INSN; else if (((GET_CODE (src) == MINUS && GET_CODE (XEXP (src, 1)) == CONST_INT && INTVAL( XEXP (src, 1)) == 1) || (GET_CODE (src) == PLUS && GET_CODE (XEXP (src, 1)) == CONST_INT && INTVAL (XEXP (src, 1)) == -1)) && (GET_CODE (XEXP (src, 0)) == REG || (GET_CODE (XEXP (src, 0)) == SUBREG && GET_CODE (SUBREG_REG (XEXP (src, 0))) == REG)) && GET_MODE (XEXP (src, 0)) == SImode) return COND_DEC_INSN; /* Some insns that we don't bother with: (set (rx:DI) (ry:DI)) (set (rx:DI) (const_int 0)) */ } else if (JUMP_P (insn) && GET_CODE (PATTERN (insn)) == SET && GET_CODE (XEXP (PATTERN (insn), 1)) == LABEL_REF) return COND_BRANCH_INSN; return COND_NO; } /* Emit a conditional version of insn and replace the old insn with the new one. Return the new insn if emitted. */ static rtx_insn * emit_new_cond_insn (rtx_insn *insn, int cond) { rtx c_insn = 0; rtx pat, dst, src; cond_type num; if ((num = is_cond_candidate (insn)) == COND_NO) return NULL; pat = PATTERN (insn); if (NONJUMP_INSN_P (insn)) { dst = SET_DEST (pat); src = SET_SRC (pat); } else { dst = JUMP_LABEL (insn); src = NULL_RTX; } switch (num) { case COND_MOV_INSN: case COND_CLR_INSN: if (cond) c_insn = gen_movt0 (dst, src, dst); else c_insn = gen_movt0 (dst, dst, src); break; case COND_INC_INSN: if (cond) c_insn = gen_incscc (dst, dst); else c_insn = gen_incscc_false (dst, dst); break; case COND_DEC_INSN: if (cond) c_insn = gen_decscc (dst, dst); else c_insn = gen_decscc_false (dst, dst); break; case COND_BRANCH_INSN: if (cond) c_insn = gen_branch_true (dst); else c_insn = gen_branch_false (dst); break; default: return NULL; } /* Only copy the notes if they exist. */ if (rtx_length [GET_CODE (c_insn)] >= 7 && rtx_length [GET_CODE (insn)] >= 7) { /* We really don't need to bother with the notes and links at this point, but go ahead and save the notes. This will help is_dead() when applying peepholes (links don't matter since they are not used any more beyond this point for the mcore). */ REG_NOTES (c_insn) = REG_NOTES (insn); } if (num == COND_BRANCH_INSN) { /* For jumps, we need to be a little bit careful and emit the new jump before the old one and to update the use count for the target label. This way, the barrier following the old (uncond) jump will get deleted, but the label won't. */ c_insn = emit_jump_insn_before (c_insn, insn); ++ LABEL_NUSES (dst); JUMP_LABEL (c_insn) = dst; } else c_insn = emit_insn_after (c_insn, insn); delete_insn (insn); return as_a (c_insn); } /* Attempt to change a basic block into a series of conditional insns. This works by taking the branch at the end of the 1st block and scanning for the end of the 2nd block. If all instructions in the 2nd block have cond. versions and the label at the start of block 3 is the same as the target from the branch at block 1, then conditionalize all insn in block 2 using the inverse condition of the branch at block 1. (Note I'm bending the definition of basic block here.) e.g., change: bt L2 <-- end of block 1 (delete) mov r7,r8 addu r7,1 br L3 <-- end of block 2 L2: ... <-- start of block 3 (NUSES==1) L3: ... to: movf r7,r8 incf r7 bf L3 L3: ... we can delete the L2 label if NUSES==1 and re-apply the optimization starting at the last instruction of block 2. This may allow an entire if-then-else statement to be conditionalized. BRC */ static rtx_insn * conditionalize_block (rtx_insn *first) { rtx_insn *insn; rtx br_pat; rtx_insn *end_blk_1_br = 0; rtx_insn *end_blk_2_insn = 0; rtx_insn *start_blk_3_lab = 0; int cond; int br_lab_num; int blk_size = 0; /* Check that the first insn is a candidate conditional jump. This is the one that we'll eliminate. If not, advance to the next insn to try. */ if (! JUMP_P (first) || GET_CODE (PATTERN (first)) != SET || GET_CODE (XEXP (PATTERN (first), 1)) != IF_THEN_ELSE) return NEXT_INSN (first); /* Extract some information we need. */ end_blk_1_br = first; br_pat = PATTERN (end_blk_1_br); /* Complement the condition since we use the reverse cond. for the insns. */ cond = (GET_CODE (XEXP (XEXP (br_pat, 1), 0)) == EQ); /* Determine what kind of branch we have. */ if (GET_CODE (XEXP (XEXP (br_pat, 1), 1)) == LABEL_REF) { /* A normal branch, so extract label out of first arm. */ br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 1), 0)); } else { /* An inverse branch, so extract the label out of the 2nd arm and complement the condition. */ cond = (cond == 0); br_lab_num = CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (br_pat, 1), 2), 0)); } /* Scan forward for the start of block 2: it must start with a label and that label must be the same as the branch target label from block 1. We don't care about whether block 2 actually ends with a branch or a label (an uncond. branch is conditionalizable). */ for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn)) { enum rtx_code code; code = GET_CODE (insn); /* Look for the label at the start of block 3. */ if (code == CODE_LABEL && CODE_LABEL_NUMBER (insn) == br_lab_num) break; /* Skip barriers, notes, and conditionalizable insns. If the insn is not conditionalizable or makes this optimization fail, just return the next insn so we can start over from that point. */ if (code != BARRIER && code != NOTE && !is_cond_candidate (insn)) return NEXT_INSN (insn); /* Remember the last real insn before the label (i.e. end of block 2). */ if (code == JUMP_INSN || code == INSN) { blk_size ++; end_blk_2_insn = insn; } } if (!insn) return insn; /* It is possible for this optimization to slow performance if the blocks are long. This really depends upon whether the branch is likely taken or not. If the branch is taken, we slow performance in many cases. But, if the branch is not taken, we always help performance (for a single block, but for a double block (i.e. when the optimization is re-applied) this is not true since the 'right thing' depends on the overall length of the collapsed block). As a compromise, don't apply this optimization on blocks larger than size 2 (unlikely for the mcore) when speed is important. the best threshold depends on the latencies of the instructions (i.e., the branch penalty). */ if (optimize > 1 && blk_size > 2) return insn; /* At this point, we've found the start of block 3 and we know that it is the destination of the branch from block 1. Also, all instructions in the block 2 are conditionalizable. So, apply the conditionalization and delete the branch. */ start_blk_3_lab = insn; for (insn = NEXT_INSN (end_blk_1_br); insn != start_blk_3_lab; insn = NEXT_INSN (insn)) { rtx_insn *newinsn; if (insn->deleted ()) continue; /* Try to form a conditional variant of the instruction and emit it. */ if ((newinsn = emit_new_cond_insn (insn, cond))) { if (end_blk_2_insn == insn) end_blk_2_insn = newinsn; insn = newinsn; } } /* Note whether we will delete the label starting blk 3 when the jump gets deleted. If so, we want to re-apply this optimization at the last real instruction right before the label. */ if (LABEL_NUSES (start_blk_3_lab) == 1) { start_blk_3_lab = 0; } /* ??? we probably should redistribute the death notes for this insn, esp. the death of cc, but it doesn't really matter this late in the game. The peepholes all use is_dead() which will find the correct death regardless of whether there is a note. */ delete_insn (end_blk_1_br); if (! start_blk_3_lab) return end_blk_2_insn; /* Return the insn right after the label at the start of block 3. */ return NEXT_INSN (start_blk_3_lab); } /* Apply the conditionalization of blocks optimization. This is the outer loop that traverses through the insns scanning for a branch that signifies an opportunity to apply the optimization. Note that this optimization is applied late. If we could apply it earlier, say before cse 2, it may expose more optimization opportunities. but, the pay back probably isn't really worth the effort (we'd have to update all reg/flow/notes/links/etc to make it work - and stick it in before cse 2). */ static void conditionalize_optimization (void) { rtx_insn *insn; for (insn = get_insns (); insn; insn = conditionalize_block (insn)) continue; } /* This is to handle loads from the constant pool. */ static void mcore_reorg (void) { /* Reset this variable. */ current_function_anonymous_args = 0; if (optimize == 0) return; /* Conditionalize blocks where we can. */ conditionalize_optimization (); /* Literal pool generation is now pushed off until the assembler. */ } /* Return true if X is something that can be moved directly into r15. */ bool mcore_r15_operand_p (rtx x) { switch (GET_CODE (x)) { case CONST_INT: return mcore_const_ok_for_inline (INTVAL (x)); case REG: case SUBREG: case MEM: return 1; default: return 0; } } /* Implement SECONDARY_RELOAD_CLASS. If RCLASS contains r15, and we can't directly move X into it, use r1-r14 as a temporary. */ enum reg_class mcore_secondary_reload_class (enum reg_class rclass, machine_mode mode ATTRIBUTE_UNUSED, rtx x) { if (TEST_HARD_REG_BIT (reg_class_contents[rclass], 15) && !mcore_r15_operand_p (x)) return LRW_REGS; return NO_REGS; } /* Return the reg_class to use when reloading the rtx X into the class RCLASS. If X is too complex to move directly into r15, prefer to use LRW_REGS instead. */ enum reg_class mcore_reload_class (rtx x, enum reg_class rclass) { if (reg_class_subset_p (LRW_REGS, rclass) && !mcore_r15_operand_p (x)) return LRW_REGS; return rclass; } /* Tell me if a pair of reg/subreg rtx's actually refer to the same register. Note that the current version doesn't worry about whether they are the same mode or note (e.g., a QImode in r2 matches an HImode in r2 matches an SImode in r2. Might think in the future about whether we want to be able to say something about modes. */ int mcore_is_same_reg (rtx x, rtx y) { /* Strip any and all of the subreg wrappers. */ while (GET_CODE (x) == SUBREG) x = SUBREG_REG (x); while (GET_CODE (y) == SUBREG) y = SUBREG_REG (y); if (GET_CODE(x) == REG && GET_CODE(y) == REG && REGNO(x) == REGNO(y)) return 1; return 0; } static void mcore_option_override (void) { /* Only the m340 supports little endian code. */ if (TARGET_LITTLE_END && ! TARGET_M340) target_flags |= MASK_M340; } /* Compute the number of word sized registers needed to hold a function argument of mode MODE and type TYPE. */ int mcore_num_arg_regs (machine_mode mode, const_tree type) { int size; if (targetm.calls.must_pass_in_stack (mode, type)) return 0; if (type && mode == BLKmode) size = int_size_in_bytes (type); else size = GET_MODE_SIZE (mode); return ROUND_ADVANCE (size); } static rtx handle_structs_in_regs (machine_mode mode, const_tree type, int reg) { int size; /* The MCore ABI defines that a structure whose size is not a whole multiple of bytes is passed packed into registers (or spilled onto the stack if not enough registers are available) with the last few bytes of the structure being packed, left-justified, into the last register/stack slot. GCC handles this correctly if the last word is in a stack slot, but we have to generate a special, PARALLEL RTX if the last word is in an argument register. */ if (type && TYPE_MODE (type) == BLKmode && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST && (size = int_size_in_bytes (type)) > UNITS_PER_WORD && (size % UNITS_PER_WORD != 0) && (reg + mcore_num_arg_regs (mode, type) <= (FIRST_PARM_REG + NPARM_REGS))) { rtx arg_regs [NPARM_REGS]; int nregs; rtx result; rtvec rtvec; for (nregs = 0; size > 0; size -= UNITS_PER_WORD) { arg_regs [nregs] = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, reg ++), GEN_INT (nregs * UNITS_PER_WORD)); nregs ++; } /* We assume here that NPARM_REGS == 6. The assert checks this. */ gcc_assert (ARRAY_SIZE (arg_regs) == 6); rtvec = gen_rtvec (nregs, arg_regs[0], arg_regs[1], arg_regs[2], arg_regs[3], arg_regs[4], arg_regs[5]); result = gen_rtx_PARALLEL (mode, rtvec); return result; } return gen_rtx_REG (mode, reg); } rtx mcore_function_value (const_tree valtype, const_tree func) { machine_mode mode; int unsigned_p; mode = TYPE_MODE (valtype); /* Since we promote return types, we must promote the mode here too. */ mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1); return handle_structs_in_regs (mode, valtype, FIRST_RET_REG); } /* Define where to put the arguments to a function. Value is zero to push the argument on the stack, or a hard register in which to store the argument. MODE is the argument's machine mode. TYPE is the data type of the argument (as a tree). This is null for libcalls where that information may not be available. CUM is a variable of type CUMULATIVE_ARGS which gives info about the preceding args and about the function being called. NAMED is nonzero if this argument is a named parameter (otherwise it is an extra parameter matching an ellipsis). On MCore the first args are normally in registers and the rest are pushed. Any arg that starts within the first NPARM_REGS words is at least partially passed in a register unless its data type forbids. */ static rtx mcore_function_arg (cumulative_args_t cum, machine_mode mode, const_tree type, bool named) { int arg_reg; if (! named || mode == VOIDmode) return 0; if (targetm.calls.must_pass_in_stack (mode, type)) return 0; arg_reg = ROUND_REG (*get_cumulative_args (cum), mode); if (arg_reg < NPARM_REGS) return handle_structs_in_regs (mode, type, FIRST_PARM_REG + arg_reg); return 0; } static void mcore_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, const_tree type, bool named ATTRIBUTE_UNUSED) { CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); *cum = (ROUND_REG (*cum, mode) + (int)named * mcore_num_arg_regs (mode, type)); } static unsigned int mcore_function_arg_boundary (machine_mode mode, const_tree type ATTRIBUTE_UNUSED) { /* Doubles must be aligned to an 8 byte boundary. */ return (mode != BLKmode && GET_MODE_SIZE (mode) == 8 ? BIGGEST_ALIGNMENT : PARM_BOUNDARY); } /* Returns the number of bytes of argument registers required to hold *part* of a parameter of machine mode MODE and type TYPE (which may be NULL if the type is not known). If the argument fits entirely in the argument registers, or entirely on the stack, then 0 is returned. CUM is the number of argument registers already used by earlier parameters to the function. */ static int mcore_arg_partial_bytes (cumulative_args_t cum, machine_mode mode, tree type, bool named) { int reg = ROUND_REG (*get_cumulative_args (cum), mode); if (named == 0) return 0; if (targetm.calls.must_pass_in_stack (mode, type)) return 0; /* REG is not the *hardware* register number of the register that holds the argument, it is the *argument* register number. So for example, the first argument to a function goes in argument register 0, which translates (for the MCore) into hardware register 2. The second argument goes into argument register 1, which translates into hardware register 3, and so on. NPARM_REGS is the number of argument registers supported by the target, not the maximum hardware register number of the target. */ if (reg >= NPARM_REGS) return 0; /* If the argument fits entirely in registers, return 0. */ if (reg + mcore_num_arg_regs (mode, type) <= NPARM_REGS) return 0; /* The argument overflows the number of available argument registers. Compute how many argument registers have not yet been assigned to hold an argument. */ reg = NPARM_REGS - reg; /* Return partially in registers and partially on the stack. */ return reg * UNITS_PER_WORD; } /* Return nonzero if SYMBOL is marked as being dllexport'd. */ int mcore_dllexport_name_p (const char * symbol) { return symbol[0] == '@' && symbol[1] == 'e' && symbol[2] == '.'; } /* Return nonzero if SYMBOL is marked as being dllimport'd. */ int mcore_dllimport_name_p (const char * symbol) { return symbol[0] == '@' && symbol[1] == 'i' && symbol[2] == '.'; } /* Mark a DECL as being dllexport'd. */ static void mcore_mark_dllexport (tree decl) { const char * oldname; char * newname; rtx rtlname; tree idp; rtlname = XEXP (DECL_RTL (decl), 0); if (GET_CODE (rtlname) == MEM) rtlname = XEXP (rtlname, 0); gcc_assert (GET_CODE (rtlname) == SYMBOL_REF); oldname = XSTR (rtlname, 0); if (mcore_dllexport_name_p (oldname)) return; /* Already done. */ newname = XALLOCAVEC (char, strlen (oldname) + 4); sprintf (newname, "@e.%s", oldname); /* We pass newname through get_identifier to ensure it has a unique address. RTL processing can sometimes peek inside the symbol ref and compare the string's addresses to see if two symbols are identical. */ /* ??? At least I think that's why we do this. */ idp = get_identifier (newname); XEXP (DECL_RTL (decl), 0) = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); } /* Mark a DECL as being dllimport'd. */ static void mcore_mark_dllimport (tree decl) { const char * oldname; char * newname; tree idp; rtx rtlname; rtx newrtl; rtlname = XEXP (DECL_RTL (decl), 0); if (GET_CODE (rtlname) == MEM) rtlname = XEXP (rtlname, 0); gcc_assert (GET_CODE (rtlname) == SYMBOL_REF); oldname = XSTR (rtlname, 0); gcc_assert (!mcore_dllexport_name_p (oldname)); if (mcore_dllimport_name_p (oldname)) return; /* Already done. */ /* ??? One can well ask why we're making these checks here, and that would be a good question. */ /* Imported variables can't be initialized. */ if (TREE_CODE (decl) == VAR_DECL && !DECL_VIRTUAL_P (decl) && DECL_INITIAL (decl)) { error ("initialized variable %q+D is marked dllimport", decl); return; } /* `extern' needn't be specified with dllimport. Specify `extern' now and hope for the best. Sigh. */ if (TREE_CODE (decl) == VAR_DECL /* ??? Is this test for vtables needed? */ && !DECL_VIRTUAL_P (decl)) { DECL_EXTERNAL (decl) = 1; TREE_PUBLIC (decl) = 1; } newname = XALLOCAVEC (char, strlen (oldname) + 11); sprintf (newname, "@i.__imp_%s", oldname); /* We pass newname through get_identifier to ensure it has a unique address. RTL processing can sometimes peek inside the symbol ref and compare the string's addresses to see if two symbols are identical. */ /* ??? At least I think that's why we do this. */ idp = get_identifier (newname); newrtl = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp))); XEXP (DECL_RTL (decl), 0) = newrtl; } static int mcore_dllexport_p (tree decl) { if ( TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL) return 0; return lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)) != 0; } static int mcore_dllimport_p (tree decl) { if ( TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL) return 0; return lookup_attribute ("dllimport", DECL_ATTRIBUTES (decl)) != 0; } /* We must mark dll symbols specially. Definitions of dllexport'd objects install some info in the .drective (PE) or .exports (ELF) sections. */ static void mcore_encode_section_info (tree decl, rtx rtl ATTRIBUTE_UNUSED, int first ATTRIBUTE_UNUSED) { /* Mark the decl so we can tell from the rtl whether the object is dllexport'd or dllimport'd. */ if (mcore_dllexport_p (decl)) mcore_mark_dllexport (decl); else if (mcore_dllimport_p (decl)) mcore_mark_dllimport (decl); /* It might be that DECL has already been marked as dllimport, but a subsequent definition nullified that. The attribute is gone but DECL_RTL still has @i.__imp_foo. We need to remove that. */ else if ((TREE_CODE (decl) == FUNCTION_DECL || TREE_CODE (decl) == VAR_DECL) && DECL_RTL (decl) != NULL_RTX && GET_CODE (DECL_RTL (decl)) == MEM && GET_CODE (XEXP (DECL_RTL (decl), 0)) == MEM && GET_CODE (XEXP (XEXP (DECL_RTL (decl), 0), 0)) == SYMBOL_REF && mcore_dllimport_name_p (XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0))) { const char * oldname = XSTR (XEXP (XEXP (DECL_RTL (decl), 0), 0), 0); tree idp = get_identifier (oldname + 9); rtx newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp)); XEXP (DECL_RTL (decl), 0) = newrtl; /* We previously set TREE_PUBLIC and DECL_EXTERNAL. ??? We leave these alone for now. */ } } /* Undo the effects of the above. */ static const char * mcore_strip_name_encoding (const char * str) { return str + (str[0] == '@' ? 3 : 0); } /* MCore specific attribute support. dllexport - for exporting a function/variable that will live in a dll dllimport - for importing a function/variable from a dll naked - do not create a function prologue/epilogue. */ /* Handle a "naked" attribute; arguments as in struct attribute_spec.handler. */ static tree mcore_handle_naked_attribute (tree * node, tree name, tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) { if (TREE_CODE (*node) != FUNCTION_DECL) { warning (OPT_Wattributes, "%qE attribute only applies to functions", name); *no_add_attrs = true; } return NULL_TREE; } /* ??? It looks like this is PE specific? Oh well, this is what the old code did as well. */ static void mcore_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) { int len; const char * name; char * string; const char * prefix; name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); /* Strip off any encoding in name. */ name = (* targetm.strip_name_encoding) (name); /* The object is put in, for example, section .text$foo. The linker will then ultimately place them in .text (everything from the $ on is stripped). */ if (TREE_CODE (decl) == FUNCTION_DECL) prefix = ".text$"; /* For compatibility with EPOC, we ignore the fact that the section might have relocs against it. */ else if (decl_readonly_section (decl, 0)) prefix = ".rdata$"; else prefix = ".data$"; len = strlen (name) + strlen (prefix); string = XALLOCAVEC (char, len + 1); sprintf (string, "%s%s", prefix, name); set_decl_section_name (decl, string); } int mcore_naked_function_p (void) { return lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE; } static bool mcore_warn_func_return (tree decl) { /* Naked functions are implemented entirely in assembly, including the return sequence, so suppress warnings about this. */ return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE; } #ifdef OBJECT_FORMAT_ELF static void mcore_asm_named_section (const char *name, unsigned int flags ATTRIBUTE_UNUSED, tree decl ATTRIBUTE_UNUSED) { fprintf (asm_out_file, "\t.section %s\n", name); } #endif /* OBJECT_FORMAT_ELF */ /* Worker function for TARGET_ASM_EXTERNAL_LIBCALL. */ static void mcore_external_libcall (rtx fun) { fprintf (asm_out_file, "\t.import\t"); assemble_name (asm_out_file, XSTR (fun, 0)); fprintf (asm_out_file, "\n"); } /* Worker function for TARGET_RETURN_IN_MEMORY. */ static bool mcore_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) { const HOST_WIDE_INT size = int_size_in_bytes (type); return (size == -1 || size > 2 * UNITS_PER_WORD); } /* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE. Output assembler code for a block containing the constant parts of a trampoline, leaving space for the variable parts. On the MCore, the trampoline looks like: lrw r1, function lrw r13, area jmp r13 or r0, r0 .literals */ static void mcore_asm_trampoline_template (FILE *f) { fprintf (f, "\t.short 0x7102\n"); fprintf (f, "\t.short 0x7d02\n"); fprintf (f, "\t.short 0x00cd\n"); fprintf (f, "\t.short 0x1e00\n"); fprintf (f, "\t.long 0\n"); fprintf (f, "\t.long 0\n"); } /* Worker function for TARGET_TRAMPOLINE_INIT. */ static void mcore_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) { rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); rtx mem; emit_block_move (m_tramp, assemble_trampoline_template (), GEN_INT (2*UNITS_PER_WORD), BLOCK_OP_NORMAL); mem = adjust_address (m_tramp, SImode, 8); emit_move_insn (mem, chain_value); mem = adjust_address (m_tramp, SImode, 12); emit_move_insn (mem, fnaddr); } /* Implement TARGET_LEGITIMATE_CONSTANT_P On the MCore, allow anything but a double. */ static bool mcore_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { return GET_CODE (x) != CONST_DOUBLE; } /* Helper function for `mcore_legitimate_address_p'. */ static bool mcore_reg_ok_for_base_p (const_rtx reg, bool strict_p) { if (strict_p) return REGNO_OK_FOR_BASE_P (REGNO (reg)); else return (REGNO (reg) <= 16 || !HARD_REGISTER_P (reg)); } static bool mcore_base_register_rtx_p (const_rtx x, bool strict_p) { return REG_P(x) && mcore_reg_ok_for_base_p (x, strict_p); } /* A legitimate index for a QI is 0..15, for HI is 0..30, for SI is 0..60, and for DI is 0..56 because we use two SI loads, etc. */ static bool mcore_legitimate_index_p (machine_mode mode, const_rtx op) { if (CONST_INT_P (op)) { if (GET_MODE_SIZE (mode) >= 4 && (((unsigned HOST_WIDE_INT) INTVAL (op)) % 4) == 0 && ((unsigned HOST_WIDE_INT) INTVAL (op)) <= (unsigned HOST_WIDE_INT) 64 - GET_MODE_SIZE (mode)) return true; if (GET_MODE_SIZE (mode) == 2 && (((unsigned HOST_WIDE_INT) INTVAL (op)) % 2) == 0 && ((unsigned HOST_WIDE_INT) INTVAL (op)) <= 30) return true; if (GET_MODE_SIZE (mode) == 1 && ((unsigned HOST_WIDE_INT) INTVAL (op)) <= 15) return true; } return false; } /* Worker function for TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. Allow REG REG + disp */ static bool mcore_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, addr_space_t as) { gcc_assert (ADDR_SPACE_GENERIC_P (as)); if (mcore_base_register_rtx_p (x, strict_p)) return true; else if (GET_CODE (x) == PLUS || GET_CODE (x) == LO_SUM) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); if (mcore_base_register_rtx_p (xop0, strict_p) && mcore_legitimate_index_p (mode, xop1)) return true; if (mcore_base_register_rtx_p (xop1, strict_p) && mcore_legitimate_index_p (mode, xop0)) return true; } return false; } /* Implement TARGET_HARD_REGNO_MODE_OK. We may keep double values in even registers. */ static bool mcore_hard_regno_mode_ok (unsigned int regno, machine_mode mode) { if (TARGET_8ALIGN && GET_MODE_SIZE (mode) > UNITS_PER_WORD) return (regno & 1) == 0; return regno < 18; } /* Implement TARGET_MODES_TIEABLE_P. */ static bool mcore_modes_tieable_p (machine_mode mode1, machine_mode mode2) { return mode1 == mode2 || GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2); }