diff options
author | Chung-Ju Wu <jasonwucj@gmail.com> | 2013-10-31 17:08:16 +0000 |
---|---|---|
committer | Chung-Ju Wu <jasonwucj@gcc.gnu.org> | 2013-10-31 17:08:16 +0000 |
commit | 9304f876116aae3d297ab5bf7341749f09406a50 (patch) | |
tree | 5479731a485c268d194925363d10034093696843 /gcc/config | |
parent | 7214306b3e376bff94b20e468018fb356264b33a (diff) | |
download | gcc-9304f876116aae3d297ab5bf7341749f09406a50.tar.gz |
Add new nds32 port, including machine description, libgcc, and documentation.
[gcc/ChangeLog]
* config.gcc (nds32*-*-*): Add nds32 target.
* config/nds32/nds32.c: New file.
* config/nds32/nds32.h: New file.
* config/nds32/nds32.md: New file.
* config/nds32/constants.md: New file.
* config/nds32/constraints.md: New file.
* config/nds32/iterators.md: New file.
* config/nds32/nds32-doubleword.md: New file.
* config/nds32/nds32-intrinsic.md: New file.
* config/nds32/nds32_intrinsic.h: New file.
* config/nds32/nds32-modes.def: New file.
* config/nds32/nds32-multiple.md: New file.
* config/nds32/nds32.opt: New file.
* config/nds32/nds32-opts.h: New file.
* config/nds32/nds32-protos.h: New file.
* config/nds32/nds32-peephole2.md: New file.
* config/nds32/pipelines.md: New file.
* config/nds32/predicates.md: New file.
* config/nds32/t-mlibs: New file.
* common/config/nds32: New directory and files.
* doc/invoke.texi (NDS32 options): Document nds32 specific options.
* doc/md.texi (NDS32 family): Document nds32 specific constraints.
* doc/install.texi (Cross-Compiler-Specific Options): Document
--with-nds32-lib for nds32 target.
* doc/extend.texi (Function Attributes, Target Builtins): Document
nds32 specific attributes.
[libgcc/ChangeLog]
* config.host (nds32*-elf*): Add nds32 target.
* config/nds32 : New directory and files.
[contrib/ChangeLog]
* config-list.mk (nds32le-elf, nds32be-elf): Add nds32 target.
Co-Authored-By: Shiva Chen <shiva0217@gmail.com>
From-SVN: r204269
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/nds32/constants.md | 46 | ||||
-rw-r--r-- | gcc/config/nds32/constraints.md | 254 | ||||
-rw-r--r-- | gcc/config/nds32/iterators.md | 55 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-doubleword.md | 251 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-intrinsic.md | 97 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-modes.def | 21 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-multiple.md | 410 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-opts.h | 35 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-peephole2.md | 25 | ||||
-rw-r--r-- | gcc/config/nds32/nds32-protos.h | 128 | ||||
-rw-r--r-- | gcc/config/nds32/nds32.c | 5721 | ||||
-rw-r--r-- | gcc/config/nds32/nds32.h | 982 | ||||
-rw-r--r-- | gcc/config/nds32/nds32.md | 2221 | ||||
-rw-r--r-- | gcc/config/nds32/nds32.opt | 102 | ||||
-rw-r--r-- | gcc/config/nds32/nds32_intrinsic.h | 37 | ||||
-rw-r--r-- | gcc/config/nds32/pipelines.md | 29 | ||||
-rw-r--r-- | gcc/config/nds32/predicates.md | 92 | ||||
-rw-r--r-- | gcc/config/nds32/t-mlibs | 38 |
18 files changed, 10544 insertions, 0 deletions
diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md new file mode 100644 index 00000000000..03a21dba1b2 --- /dev/null +++ b/gcc/config/nds32/constants.md @@ -0,0 +1,46 @@ +;; Constant defintions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; Register numbers. +(define_constants + [(R8_REGNUM 8) + (TA_REGNUM 15) + (FP_REGNUM 28) + (GP_REGNUM 29) + (LP_REGNUM 30) + (SP_REGNUM 31) + ]) + + +;; The unspec_volatile operation index. +(define_c_enum "unspec_volatile_element" [ + UNSPEC_VOLATILE_FUNC_RETURN + UNSPEC_VOLATILE_ISYNC + UNSPEC_VOLATILE_ISB + UNSPEC_VOLATILE_MFSR + UNSPEC_VOLATILE_MFUSR + UNSPEC_VOLATILE_MTSR + UNSPEC_VOLATILE_MTUSR + UNSPEC_VOLATILE_SETGIE_EN + UNSPEC_VOLATILE_SETGIE_DIS +]) + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md new file mode 100644 index 00000000000..b4ae6c7258c --- /dev/null +++ b/gcc/config/nds32/constraints.md @@ -0,0 +1,254 @@ +;; Constraint definitions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Check 16.8.7 Defining Machine-Specific Constraints for detail. + +;; NO contrains can be prefixed with: E F V X g i m n o p r s +;; Machine-dependent integer: I J K L M N O P +;; Machine-dependent floating: G H + + +(define_register_constraint "w" "(TARGET_ISA_V3 || TARGET_ISA_V3M) ? LOW_REGS : NO_REGS" + "LOW register class $r0 ~ $r7 constraint for V3/V3M ISA") + +(define_register_constraint "l" "LOW_REGS" + "LOW register class $r0 ~ $r7") + +(define_register_constraint "d" "MIDDLE_REGS" + "MIDDLE register class $r0 ~ $r11, $r16 ~ $r19") + +(define_register_constraint "h" "HIGH_REGS" + "HIGH register class $r12 ~ $r14, $r20 ~ $r31") + + +(define_register_constraint "t" "R15_TA_REG" + "Temporary Assist register $ta (i.e. $r15)") + +(define_register_constraint "k" "STACK_REG" + "Stack register $sp") + + +(define_constraint "Iu03" + "Unsigned immediate 3-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 3) && ival >= 0"))) + +(define_constraint "In03" + "Negative immediate 3-bit value in the range of -7 to 0" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -7, 0)"))) + +(define_constraint "Iu04" + "Unsigned immediate 4-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 4) && ival >= 0"))) + +(define_constraint "Is05" + "Signed immediate 5-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 4) && ival >= -(1 << 4)"))) + +(define_constraint "Iu05" + "Unsigned immediate 5-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 5) && ival >= 0"))) + +(define_constraint "In05" + "Negative immediate 5-bit value in the range of -31 to 0" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -31, 0)"))) + +;; Ip05 is special and dedicated for v3 movpi45 instruction. +;; movpi45 has imm5u field but the range is 16 ~ 47. +(define_constraint "Ip05" + "Unsigned immediate 5-bit value for movpi45 instruction with range 16-47" + (and (match_code "const_int") + (match_test "ival < ((1 << 5) + 16) + && ival >= (0 + 16) + && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) + +(define_constraint "Iu06" + "Unsigned immediate 6-bit value constraint for addri36.sp instruction" + (and (match_code "const_int") + (match_test "ival < (1 << 6) + && ival >= 0 + && (ival % 4 == 0) + && (TARGET_ISA_V3 || TARGET_ISA_V3M)"))) + +(define_constraint "Iu08" + "Unsigned immediate 8-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 8) && ival >= 0"))) + +(define_constraint "Iu09" + "Unsigned immediate 9-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 9) && ival >= 0"))) + + +(define_constraint "Is10" + "Signed immediate 10-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 9) && ival >= -(1 << 9)"))) + +(define_constraint "Is11" + "Signed immediate 11-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 10) && ival >= -(1 << 10)"))) + + +(define_constraint "Is15" + "Signed immediate 15-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 14) && ival >= -(1 << 14)"))) + +(define_constraint "Iu15" + "Unsigned immediate 15-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 15) && ival >= 0"))) + + +;; Ic15 is special and dedicated for performance extension +;; 'bclr' (single-bit-clear) instruction. +;; It is used in andsi3 pattern and recognized for the immediate +;; which is NOT in the range of imm15u but OK for 'bclr' instruction. +;; (If the immediate value IS in the range of imm15u, +;; we can directly use 'andi' instruction.) +(define_constraint "Ic15" + "A constant which is not in the range of imm15u but ok for bclr instruction" + (and (match_code "const_int") + (match_test "(ival & 0xffff8000) && nds32_can_use_bclr_p (ival)"))) + +;; Ie15 is special and dedicated for performance extension +;; 'bset' (single-bit-set) instruction. +;; It is used in iorsi3 pattern and recognized for the immediate +;; which is NOT in the range of imm15u but OK for 'bset' instruction. +;; (If the immediate value IS in the range of imm15u, +;; we can directly use 'ori' instruction.) +(define_constraint "Ie15" + "A constant which is not in the range of imm15u but ok for bset instruction" + (and (match_code "const_int") + (match_test "(ival & 0xffff8000) && nds32_can_use_bset_p (ival)"))) + +;; It15 is special and dedicated for performance extension +;; 'btgl' (single-bit-toggle) instruction. +;; It is used in xorsi3 pattern and recognized for the immediate +;; which is NOT in the range of imm15u but OK for 'btgl' instruction. +;; (If the immediate value IS in the range of imm15u, +;; we can directly use 'xori' instruction.) +(define_constraint "It15" + "A constant which is not in the range of imm15u but ok for btgl instruction" + (and (match_code "const_int") + (match_test "(ival & 0xffff8000) && nds32_can_use_btgl_p (ival)"))) + + +;; Ii15 is special and dedicated for v3 isa +;; 'bitci' (bit-clear-immediate) instruction. +;; It is used in andsi3 pattern and recognized for the immediate whose +;; (~ival) value is in the range of imm15u and OK for 'bitci' instruction. +;; For example, 'andi $r0,$r0,0xfffffffc' can be presented +; with 'bitci $r0,$r0,3'. +(define_constraint "Ii15" + "A constant whose compliment value is in the range of imm15u + and ok for bitci instruction" + (and (match_code "const_int") + (match_test "nds32_can_use_bitci_p (ival)"))) + + +(define_constraint "Is16" + "Signed immediate 16-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 15) && ival >= -(1 << 15)"))) + +(define_constraint "Is17" + "Signed immediate 17-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 16) && ival >= -(1 << 16)"))) + + +(define_constraint "Is19" + "Signed immediate 19-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 18) && ival >= -(1 << 18)"))) + + +(define_constraint "Is20" + "Signed immediate 20-bit value" + (and (match_code "const_int") + (match_test "ival < (1 << 19) && ival >= -(1 << 19)"))) + + +(define_constraint "Ihig" + "The immediate value that can be simply set high 20-bit" + (and (match_code "const_int") + (match_test "(ival != 0) && ((ival & 0xfff) == 0)"))) + +(define_constraint "Izeb" + "The immediate value 0xff" + (and (match_code "const_int") + (match_test "(ival == 0xff)"))) + +(define_constraint "Izeh" + "The immediate value 0xffff" + (and (match_code "const_int") + (match_test "(ival == 0xffff)"))) + +(define_constraint "Ixls" + "The immediate value 0x01" + (and (match_code "const_int") + (match_test "TARGET_PERF_EXT && (ival == 0x1)"))) + +(define_constraint "Ix11" + "The immediate value 0x7ff" + (and (match_code "const_int") + (match_test "TARGET_PERF_EXT && (ival == 0x7ff)"))) + +(define_constraint "Ibms" + "The immediate value with power of 2" + (and (match_code "const_int") + (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) + && (IN_RANGE (exact_log2 (ival), 0, 7))"))) + +(define_constraint "Ifex" + "The immediate value with power of 2 minus 1" + (and (match_code "const_int") + (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) + && (IN_RANGE (exact_log2 (ival + 1), 1, 8))"))) + + +(define_memory_constraint "U33" + "Memory constraint for 333 format" + (and (match_code "mem") + (match_test "nds32_mem_format (op) == ADDRESS_LO_REG_IMM3U"))) + +(define_memory_constraint "U45" + "Memory constraint for 45 format" + (and (match_code "mem") + (match_test "(nds32_mem_format (op) == ADDRESS_REG) + && (GET_MODE (op) == SImode)"))) + +(define_memory_constraint "U37" + "Memory constraint for 37 format" + (and (match_code "mem") + (match_test "(nds32_mem_format (op) == ADDRESS_SP_IMM7U + || nds32_mem_format (op) == ADDRESS_FP_IMM7U) + && (GET_MODE (op) == SImode)"))) + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md new file mode 100644 index 00000000000..6ec519618a9 --- /dev/null +++ b/gcc/config/nds32/iterators.md @@ -0,0 +1,55 @@ +;; Code and mode itertator and attribute definitions +;; of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;;---------------------------------------------------------------------------- +;; Mode iterators. +;;---------------------------------------------------------------------------- + +;; A list of integer modes that are up to one word long. +(define_mode_iterator QIHISI [QI HI SI]) + +;; A list of integer modes that are up to one half-word long. +(define_mode_iterator QIHI [QI HI]) + +;; A list of the modes that are up to double-word long. +(define_mode_iterator DIDF [DI DF]) + + +;;---------------------------------------------------------------------------- +;; Mode attributes. +;;---------------------------------------------------------------------------- + +(define_mode_attr size [(QI "b") (HI "h") (SI "w")]) + +(define_mode_attr byte [(QI "1") (HI "2") (SI "4")]) + + +;;---------------------------------------------------------------------------- +;; Code iterators. +;;---------------------------------------------------------------------------- + + +;;---------------------------------------------------------------------------- +;; Code attributes. +;;---------------------------------------------------------------------------- + + +;;---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md new file mode 100644 index 00000000000..4bfede4236a --- /dev/null +++ b/gcc/config/nds32/nds32-doubleword.md @@ -0,0 +1,251 @@ +;; DImode/DFmode patterns description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; ------------------------------------------------------------- +;; Move DImode/DFmode instructions. +;; ------------------------------------------------------------- + + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); +}) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (DFmode, operands[1]); +}) + + +(define_insn "move_<mode>" + [(set (match_operand:DIDF 0 "nonimmediate_operand" "=r, r, r, m") + (match_operand:DIDF 1 "general_operand" " r, i, m, r"))] + "" +{ + rtx addr; + rtx otherops[5]; + + switch (which_alternative) + { + case 0: + return "movd44\t%0, %1"; + + case 1: + /* reg <- const_int, we ask gcc to split instruction. */ + return "#"; + + case 2: + /* Refer to nds32_legitimate_address_p() in nds32.c, + we only allow "reg", "symbol_ref", "const", and "reg + const_int" + as address rtx for DImode/DFmode memory access. */ + addr = XEXP (operands[1], 0); + + otherops[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + otherops[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + otherops[2] = addr; + + if (REG_P (addr)) + { + /* (reg) <- (mem (reg)) */ + output_asm_insn ("lmw.bi\t%0, [%2], %1, 0", otherops); + } + else if (GET_CODE (addr) == PLUS) + { + /* (reg) <- (mem (plus (reg) (const_int))) */ + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + + if (REG_P (op0)) + { + otherops[2] = op0; + otherops[3] = op1; + otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode); + } + else + { + otherops[2] = op1; + otherops[3] = op0; + otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode); + } + + /* To avoid base overwrite when REGNO(%0) == REGNO(%2). */ + if (REGNO (otherops[0]) != REGNO (otherops[2])) + { + output_asm_insn ("lwi\t%0, [%2 + (%3)]", otherops); + output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops); + } + else + { + output_asm_insn ("lwi\t%1, [%2 + (%4)]", otherops); + output_asm_insn ("lwi\t%0,[ %2 + (%3)]", otherops); + } + } + else + { + /* (reg) <- (mem (symbol_ref ...)) + (reg) <- (mem (const ...)) */ + output_asm_insn ("lwi.gp\t%0, [ + %2]", otherops); + output_asm_insn ("lwi.gp\t%1, [ + %2 + 4]", otherops); + } + + /* We have already used output_asm_insn() by ourself, + so return an empty string. */ + return ""; + + case 3: + /* Refer to nds32_legitimate_address_p() in nds32.c, + we only allow "reg", "symbol_ref", "const", and "reg + const_int" + as address rtx for DImode/DFmode memory access. */ + addr = XEXP (operands[0], 0); + + otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1])); + otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); + otherops[2] = addr; + + if (REG_P (addr)) + { + /* (mem (reg)) <- (reg) */ + output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops); + } + else if (GET_CODE (addr) == PLUS) + { + /* (mem (plus (reg) (const_int))) <- (reg) */ + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + + if (REG_P (op0)) + { + otherops[2] = op0; + otherops[3] = op1; + otherops[4] = gen_int_mode (INTVAL (op1) + 4, SImode); + } + else + { + otherops[2] = op1; + otherops[3] = op0; + otherops[4] = gen_int_mode (INTVAL (op0) + 4, SImode); + } + + /* To avoid base overwrite when REGNO(%0) == REGNO(%2). */ + if (REGNO (otherops[0]) != REGNO (otherops[2])) + { + output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops); + output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops); + } + else + { + output_asm_insn ("swi\t%1, [%2 + (%4)]", otherops); + output_asm_insn ("swi\t%0, [%2 + (%3)]", otherops); + } + } + else + { + /* (mem (symbol_ref ...)) <- (reg) + (mem (const ...)) <- (reg) */ + output_asm_insn ("swi.gp\t%0, [ + %2]", otherops); + output_asm_insn ("swi.gp\t%1, [ + %2 + 4]", otherops); + } + + /* We have already used output_asm_insn() by ourself, + so return an empty string. */ + return ""; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "move,move,move,move") + (set_attr "length" " 4, 16, 8, 8")]) + +(define_split + [(set (match_operand:DIDF 0 "register_operand" "") + (match_operand:DIDF 1 "const_double_operand" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + /* Construct lowpart rtx. */ + operands[2] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_lowpart (SImode, operands[1]); + + /* Construct highpart rtx. */ + /* Note that operands[1] can be VOIDmode constant, + so we need to use gen_highpart_mode(). + Refer to gcc/emit-rtl.c for more information. */ + operands[4] = gen_highpart (SImode, operands[0]); + operands[5] = gen_highpart_mode (SImode, + GET_MODE (operands[0]), operands[1]); + + /* Actually we would like to create move behavior by ourself. + So that movsi expander could have chance to split large constant. */ + emit_move_insn (operands[2], operands[3]); + emit_move_insn (operands[4], operands[5]); + DONE; +}) + +;; There is 'movd44' instruction for DImode/DFmode movement under V3/V3M ISA. +;; We only need to split it under V2 ISA or none-16-bit code generation. +(define_split + [(set (match_operand:DIDF 0 "register_operand" "") + (match_operand:DIDF 1 "register_operand" ""))] + "reload_completed + && (TARGET_ISA_V2 || !TARGET_16_BIT)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* Handle a partial overlap. */ + if (rtx_equal_p (operands[0], operands[3])) + { + rtx tmp0 = operands[0]; + rtx tmp1 = operands[1]; + + operands[0] = operands[2]; + operands[1] = operands[3]; + operands[2] = tmp0; + operands[3] = tmp1; + } +}) + +;; ------------------------------------------------------------- +;; Boolean DImode instructions. +;; ------------------------------------------------------------- + +;; Nowadays, the generic code is supposed to split the DImode +;; boolean operations and have good code generation. +;; Unless we find out some bad cases, there is no need to +;; define DImode boolean operations by ourself. + +;; ------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md new file mode 100644 index 00000000000..4ee2d851023 --- /dev/null +++ b/gcc/config/nds32/nds32-intrinsic.md @@ -0,0 +1,97 @@ +;; Intrinsic patterns description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; ------------------------------------------------------------------------ + +;; Register Transfer. + +(define_insn "unspec_volatile_mfsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MFSR))] + "" + "mfsr\t%0, %V1" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_mfusr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MFUSR))] + "" + "mfusr\t%0, %V1" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_mtsr" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTSR)] + "" + "mtsr\t%0, %V1" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + +(define_insn "unspec_volatile_mtusr" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_MTUSR)] + "" + "mtusr\t%0, %V1" + [(set_attr "type" "misc") + (set_attr "length" "4")] +) + +;; ------------------------------------------------------------------------ + +;; Interrupt Instructions. + +(define_insn "unspec_volatile_setgie_en" + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETGIE_EN)] + "" + "setgie.e" + [(set_attr "type" "misc")] +) + +(define_insn "unspec_volatile_setgie_dis" + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETGIE_DIS)] + "" + "setgie.d" + [(set_attr "type" "misc")] +) + +;; ------------------------------------------------------------------------ + +;; Cache Synchronization Instructions + +(define_insn "unspec_volatile_isync" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_ISYNC)] + "" + "isync\t%0" + [(set_attr "type" "misc")] +) + +(define_insn "unspec_volatile_isb" + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_ISB)] + "" + "isb" + [(set_attr "type" "misc")] +) + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/nds32-modes.def b/gcc/config/nds32/nds32-modes.def new file mode 100644 index 00000000000..9d32ada0ce8 --- /dev/null +++ b/gcc/config/nds32/nds32-modes.def @@ -0,0 +1,21 @@ +/* Extra machine modes of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2013 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* So far, there is no need to define any modes for nds32 target. */ diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md new file mode 100644 index 00000000000..da89a490d3a --- /dev/null +++ b/gcc/config/nds32/nds32-multiple.md @@ -0,0 +1,410 @@ +;; Load/Store Multiple patterns description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation.for NDS32. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; Load Multiple Insns. +;; +;; operands[0] is the first of the consecutive registers. +;; operands[1] is the first memory location. +;; operands[2] is the number of consecutive registers. + +(define_expand "load_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "" +{ + int maximum; + + /* Because reduced-set regsiters has few registers + (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot + be used for register allocation), + using 8 registers for load_multiple may easily consume all of them. + It makes register allocation/spilling hard to work. + So we only allow maximum=4 registers for load_multiple + under reduced-set registers. */ + if (TARGET_REDUCED_REGS) + maximum = 4; + else + maximum = 8; + + /* Here are the conditions that must be all passed, + otherwise we have to FAIL this rtx generation: + 1. The number of consecutive registers must be integer. + 2. Maximum 4 or 8 registers for lmw.bi instruction + (based on this nds32-multiple.md design). + 3. Minimum 2 registers for lmw.bi instruction + (based on this nds32-multiple.md design). + 4. operands[0] must be register for sure. + 5. operands[1] must be memory for sure. + 6. Do not cross $r15 register because it is not allocatable. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) > maximum + || INTVAL (operands[2]) < 2 + || GET_CODE (operands[0]) != REG + || GET_CODE (operands[1]) != MEM + || REGNO (operands[0]) + INTVAL (operands[2]) > TA_REGNUM) + FAIL; + + /* For (mem addr), we force_reg on addr here, + so that nds32_expand_load_multiple can easily use it. */ + operands[3] = nds32_expand_load_multiple (REGNO (operands[0]), + INTVAL (operands[2]), + force_reg (SImode, + XEXP (operands[1], 0)), + operands[1]); +}) + +;; Ordinary Load Multiple. + +(define_insn "*lmwsi8" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20)))) + (set (match_operand:SI 8 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 24)))) + (set (match_operand:SI 9 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 28))))])] + "(XVECLEN (operands[0], 0) == 8)" + "lmw.bi\t%2, [%1], %9, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_insn "*lmwsi7" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20)))) + (set (match_operand:SI 8 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 24))))])] + "(XVECLEN (operands[0], 0) == 7)" + "lmw.bi\t%2, [%1], %8, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_insn "*lmwsi6" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16)))) + (set (match_operand:SI 7 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 20))))])] + "(XVECLEN (operands[0], 0) == 6)" + "lmw.bi\t%2, [%1], %7, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_insn "*lmwsi5" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12)))) + (set (match_operand:SI 6 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 16))))])] + "(XVECLEN (operands[0], 0) == 5)" + "lmw.bi\t%2, [%1], %6, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_insn "*lmwsi4" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8)))) + (set (match_operand:SI 5 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 12))))])] + "(XVECLEN (operands[0], 0) == 4)" + "lmw.bi\t%2, [%1], %5, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_insn "*lmwsi3" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4)))) + (set (match_operand:SI 4 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 8))))])] + "(XVECLEN (operands[0], 0) == 3)" + "lmw.bi\t%2, [%1], %4, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + +(define_insn "*lmwsi2" + [(match_parallel 0 "nds32_load_multiple_operation" + [(set (match_operand:SI 2 "register_operand" "") + (mem:SI (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (plus:SI (match_dup 1) (const_int 4))))])] + "(XVECLEN (operands[0], 0) == 2)" + "lmw.bi\t%2, [%1], %3, 0x0" + [(set_attr "type" "load") + (set_attr "length" "4")] +) + + +;; Store Multiple Insns. +;; +;; operands[0] is the first memory location. +;; opernads[1] is the first of the consecutive registers. +;; operands[2] is the number of consecutive registers. + +(define_expand "store_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "" +{ + int maximum; + + /* Because reduced-set regsiters has few registers + (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' cannot + be used for register allocation), + using 8 registers for store_multiple may easily consume all of them. + It makes register allocation/spilling hard to work. + So we only allow maximum=4 registers for store_multiple + under reduced-set registers. */ + if (TARGET_REDUCED_REGS) + maximum = 4; + else + maximum = 8; + + /* Here are the conditions that must be all passed, + otherwise we have to FAIL this rtx generation: + 1. The number of consecutive registers must be integer. + 2. Maximum 4 or 8 registers for smw.bi instruction + (based on this nds32-multiple.md design). + 3. Minimum 2 registers for smw.bi instruction + (based on this nds32-multiple.md design). + 4. operands[0] must be memory for sure. + 5. operands[1] must be register for sure. + 6. Do not cross $r15 register because it is not allocatable. */ + if (GET_CODE (operands[2]) != CONST_INT + || INTVAL (operands[2]) > maximum + || INTVAL (operands[2]) < 2 + || GET_CODE (operands[0]) != MEM + || GET_CODE (operands[1]) != REG + || REGNO (operands[1]) + INTVAL (operands[2]) > TA_REGNUM) + FAIL; + + /* For (mem addr), we force_reg on addr here, + so that nds32_expand_store_multiple can easily use it. */ + operands[3] = nds32_expand_store_multiple (REGNO (operands[1]), + INTVAL (operands[2]), + force_reg (SImode, + XEXP (operands[0], 0)), + operands[0]); +}) + +;; Ordinary Store Multiple. + +(define_insn "*stmsi8" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 4 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 5 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 6 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) + (match_operand:SI 7 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) + (match_operand:SI 8 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 28))) + (match_operand:SI 9 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 8)" + "smw.bi\t%2, [%1], %9, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "*stmsi7" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 4 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 5 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 6 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) + (match_operand:SI 7 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 24))) + (match_operand:SI 8 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 7)" + "smw.bi\t%2, [%1], %8, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "*stmsi6" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 4 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 5 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 6 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 20))) + (match_operand:SI 7 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 6)" + "smw.bi\t%2, [%1], %7, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "*stmsi5" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 4 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 5 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 16))) + (match_operand:SI 6 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 5)" + "smw.bi\t%2, [%1], %6, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "*stmsi4" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 4 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 12))) + (match_operand:SI 5 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 4)" + "smw.bi\t%2, [%1], %5, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "*stmsi3" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 8))) + (match_operand:SI 4 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 3)" + "smw.bi\t%2, [%1], %4, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +(define_insn "*stmsi2" + [(match_parallel 0 "nds32_store_multiple_operation" + [(set (mem:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "")) + (set (mem:SI (plus:SI (match_dup 1) (const_int 4))) + (match_operand:SI 3 "register_operand" ""))])] + "(XVECLEN (operands[0], 0) == 2)" + "smw.bi\t%2, [%1], %3, 0x0" + [(set_attr "type" "store") + (set_attr "length" "4")] +) + +;; Move a block of memory if it is word aligned and MORE than 2 words long. +;; We could let this apply for blocks of less than this, but it clobbers so +;; many registers that there is then probably a better way. +;; +;; operands[0] is the destination block of memory. +;; operands[1] is the source block of memory. +;; operands[2] is the number of bytes to move. +;; operands[3] is the known shared alignment. + +(define_expand "movmemqi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" +{ + if (nds32_expand_movmemqi (operands[0], + operands[1], + operands[2], + operands[3])) + DONE; + + FAIL; +}) + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h new file mode 100644 index 00000000000..b38672f0e2f --- /dev/null +++ b/gcc/config/nds32/nds32-opts.h @@ -0,0 +1,35 @@ +/* Definitions for option handling of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2013 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef NDS32_OPTS_H +#define NDS32_OPTS_H + +#define NDS32_DEFAULT_CACHE_BLOCK_SIZE 16 +#define NDS32_DEFAULT_ISR_VECTOR_SIZE (TARGET_ISA_V3 ? 4 : 16) + +/* The various ANDES ISA. */ +enum nds32_arch_type +{ + ARCH_V2, + ARCH_V3, + ARCH_V3M +}; + +#endif diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md new file mode 100644 index 00000000000..dbe2d6c8a62 --- /dev/null +++ b/gcc/config/nds32/nds32-peephole2.md @@ -0,0 +1,25 @@ +;; define_peephole2 optimization patterns of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; Use define_peephole and define_peephole2 to handle possible +;; target-specific optimization in this file. + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h new file mode 100644 index 00000000000..2bc405db0f4 --- /dev/null +++ b/gcc/config/nds32/nds32-protos.h @@ -0,0 +1,128 @@ +/* Prototypes for exported functions of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2013 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + +/* ------------------------------------------------------------------------ */ + +/* Defining Data Structures for Per-function Information. */ + +extern void nds32_init_expanders (void); + + +/* Register Usage. */ + +/* -- How Values Fit in Registers. */ + +extern int nds32_hard_regno_nregs (int, enum machine_mode); +extern int nds32_hard_regno_mode_ok (int, enum machine_mode); + + +/* Register Classes. */ + +extern enum reg_class nds32_regno_reg_class (int); + + +/* Stack Layout and Calling Conventions. */ + +/* -- Basic Stack Layout. */ + +extern rtx nds32_return_addr_rtx (int, rtx); + +/* -- Eliminating Frame Pointer and Arg Pointer. */ + +extern HOST_WIDE_INT nds32_initial_elimination_offset (unsigned int, + unsigned int); + +/* -- Passing Arguments in Registers. */ + +extern void nds32_init_cumulative_args (CUMULATIVE_ARGS *, + tree, rtx, tree, int); + +/* -- Function Entry and Exit. */ + +extern void nds32_expand_prologue (void); +extern void nds32_expand_epilogue (void); +extern void nds32_expand_prologue_v3push (void); +extern void nds32_expand_epilogue_v3pop (void); + +/* ------------------------------------------------------------------------ */ + +/* Auxiliary functions for auxiliary macros in nds32.h. */ + +extern bool nds32_ls_333_p (rtx, rtx, rtx, enum machine_mode); + +/* Auxiliary functions for expanding rtl used in nds32-multiple.md. */ + +extern rtx nds32_expand_load_multiple (int, int, rtx, rtx); +extern rtx nds32_expand_store_multiple (int, int, rtx, rtx); +extern int nds32_expand_movmemqi (rtx, rtx, rtx, rtx); + +/* Auxiliary functions for multiple load/store predicate checking. */ + +extern bool nds32_valid_multiple_load_store (rtx, bool); + +/* Auxiliary functions for stack operation predicate checking. */ + +extern bool nds32_valid_stack_push_pop (rtx, bool); + +/* Auxiliary functions for bit operation detection. */ + +extern int nds32_can_use_bclr_p (int); +extern int nds32_can_use_bset_p (int); +extern int nds32_can_use_btgl_p (int); + +extern int nds32_can_use_bitci_p (int); + +/* Auxiliary function for 'Computing the Length of an Insn'. */ + +extern int nds32_adjust_insn_length (rtx, int); + +/* Auxiliary functions for FP_AS_GP detection. */ + +extern bool nds32_symbol_load_store_p (rtx); +extern int nds32_fp_as_gp_check_available (void); + +/* Auxiliary functions for jump table generation. */ + +extern const char *nds32_output_casesi_pc_relative (rtx *); +extern const char *nds32_output_casesi (rtx *); + +/* Auxiliary functions to identify 16 bit addresing mode. */ + +extern enum nds32_16bit_address_type nds32_mem_format (rtx); + +/* Auxiliary functions to output assembly code. */ + +extern const char *nds32_output_16bit_store (rtx *, int); +extern const char *nds32_output_16bit_load (rtx *, int); +extern const char *nds32_output_32bit_store (rtx *, int); +extern const char *nds32_output_32bit_load (rtx *, int); +extern const char *nds32_output_32bit_load_s (rtx *, int); + +/* Auxiliary functions to output stack push/pop instruction. */ + +extern const char *nds32_output_stack_push (void); +extern const char *nds32_output_stack_pop (void); + +/* Auxiliary functions to decide output alignment or not. */ + +extern int nds32_target_alignment (rtx); + +/* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c new file mode 100644 index 00000000000..2700a872360 --- /dev/null +++ b/gcc/config/nds32/nds32.c @@ -0,0 +1,5721 @@ +/* Subroutines used for code generation of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2013 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" /* Required by recog.h. */ +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" /* For DFA state_t. */ +#include "insn-codes.h" /* For CODE_FOR_xxx. */ +#include "reload.h" /* For push_reload(). */ +#include "flags.h" +#include "function.h" +#include "expr.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "df.h" +#include "tm_p.h" +#include "tm-constrs.h" +#include "optabs.h" /* For GEN_FCN. */ +#include "target.h" +#include "target-def.h" +#include "langhooks.h" /* For add_builtin_function(). */ +#include "ggc.h" + +/* ------------------------------------------------------------------------ */ + +/* This file is divided into five parts: + + PART 1: Auxiliary static variable definitions and + target hook static variable definitions. + + PART 2: Auxiliary static function definitions. + + PART 3: Implement target hook stuff definitions. + + PART 4: Implemet extern function definitions, + the prototype is in nds32-protos.h. + + PART 5: Initialize target hook structure and definitions. */ + +/* ------------------------------------------------------------------------ */ + +/* PART 1: Auxiliary static variable definitions and + target hook static variable definitions. */ + +/* Refer to nds32.h, there are maximum 73 isr vectors in nds32 architecture. + 0 for reset handler with __attribute__((reset())), + 1-8 for exception handler with __attribute__((exception(1,...,8))), + and 9-72 for interrupt handler with __attribute__((interrupt(0,...,63))). + We use an array to record essential information for each vector. */ +static struct nds32_isr_info nds32_isr_vectors[NDS32_N_ISR_VECTORS]; + +/* Define intrinsic register names. + Please refer to nds32_intrinsic.h file, the index is corresponding to + 'enum nds32_intrinsic_registers' data type values. + NOTE that the base value starting from 1024. */ +static const char * const nds32_intrinsic_register_names[] = +{ + "$PSW", "$IPSW", "$ITYPE", "$IPC" +}; + +/* Defining target-specific uses of __attribute__. */ +static const struct attribute_spec nds32_attribute_table[] = +{ + /* Syntax: { name, min_len, max_len, decl_required, type_required, + function_type_required, handler, affects_type_identity } */ + + /* The interrupt vid: [0-63]+ (actual vector number starts from 9 to 72). */ + { "interrupt", 1, 64, false, false, false, NULL, false }, + /* The exception vid: [1-8]+ (actual vector number starts from 1 to 8). */ + { "exception", 1, 8, false, false, false, NULL, false }, + /* Argument is user's interrupt numbers. The vector number is always 0. */ + { "reset", 1, 1, false, false, false, NULL, false }, + + /* The attributes describing isr nested type. */ + { "nested", 0, 0, false, false, false, NULL, false }, + { "not_nested", 0, 0, false, false, false, NULL, false }, + { "nested_ready", 0, 0, false, false, false, NULL, false }, + + /* The attributes describing isr register save scheme. */ + { "save_all", 0, 0, false, false, false, NULL, false }, + { "partial_save", 0, 0, false, false, false, NULL, false }, + + /* The attributes used by reset attribute. */ + { "nmi", 1, 1, false, false, false, NULL, false }, + { "warm", 1, 1, false, false, false, NULL, false }, + + /* The attribute telling no prologue/epilogue. */ + { "naked", 0, 0, false, false, false, NULL, false }, + + /* The last attribute spec is set to be NULL. */ + { NULL, 0, 0, false, false, false, NULL, false } +}; + + +/* ------------------------------------------------------------------------ */ + +/* PART 2: Auxiliary static function definitions. */ + +/* Function to save and restore machine-specific function data. */ +static struct machine_function * +nds32_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_alloc_cleared_machine_function (); + + /* Initially assume this function needs prologue/epilogue. */ + machine->naked_p = 0; + + /* Initially assume this function does NOT use fp_as_gp optimization. */ + machine->fp_as_gp_p = 0; + + return machine; +} + +/* Function to compute stack frame size and + store into cfun->machine structure. */ +static void +nds32_compute_stack_frame (void) +{ + int r; + int block_size; + + /* Because nds32_compute_stack_frame() will be called from different place, + everytime we enter this function, we have to assume this function + needs prologue/epilogue. */ + cfun->machine->naked_p = 0; + + /* Get variadic arguments size to prepare pretend arguments and + push them into stack at prologue. + Currently, we do not push variadic arguments by ourself. + We have GCC handle all the works. + The caller will push all corresponding nameless arguments into stack, + and the callee is able to retrieve them without problems. + These variables are still preserved in case one day + we would like caller passing arguments with registers. */ + cfun->machine->va_args_size = 0; + cfun->machine->va_args_first_regno = SP_REGNUM; + cfun->machine->va_args_last_regno = SP_REGNUM; + + /* Get local variables, incoming variables, and temporary variables size. + Note that we need to make sure it is 8-byte alignment because + there may be no padding bytes if we are using LRA. */ + cfun->machine->local_size = NDS32_ROUND_UP_DOUBLE_WORD (get_frame_size ()); + + /* Get outgoing arguments size. */ + cfun->machine->out_args_size = crtl->outgoing_args_size; + + /* If $fp value is required to be saved on stack, it needs 4 bytes space. + Check whether $fp is ever live. */ + cfun->machine->fp_size = (df_regs_ever_live_p (FP_REGNUM)) ? 4 : 0; + + /* If $gp value is required to be saved on stack, it needs 4 bytes space. + Check whether we are using PIC code genration. */ + cfun->machine->gp_size = (flag_pic) ? 4 : 0; + + /* If $lp value is required to be saved on stack, it needs 4 bytes space. + Check whether $lp is ever live. */ + cfun->machine->lp_size = (df_regs_ever_live_p (LP_REGNUM)) ? 4 : 0; + + /* Initially there is no padding bytes. */ + cfun->machine->callee_saved_area_padding_bytes = 0; + + /* Calculate the bytes of saving callee-saved registers on stack. */ + cfun->machine->callee_saved_regs_size = 0; + cfun->machine->callee_saved_regs_first_regno = SP_REGNUM; + cfun->machine->callee_saved_regs_last_regno = SP_REGNUM; + /* Currently, there is no need to check $r28~$r31 + because we will save them in another way. */ + for (r = 0; r < 28; r++) + { + if (NDS32_REQUIRED_CALLEE_SAVED_P (r)) + { + /* Mark the first required callee-saved register + (only need to set it once). + If first regno == SP_REGNUM, we can tell that + it is the first time to be here. */ + if (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM) + cfun->machine->callee_saved_regs_first_regno = r; + /* Mark the last required callee-saved register. */ + cfun->machine->callee_saved_regs_last_regno = r; + } + } + + /* Check if this function can omit prologue/epilogue code fragment. + If there is 'naked' attribute in this function, + we can set 'naked_p' flag to indicate that + we do not have to generate prologue/epilogue. + Or, if all the following conditions succeed, + we can set this function 'naked_p' as well: + condition 1: first_regno == last_regno == SP_REGNUM, + which means we do not have to save + any callee-saved registers. + condition 2: Both $lp and $fp are NOT live in this function, + which means we do not need to save them. + condition 3: There is no local_size, which means + we do not need to adjust $sp. */ + if (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) + || (cfun->machine->callee_saved_regs_first_regno == SP_REGNUM + && cfun->machine->callee_saved_regs_last_regno == SP_REGNUM + && !df_regs_ever_live_p (FP_REGNUM) + && !df_regs_ever_live_p (LP_REGNUM) + && cfun->machine->local_size == 0)) + { + /* Set this function 'naked_p' and + other functions can check this flag. */ + cfun->machine->naked_p = 1; + + /* No need to save $fp, $gp, and $lp. + We should set these value to be zero + so that nds32_initial_elimination_offset() can work properly. */ + cfun->machine->fp_size = 0; + cfun->machine->gp_size = 0; + cfun->machine->lp_size = 0; + + /* If stack usage computation is required, + we need to provide the static stack size. */ + if (flag_stack_usage_info) + current_function_static_stack_size = 0; + + /* No need to do following adjustment, return immediately. */ + return; + } + + /* Adjustment for v3push instructions: + If we are using v3push (push25/pop25) instructions, + we need to make sure Rb is $r6 and Re is + located on $r6, $r8, $r10, or $r14. + Some results above will be discarded and recomputed. + Note that it is only available under V3/V3M ISA. */ + if (TARGET_V3PUSH) + { + /* Recompute: + cfun->machine->fp_size + cfun->machine->gp_size + cfun->machine->lp_size + cfun->machine->callee_saved_regs_first_regno + cfun->machine->callee_saved_regs_last_regno */ + + /* For v3push instructions, $fp, $gp, and $lp are always saved. */ + cfun->machine->fp_size = 4; + cfun->machine->gp_size = 4; + cfun->machine->lp_size = 4; + + /* Remember to set Rb = $r6. */ + cfun->machine->callee_saved_regs_first_regno = 6; + + if (cfun->machine->callee_saved_regs_last_regno <= 6) + { + /* Re = $r6 */ + cfun->machine->callee_saved_regs_last_regno = 6; + } + else if (cfun->machine->callee_saved_regs_last_regno <= 8) + { + /* Re = $r8 */ + cfun->machine->callee_saved_regs_last_regno = 8; + } + else if (cfun->machine->callee_saved_regs_last_regno <= 10) + { + /* Re = $r10 */ + cfun->machine->callee_saved_regs_last_regno = 10; + } + else if (cfun->machine->callee_saved_regs_last_regno <= 14) + { + /* Re = $r14 */ + cfun->machine->callee_saved_regs_last_regno = 14; + } + else if (cfun->machine->callee_saved_regs_last_regno == SP_REGNUM) + { + /* If last_regno is SP_REGNUM, which means + it is never changed, so set it to Re = $r6. */ + cfun->machine->callee_saved_regs_last_regno = 6; + } + else + { + /* The program flow should not go here. */ + gcc_unreachable (); + } + } + + /* We have correctly set callee_saved_regs_first_regno + and callee_saved_regs_last_regno. + Initially, the callee_saved_regs_size is supposed to be 0. + As long as callee_saved_regs_last_regno is not SP_REGNUM, + we can update callee_saved_regs_size with new size. */ + if (cfun->machine->callee_saved_regs_last_regno != SP_REGNUM) + { + /* Compute pushed size of callee-saved registers. */ + cfun->machine->callee_saved_regs_size + = 4 * (cfun->machine->callee_saved_regs_last_regno + - cfun->machine->callee_saved_regs_first_regno + + 1); + } + + /* Important: We need to make sure that + (va_args_size + fp_size + gp_size + + lp_size + callee_saved_regs_size) + is 8-byte alignment. + If it is not, calculate the padding bytes. */ + block_size = cfun->machine->va_args_size + + cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size; + if (!NDS32_DOUBLE_WORD_ALIGN_P (block_size)) + { + cfun->machine->callee_saved_area_padding_bytes + = NDS32_ROUND_UP_DOUBLE_WORD (block_size) - block_size; + } + + /* If stack usage computation is required, + we need to provide the static stack size. */ + if (flag_stack_usage_info) + { + current_function_static_stack_size + = NDS32_ROUND_UP_DOUBLE_WORD (block_size) + + cfun->machine->local_size + + cfun->machine->out_args_size; + } +} + +/* Function to create a parallel rtx pattern + which presents stack push multiple behavior. + The overall concept are: + "push registers to memory", + "adjust stack pointer". */ +static rtx +nds32_gen_stack_push_multiple (rtx Rb, rtx Re, + rtx En4 ATTRIBUTE_UNUSED) +{ + int regno; + int extra_count; + int num_use_regs; + int par_index; + int offset; + + rtx reg; + rtx mem; + rtx push_rtx; + rtx adjust_sp_rtx; + rtx parallel_insn; + + /* We need to provide a customized rtx which contains + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32))) + (reg:SI Rb)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) + (reg:SI Rb+1)) + ... + (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) + (reg:SI Re)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) + (reg:SI FP_REGNUM)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) + (reg:SI GP_REGNUM)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) + (reg:SI LP_REGNUM)) + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int -32)))]) */ + + /* Calculate the number of registers that will be pushed. */ + extra_count = 0; + if (cfun->machine->fp_size) + extra_count++; + if (cfun->machine->gp_size) + extra_count++; + if (cfun->machine->lp_size) + extra_count++; + /* Note that Rb and Re may be SP_REGNUM. DO NOT count it in. */ + if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM) + num_use_regs = extra_count; + else + num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count; + + /* In addition to used registers, + we need one more space for (set sp sp-x) rtx. */ + parallel_insn = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (num_use_regs + 1)); + par_index = 0; + + /* Initialize offset and start to create push behavior. */ + offset = -(num_use_regs * 4); + + /* Create (set mem regX) from Rb, Rb+1 up to Re. */ + for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) + { + /* Rb and Re may be SP_REGNUM. + We need to break this loop immediately. */ + if (regno == SP_REGNUM) + break; + + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + } + + /* Create (set mem fp), (set mem gp), and (set mem lp) if necessary. */ + if (cfun->machine->fp_size) + { + reg = gen_rtx_REG (SImode, FP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + } + if (cfun->machine->gp_size) + { + reg = gen_rtx_REG (SImode, GP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + } + if (cfun->machine->lp_size) + { + reg = gen_rtx_REG (SImode, LP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + } + + /* Create (set sp sp-x). */ + + /* We need to re-calculate the offset value again for adjustment. */ + offset = -(num_use_regs * 4); + adjust_sp_rtx + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, offset)); + XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx; + RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1; + + return parallel_insn; +} + +/* Function to create a parallel rtx pattern + which presents stack pop multiple behavior. + The overall concept are: + "pop registers from memory", + "adjust stack pointer". */ +static rtx +nds32_gen_stack_pop_multiple (rtx Rb, rtx Re, + rtx En4 ATTRIBUTE_UNUSED) +{ + int regno; + int extra_count; + int num_use_regs; + int par_index; + int offset; + + rtx reg; + rtx mem; + rtx pop_rtx; + rtx adjust_sp_rtx; + rtx parallel_insn; + + /* We need to provide a customized rtx which contains + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (reg:SI Rb) + (mem (reg:SI SP_REGNUM))) + (set (reg:SI Rb+1) + (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) + ... + (set (reg:SI Re) + (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) + (set (reg:SI FP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) + (set (reg:SI GP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) + (set (reg:SI LP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int 32)))]) */ + + /* Calculate the number of registers that will be poped. */ + extra_count = 0; + if (cfun->machine->fp_size) + extra_count++; + if (cfun->machine->gp_size) + extra_count++; + if (cfun->machine->lp_size) + extra_count++; + /* Note that Rb and Re may be SP_REGNUM. DO NOT count it in. */ + if (REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM) + num_use_regs = extra_count; + else + num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + extra_count; + + /* In addition to used registers, + we need one more space for (set sp sp+x) rtx. */ + parallel_insn = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (num_use_regs + 1)); + par_index = 0; + + /* Initialize offset and start to create pop behavior. */ + offset = 0; + + /* Create (set regX mem) from Rb, Rb+1 up to Re. */ + for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) + { + /* Rb and Re may be SP_REGNUM. + We need to break this loop immediately. */ + if (regno == SP_REGNUM) + break; + + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + } + + /* Create (set fp mem), (set gp mem), and (set lp mem) if necessary. */ + if (cfun->machine->fp_size) + { + reg = gen_rtx_REG (SImode, FP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + } + if (cfun->machine->gp_size) + { + reg = gen_rtx_REG (SImode, GP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + } + if (cfun->machine->lp_size) + { + reg = gen_rtx_REG (SImode, LP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + } + + /* Create (set sp sp+x). */ + + /* The offset value is already in place. No need to re-calculate it. */ + adjust_sp_rtx + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, offset)); + XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx; + RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1; + + return parallel_insn; +} + +/* Function to create a parallel rtx pattern + which presents stack v3push behavior. + The overall concept are: + "push registers to memory", + "adjust stack pointer". */ +static rtx +nds32_gen_stack_v3push (rtx Rb, + rtx Re, + rtx En4 ATTRIBUTE_UNUSED, + rtx imm8u) +{ + int regno; + int num_use_regs; + int par_index; + int offset; + + rtx reg; + rtx mem; + rtx push_rtx; + rtx adjust_sp_rtx; + rtx parallel_insn; + + /* We need to provide a customized rtx which contains + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [ + (set (mem (plus (reg:SI SP_REGNUM) (const_int -32))) + (reg:SI Rb)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) + (reg:SI Rb+1)) + ... + (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) + (reg:SI Re)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) + (reg:SI FP_REGNUM)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) + (reg:SI GP_REGNUM)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) + (reg:SI LP_REGNUM)) + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int -32-imm8u)))]) */ + + /* Calculate the number of registers that will be pushed. + Since $fp, $gp, and $lp is always pushed with v3push instruction, + we need to count these three registers. + Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ + num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3; + + /* In addition to used registers, + we need one more space for (set sp sp-x-imm8u) rtx. */ + parallel_insn = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (num_use_regs + 1)); + par_index = 0; + + /* Initialize offset and start to create push behavior. */ + offset = -(num_use_regs * 4); + + /* Create (set mem regX) from Rb, Rb+1 up to Re. + Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ + for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) + { + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + } + + /* Create (set mem fp). */ + reg = gen_rtx_REG (SImode, FP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + /* Create (set mem gp). */ + reg = gen_rtx_REG (SImode, GP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + /* Create (set mem lp). */ + reg = gen_rtx_REG (SImode, LP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + push_rtx = gen_rtx_SET (VOIDmode, mem, reg); + XVECEXP (parallel_insn, 0, par_index) = push_rtx; + RTX_FRAME_RELATED_P (push_rtx) = 1; + offset = offset + 4; + par_index++; + + /* Create (set sp sp-x-imm8u). */ + + /* We need to re-calculate the offset value again for adjustment. */ + offset = -(num_use_regs * 4); + adjust_sp_rtx + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset - INTVAL (imm8u))); + XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx; + RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1; + + return parallel_insn; +} + +/* Function to create a parallel rtx pattern + which presents stack v3pop behavior. + The overall concept are: + "pop registers from memory", + "adjust stack pointer". */ +static rtx +nds32_gen_stack_v3pop (rtx Rb, + rtx Re, + rtx En4 ATTRIBUTE_UNUSED, + rtx imm8u) +{ + int regno; + int num_use_regs; + int par_index; + int offset; + + rtx reg; + rtx mem; + rtx pop_rtx; + rtx adjust_sp_rtx; + rtx parallel_insn; + + /* We need to provide a customized rtx which contains + necessary information for data analysis, + so we create a parallel rtx like this: + (parallel [(set (reg:SI Rb) + (mem (reg:SI SP_REGNUM))) + (set (reg:SI Rb+1) + (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) + ... + (set (reg:SI Re) + (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) + (set (reg:SI FP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) + (set (reg:SI GP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) + (set (reg:SI LP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int 32+imm8u)))]) */ + + /* Calculate the number of registers that will be poped. + Since $fp, $gp, and $lp is always poped with v3pop instruction, + we need to count these three registers. + Under v3push, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ + num_use_regs = REGNO (Re) - REGNO (Rb) + 1 + 3; + + /* In addition to used registers, + we need one more space for (set sp sp+x+imm8u) rtx. */ + parallel_insn = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (num_use_regs + 1)); + par_index = 0; + + /* Initialize offset and start to create pop behavior. */ + offset = 0; + + /* Create (set regX mem) from Rb, Rb+1 up to Re. + Under v3pop, Rb is $r6, while Re is $r6, $r8, $r10, or $r14. + So there is no need to worry about Rb=Re=SP_REGNUM case. */ + for (regno = REGNO (Rb); regno <= (int) REGNO (Re); regno++) + { + reg = gen_rtx_REG (SImode, regno); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + } + + /* Create (set fp mem). */ + reg = gen_rtx_REG (SImode, FP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + /* Create (set gp mem). */ + reg = gen_rtx_REG (SImode, GP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + /* Create (set lp mem ). */ + reg = gen_rtx_REG (SImode, LP_REGNUM); + mem = gen_frame_mem (SImode, plus_constant (Pmode, + stack_pointer_rtx, + offset)); + pop_rtx = gen_rtx_SET (VOIDmode, reg, mem); + XVECEXP (parallel_insn, 0, par_index) = pop_rtx; + RTX_FRAME_RELATED_P (pop_rtx) = 1; + offset = offset + 4; + par_index++; + + /* Create (set sp sp+x+imm8u). */ + + /* The offset value is already in place. No need to re-calculate it. */ + adjust_sp_rtx + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset + INTVAL (imm8u))); + XVECEXP (parallel_insn, 0, par_index) = adjust_sp_rtx; + RTX_FRAME_RELATED_P (adjust_sp_rtx) = 1; + + return parallel_insn; +} + +/* A subroutine that checks multiple load and store + using consecutive registers. + OP is a parallel rtx we would like to check. + LOAD_P indicates whether we are checking load operation. + PAR_INDEX is starting element of parallel rtx. + FIRST_ELT_REGNO is used to tell starting register number. + COUNT helps us to check consecutive register numbers. */ +static bool +nds32_consecutive_registers_load_store_p (rtx op, + bool load_p, + int par_index, + int first_elt_regno, + int count) +{ + int i; + int check_regno; + rtx elt; + rtx elt_reg; + rtx elt_mem; + + for (i = 0; i < count; i++) + { + /* Pick up each element from parallel rtx. */ + elt = XVECEXP (op, 0, i + par_index); + + /* If this element is not a 'set' rtx, return false immediately. */ + if (GET_CODE (elt) != SET) + return false; + + /* Pick up reg and mem of this element. */ + elt_reg = load_p ? SET_DEST (elt) : SET_SRC (elt); + elt_mem = load_p ? SET_SRC (elt) : SET_DEST (elt); + + /* If elt_reg is not a expected reg rtx, return false. */ + if (GET_CODE (elt_reg) != REG || GET_MODE (elt_reg) != SImode) + return false; + /* If elt_mem is not a expected mem rtx, return false. */ + if (GET_CODE (elt_mem) != MEM || GET_MODE (elt_mem) != SImode) + return false; + + /* The consecutive registers should be in (Rb,Rb+1...Re) order. */ + check_regno = first_elt_regno + i; + + /* If the register number is not continuous, return false. */ + if (REGNO (elt_reg) != (unsigned int) check_regno) + return false; + } + + return true; +} + +/* A helper function to emit section head template. */ +static void +nds32_emit_section_head_template (char section_name[], + char symbol_name[], + int align_value, + bool object_p) +{ + const char *flags_str; + const char *type_str; + + flags_str = (object_p) ? "\"a\"" : "\"ax\""; + type_str = (object_p) ? "@object" : "@function"; + + fprintf (asm_out_file, "\t.section\t%s, %s\n", section_name, flags_str); + fprintf (asm_out_file, "\t.align\t%d\n", align_value); + fprintf (asm_out_file, "\t.global\t%s\n", symbol_name); + fprintf (asm_out_file, "\t.type\t%s, %s\n", symbol_name, type_str); + fprintf (asm_out_file, "%s:\n", symbol_name); +} + +/* A helper function to emit section tail template. */ +static void +nds32_emit_section_tail_template (char symbol_name[]) +{ + fprintf (asm_out_file, "\t.size\t%s, .-%s\n", symbol_name, symbol_name); +} + +/* Function to emit isr jump table section. */ +static void +nds32_emit_isr_jmptbl_section (int vector_id) +{ + char section_name[100]; + char symbol_name[100]; + + /* Prepare jmptbl section and symbol name. */ + snprintf (section_name, sizeof (section_name), + ".nds32_jmptbl.%02d", vector_id); + snprintf (symbol_name, sizeof (symbol_name), + "_nds32_jmptbl_%02d", vector_id); + + nds32_emit_section_head_template (section_name, symbol_name, 2, true); + fprintf (asm_out_file, "\t.word\t%s\n", + nds32_isr_vectors[vector_id].func_name); + nds32_emit_section_tail_template (symbol_name); +} + +/* Function to emit isr vector section. */ +static void +nds32_emit_isr_vector_section (int vector_id) +{ + unsigned int vector_number_offset = 0; + const char *c_str = "CATEGORY"; + const char *sr_str = "SR"; + const char *nt_str = "NT"; + const char *vs_str = "VS"; + char first_level_handler_name[100]; + char section_name[100]; + char symbol_name[100]; + + /* Set the vector number offset so that we can calculate + the value that user specifies in the attribute. + We also prepare the category string for first level handler name. */ + switch (nds32_isr_vectors[vector_id].category) + { + case NDS32_ISR_INTERRUPT: + vector_number_offset = 9; + c_str = "i"; + break; + case NDS32_ISR_EXCEPTION: + vector_number_offset = 0; + c_str = "e"; + break; + case NDS32_ISR_NONE: + case NDS32_ISR_RESET: + /* Normally it should not be here. */ + gcc_unreachable (); + break; + } + + /* Prepare save reg string for first level handler name. */ + switch (nds32_isr_vectors[vector_id].save_reg) + { + case NDS32_SAVE_ALL: + sr_str = "sa"; + break; + case NDS32_PARTIAL_SAVE: + sr_str = "ps"; + break; + } + + /* Prepare nested type string for first level handler name. */ + switch (nds32_isr_vectors[vector_id].nested_type) + { + case NDS32_NESTED: + nt_str = "ns"; + break; + case NDS32_NOT_NESTED: + nt_str = "nn"; + break; + case NDS32_NESTED_READY: + nt_str = "nr"; + break; + } + + /* Currently we have 4-byte or 16-byte size for each vector. + If it is 4-byte, the first level handler name has suffix string "_4b". */ + vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; + + /* Now we can create first level handler name. */ + snprintf (first_level_handler_name, sizeof (first_level_handler_name), + "_nds32_%s_%s_%s%s", c_str, sr_str, nt_str, vs_str); + + /* Prepare vector section and symbol name. */ + snprintf (section_name, sizeof (section_name), + ".nds32_vector.%02d", vector_id); + snprintf (symbol_name, sizeof (symbol_name), + "_nds32_vector_%02d%s", vector_id, vs_str); + + + /* Everything is ready. We can start emit vector section content. */ + nds32_emit_section_head_template (section_name, symbol_name, + floor_log2 (nds32_isr_vector_size), false); + + /* According to the vector size, the instructions in the + vector section may be different. */ + if (nds32_isr_vector_size == 4) + { + /* This block is for 4-byte vector size. + Hardware $VID support is necessary and only one instruction + is needed in vector section. */ + fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n", + first_level_handler_name); + } + else + { + /* This block is for 16-byte vector size. + There is NO hardware $VID so that we need several instructions + such as pushing GPRs and preparing software vid at vector section. + For pushing GPRs, there are four variations for + 16-byte vector content and we have to handle each combination. + For preparing software vid, note that the vid need to + be substracted vector_number_offset. */ + if (TARGET_REDUCED_REGS) + { + if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL) + { + /* Case of reduced set registers and save_all attribute. */ + fprintf (asm_out_file, "\t! reduced set regs + save_all\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0xf\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r10, 0x0\n"); + + } + else + { + /* Case of reduced set registers and partial_save attribute. */ + fprintf (asm_out_file, "\t! reduced set regs + partial_save\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r15, 0x2\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n"); + } + } + else + { + if (nds32_isr_vectors[vector_id].save_reg == NDS32_SAVE_ALL) + { + /* Case of full set registers and save_all attribute. */ + fprintf (asm_out_file, "\t! full set regs + save_all\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r27, 0xf\n"); + } + else + { + /* Case of full set registers and partial_save attribute. */ + fprintf (asm_out_file, "\t! full set regs + partial_save\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r15, [$sp], $r27, 0x2\n"); + fprintf (asm_out_file, "\tsmw.adm\t$r0, [$sp], $r5, 0x0\n"); + } + } + + fprintf (asm_out_file, "\tmovi\t$r0, %d ! preparing software vid\n", + vector_id - vector_number_offset); + fprintf (asm_out_file, "\tj\t%s ! jump to first level handler\n", + first_level_handler_name); + } + + nds32_emit_section_tail_template (symbol_name); +} + +/* Function to emit isr reset handler content. + Including all jmptbl/vector references, jmptbl section, + vector section, nmi handler section, and warm handler section. */ +static void +nds32_emit_isr_reset_content (void) +{ + unsigned int i; + unsigned int total_n_vectors; + const char *vs_str; + char reset_handler_name[100]; + char section_name[100]; + char symbol_name[100]; + + total_n_vectors = nds32_isr_vectors[0].total_n_vectors; + vs_str = (nds32_isr_vector_size == 4) ? "_4b" : ""; + + fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - BEGIN !\n"); + + /* Create references in .rodata according to total number of vectors. */ + fprintf (asm_out_file, "\t.section\t.rodata\n"); + fprintf (asm_out_file, "\t.align\t2\n"); + + /* Emit jmptbl references. */ + fprintf (asm_out_file, "\t ! references to jmptbl section entries\n"); + for (i = 0; i < total_n_vectors; i++) + fprintf (asm_out_file, "\t.word\t_nds32_jmptbl_%02d\n", i); + + /* Emit vector references. */ + fprintf (asm_out_file, "\t ! references to vector section entries\n"); + for (i = 0; i < total_n_vectors; i++) + fprintf (asm_out_file, "\t.word\t_nds32_vector_%02d%s\n", i, vs_str); + + /* Emit jmptbl_00 section. */ + snprintf (section_name, sizeof (section_name), ".nds32_jmptbl.00"); + snprintf (symbol_name, sizeof (symbol_name), "_nds32_jmptbl_00"); + + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_section_head_template (section_name, symbol_name, 2, true); + fprintf (asm_out_file, "\t.word\t%s\n", + nds32_isr_vectors[0].func_name); + nds32_emit_section_tail_template (symbol_name); + + /* Emit vector_00 section. */ + snprintf (section_name, sizeof (section_name), ".nds32_vector.00"); + snprintf (symbol_name, sizeof (symbol_name), "_nds32_vector_00%s", vs_str); + snprintf (reset_handler_name, sizeof (reset_handler_name), + "_nds32_reset%s", vs_str); + + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_section_head_template (section_name, symbol_name, + floor_log2 (nds32_isr_vector_size), false); + fprintf (asm_out_file, "\tj\t%s ! jump to reset handler\n", + reset_handler_name); + nds32_emit_section_tail_template (symbol_name); + + /* Emit nmi handler section. */ + snprintf (section_name, sizeof (section_name), ".nds32_nmih"); + snprintf (symbol_name, sizeof (symbol_name), "_nds32_nmih"); + + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_section_head_template (section_name, symbol_name, 2, true); + fprintf (asm_out_file, "\t.word\t%s\n", + (strlen (nds32_isr_vectors[0].nmi_name) == 0) + ? "0" + : nds32_isr_vectors[0].nmi_name); + nds32_emit_section_tail_template (symbol_name); + + /* Emit warm handler section. */ + snprintf (section_name, sizeof (section_name), ".nds32_wrh"); + snprintf (symbol_name, sizeof (symbol_name), "_nds32_wrh"); + + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_section_head_template (section_name, symbol_name, 2, true); + fprintf (asm_out_file, "\t.word\t%s\n", + (strlen (nds32_isr_vectors[0].warm_name) == 0) + ? "0" + : nds32_isr_vectors[0].warm_name); + nds32_emit_section_tail_template (symbol_name); + + fprintf (asm_out_file, "\t! RESET HANDLER CONTENT - END !\n"); +} + +/* Function for nds32_merge_decl_attributes() and nds32_insert_attributes() + to check if there are any conflict isr-specific attributes being set. + We need to check: + 1. Only 'save_all' or 'partial_save' in the attributes. + 2. Only 'nested', 'not_nested', or 'nested_ready' in the attributes. + 3. Only 'interrupt', 'exception', or 'reset' in the attributes. */ +static void +nds32_check_isr_attrs_conflict (tree func_decl, tree func_attrs) +{ + int save_all_p, partial_save_p; + int nested_p, not_nested_p, nested_ready_p; + int intr_p, excp_p, reset_p; + + /* Initialize variables. */ + save_all_p = partial_save_p = 0; + nested_p = not_nested_p = nested_ready_p = 0; + intr_p = excp_p = reset_p = 0; + + /* We must check at MOST one attribute to set save-reg. */ + if (lookup_attribute ("save_all", func_attrs)) + save_all_p = 1; + if (lookup_attribute ("partial_save", func_attrs)) + partial_save_p = 1; + + if ((save_all_p + partial_save_p) > 1) + error ("multiple save reg attributes to function %qD", func_decl); + + /* We must check at MOST one attribute to set nested-type. */ + if (lookup_attribute ("nested", func_attrs)) + nested_p = 1; + if (lookup_attribute ("not_nested", func_attrs)) + not_nested_p = 1; + if (lookup_attribute ("nested_ready", func_attrs)) + nested_ready_p = 1; + + if ((nested_p + not_nested_p + nested_ready_p) > 1) + error ("multiple nested types attributes to function %qD", func_decl); + + /* We must check at MOST one attribute to + set interrupt/exception/reset. */ + if (lookup_attribute ("interrupt", func_attrs)) + intr_p = 1; + if (lookup_attribute ("exception", func_attrs)) + excp_p = 1; + if (lookup_attribute ("reset", func_attrs)) + reset_p = 1; + + if ((intr_p + excp_p + reset_p) > 1) + error ("multiple interrupt attributes to function %qD", func_decl); +} + +/* Function to construct isr vectors information array. + We DO NOT HAVE TO check if the attributes are valid + because those works are supposed to be done on + nds32_merge_decl_attributes() and nds32_insert_attributes(). */ +static void +nds32_construct_isr_vectors_information (tree func_attrs, + const char *func_name) +{ + tree save_all, partial_save; + tree nested, not_nested, nested_ready; + tree intr, excp, reset; + + save_all = lookup_attribute ("save_all", func_attrs); + partial_save = lookup_attribute ("partial_save", func_attrs); + + nested = lookup_attribute ("nested", func_attrs); + not_nested = lookup_attribute ("not_nested", func_attrs); + nested_ready = lookup_attribute ("nested_ready", func_attrs); + + intr = lookup_attribute ("interrupt", func_attrs); + excp = lookup_attribute ("exception", func_attrs); + reset = lookup_attribute ("reset", func_attrs); + + /* If there is no interrupt/exception/reset, we can return immediately. */ + if (!intr && !excp && !reset) + return; + + /* If we are here, either we have interrupt/exception, + or reset attribute. */ + if (intr || excp) + { + tree id_list; + + /* Prepare id list so that we can traverse and set vector id. */ + id_list = (intr) ? (TREE_VALUE (intr)) : (TREE_VALUE (excp)); + + while (id_list) + { + tree id; + int vector_id; + unsigned int vector_number_offset; + + /* The way to handle interrupt or exception is the same, + we just need to take care of actual vector number. + For interrupt(0..63), the actual vector number is (9..72). + For exception(1..8), the actual vector number is (1..8). */ + vector_number_offset = (intr) ? (9) : (0); + + /* Pick up each vector id value. */ + id = TREE_VALUE (id_list); + /* Add vector_number_offset to get actual vector number. */ + vector_id = TREE_INT_CST_LOW (id) + vector_number_offset; + + /* Enable corresponding vector and set function name. */ + nds32_isr_vectors[vector_id].category = (intr) + ? (NDS32_ISR_INTERRUPT) + : (NDS32_ISR_EXCEPTION); + strcpy (nds32_isr_vectors[vector_id].func_name, func_name); + + /* Set register saving scheme. */ + if (save_all) + nds32_isr_vectors[vector_id].save_reg = NDS32_SAVE_ALL; + else if (partial_save) + nds32_isr_vectors[vector_id].save_reg = NDS32_PARTIAL_SAVE; + + /* Set nested type. */ + if (nested) + nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED; + else if (not_nested) + nds32_isr_vectors[vector_id].nested_type = NDS32_NOT_NESTED; + else if (nested_ready) + nds32_isr_vectors[vector_id].nested_type = NDS32_NESTED_READY; + + /* Advance to next id. */ + id_list = TREE_CHAIN (id_list); + } + } + else + { + tree id_list; + tree id; + tree nmi, warm; + + /* Deal with reset attribute. Its vector number is always 0. */ + nds32_isr_vectors[0].category = NDS32_ISR_RESET; + + /* Prepare id_list and identify id value so that + we can set total number of vectors. */ + id_list = TREE_VALUE (reset); + id = TREE_VALUE (id_list); + + /* The total vectors = interrupt + exception numbers + reset. + There are 8 exception and 1 reset in nds32 architecture. */ + nds32_isr_vectors[0].total_n_vectors = TREE_INT_CST_LOW (id) + 8 + 1; + strcpy (nds32_isr_vectors[0].func_name, func_name); + + /* Retrieve nmi and warm function. */ + nmi = lookup_attribute ("nmi", func_attrs); + warm = lookup_attribute ("warm", func_attrs); + + if (nmi != NULL_TREE) + { + tree nmi_func_list; + tree nmi_func; + + nmi_func_list = TREE_VALUE (nmi); + nmi_func = TREE_VALUE (nmi_func_list); + + /* Record nmi function name. */ + strcpy (nds32_isr_vectors[0].nmi_name, + IDENTIFIER_POINTER (nmi_func)); + } + + if (warm != NULL_TREE) + { + tree warm_func_list; + tree warm_func; + + warm_func_list = TREE_VALUE (warm); + warm_func = TREE_VALUE (warm_func_list); + + /* Record warm function name. */ + strcpy (nds32_isr_vectors[0].warm_name, + IDENTIFIER_POINTER (warm_func)); + } + } +} + +/* Function that may creates more instructions + for large value on adjusting stack pointer. + + In nds32 target, 'addi' can be used for stack pointer + adjustment in prologue/epilogue stage. + However, sometimes there are too many local variables so that + the adjustment value is not able to be fit in the 'addi' instruction. + One solution is to move value into a register + and then use 'add' instruction. + In practice, we use TA_REGNUM ($r15) to accomplish this purpose. + Also, we need to return zero for sp adjustment so that + proglogue/epilogue knows there is no need to create 'addi' instruction. */ +static int +nds32_force_addi_stack_int (int full_value) +{ + int adjust_value; + + rtx tmp_reg; + rtx sp_adjust_insn; + + if (!satisfies_constraint_Is15 (GEN_INT (full_value))) + { + /* The value is not able to fit in single addi instruction. + Create more instructions of moving value into a register + and then add stack pointer with it. */ + + /* $r15 is going to be temporary register to hold the value. */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + /* Create one more instruction to move value + into the temporary register. */ + emit_move_insn (tmp_reg, GEN_INT (full_value)); + + /* Create new 'add' rtx. */ + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + tmp_reg); + /* Emit rtx into insn list and receive its transformed insn rtx. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + + /* At prologue, we need to tell GCC that this is frame related insn, + so that we can consider this instruction to output debug information. + If full_value is NEGATIVE, it means this function + is invoked by expand_prologue. */ + if (full_value < 0) + { + /* Because (tmp_reg <- full_value) may be split into two + rtl patterns, we can not set its RTX_FRAME_RELATED_P. + We need to construct another (sp <- sp + full_value) + and then insert it into sp_adjust_insn's reg note to + represent a frame related expression. + GCC knows how to refer it and output debug information. */ + + rtx plus_rtx; + rtx set_rtx; + + plus_rtx = plus_constant (Pmode, stack_pointer_rtx, full_value); + set_rtx = gen_rtx_SET (VOIDmode, stack_pointer_rtx, plus_rtx); + add_reg_note (sp_adjust_insn, REG_FRAME_RELATED_EXPR, set_rtx); + + RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; + } + + /* We have used alternative way to adjust stack pointer value. + Return zero so that prologue/epilogue + will not generate other instructions. */ + return 0; + } + else + { + /* The value is able to fit in addi instruction. + However, remember to make it to be positive value + because we want to return 'adjustment' result. */ + adjust_value = (full_value < 0) ? (-full_value) : (full_value); + + return adjust_value; + } +} + +/* Return true if MODE/TYPE need double word alignment. */ +static bool +nds32_needs_double_word_align (enum machine_mode mode, const_tree type) +{ + unsigned int align; + + /* When 'type' is nonnull, there is no need to look at 'mode'. */ + align = (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)); + + return (align > PARM_BOUNDARY); +} + +/* Return true if FUNC is a naked function. */ +static bool nds32_naked_function_p (tree func) +{ + tree t; + + if (TREE_CODE (func) != FUNCTION_DECL) + abort (); + + t = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); + + return (t != NULL_TREE); +} + +/* Function that check if 'X' is a valid address register. + The variable 'STRICT' is very important to + make decision for register number. + + STRICT : true + => We are in reload pass or after reload pass. + The register number should be strictly limited in general registers. + + STRICT : false + => Before reload pass, we are free to use any register number. */ +static bool +nds32_address_register_rtx_p (rtx x, bool strict) +{ + int regno; + + if (GET_CODE (x) != REG) + return false; + + regno = REGNO (x); + + if (strict) + return REGNO_OK_FOR_BASE_P (regno); + else + return true; +} + +/* Function that check if 'INDEX' is valid to be a index rtx for address. + + OUTER_MODE : Machine mode of outer address rtx. + INDEX : Check if this rtx is valid to be a index for address. + STRICT : If it is true, we are in reload pass or after reload pass. */ +static bool +nds32_legitimate_index_p (enum machine_mode outer_mode, + rtx index, + bool strict) +{ + int regno; + rtx op0; + rtx op1; + + switch (GET_CODE (index)) + { + case REG: + regno = REGNO (index); + /* If we are in reload pass or after reload pass, + we need to limit it to general register. */ + if (strict) + return REGNO_OK_FOR_INDEX_P (regno); + else + return true; + + case CONST_INT: + /* The alignment of the integer value is determined by 'outer_mode'. */ + if (GET_MODE_SIZE (outer_mode) == 1) + { + /* Further check if the value is legal for the 'outer_mode'. */ + if (!satisfies_constraint_Is15 (index)) + return false; + + /* Pass all test, the value is valid, return true. */ + return true; + } + if (GET_MODE_SIZE (outer_mode) == 2 + && NDS32_HALF_WORD_ALIGN_P (INTVAL (index))) + { + /* Further check if the value is legal for the 'outer_mode'. */ + if (!satisfies_constraint_Is16 (index)) + return false; + + /* Pass all test, the value is valid, return true. */ + return true; + } + if (GET_MODE_SIZE (outer_mode) == 4 + && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) + { + /* Further check if the value is legal for the 'outer_mode'. */ + if (!satisfies_constraint_Is17 (index)) + return false; + + /* Pass all test, the value is valid, return true. */ + return true; + } + if (GET_MODE_SIZE (outer_mode) == 8 + && NDS32_SINGLE_WORD_ALIGN_P (INTVAL (index))) + { + /* Further check if the value is legal for the 'outer_mode'. */ + if (!satisfies_constraint_Is17 (gen_int_mode (INTVAL (index) + 4, + SImode))) + return false; + + /* Pass all test, the value is valid, return true. */ + return true; + } + + return false; + + case MULT: + op0 = XEXP (index, 0); + op1 = XEXP (index, 1); + + if (REG_P (op0) && CONST_INT_P (op1)) + { + int multiplier; + multiplier = INTVAL (op1); + + /* We only allow (mult reg const_int_1) + or (mult reg const_int_2) or (mult reg const_int_4). */ + if (multiplier != 1 && multiplier != 2 && multiplier != 4) + return false; + + regno = REGNO (op0); + /* Limit it in general registers if we are + in reload pass or after reload pass. */ + if(strict) + return REGNO_OK_FOR_INDEX_P (regno); + else + return true; + } + + return false; + + case ASHIFT: + op0 = XEXP (index, 0); + op1 = XEXP (index, 1); + + if (REG_P (op0) && CONST_INT_P (op1)) + { + int sv; + /* op1 is already the sv value for use to do left shift. */ + sv = INTVAL (op1); + + /* We only allow (ashift reg const_int_0) + or (ashift reg const_int_1) or (ashift reg const_int_2). */ + if (sv != 0 && sv != 1 && sv !=2) + return false; + + regno = REGNO (op0); + /* Limit it in general registers if we are + in reload pass or after reload pass. */ + if(strict) + return REGNO_OK_FOR_INDEX_P (regno); + else + return true; + } + + return false; + + default: + return false; + } +} + +/* Function to expand builtin function for + '[(unspec_volatile [(reg)])]'. */ +static rtx +nds32_expand_builtin_null_ftype_reg (enum insn_code icode, + tree exp, rtx target) +{ + /* Mapping: + ops[0] <--> value0 <--> arg0 */ + struct expand_operand ops[1]; + tree arg0; + rtx value0; + + /* Grab the incoming arguments and extract its rtx. */ + arg0 = CALL_EXPR_ARG (exp, 0); + value0 = expand_normal (arg0); + + /* Create operands. */ + create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0))); + + /* Emit new instruction. */ + if (!maybe_expand_insn (icode, 1, ops)) + error ("invalid argument to built-in function"); + + return target; +} + +/* Function to expand builtin function for + '[(set (reg) (unspec_volatile [(imm)]))]'. */ +static rtx +nds32_expand_builtin_reg_ftype_imm (enum insn_code icode, + tree exp, rtx target) +{ + /* Mapping: + ops[0] <--> target <--> exp + ops[1] <--> value0 <--> arg0 */ + struct expand_operand ops[2]; + tree arg0; + rtx value0; + + /* Grab the incoming arguments and extract its rtx. */ + arg0 = CALL_EXPR_ARG (exp, 0); + value0 = expand_normal (arg0); + + /* Create operands. */ + create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp))); + create_input_operand (&ops[1], value0, TYPE_MODE (TREE_TYPE (arg0))); + + /* Emit new instruction. */ + if (!maybe_expand_insn (icode, 2, ops)) + error ("invalid argument to built-in function"); + + return target; +} + +/* Function to expand builtin function for + '[(unspec_volatile [(reg) (imm)])]' pattern. */ +static rtx +nds32_expand_builtin_null_ftype_reg_imm (enum insn_code icode, + tree exp, rtx target) +{ + /* Mapping: + ops[0] <--> value0 <--> arg0 + ops[1] <--> value1 <--> arg1 */ + struct expand_operand ops[2]; + tree arg0, arg1; + rtx value0, value1; + + /* Grab the incoming arguments and extract its rtx. */ + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + value0 = expand_normal (arg0); + value1 = expand_normal (arg1); + + /* Create operands. */ + create_input_operand (&ops[0], value0, TYPE_MODE (TREE_TYPE (arg0))); + create_input_operand (&ops[1], value1, TYPE_MODE (TREE_TYPE (arg1))); + + /* Emit new instruction. */ + if (!maybe_expand_insn (icode, 2, ops)) + error ("invalid argument to built-in function"); + + return target; +} + +/* A helper function to return character based on byte size. */ +static char +nds32_byte_to_size (int byte) +{ + switch (byte) + { + case 4: + return 'w'; + case 2: + return 'h'; + case 1: + return 'b'; + default: + /* Normally it should not be here. */ + gcc_unreachable (); + } +} + +/* A helper function to check if this function should contain prologue. */ +static int +nds32_have_prologue_p (void) +{ + int i; + + for (i = 0; i < 28; i++) + if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) + return 1; + + return (flag_pic + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); +} + +/* ------------------------------------------------------------------------ */ + +/* PART 3: Implement target hook stuff definitions. */ + +/* Register Classes. */ + +static unsigned char +nds32_class_max_nregs (reg_class_t rclass ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + /* Return the maximum number of consecutive registers + needed to represent "mode" in a register of "rclass". */ + return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); +} + +static int +nds32_register_priority (int hard_regno) +{ + /* Encourage to use r0-r7 for LRA when optimize for size. */ + if (optimize_size && hard_regno < 8) + return 4; + return 3; +} + + +/* Stack Layout and Calling Conventions. */ + +/* There are three kinds of pointer concepts using in GCC compiler: + + frame pointer: A pointer to the first location of local variables. + stack pointer: A pointer to the top of a stack frame. + argument pointer: A pointer to the incoming arguments. + + In nds32 target calling convention, we are using 8-byte alignment. + Besides, we would like to have each stack frame of a function includes: + + [Block A] + 1. previous hard frame pointer + 2. return address + 3. callee-saved registers + 4. <padding bytes> (we will calculte in nds32_compute_stack_frame() + and save it at + cfun->machine->callee_saved_area_padding_bytes) + + [Block B] + 1. local variables + 2. spilling location + 3. <padding bytes> (it will be calculated by GCC itself) + 4. incoming arguments + 5. <padding bytes> (it will be calculated by GCC itself) + + [Block C] + 1. <padding bytes> (it will be calculated by GCC itself) + 2. outgoing arguments + + We 'wrap' these blocks together with + hard frame pointer ($r28) and stack pointer ($r31). + By applying the basic frame/stack/argument pointers concept, + the layout of a stack frame shoule be like this: + + | | + old stack pointer -> ---- + | | \ + | | saved arguments for + | | vararg functions + | | / + hard frame pointer -> -- + & argument pointer | | \ + | | previous hardware frame pointer + | | return address + | | callee-saved registers + | | / + frame pointer -> -- + | | \ + | | local variables + | | and incoming arguments + | | / + -- + | | \ + | | outgoing + | | arguments + | | / + stack pointer -> ---- + + $SFP and $AP are used to represent frame pointer and arguments pointer, + which will be both eliminated as hard frame pointer. */ + +/* -- Eliminating Frame Pointer and Arg Pointer. */ + +static bool nds32_can_eliminate (const int from_reg, const int to_reg) +{ + if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM) + return true; + + if (from_reg == ARG_POINTER_REGNUM && to_reg == HARD_FRAME_POINTER_REGNUM) + return true; + + if (from_reg == FRAME_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM) + return true; + + if (from_reg == FRAME_POINTER_REGNUM && to_reg == HARD_FRAME_POINTER_REGNUM) + return true; + + return false; +} + +/* -- Passing Arguments in Registers. */ + +static rtx +nds32_function_arg (cumulative_args_t ca, enum machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (ca); + + /* The last time this hook is called, + it is called with MODE == VOIDmode. */ + if (mode == VOIDmode) + return NULL_RTX; + + /* For nameless arguments, they are passed on the stack. */ + if (!named) + return NULL_RTX; + + /* If there are still registers available, return it. */ + if (NDS32_ARG_PASS_IN_REG_P (cum->reg_offset, mode, type)) + { + /* Pick up the next available register number. */ + return gen_rtx_REG (mode, + NDS32_AVAILABLE_REGNUM_FOR_ARG (cum->reg_offset, + mode, + type)); + } + else + { + /* No register available, return NULL_RTX. + The compiler will use stack to pass argument instead. */ + return NULL_RTX; + } +} + +static void +nds32_function_arg_advance (cumulative_args_t ca, enum machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (ca); + + /* Advance next register for use. + Only named argument could be advanced. */ + if (named) + { + cum->reg_offset + = NDS32_AVAILABLE_REGNUM_FOR_ARG (cum->reg_offset, mode, type) + - NDS32_GPR_ARG_FIRST_REGNUM + + NDS32_NEED_N_REGS_FOR_ARG (mode, type); + } +} + +static unsigned int +nds32_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + return (nds32_needs_double_word_align (mode, type) + ? NDS32_DOUBLE_WORD_ALIGNMENT + : PARM_BOUNDARY); +} + +/* -- How Scalar Function Values Are Returned. */ + +static rtx +nds32_function_value (const_tree ret_type, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode; + int unsignedp; + + mode = TYPE_MODE (ret_type); + unsignedp = TYPE_UNSIGNED (ret_type); + + mode = promote_mode (ret_type, mode, &unsignedp); + + return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); +} + +static rtx +nds32_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, NDS32_GPR_RET_FIRST_REGNUM); +} + +static bool +nds32_function_value_regno_p (const unsigned int regno) +{ + return (regno == NDS32_GPR_RET_FIRST_REGNUM); +} + +/* -- Function Entry and Exit. */ + +/* The content produced from this function + will be placed before prologue body. */ +static void +nds32_asm_function_prologue (FILE *file, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + int r; + const char *func_name; + tree attrs; + tree name; + + /* All stack frame information is supposed to be + already computed when expanding prologue. + The result is in cfun->machine. + DO NOT call nds32_compute_stack_frame() here + because it may corrupt the essential information. */ + + fprintf (file, "\t! BEGIN PROLOGUE\n"); + fprintf (file, "\t! fp needed: %d\n", frame_pointer_needed); + fprintf (file, "\t! pretend_args: %d\n", cfun->machine->va_args_size); + fprintf (file, "\t! local_size: %d\n", cfun->machine->local_size); + fprintf (file, "\t! out_args_size: %d\n", cfun->machine->out_args_size); + + /* Use df_regs_ever_live_p() to detect if the register + is ever used in the current function. */ + fprintf (file, "\t! registers ever_live: "); + for (r = 0; r < 32; r++) + { + if (df_regs_ever_live_p (r)) + fprintf (file, "%s, ", reg_names[r]); + } + fputc ('\n', file); + + /* Display the attributes of this function. */ + fprintf (file, "\t! function attributes: "); + /* GCC build attributes list with reverse order, + so we use nreverse() to make it looks like + the order that user specifies. */ + attrs = nreverse (DECL_ATTRIBUTES (current_function_decl)); + + /* If there is no any attribute, print out "None". */ + if (!attrs) + fprintf (file, "None"); + + /* If there are some attributes, try if we need to + construct isr vector information. */ + func_name = IDENTIFIER_POINTER (DECL_NAME (current_function_decl)); + nds32_construct_isr_vectors_information (attrs, func_name); + + /* Display all attributes of this function. */ + while (attrs) + { + name = TREE_PURPOSE (attrs); + fprintf (file, "%s ", IDENTIFIER_POINTER (name)); + + /* Pick up the next attribute. */ + attrs = TREE_CHAIN (attrs); + } + fputc ('\n', file); +} + +/* After rtl prologue has been expanded, this function is used. */ +static void +nds32_asm_function_end_prologue (FILE *file) +{ + fprintf (file, "\t! END PROLOGUE\n"); + + /* If frame pointer is NOT needed and -mfp-as-gp is issued, + we can generate special directive: ".omit_fp_begin" + to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (!frame_pointer_needed + && !cfun->machine->naked_p + && cfun->machine->fp_as_gp_p) + { + fprintf (file, "\t! ----------------------------------------\n"); + fprintf (file, "\t! Guide linker to do " + "link time optimization: fp-as-gp\n"); + fprintf (file, "\t! We add one more instruction to " + "initialize $fp near to $gp location.\n"); + fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n"); + fprintf (file, "\t! this extra instruction should be " + "eliminated at link stage.\n"); + fprintf (file, "\t.omit_fp_begin\n"); + fprintf (file, "\tla\t$fp,_FP_BASE_\n"); + fprintf (file, "\t! ----------------------------------------\n"); + } +} + +/* Before rtl epilogue has been expanded, this function is used. */ +static void +nds32_asm_function_begin_epilogue (FILE *file) +{ + /* If frame pointer is NOT needed and -mfp-as-gp is issued, + we can generate special directive: ".omit_fp_end" + to claim fp-as-gp optimization range. + However, for a naked function, + which means it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (!frame_pointer_needed + && !cfun->machine->naked_p + && cfun->machine->fp_as_gp_p) + { + fprintf (file, "\t! ----------------------------------------\n"); + fprintf (file, "\t! Claim the range of fp-as-gp " + "link time optimization\n"); + fprintf (file, "\t.omit_fp_end\n"); + fprintf (file, "\t! ----------------------------------------\n"); + } + + fprintf (file, "\t! BEGIN EPILOGUE\n"); +} + +/* The content produced from this function + will be placed after epilogue body. */ +static void +nds32_asm_function_epilogue (FILE *file, + HOST_WIDE_INT size ATTRIBUTE_UNUSED) +{ + fprintf (file, "\t! END EPILOGUE\n"); +} + +static void +nds32_asm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree function) +{ + int this_regno; + + /* Make sure unwind info is emitted for the thunk if needed. */ + final_start_function (emit_barrier (), file, 1); + + this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) + ? 1 + : 0); + + if (delta != 0) + { + if (satisfies_constraint_Is15 (GEN_INT (delta))) + { + fprintf (file, "\taddi\t$r%d, $r%d, %ld\n", + this_regno, this_regno, delta); + } + else if (satisfies_constraint_Is20 (GEN_INT (delta))) + { + fprintf (file, "\tmovi\t$ta, %ld\n", delta); + fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno); + } + else + { + fprintf (file, "\tsethi\t$ta, hi20(%ld)\n", delta); + fprintf (file, "\tori\t$ta, $ta, lo12(%ld)\n", delta); + fprintf (file, "\tadd\t$r%d, $r%d, $ta\n", this_regno, this_regno); + } + } + + fprintf (file, "\tb\t"); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fprintf (file, "\n"); + + final_end_function (); +} + +/* -- Permitting tail calls. */ + +/* Determine whether we need to enable warning for function return check. */ +static bool +nds32_warn_func_return (tree decl) +{ +/* Naked functions are implemented entirely in assembly, including the + return sequence, so suppress warnings about this. */ + return !nds32_naked_function_p (decl); +} + + +/* Implementing the Varargs Macros. */ + +static bool +nds32_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) +{ + /* Return true so that all the named arguments for FUNCTION_ARG have named=1. + If return false, for the variadic function, all named arguments EXCEPT + the last are treated as named. */ + return true; +} + + +/* Trampolines for Nested Functions. */ + +static void +nds32_asm_trampoline_template (FILE *f) +{ + if (TARGET_REDUCED_REGS) + { + /* Trampoline is not supported on reduced-set registers yet. */ + sorry ("a nested function is not supported for reduced registers"); + } + else + { + asm_fprintf (f, "\t! Trampoline code template\n"); + asm_fprintf (f, "\t! This code fragment will be copied " + "into stack on demand\n"); + + asm_fprintf (f, "\tmfusr\t$r16,$pc\n"); + asm_fprintf (f, "\tlwi\t$r15,[$r16 + 20] " + "! load nested function address\n"); + asm_fprintf (f, "\tlwi\t$r16,[$r16 + 16] " + "! load chain_value\n"); + asm_fprintf (f, "\tjr\t$r15\n"); + } + + /* Preserve space ($pc + 16) for saving chain_value, + nds32_trampoline_init will fill the value in this slot. */ + asm_fprintf (f, "\t! space for saving chain_value\n"); + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); + + /* Preserve space ($pc + 20) for saving nested function address, + nds32_trampoline_init will fill the value in this slot. */ + asm_fprintf (f, "\t! space for saving nested function address\n"); + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. */ +static void +nds32_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + int i; + + /* Nested function address. */ + rtx fnaddr; + /* The memory rtx that is going to + be filled with chain_value. */ + rtx chain_value_mem; + /* The memory rtx that is going to + be filled with nested function address. */ + rtx nested_func_mem; + + /* Start address of trampoline code in stack, for doing cache sync. */ + rtx sync_cache_addr; + /* Temporary register for sync instruction. */ + rtx tmp_reg; + /* Instruction-cache sync instruction, + requesting an argument as starting address. */ + rtx isync_insn; + /* For convenience reason of doing comparison. */ + int tramp_align_in_bytes; + + /* Trampoline is not supported on reduced-set registers yet. */ + if (TARGET_REDUCED_REGS) + sorry ("a nested function is not supported for reduced registers"); + + /* STEP 1: Copy trampoline code template into stack, + fill up essential data into stack. */ + + /* Extract nested function address rtx. */ + fnaddr = XEXP (DECL_RTL (fndecl), 0); + + /* m_tramp is memory rtx that is going to be filled with trampoline code. + We have nds32_asm_trampoline_template() to emit template pattern. */ + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + /* After copying trampoline code into stack, + fill chain_value into stack. */ + chain_value_mem = adjust_address (m_tramp, SImode, 16); + emit_move_insn (chain_value_mem, chain_value); + /* After copying trampoline code int stack, + fill nested function address into stack. */ + nested_func_mem = adjust_address (m_tramp, SImode, 20); + emit_move_insn (nested_func_mem, fnaddr); + + /* STEP 2: Sync instruction-cache. */ + + /* We have successfully filled trampoline code into stack. + However, in order to execute code in stack correctly, + we must sync instruction cache. */ + sync_cache_addr = XEXP (m_tramp, 0); + tmp_reg = gen_reg_rtx (SImode); + isync_insn = gen_unspec_volatile_isync (tmp_reg); + + /* Because nds32_cache_block_size is in bytes, + we get trampoline alignment in bytes for convenient comparison. */ + tramp_align_in_bytes = TRAMPOLINE_ALIGNMENT / BITS_PER_UNIT; + + if (tramp_align_in_bytes >= nds32_cache_block_size + && (tramp_align_in_bytes % nds32_cache_block_size) == 0) + { + /* Under this condition, the starting address of trampoline + must be aligned to the starting address of each cache block + and we do not have to worry about cross-boundary issue. */ + for (i = 0; + i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1) + / nds32_cache_block_size; + i++) + { + emit_move_insn (tmp_reg, + plus_constant (Pmode, sync_cache_addr, + nds32_cache_block_size * i)); + emit_insn (isync_insn); + } + } + else if (TRAMPOLINE_SIZE > nds32_cache_block_size) + { + /* The starting address of trampoline code + may not be aligned to the cache block, + so the trampoline code may be across two cache block. + We need to sync the last element, which is 4-byte size, + of trampoline template. */ + for (i = 0; + i < (TRAMPOLINE_SIZE + nds32_cache_block_size - 1) + / nds32_cache_block_size; + i++) + { + emit_move_insn (tmp_reg, + plus_constant (Pmode, sync_cache_addr, + nds32_cache_block_size * i)); + emit_insn (isync_insn); + } + + /* The last element of trampoline template is 4-byte size. */ + emit_move_insn (tmp_reg, + plus_constant (Pmode, sync_cache_addr, + TRAMPOLINE_SIZE - 4)); + emit_insn (isync_insn); + } + else + { + /* This is the simplest case. + Because TRAMPOLINE_SIZE is less than or + equal to nds32_cache_block_size, + we can just sync start address and + the last element of trampoline code. */ + + /* Sync starting address of tampoline code. */ + emit_move_insn (tmp_reg, sync_cache_addr); + emit_insn (isync_insn); + /* Sync the last element, which is 4-byte size, + of trampoline template. */ + emit_move_insn (tmp_reg, + plus_constant (Pmode, sync_cache_addr, + TRAMPOLINE_SIZE - 4)); + emit_insn (isync_insn); + } + + /* Set instruction serialization barrier + to guarantee the correct operations. */ + emit_insn (gen_unspec_volatile_isb ()); +} + + +/* Addressing Modes. */ + +static bool +nds32_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + /* For (mem:DI addr) or (mem:DF addr) case, + we only allow 'addr' to be [reg], [symbol_ref], + [const], or [reg + const_int] pattern. */ + if (mode == DImode || mode == DFmode) + { + /* Allow [Reg + const_int] addressing mode. */ + if (GET_CODE (x) == PLUS) + { + if (nds32_address_register_rtx_p (XEXP (x, 0), strict) + && nds32_legitimate_index_p (mode, XEXP (x, 1), strict) + && CONST_INT_P (XEXP (x, 1))) + return true; + + else if (nds32_address_register_rtx_p (XEXP (x, 1), strict) + && nds32_legitimate_index_p (mode, XEXP (x, 0), strict) + && CONST_INT_P (XEXP (x, 0))) + return true; + } + + /* Now check [reg], [symbol_ref], and [const]. */ + if (GET_CODE (x) != REG + && GET_CODE (x) != SYMBOL_REF + && GET_CODE (x) != CONST) + return false; + } + + /* Check if 'x' is a valid address. */ + switch (GET_CODE (x)) + { + case REG: + /* (mem (reg A)) => [Ra] */ + return nds32_address_register_rtx_p (x, strict); + + case SYMBOL_REF: + + if (!TARGET_GP_DIRECT + && (reload_completed + || reload_in_progress + || lra_in_progress)) + return false; + + /* (mem (symbol_ref A)) => [symbol_ref] */ + return !currently_expanding_to_rtl; + + case CONST: + + if (!TARGET_GP_DIRECT + && (reload_completed + || reload_in_progress + || lra_in_progress)) + return false; + + /* (mem (const (...))) + => [ + const_addr ], where const_addr = symbol_ref + const_int */ + if (GET_CODE (XEXP (x, 0)) == PLUS) + { + rtx plus_op = XEXP (x, 0); + + rtx op0 = XEXP (plus_op, 0); + rtx op1 = XEXP (plus_op, 1); + + if (GET_CODE (op0) == SYMBOL_REF && CONST_INT_P (op1)) + return true; + else + return false; + } + + return false; + + case POST_MODIFY: + /* (mem (post_modify (reg) (plus (reg) (reg)))) + => [Ra], Rb */ + /* (mem (post_modify (reg) (plus (reg) (const_int)))) + => [Ra], const_int */ + if (GET_CODE (XEXP (x, 0)) == REG + && GET_CODE (XEXP (x, 1)) == PLUS) + { + rtx plus_op = XEXP (x, 1); + + rtx op0 = XEXP (plus_op, 0); + rtx op1 = XEXP (plus_op, 1); + + if (nds32_address_register_rtx_p (op0, strict) + && nds32_legitimate_index_p (mode, op1, strict)) + return true; + else + return false; + } + + return false; + + case POST_INC: + case POST_DEC: + /* (mem (post_inc reg)) => [Ra], 1/2/4 */ + /* (mem (post_dec reg)) => [Ra], -1/-2/-4 */ + /* The 1/2/4 or -1/-2/-4 have been displayed in nds32.md. + We only need to deal with register Ra. */ + if (nds32_address_register_rtx_p (XEXP (x, 0), strict)) + return true; + else + return false; + + case PLUS: + /* (mem (plus reg const_int)) + => [Ra + imm] */ + /* (mem (plus reg reg)) + => [Ra + Rb] */ + /* (mem (plus (mult reg const_int) reg)) + => [Ra + Rb << sv] */ + if (nds32_address_register_rtx_p (XEXP (x, 0), strict) + && nds32_legitimate_index_p (mode, XEXP (x, 1), strict)) + return true; + else if (nds32_address_register_rtx_p (XEXP (x, 1), strict) + && nds32_legitimate_index_p (mode, XEXP (x, 0), strict)) + return true; + else + return false; + + case LO_SUM: + if (!TARGET_GP_DIRECT) + return true; + + default: + return false; + } +} + + +/* Describing Relative Costs of Operations. */ + +static int nds32_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, + reg_class_t to) +{ + if (from == HIGH_REGS || to == HIGH_REGS) + return 6; + + return 2; +} + +static int nds32_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + return 8; +} + +/* This target hook describes the relative costs of RTL expressions. + Return 'true' when all subexpressions of x have been processed. + Return 'false' to sum the costs of sub-rtx, plus cost of this operation. + Refer to gcc/rtlanal.c for more information. */ +static bool +nds32_rtx_costs (rtx x, + int code, + int outer_code, + int opno ATTRIBUTE_UNUSED, + int *total, + bool speed) +{ + /* According to 'speed', goto suitable cost model section. */ + if (speed) + goto performance_cost; + else + goto size_cost; + + +performance_cost: + /* This is section for performance cost model. */ + + /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. + We treat it as 4-cycle cost for each instruction + under performance consideration. */ + switch (code) + { + case SET: + /* For 'SET' rtx, we need to return false + so that it can recursively calculate costs. */ + return false; + + case USE: + /* Used in combine.c as a marker. */ + *total = 0; + break; + + case MULT: + *total = COSTS_N_INSNS (5); + break; + + case DIV: + case UDIV: + case MOD: + case UMOD: + *total = COSTS_N_INSNS (7); + break; + + default: + *total = COSTS_N_INSNS (1); + break; + } + + return true; + + +size_cost: + /* This is section for size cost model. */ + + /* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4. + We treat it as 4-byte cost for each instruction + under code size consideration. */ + switch (code) + { + case SET: + /* For 'SET' rtx, we need to return false + so that it can recursively calculate costs. */ + return false; + + case USE: + /* Used in combine.c as a marker. */ + *total = 0; + break; + + case CONST_INT: + /* All instructions involving constant operation + need to be considered for cost evaluation. */ + if (outer_code == SET) + { + /* (set X imm5s), use movi55, 2-byte cost. + (set X imm20s), use movi, 4-byte cost. + (set X BIG_INT), use sethi/ori, 8-byte cost. */ + if (satisfies_constraint_Is05 (x)) + *total = COSTS_N_INSNS (1) - 2; + else if (satisfies_constraint_Is20 (x)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (2); + } + else if (outer_code == PLUS || outer_code == MINUS) + { + /* Possible addi333/subi333 or subi45/addi45, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu05 (x)) + *total = COSTS_N_INSNS (1) - 2; + else + *total = COSTS_N_INSNS (1); + } + else if (outer_code == ASHIFT) + { + /* Possible slli333, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu03 (x)) + *total = COSTS_N_INSNS (1) - 2; + else + *total = COSTS_N_INSNS (1); + } + else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT) + { + /* Possible srai45 or srli45, 2-byte cost. + General case, cost 1 instruction with 4-byte. */ + if (satisfies_constraint_Iu05 (x)) + *total = COSTS_N_INSNS (1) - 2; + else + *total = COSTS_N_INSNS (1); + } + else + { + /* For other cases, simply set it 4-byte cost. */ + *total = COSTS_N_INSNS (1); + } + break; + + case CONST_DOUBLE: + /* It requires high part and low part processing, set it 8-byte cost. */ + *total = COSTS_N_INSNS (2); + break; + + default: + /* For other cases, generally we set it 4-byte cost + and stop resurively traversing. */ + *total = COSTS_N_INSNS (1); + break; + } + + return true; +} + +static int nds32_address_cost (rtx address, + enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed) +{ + rtx plus0, plus1; + enum rtx_code code; + + code = GET_CODE (address); + + /* According to 'speed', goto suitable cost model section. */ + if (speed) + goto performance_cost; + else + goto size_cost; + +performance_cost: + /* This is section for performance cost model. */ + + /* FALLTHRU, currently we use same cost model as size_cost. */ + +size_cost: + /* This is section for size cost model. */ + + switch (code) + { + case POST_MODIFY: + case POST_INC: + case POST_DEC: + /* We encourage that rtx contains + POST_MODIFY/POST_INC/POST_DEC behavior. */ + return 0; + + case SYMBOL_REF: + /* We can have gp-relative load/store for symbol_ref. + Have it 4-byte cost. */ + return COSTS_N_INSNS (1); + + case CONST: + /* It is supposed to be the pattern (const (plus symbol_ref const_int)). + Have it 4-byte cost. */ + return COSTS_N_INSNS (1); + + case REG: + /* Simply return 4-byte costs. */ + return COSTS_N_INSNS (1); + + case PLUS: + /* We do not need to check if the address is a legitimate address, + because this hook is never called with an invalid address. + But we better check the range of + const_int value for cost, if it exists. */ + plus0 = XEXP (address, 0); + plus1 = XEXP (address, 1); + + if (REG_P (plus0) && CONST_INT_P (plus1)) + { + /* If it is possible to be lwi333/swi333 form, + make it 2-byte cost. */ + if (satisfies_constraint_Iu05 (plus1)) + return (COSTS_N_INSNS (1) - 2); + else + return COSTS_N_INSNS (1); + } + + /* For other 'plus' situation, make it cost 4-byte. */ + return COSTS_N_INSNS (1); + + default: + break; + } + + return COSTS_N_INSNS (4); +} + + +/* Defining the Output Assembler Language. */ + +/* -- The Overall Framework of an Assembler File. */ + +static void +nds32_asm_file_start (void) +{ + int i; + + default_file_start (); + + /* Tell assembler which ABI we are using. */ + fprintf (asm_out_file, "\t! ABI version\n"); + fprintf (asm_out_file, "\t.abi_2\n"); + + /* Tell assembler that this asm code is generated by compiler. */ + fprintf (asm_out_file, "\t! This asm file is generated by compiler\n"); + fprintf (asm_out_file, "\t.flag\tverbatim\n"); + /* Give assembler the size of each vector for interrupt handler. */ + fprintf (asm_out_file, "\t! This vector size directive is required " + "for checking inconsistency on interrupt handler\n"); + fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size); + + /* If user enables '-mforce-fp-as-gp' or compiles programs with -Os, + the compiler may produce 'la $fp,_FP_BASE_' instruction + at prologue for fp-as-gp optimization. + We should emit weak reference of _FP_BASE_ to avoid undefined reference + in case user does not pass '--relax' option to linker. */ + if (TARGET_FORCE_FP_AS_GP || optimize_size) + { + fprintf (asm_out_file, "\t! This weak reference is required to do " + "fp-as-gp link time optimization\n"); + fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n"); + } + /* If user enables '-mex9', we should emit relaxation directive + to tell linker that this file is allowed to do ex9 optimization. */ + if (TARGET_EX9) + { + fprintf (asm_out_file, "\t! This relaxation directive is required " + "to do ex9 link time optimization\n"); + fprintf (asm_out_file, "\t.relax\tex9\n"); + } + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + if (TARGET_ISA_V2) + fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V2"); + if (TARGET_ISA_V3) + fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3"); + if (TARGET_ISA_V3M) + fprintf (asm_out_file, "\t! ISA family\t\t: %s\n", "V3M"); + + fprintf (asm_out_file, "\t! Endian setting\t: %s\n", + ((TARGET_BIG_ENDIAN) ? "big-endian" + : "little-endian")); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + fprintf (asm_out_file, "\t! Use conditional move\t\t: %s\n", + ((TARGET_CMOV) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! Use performance extension\t: %s\n", + ((TARGET_PERF_EXT) ? "Yes" + : "No")); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + fprintf (asm_out_file, "\t! V3PUSH instructions\t: %s\n", + ((TARGET_V3PUSH) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! 16-bit instructions\t: %s\n", + ((TARGET_16_BIT) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! GP base access\t: %s\n", + ((TARGET_GP_DIRECT) ? "Yes" + : "No")); + fprintf (asm_out_file, "\t! Reduced registers set\t: %s\n", + ((TARGET_REDUCED_REGS) ? "Yes" + : "No")); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + if (optimize_size) + fprintf (asm_out_file, "\t! Optimization level\t: -Os\n"); + else + fprintf (asm_out_file, "\t! Optimization level\t: -O%d\n", optimize); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + fprintf (asm_out_file, "\t! Cache block size\t: %d\n", + nds32_cache_block_size); + + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + /* Initialize isr vector information array before compiling functions. */ + for (i = 0; i < NDS32_N_ISR_VECTORS; i++) + { + nds32_isr_vectors[i].category = NDS32_ISR_NONE; + strcpy (nds32_isr_vectors[i].func_name, ""); + nds32_isr_vectors[i].save_reg = NDS32_PARTIAL_SAVE; + nds32_isr_vectors[i].nested_type = NDS32_NOT_NESTED; + nds32_isr_vectors[i].total_n_vectors = 0; + strcpy (nds32_isr_vectors[i].nmi_name, ""); + strcpy (nds32_isr_vectors[i].warm_name, ""); + } +} + +static void +nds32_asm_file_end (void) +{ + int i; + + /* If all the vectors are NDS32_ISR_NONE, we can return immediately. */ + for (i = 0; i < NDS32_N_ISR_VECTORS; i++) + if (nds32_isr_vectors[i].category != NDS32_ISR_NONE) + break; + + if (i == NDS32_N_ISR_VECTORS) + return; + + /* At least one vector is NOT NDS32_ISR_NONE, + we should output isr vector information. */ + fprintf (asm_out_file, "\t! ------------------------------------\n"); + fprintf (asm_out_file, "\t! The isr vector information:\n"); + fprintf (asm_out_file, "\t! ------------------------------------\n"); + + /* Check reset handler first. Its vector number is always 0. */ + if (nds32_isr_vectors[0].category == NDS32_ISR_RESET) + { + nds32_emit_isr_reset_content (); + fprintf (asm_out_file, "\t! ------------------------------------\n"); + } + + /* Check other vectors, starting from vector number 1. */ + for (i = 1; i < NDS32_N_ISR_VECTORS; i++) + { + if (nds32_isr_vectors[i].category == NDS32_ISR_INTERRUPT + || nds32_isr_vectors[i].category == NDS32_ISR_EXCEPTION) + { + /* Found one vector which is interupt or exception. + Output its jmptbl and vector section content. */ + fprintf (asm_out_file, "\t! interrupt/exception vector %02d\n", i); + fprintf (asm_out_file, "\t! ------------------------------------\n"); + nds32_emit_isr_jmptbl_section (i); + fprintf (asm_out_file, "\t! ....................................\n"); + nds32_emit_isr_vector_section (i); + fprintf (asm_out_file, "\t! ------------------------------------\n"); + } + } + + fprintf (asm_out_file, "\t! ------------------------------------\n"); +} + +/* -- Output and Generation of Labels. */ + +static void +nds32_asm_globalize_label (FILE *stream, const char *name) +{ + fputs ("\t.global\t", stream); + assemble_name (stream, name); + fputs ("\n", stream); +} + +/* -- Output of Assembler Instructions. */ + +static void +nds32_print_operand (FILE *stream, rtx x, int code) +{ + int op_value; + + switch (code) + { + case 0 : + /* Do nothing special. */ + break; + + case 'V': + /* 'x' is supposed to be CONST_INT, get the value. */ + gcc_assert (CONST_INT_P (x)); + op_value = INTVAL (x); + + /* According to the Andes architecture, + the system/user register index range is 0 ~ 1023. + In order to avoid conflict between user-specified-integer value + and enum-specified-register value, + the 'enum nds32_intrinsic_registers' value + in nds32_intrinsic.h starts from 1024. */ + if (op_value < 1024 && op_value >= 0) + { + /* If user gives integer value directly (0~1023), + we just print out the value. */ + fprintf (stream, "%d", op_value); + } + else if (op_value < 0 + || op_value >= ((int) ARRAY_SIZE (nds32_intrinsic_register_names) + + 1024)) + { + /* The enum index value for array size is out of range. */ + error ("intrinsic register index is out of range"); + } + else + { + /* If user applies normal way with __NDS32_REG_XXX__ enum data, + we can print out register name. Remember to substract 1024. */ + fprintf (stream, "%s", + nds32_intrinsic_register_names[op_value - 1024]); + } + + /* No need to handle following process, so return immediately. */ + return; + + default : + /* Unknown flag. */ + output_operand_lossage ("invalid operand output code"); + break; + } + + switch (GET_CODE (x)) + { + case LABEL_REF: + case SYMBOL_REF: + output_addr_const (stream, x); + break; + + case REG: + /* Forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REGNO (x) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + /* Normal cases, print out register name. */ + fputs (reg_names[REGNO (x)], stream); + break; + + case MEM: + output_address (XEXP (x, 0)); + break; + + case CODE_LABEL: + case CONST_INT: + case CONST: + output_addr_const (stream, x); + break; + + default: + /* Generally, output_addr_const () is able to handle most cases. + We want to see what CODE could appear, + so we use gcc_unreachable() to stop it. */ + debug_rtx (x); + gcc_unreachable (); + break; + } +} + +static void +nds32_print_operand_address (FILE *stream, rtx x) +{ + rtx op0, op1; + + switch (GET_CODE (x)) + { + case SYMBOL_REF: + case CONST: + /* [ + symbol_ref] */ + /* [ + const_addr], where const_addr = symbol_ref + const_int */ + fputs ("[ + ", stream); + output_addr_const (stream, x); + fputs ("]", stream); + break; + + case REG: + /* Forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REGNO (x) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + /* [Ra] */ + fprintf (stream, "[%s]", reg_names[REGNO (x)]); + break; + + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* Checking op0, forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op0) + && REGNO (op0) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + /* Checking op1, forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op1) + && REGNO (op1) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + if (REG_P (op0) && CONST_INT_P (op1)) + { + /* [Ra + imm] */ + fprintf (stream, "[%s + (%d)]", + reg_names[REGNO (op0)], (int)INTVAL (op1)); + } + else if (REG_P (op0) && REG_P (op1)) + { + /* [Ra + Rb] */ + fprintf (stream, "[%s + %s]", + reg_names[REGNO (op0)], reg_names[REGNO (op1)]); + } + else if (GET_CODE (op0) == MULT && REG_P (op1)) + { + /* [Ra + Rb << sv] + From observation, the pattern looks like: + (plus:SI (mult:SI (reg:SI 58) + (const_int 4 [0x4])) + (reg/f:SI 57)) */ + int sv; + + /* We need to set sv to output shift value. */ + if (INTVAL (XEXP (op0, 1)) == 1) + sv = 0; + else if (INTVAL (XEXP (op0, 1)) == 2) + sv = 1; + else if (INTVAL (XEXP (op0, 1)) == 4) + sv = 2; + else + gcc_unreachable (); + + fprintf (stream, "[%s + %s << %d]", + reg_names[REGNO (op1)], + reg_names[REGNO (XEXP (op0, 0))], + sv); + } + else + { + /* The control flow is not supposed to be here. */ + debug_rtx (x); + gcc_unreachable (); + } + + break; + + case POST_MODIFY: + /* (post_modify (regA) (plus (regA) (regB))) + (post_modify (regA) (plus (regA) (const_int))) + We would like to extract + regA and regB (or const_int) from plus rtx. */ + op0 = XEXP (XEXP (x, 1), 0); + op1 = XEXP (XEXP (x, 1), 1); + + /* Checking op0, forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op0) + && REGNO (op0) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + /* Checking op1, forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op1) + && REGNO (op1) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + if (REG_P (op0) && REG_P (op1)) + { + /* [Ra], Rb */ + fprintf (stream, "[%s], %s", + reg_names[REGNO (op0)], reg_names[REGNO (op1)]); + } + else if (REG_P (op0) && CONST_INT_P (op1)) + { + /* [Ra], imm */ + fprintf (stream, "[%s], %d", + reg_names[REGNO (op0)], (int)INTVAL (op1)); + } + else + { + /* The control flow is not supposed to be here. */ + debug_rtx (x); + gcc_unreachable (); + } + + break; + + case POST_INC: + case POST_DEC: + op0 = XEXP (x, 0); + + /* Checking op0, forbid using static chain register ($r16) + on reduced-set registers configuration. */ + if (TARGET_REDUCED_REGS + && REG_P (op0) + && REGNO (op0) == STATIC_CHAIN_REGNUM) + sorry ("a nested function is not supported for reduced registers"); + + if (REG_P (op0)) + { + /* "[Ra], 1/2/4" or "[Ra], -1/-2/-4" + The 1/2/4 or -1/-2/-4 have been displayed in nds32.md. + We only need to deal with register Ra. */ + fprintf (stream, "[%s]", reg_names[REGNO (op0)]); + } + else + { + /* The control flow is not supposed to be here. */ + debug_rtx (x); + gcc_unreachable (); + } + + break; + + default : + /* Generally, output_addr_const () is able to handle most cases. + We want to see what CODE could appear, + so we use gcc_unreachable() to stop it. */ + debug_rtx (x); + gcc_unreachable (); + break; + } +} + + +/* Defining target-specific uses of __attribute__. */ + +/* Add some checking after merging attributes. */ +static tree +nds32_merge_decl_attributes (tree olddecl, tree newdecl) +{ + tree combined_attrs; + + /* Create combined attributes. */ + combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl), + DECL_ATTRIBUTES (newdecl)); + + /* Sinc newdecl is acutally a duplicate of olddecl, + we can take olddecl for some operations. */ + if (TREE_CODE (olddecl) == FUNCTION_DECL) + { + /* Check isr-specific attributes conflict. */ + nds32_check_isr_attrs_conflict (olddecl, combined_attrs); + } + + return combined_attrs; +} + +/* Add some checking when inserting attributes. */ +static void +nds32_insert_attributes (tree decl, tree *attributes) +{ + /* For function declaration, we need to check isr-specific attributes: + 1. Call nds32_check_isr_attrs_conflict() to check any conflict. + 2. Check valid integer value for interrupt/exception. + 3. Check valid integer value for reset. + 4. Check valid function for nmi/warm. */ + if (TREE_CODE (decl) == FUNCTION_DECL) + { + tree func_attrs; + tree intr, excp, reset; + + /* Pick up function attributes. */ + func_attrs = *attributes; + + /* 1. Call nds32_check_isr_attrs_conflict() to check any conflict. */ + nds32_check_isr_attrs_conflict (decl, func_attrs); + + /* Now we are starting to check valid id value + for interrupt/exception/reset. + Note that we ONLY check its validity here. + To construct isr vector information, it is still performed + by nds32_construct_isr_vectors_information(). */ + intr = lookup_attribute ("interrupt", func_attrs); + excp = lookup_attribute ("exception", func_attrs); + reset = lookup_attribute ("reset", func_attrs); + + if (intr || excp) + { + /* Deal with interrupt/exception. */ + tree id_list; + unsigned int lower_bound, upper_bound; + + /* The way to handle interrupt or exception is the same, + we just need to take care of actual vector number. + For interrupt(0..63), the actual vector number is (9..72). + For exception(1..8), the actual vector number is (1..8). */ + lower_bound = (intr) ? (0) : (1); + upper_bound = (intr) ? (63) : (8); + + /* Prepare id list so that we can traverse id value. */ + id_list = (intr) ? (TREE_VALUE (intr)) : (TREE_VALUE (excp)); + + /* 2. Check valid integer value for interrupt/exception. */ + while (id_list) + { + tree id; + + /* Pick up each vector id value. */ + id = TREE_VALUE (id_list); + /* Issue error if it is not a valid integer value. */ + if (TREE_CODE (id) != INTEGER_CST + || TREE_INT_CST_LOW (id) < lower_bound + || TREE_INT_CST_LOW (id) > upper_bound) + error ("invalid id value for interrupt/exception attribute"); + + /* Advance to next id. */ + id_list = TREE_CHAIN (id_list); + } + } + else if (reset) + { + /* Deal with reset. */ + tree id_list; + tree id; + tree nmi, warm; + unsigned int lower_bound; + unsigned int upper_bound; + + /* Prepare id_list and identify id value so that + we can check if total number of vectors is valid. */ + id_list = TREE_VALUE (reset); + id = TREE_VALUE (id_list); + + /* The maximum numbers for user's interrupt is 64. */ + lower_bound = 0; + upper_bound = 64; + + /* 3. Check valid integer value for reset. */ + if (TREE_CODE (id) != INTEGER_CST + || TREE_INT_CST_LOW (id) < lower_bound + || TREE_INT_CST_LOW (id) > upper_bound) + error ("invalid id value for reset attribute"); + + /* 4. Check valid function for nmi/warm. */ + nmi = lookup_attribute ("nmi", func_attrs); + warm = lookup_attribute ("warm", func_attrs); + + if (nmi != NULL_TREE) + { + tree nmi_func_list; + tree nmi_func; + + nmi_func_list = TREE_VALUE (nmi); + nmi_func = TREE_VALUE (nmi_func_list); + + /* Issue error if it is not a valid nmi function. */ + if (TREE_CODE (nmi_func) != IDENTIFIER_NODE) + error ("invalid nmi function for reset attribute"); + } + + if (warm != NULL_TREE) + { + tree warm_func_list; + tree warm_func; + + warm_func_list = TREE_VALUE (warm); + warm_func = TREE_VALUE (warm_func_list); + + /* Issue error if it is not a valid warm function. */ + if (TREE_CODE (warm_func) != IDENTIFIER_NODE) + error ("invalid warm function for reset attribute"); + } + } + else + { + /* No interrupt, exception, or reset attribute is set. */ + return; + } + } +} + +static bool +nds32_option_pragma_parse (tree args ATTRIBUTE_UNUSED, + tree pop_target ATTRIBUTE_UNUSED) +{ + /* Currently, we do not parse any pragma target by ourself, + so just simply return false. */ + return false; +} + +static void +nds32_option_override (void) +{ + /* After all the command options have been parsed, + we shall deal with some flags for changing compiler settings. */ + + /* At first, we check if we have to strictly + set some flags based on ISA family. */ + if (TARGET_ISA_V2) + { + /* Under V2 ISA, we need to strictly disable TARGET_V3PUSH. */ + target_flags &= ~MASK_V3PUSH; + } + if (TARGET_ISA_V3) + { + /* Under V3 ISA, currently nothing should be strictly set. */ + } + if (TARGET_ISA_V3M) + { + /* Under V3M ISA, we need to strictly enable TARGET_REDUCED_REGS. */ + target_flags |= MASK_REDUCED_REGS; + /* Under V3M ISA, we need to strictly disable TARGET_PERF_EXT. */ + target_flags &= ~MASK_PERF_EXT; + } + + /* See if we are using reduced-set registers: + $r0~$r5, $r6~$r10, $r15, $r28, $r29, $r30, $r31 + If so, we must forbid using $r11~$r14, $r16~$r27. */ + if (TARGET_REDUCED_REGS) + { + int r; + + /* Prevent register allocator from + choosing it as doing register allocation. */ + for (r = 11; r <= 14; r++) + fixed_regs[r] = call_used_regs[r] = 1; + for (r = 16; r <= 27; r++) + fixed_regs[r] = call_used_regs[r] = 1; + } + + /* See if user explicitly would like to use fp-as-gp optimization. + If so, we must prevent $fp from being allocated + during register allocation. */ + if (TARGET_FORCE_FP_AS_GP) + fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1; + + if (!TARGET_16_BIT) + { + /* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */ + target_flags &= ~MASK_V3PUSH; + } + + /* Currently, we don't support PIC code generation yet. */ + if (flag_pic) + sorry ("not support -fpic"); +} + + +/* Miscellaneous Parameters. */ + +static void +nds32_init_builtins (void) +{ + tree pointer_type_node = build_pointer_type (integer_type_node); + + tree void_ftype_void = build_function_type (void_type_node, + void_list_node); + + tree void_ftype_pint = build_function_type_list (void_type_node, + pointer_type_node, + NULL_TREE); + + tree int_ftype_int = build_function_type_list (integer_type_node, + integer_type_node, + NULL_TREE); + + tree void_ftype_int_int = build_function_type_list (void_type_node, + integer_type_node, + integer_type_node, + NULL_TREE); + + /* Cache. */ + add_builtin_function ("__builtin_nds32_isync", void_ftype_pint, + NDS32_BUILTIN_ISYNC, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_nds32_isb", void_ftype_void, + NDS32_BUILTIN_ISB, + BUILT_IN_MD, NULL, NULL_TREE); + + /* Register Transfer. */ + add_builtin_function ("__builtin_nds32_mfsr", int_ftype_int, + NDS32_BUILTIN_MFSR, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_nds32_mfusr", int_ftype_int, + NDS32_BUILTIN_MFUSR, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_nds32_mtsr", void_ftype_int_int, + NDS32_BUILTIN_MTSR, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_nds32_mtusr", void_ftype_int_int, + NDS32_BUILTIN_MTUSR, + BUILT_IN_MD, NULL, NULL_TREE); + + /* Interrupt. */ + add_builtin_function ("__builtin_nds32_setgie_en", void_ftype_void, + NDS32_BUILTIN_SETGIE_EN, + BUILT_IN_MD, NULL, NULL_TREE); + add_builtin_function ("__builtin_nds32_setgie_dis", void_ftype_void, + NDS32_BUILTIN_SETGIE_DIS, + BUILT_IN_MD, NULL, NULL_TREE); +} + +static rtx +nds32_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + + int fcode = DECL_FUNCTION_CODE (fndecl); + + switch (fcode) + { + /* Cache. */ + case NDS32_BUILTIN_ISYNC: + return nds32_expand_builtin_null_ftype_reg + (CODE_FOR_unspec_volatile_isync, exp, target); + case NDS32_BUILTIN_ISB: + /* Since there are no result and operands for isb instruciton, + we can simply emit this rtx. */ + emit_insn (gen_unspec_volatile_isb ()); + return target; + + /* Register Transfer. */ + case NDS32_BUILTIN_MFSR: + return nds32_expand_builtin_reg_ftype_imm + (CODE_FOR_unspec_volatile_mfsr, exp, target); + case NDS32_BUILTIN_MFUSR: + return nds32_expand_builtin_reg_ftype_imm + (CODE_FOR_unspec_volatile_mfusr, exp, target); + case NDS32_BUILTIN_MTSR: + return nds32_expand_builtin_null_ftype_reg_imm + (CODE_FOR_unspec_volatile_mtsr, exp, target); + case NDS32_BUILTIN_MTUSR: + return nds32_expand_builtin_null_ftype_reg_imm + (CODE_FOR_unspec_volatile_mtusr, exp, target); + + /* Interrupt. */ + case NDS32_BUILTIN_SETGIE_EN: + /* Since there are no result and operands for setgie.e instruciton, + we can simply emit this rtx. */ + emit_insn (gen_unspec_volatile_setgie_en ()); + return target; + case NDS32_BUILTIN_SETGIE_DIS: + /* Since there are no result and operands for setgie.d instruciton, + we can simply emit this rtx. */ + emit_insn (gen_unspec_volatile_setgie_dis ()); + return target; + + default: + gcc_unreachable (); + } + + return NULL_RTX; +} + + +/* ------------------------------------------------------------------------ */ + +/* PART 4: Implemet extern function definitions, + the prototype is in nds32-protos.h. */ + +/* Defining Data Structures for Per-function Information. */ + +void +nds32_init_expanders (void) +{ + /* Arrange to initialize and mark the machine per-function status. */ + init_machine_status = nds32_init_machine_status; +} + + +/* Register Usage. */ + +/* -- How Values Fit in Registers. */ + +int +nds32_hard_regno_nregs (int regno ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); +} + +int +nds32_hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* Restrict double-word quantities to even register pairs. */ + if (HARD_REGNO_NREGS (regno, mode) == 1 + || !((regno) & 1)) + return 1; + + return 0; +} + + +/* Register Classes. */ + +enum reg_class +nds32_regno_reg_class (int regno) +{ + /* Refer to nds32.h for more register class details. */ + + if (regno >= 0 && regno <= 7) + return LOW_REGS; + else if (regno >= 8 && regno <= 11) + return MIDDLE_REGS; + else if (regno >= 12 && regno <= 14) + return HIGH_REGS; + else if (regno == 15) + return R15_TA_REG; + else if (regno >= 16 && regno <= 19) + return MIDDLE_REGS; + else if (regno >= 20 && regno <= 31) + return HIGH_REGS; + else if (regno == 32 || regno == 33) + return FRAME_REGS; + else + return NO_REGS; +} + + +/* Stack Layout and Calling Conventions. */ + +/* -- Basic Stack Layout. */ + +rtx +nds32_return_addr_rtx (int count, + rtx frameaddr ATTRIBUTE_UNUSED) +{ + /* There is no way to determine the return address + if frameaddr is the frame that has 'count' steps + up from current frame. */ + if (count != 0) + return NULL_RTX; + + /* If count == 0, it means we are at current frame, + the return address is $r30 ($lp). */ + return get_hard_reg_initial_val (Pmode, LP_REGNUM); +} + +/* -- Eliminating Frame Pointer and Arg Pointer. */ + +HOST_WIDE_INT +nds32_initial_elimination_offset (unsigned int from_reg, unsigned int to_reg) +{ + HOST_WIDE_INT offset; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + + /* Remember to consider + cfun->machine->callee_saved_area_padding_bytes + when calculating offset. */ + if (from_reg == ARG_POINTER_REGNUM && to_reg == STACK_POINTER_REGNUM) + { + offset = (cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size + + cfun->machine->callee_saved_area_padding_bytes + + cfun->machine->local_size + + cfun->machine->out_args_size); + } + else if (from_reg == ARG_POINTER_REGNUM + && to_reg == HARD_FRAME_POINTER_REGNUM) + { + offset = 0; + } + else if (from_reg == FRAME_POINTER_REGNUM + && to_reg == STACK_POINTER_REGNUM) + { + offset = (cfun->machine->local_size + cfun->machine->out_args_size); + } + else if (from_reg == FRAME_POINTER_REGNUM + && to_reg == HARD_FRAME_POINTER_REGNUM) + { + offset = (-1) * (cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size + + cfun->machine->callee_saved_area_padding_bytes); + } + else + { + gcc_unreachable (); + } + + return offset; +} + +/* -- Passing Arguments in Registers. */ + +void +nds32_init_cumulative_args (CUMULATIVE_ARGS *cum, + tree fntype ATTRIBUTE_UNUSED, + rtx libname ATTRIBUTE_UNUSED, + tree fndecl ATTRIBUTE_UNUSED, + int n_named_args ATTRIBUTE_UNUSED) +{ + /* Initial available registers + (in offset, corresponding to NDS32_GPR_ARG_FIRST_REGNUM) + for passing arguments. */ + cum->reg_offset = 0; +} + +/* -- Function Entry and Exit. */ + +/* Function for normal multiple push prologue. */ +void +nds32_expand_prologue (void) +{ + int fp_adjust; + int sp_adjust; + int en4_const; + + rtx Rb, Re; + rtx push_insn; + rtx fp_adjust_insn, sp_adjust_insn; + + /* Before computing everything for stack frame size, + we check if it is still worth to use fp_as_gp optimization. + If it is, the 'df_regs_ever_live_p (FP_REGNUM)' will be set + so that $fp will be saved on stack. */ + cfun->machine->fp_as_gp_p = nds32_fp_as_gp_check_available (); + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + + /* If the function is 'naked', + we do not have to generate prologue code fragment. */ + if (cfun->machine->naked_p) + return; + + /* Get callee_first_regno and callee_last_regno. */ + Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno); + Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno); + + /* push_insn = gen_stack_push_multiple(first_regno, last_regno), + the pattern 'stack_push_multiple' is implemented in nds32.md. + For En4 field, we have to calculate its constant value. + Refer to Andes ISA for more information. */ + en4_const = 0; + if (cfun->machine->fp_size) + en4_const += 8; + if (cfun->machine->gp_size) + en4_const += 4; + if (cfun->machine->lp_size) + en4_const += 2; + + /* If $fp, $gp, $lp, and all callee-save registers are NOT required + to be saved, we don't have to create multiple push instruction. + Otherwise, a multiple push instruction is needed. */ + if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0)) + { + /* Create multiple push instruction rtx. */ + push_insn = nds32_gen_stack_push_multiple (Rb, Re, GEN_INT (en4_const)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + push_insn = emit_insn (push_insn); + + /* The insn rtx 'push_insn' will change frame layout. + We need to use RTX_FRAME_RELATED_P so that GCC is able to + generate CFI (Call Frame Information) stuff. */ + RTX_FRAME_RELATED_P (push_insn) = 1; + } + + /* Check frame_pointer_needed to see + if we shall emit fp adjustment instruction. */ + if (frame_pointer_needed) + { + /* adjust $fp = $sp + ($fp size) + ($gp size) + ($lp size) + + (4 * callee-saved-registers) + Note: No need to adjust + cfun->machine->callee_saved_area_padding_bytes, + because, at this point, stack pointer is just + at the position after push instruction. */ + fp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size; + fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, + GEN_INT (fp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + fp_adjust_insn = emit_insn (fp_adjust_insn); + } + + /* Adjust $sp = $sp - local_size - out_args_size + - callee_saved_area_padding_bytes. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + /* sp_adjust value may be out of range of the addi instruction, + create alternative add behavior with TA_REGNUM if necessary, + using NEGATIVE value to tell that we are decreasing address. */ + sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust); + if (sp_adjust) + { + /* Generate sp adjustment instruction if and only if sp_adjust != 0. */ + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-1 * sp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + + /* The insn rtx 'sp_adjust_insn' will change frame layout. + We need to use RTX_FRAME_RELATED_P so that GCC is able to + generate CFI (Call Frame Information) stuff. */ + RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; + } + + /* Prevent the instruction scheduler from + moving instructions across the boundary. */ + emit_insn (gen_blockage ()); +} + +/* Function for normal multiple pop epilogue. */ +void +nds32_expand_epilogue (void) +{ + int sp_adjust; + int en4_const; + + rtx Rb, Re; + rtx pop_insn; + rtx sp_adjust_insn; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + + /* Prevent the instruction scheduler from + moving instructions across the boundary. */ + emit_insn (gen_blockage ()); + + /* If the function is 'naked', we do not have to generate + epilogue code fragment BUT 'ret' instruction. */ + if (cfun->machine->naked_p) + { + /* Generate return instruction by using + unspec_volatile_func_return pattern. + Make sure this instruction is after gen_blockage(). + NOTE that $lp will become 'live' + after this instruction has been emitted. */ + emit_insn (gen_unspec_volatile_func_return ()); + return; + } + + if (frame_pointer_needed) + { + /* adjust $sp = $fp - ($fp size) - ($gp size) - ($lp size) + - (4 * callee-saved-registers) + Note: No need to adjust + cfun->machine->callee_saved_area_padding_bytes, + because we want to adjust stack pointer + to the position for pop instruction. */ + sp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size; + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (-1 * sp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + } + else + { + /* If frame pointer is NOT needed, + we cannot calculate the sp adjustment from frame pointer. + Instead, we calculate the adjustment by local_size, + out_args_size, and callee_saved_area_padding_bytes. + Notice that such sp adjustment value may be out of range, + so we have to deal with it as well. */ + + /* Adjust $sp = $sp + local_size + out_args_size + + callee_saved_area_padding_bytes. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + /* sp_adjust value may be out of range of the addi instruction, + create alternative add behavior with TA_REGNUM if necessary, + using POSITIVE value to tell that we are increasing address. */ + sp_adjust = nds32_force_addi_stack_int (sp_adjust); + if (sp_adjust) + { + /* Generate sp adjustment instruction + if and only if sp_adjust != 0. */ + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (sp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + } + } + + /* Get callee_first_regno and callee_last_regno. */ + Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno); + Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno); + + /* pop_insn = gen_stack_pop_multiple(first_regno, last_regno), + the pattern 'stack_pop_multiple' is implementad in nds32.md. + For En4 field, we have to calculate its constant value. + Refer to Andes ISA for more information. */ + en4_const = 0; + if (cfun->machine->fp_size) + en4_const += 8; + if (cfun->machine->gp_size) + en4_const += 4; + if (cfun->machine->lp_size) + en4_const += 2; + + /* If $fp, $gp, $lp, and all callee-save registers are NOT required + to be saved, we don't have to create multiple pop instruction. + Otherwise, a multiple pop instruction is needed. */ + if (!(REGNO (Rb) == SP_REGNUM && REGNO (Re) == SP_REGNUM && en4_const == 0)) + { + /* Create multiple pop instruction rtx. */ + pop_insn = nds32_gen_stack_pop_multiple (Rb, Re, GEN_INT (en4_const)); + /* Emit pop instruction. */ + emit_insn (pop_insn); + } + + /* Generate return instruction by using + unspec_volatile_func_return pattern. */ + emit_insn (gen_unspec_volatile_func_return ()); +} + +/* Function for v3push prologue. */ +void +nds32_expand_prologue_v3push (void) +{ + int fp_adjust; + int sp_adjust; + + rtx Rb, Re; + rtx push_insn; + rtx fp_adjust_insn, sp_adjust_insn; + + /* Before computing everything for stack frame size, + we check if it is still worth to use fp_as_gp optimization. + If it is, the 'df_regs_ever_live_p (FP_REGNUM)' will be set + so that $fp will be saved on stack. */ + cfun->machine->fp_as_gp_p = nds32_fp_as_gp_check_available (); + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + + /* If the function is 'naked', + we do not have to generate prologue code fragment. */ + if (cfun->machine->naked_p) + return; + + /* Get callee_first_regno and callee_last_regno. */ + Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno); + Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno); + + /* Calculate sp_adjust first to test if 'push25 Re,imm8u' is available, + where imm8u has to be 8-byte alignment. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + + if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) + { + /* We can use 'push25 Re,imm8u'. */ + + /* push_insn = gen_stack_v3push(last_regno, sp_adjust), + the pattern 'stack_v3push' is implemented in nds32.md. + The (const_int 14) means v3push always push { $fp $gp $lp }. */ + push_insn = nds32_gen_stack_v3push (Rb, Re, + GEN_INT (14), GEN_INT (sp_adjust)); + /* emit rtx into instructions list and receive INSN rtx form */ + push_insn = emit_insn (push_insn); + + /* The insn rtx 'push_insn' will change frame layout. + We need to use RTX_FRAME_RELATED_P so that GCC is able to + generate CFI (Call Frame Information) stuff. */ + RTX_FRAME_RELATED_P (push_insn) = 1; + + /* Check frame_pointer_needed to see + if we shall emit fp adjustment instruction. */ + if (frame_pointer_needed) + { + /* adjust $fp = $sp + 4 ($fp size) + + 4 ($gp size) + + 4 ($lp size) + + (4 * n) (callee-saved registers) + + sp_adjust ('push25 Re,imm8u') + Note: Since we use 'push25 Re,imm8u', + the position of stack pointer is further + changed after push instruction. + Hence, we need to take sp_adjust value + into consideration. */ + fp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size + + sp_adjust; + fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, + GEN_INT (fp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + fp_adjust_insn = emit_insn (fp_adjust_insn); + } + } + else + { + /* We have to use 'push25 Re,0' and + expand one more instruction to adjust $sp later. */ + + /* push_insn = gen_stack_v3push(last_regno, sp_adjust), + the pattern 'stack_v3push' is implemented in nds32.md. + The (const_int 14) means v3push always push { $fp $gp $lp }. */ + push_insn = nds32_gen_stack_v3push (Rb, Re, + GEN_INT (14), GEN_INT (0)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + push_insn = emit_insn (push_insn); + + /* The insn rtx 'push_insn' will change frame layout. + We need to use RTX_FRAME_RELATED_P so that GCC is able to + generate CFI (Call Frame Information) stuff. */ + RTX_FRAME_RELATED_P (push_insn) = 1; + + /* Check frame_pointer_needed to see + if we shall emit fp adjustment instruction. */ + if (frame_pointer_needed) + { + /* adjust $fp = $sp + 4 ($fp size) + + 4 ($gp size) + + 4 ($lp size) + + (4 * n) (callee-saved registers) + Note: Since we use 'push25 Re,0', + the stack pointer is just at the position + after push instruction. + No need to take sp_adjust into consideration. */ + fp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size; + fp_adjust_insn = gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, + GEN_INT (fp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + fp_adjust_insn = emit_insn (fp_adjust_insn); + } + + /* Because we use 'push25 Re,0', + we need to expand one more instruction to adjust $sp. + However, sp_adjust value may be out of range of the addi instruction, + create alternative add behavior with TA_REGNUM if necessary, + using NEGATIVE value to tell that we are decreasing address. */ + sp_adjust = nds32_force_addi_stack_int ( (-1) * sp_adjust); + if (sp_adjust) + { + /* Generate sp adjustment instruction + if and only if sp_adjust != 0. */ + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-1 * sp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + + /* The insn rtx 'sp_adjust_insn' will change frame layout. + We need to use RTX_FRAME_RELATED_P so that GCC is able to + generate CFI (Call Frame Information) stuff. */ + RTX_FRAME_RELATED_P (sp_adjust_insn) = 1; + } + } + + /* Prevent the instruction scheduler from + moving instructions across the boundary. */ + emit_insn (gen_blockage ()); +} + +/* Function for v3pop epilogue. */ +void +nds32_expand_epilogue_v3pop (void) +{ + int sp_adjust; + + rtx Rb, Re; + rtx pop_insn; + rtx sp_adjust_insn; + + /* Compute and setup stack frame size. + The result will be in cfun->machine. */ + nds32_compute_stack_frame (); + + /* Prevent the instruction scheduler from + moving instructions across the boundary. */ + emit_insn (gen_blockage ()); + + /* If the function is 'naked', we do not have to generate + epilogue code fragment BUT 'ret' instruction. */ + if (cfun->machine->naked_p) + { + /* Generate return instruction by using + unspec_volatile_func_return pattern. + Make sure this instruction is after gen_blockage(). + NOTE that $lp will become 'live' + after this instruction has been emitted. */ + emit_insn (gen_unspec_volatile_func_return ()); + return; + } + + /* Get callee_first_regno and callee_last_regno. */ + Rb = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_first_regno); + Re = gen_rtx_REG (SImode, cfun->machine->callee_saved_regs_last_regno); + + /* Calculate sp_adjust first to test if 'pop25 Re,imm8u' is available, + where imm8u has to be 8-byte alignment. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + + /* We have to consider alloca issue as well. + If the function does call alloca(), the stack pointer is not fixed. + In that case, we cannot use 'pop25 Re,imm8u' directly. + We have to caculate stack pointer from frame pointer + and then use 'pop25 Re,0'. + Of course, the frame_pointer_needed should be nonzero + if the function calls alloca(). */ + if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) + && !cfun->calls_alloca) + { + /* We can use 'pop25 Re,imm8u'. */ + + /* pop_insn = gen_stack_v3pop(last_regno, sp_adjust), + the pattern 'stack_v3pop' is implementad in nds32.md. + The (const_int 14) means v3pop always pop { $fp $gp $lp }. */ + pop_insn = nds32_gen_stack_v3pop (Rb, Re, + GEN_INT (14), GEN_INT (sp_adjust)); + + /* Emit pop instruction. */ + emit_insn (pop_insn); + } + else + { + /* We have to use 'pop25 Re,0', and prior to it, + we must expand one more instruction to adjust $sp. */ + + if (frame_pointer_needed) + { + /* adjust $sp = $fp - 4 ($fp size) + - 4 ($gp size) + - 4 ($lp size) + - (4 * n) (callee-saved registers) + Note: No need to adjust + cfun->machine->callee_saved_area_padding_bytes, + because we want to adjust stack pointer + to the position for pop instruction. */ + sp_adjust = cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_regs_size; + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (-1 * sp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + } + else + { + /* If frame pointer is NOT needed, + we cannot calculate the sp adjustment from frame pointer. + Instead, we calculate the adjustment by local_size, + out_args_size, and callee_saved_area_padding_bytes. + Notice that such sp adjustment value may be out of range, + so we have to deal with it as well. */ + + /* Adjust $sp = $sp + local_size + out_args_size + + callee_saved_area_padding_bytes. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + /* sp_adjust value may be out of range of the addi instruction, + create alternative add behavior with TA_REGNUM if necessary, + using POSITIVE value to tell that we are increasing address. */ + sp_adjust = nds32_force_addi_stack_int (sp_adjust); + if (sp_adjust) + { + /* Generate sp adjustment instruction + if and only if sp_adjust != 0. */ + sp_adjust_insn = gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (sp_adjust)); + /* Emit rtx into instructions list and receive INSN rtx form. */ + sp_adjust_insn = emit_insn (sp_adjust_insn); + } + } + + /* pop_insn = gen_stack_v3pop(last_regno, sp_adjust), + the pattern 'stack_v3pop' is implementad in nds32.md. */ + /* The (const_int 14) means v3pop always pop { $fp $gp $lp }. */ + pop_insn = nds32_gen_stack_v3pop (Rb, Re, + GEN_INT (14), GEN_INT (0)); + + /* Emit pop instruction. */ + emit_insn (pop_insn); + } +} + +/* ------------------------------------------------------------------------ */ + +/* Function to test 333-form for load/store instructions. + This is auxiliary extern function for auxiliary macro in nds32.h. + Because it is a little complicated, we use function instead of macro. */ +bool +nds32_ls_333_p (rtx rt, rtx ra, rtx imm, enum machine_mode mode) +{ + if (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS + && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS) + { + if (GET_MODE_SIZE (mode) == 4) + return satisfies_constraint_Iu05 (imm); + + if (GET_MODE_SIZE (mode) == 2) + return satisfies_constraint_Iu04 (imm); + + if (GET_MODE_SIZE (mode) == 1) + return satisfies_constraint_Iu03 (imm); + } + + return false; +} + + +/* Functions to expand load_multiple and store_multiple. + They are auxiliary extern functions to help create rtx template. + Check nds32-multiple.md file for the patterns. */ +rtx +nds32_expand_load_multiple (int base_regno, int count, + rtx base_addr, rtx basemem) +{ + int par_index; + int offset; + rtx result; + rtx new_addr, mem, reg; + + /* Create the pattern that is presented in nds32-multiple.md. */ + + result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); + + for (par_index = 0; par_index < count; par_index++) + { + offset = par_index * 4; + /* 4-byte for loading data to each register. */ + new_addr = plus_constant (Pmode, base_addr, offset); + mem = adjust_automodify_address_nv (basemem, SImode, + new_addr, offset); + reg = gen_rtx_REG (SImode, base_regno + par_index); + + XVECEXP (result, 0, par_index) = gen_rtx_SET (VOIDmode, reg, mem); + } + + return result; +} + +rtx +nds32_expand_store_multiple (int base_regno, int count, + rtx base_addr, rtx basemem) +{ + int par_index; + int offset; + rtx result; + rtx new_addr, mem, reg; + + /* Create the pattern that is presented in nds32-multiple.md. */ + + result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); + + for (par_index = 0; par_index < count; par_index++) + { + offset = par_index * 4; + /* 4-byte for storing data to memory. */ + new_addr = plus_constant (Pmode, base_addr, offset); + mem = adjust_automodify_address_nv (basemem, SImode, + new_addr, offset); + reg = gen_rtx_REG (SImode, base_regno + par_index); + + XVECEXP (result, 0, par_index) = gen_rtx_SET (VOIDmode, mem, reg); + } + + return result; +} + +/* Function to move block memory content by + using load_multiple and store_multiple. + This is auxiliary extern function to help create rtx template. + Check nds32-multiple.md file for the patterns. */ +int +nds32_expand_movmemqi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) +{ + HOST_WIDE_INT in_words, out_words; + rtx dst_base_reg, src_base_reg; + int maximum_bytes; + + /* Because reduced-set regsiters has few registers + (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31' + cannot be used for register allocation), + using 8 registers (32 bytes) for moving memory block + may easily consume all of them. + It makes register allocation/spilling hard to work. + So we only allow maximum=4 registers (16 bytes) for + moving memory block under reduced-set registers. */ + if (TARGET_REDUCED_REGS) + maximum_bytes = 16; + else + maximum_bytes = 32; + + /* 1. Total_bytes is integer for sure. + 2. Alignment is integer for sure. + 3. Maximum 4 or 8 registers, 4 * 4 = 16 bytes, 8 * 4 = 32 bytes. + 4. Requires (n * 4) block size. + 5. Requires 4-byte alignment. */ + if (GET_CODE (total_bytes) != CONST_INT + || GET_CODE (alignment) != CONST_INT + || INTVAL (total_bytes) > maximum_bytes + || INTVAL (total_bytes) & 3 + || INTVAL (alignment) & 3) + return 0; + + dst_base_reg = copy_to_mode_reg (SImode, XEXP (dstmem, 0)); + src_base_reg = copy_to_mode_reg (SImode, XEXP (srcmem, 0)); + + out_words = in_words = INTVAL (total_bytes) / UNITS_PER_WORD; + + emit_insn (nds32_expand_load_multiple (0, in_words, src_base_reg, srcmem)); + emit_insn (nds32_expand_store_multiple (0, out_words, dst_base_reg, dstmem)); + + /* Successfully create patterns, return 1. */ + return 1; +} + +/* Function to check whether the OP is a valid load/store operation. + This is a helper function for the predicates: + 'nds32_load_multiple_operation' and 'nds32_store_multiple_operation' + in predicates.md file. + + The OP is supposed to be a parallel rtx. + For each element within this parallel rtx: + (set (reg) (mem addr)) is the form for load operation. + (set (mem addr) (reg)) is the form for store operation. + We have to extract reg and mem of every element and + check if the information is valid for multiple load/store operation. */ +bool +nds32_valid_multiple_load_store (rtx op, bool load_p) +{ + int count; + int first_elt_regno; + rtx elt; + + /* Get the counts of elements in the parallel rtx. */ + count = XVECLEN (op, 0); + /* Pick up the first element. */ + elt = XVECEXP (op, 0, 0); + + /* Perform some quick check for the first element in the parallel rtx. */ + if (GET_CODE (elt) != SET + || count <= 1 + || count > 8) + return false; + + /* Pick up regno of first element for further detail checking. + Note that the form is different between load and store operation. */ + if (load_p) + { + if (GET_CODE (SET_DEST (elt)) != REG + || GET_CODE (SET_SRC (elt)) != MEM) + return false; + + first_elt_regno = REGNO (SET_DEST (elt)); + } + else + { + if (GET_CODE (SET_SRC (elt)) != REG + || GET_CODE (SET_DEST (elt)) != MEM) + return false; + + first_elt_regno = REGNO (SET_SRC (elt)); + } + + /* Perform detail check for each element. + Refer to nds32-multiple.md for more information + about following checking. + The starting element of parallel rtx is index 0. */ + if (!nds32_consecutive_registers_load_store_p (op, load_p, 0, + first_elt_regno, + count)) + return false; + + /* Pass all test, this is a valid rtx. */ + return true; +} + +/* Function to check whether the OP is a valid stack push/pop operation. + For a valid stack operation, it must satisfy following conditions: + 1. Consecutive registers push/pop operations. + 2. Valid $fp/$gp/$lp push/pop operations. + 3. The last element must be stack adjustment rtx. + See the prologue/epilogue implementation for details. */ +bool +nds32_valid_stack_push_pop (rtx op, bool push_p) +{ + int index; + int total_count; + int rest_count; + int first_regno; + rtx elt; + rtx elt_reg; + rtx elt_mem; + rtx elt_plus; + + /* Get the counts of elements in the parallel rtx. */ + total_count = XVECLEN (op, 0); + + /* Perform some quick check for that every element should be 'set'. */ + for (index = 0; index < total_count; index++) + { + elt = XVECEXP (op, 0, index); + if (GET_CODE (elt) != SET) + return false; + } + + /* For push operation, the parallel rtx looks like: + (parallel [(set (mem (plus (reg:SI SP_REGNUM) (const_int -32))) + (reg:SI Rb)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -28))) + (reg:SI Rb+1)) + ... + (set (mem (plus (reg:SI SP_REGNUM) (const_int -16))) + (reg:SI Re)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -12))) + (reg:SI FP_REGNUM)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -8))) + (reg:SI GP_REGNUM)) + (set (mem (plus (reg:SI SP_REGNUM) (const_int -4))) + (reg:SI LP_REGNUM)) + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int -32)))]) + + For pop operation, the parallel rtx looks like: + (parallel [(set (reg:SI Rb) + (mem (reg:SI SP_REGNUM))) + (set (reg:SI Rb+1) + (mem (plus (reg:SI SP_REGNUM) (const_int 4)))) + ... + (set (reg:SI Re) + (mem (plus (reg:SI SP_REGNUM) (const_int 16)))) + (set (reg:SI FP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 20)))) + (set (reg:SI GP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 24)))) + (set (reg:SI LP_REGNUM) + (mem (plus (reg:SI SP_REGNUM) (const_int 28)))) + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int 32)))]) */ + + /* 1. Consecutive registers push/pop operations. + We need to calculate how many registers should be consecutive. + The $sp adjustment rtx, $fp push rtx, $gp push rtx, + and $lp push rtx are excluded. */ + + /* Exclude last $sp adjustment rtx. */ + rest_count = total_count - 1; + /* Exclude $fp, $gp, and $lp if they are in the parallel rtx. */ + if (cfun->machine->fp_size) + rest_count--; + if (cfun->machine->gp_size) + rest_count--; + if (cfun->machine->lp_size) + rest_count--; + + if (rest_count > 0) + { + elt = XVECEXP (op, 0, 0); + /* Pick up register element. */ + elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt); + first_regno = REGNO (elt_reg); + + /* The 'push' operation is a kind of store operation. + The 'pop' operation is a kind of load operation. + Pass corresponding false/true as second argument (bool load_p). + The par_index is supposed to start with index 0. */ + if (!nds32_consecutive_registers_load_store_p (op, + !push_p ? true : false, + 0, + first_regno, + rest_count)) + return false; + } + + /* 2. Valid $fp/$gp/$lp push/pop operations. + Remember to set start index for checking them. */ + + /* The rest_count is the start index for checking $fp/$gp/$lp. */ + index = rest_count; + /* If index < 0, this parallel rtx is definitely + not a valid stack push/pop operation. */ + if (index < 0) + return false; + + /* Check $fp/$gp/$lp one by one. + We use 'push_p' to pick up reg rtx and mem rtx. */ + if (cfun->machine->fp_size) + { + elt = XVECEXP (op, 0, index); + elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt); + elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt); + index++; + + if (GET_CODE (elt_mem) != MEM + || GET_CODE (elt_reg) != REG + || REGNO (elt_reg) != FP_REGNUM) + return false; + } + if (cfun->machine->gp_size) + { + elt = XVECEXP (op, 0, index); + elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt); + elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt); + index++; + + if (GET_CODE (elt_mem) != MEM + || GET_CODE (elt_reg) != REG + || REGNO (elt_reg) != GP_REGNUM) + return false; + } + if (cfun->machine->lp_size) + { + elt = XVECEXP (op, 0, index); + elt_mem = push_p ? SET_DEST (elt) : SET_SRC (elt); + elt_reg = push_p ? SET_SRC (elt) : SET_DEST (elt); + index++; + + if (GET_CODE (elt_mem) != MEM + || GET_CODE (elt_reg) != REG + || REGNO (elt_reg) != LP_REGNUM) + return false; + } + + /* 3. The last element must be stack adjustment rtx. + Its form of rtx should be: + (set (reg:SI SP_REGNUM) + (plus (reg:SI SP_REGNUM) (const_int X))) + The X could be positive or negative value. */ + + /* Pick up the last element. */ + elt = XVECEXP (op, 0, total_count - 1); + + /* Extract its destination and source rtx. */ + elt_reg = SET_DEST (elt); + elt_plus = SET_SRC (elt); + + /* Check this is (set (stack_reg) (plus stack_reg const)) pattern. */ + if (GET_CODE (elt_reg) != REG + || GET_CODE (elt_plus) != PLUS + || REGNO (elt_reg) != SP_REGNUM) + return false; + + /* Pass all test, this is a valid rtx. */ + return true; +} + +/* Computing the Length of an Insn. + Modifies the length assigned to instruction INSN. + LEN is the initially computed length of the insn. */ +int +nds32_adjust_insn_length (rtx insn, int length) +{ + rtx src, dst; + + switch (recog_memoized (insn)) + { + case CODE_FOR_move_df: + case CODE_FOR_move_di: + /* Adjust length of movd44 to 2. */ + src = XEXP (PATTERN (insn), 1); + dst = XEXP (PATTERN (insn), 0); + + if (REG_P (src) + && REG_P (dst) + && (REGNO (src) % 2) == 0 + && (REGNO (dst) % 2) == 0) + length = 2; + break; + + default: + break; + } + + return length; +} + + +/* Function to check if 'bclr' instruction can be used with IVAL. */ +int +nds32_can_use_bclr_p (int ival) +{ + int one_bit_count; + + /* Calculate the number of 1-bit of (~ival), if there is only one 1-bit, + it means the original ival has only one 0-bit, + So it is ok to perform 'bclr' operation. */ + + one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (~ival)); + + /* 'bclr' is a performance extension instruction. */ + return (TARGET_PERF_EXT && (one_bit_count == 1)); +} + +/* Function to check if 'bset' instruction can be used with IVAL. */ +int +nds32_can_use_bset_p (int ival) +{ + int one_bit_count; + + /* Caculate the number of 1-bit of ival, if there is only one 1-bit, + it is ok to perform 'bset' operation. */ + + one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival)); + + /* 'bset' is a performance extension instruction. */ + return (TARGET_PERF_EXT && (one_bit_count == 1)); +} + +/* Function to check if 'btgl' instruction can be used with IVAL. */ +int +nds32_can_use_btgl_p (int ival) +{ + int one_bit_count; + + /* Caculate the number of 1-bit of ival, if there is only one 1-bit, + it is ok to perform 'btgl' operation. */ + + one_bit_count = popcount_hwi ((unsigned HOST_WIDE_INT) (ival)); + + /* 'btgl' is a performance extension instruction. */ + return (TARGET_PERF_EXT && (one_bit_count == 1)); +} + +/* Function to check if 'bitci' instruction can be used with IVAL. */ +int +nds32_can_use_bitci_p (int ival) +{ + /* If we are using V3 ISA, we have 'bitci' instruction. + Try to see if we can present 'andi' semantic with + such 'bit-clear-immediate' operation. + For example, 'andi $r0,$r0,0xfffffffc' can be + presented with 'bitci $r0,$r0,3'. */ + return (TARGET_ISA_V3 + && (ival < 0) + && satisfies_constraint_Iu15 (gen_int_mode (~ival, SImode))); +} + + +/* Return true if is load/store with SYMBOL_REF addressing mode + and memory mode is SImode. */ +bool +nds32_symbol_load_store_p (rtx insn) +{ + rtx mem_src = NULL_RTX; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD: + mem_src = SET_SRC (PATTERN (insn)); + break; + case TYPE_STORE: + mem_src = SET_DEST (PATTERN (insn)); + break; + default: + break; + } + + /* Find load/store insn with addressing mode is SYMBOL_REF. */ + if (mem_src != NULL_RTX) + { + if ((GET_CODE (mem_src) == ZERO_EXTEND) + || (GET_CODE (mem_src) == SIGN_EXTEND)) + mem_src = XEXP (mem_src, 0); + + if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF) + || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM)) + return true; + } + + return false; +} + +/* Function to determine whether it is worth to do fp_as_gp optimization. + Return 0: It is NOT worth to do fp_as_gp optimization. + Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. + Note that if it is worth to do fp_as_gp optimization, + we MUST set FP_REGNUM ever live in this function. */ +int +nds32_fp_as_gp_check_available (void) +{ + /* If there exists ANY of following conditions, + we DO NOT perform fp_as_gp optimization: + 1. TARGET_FORBID_FP_AS_GP is set + regardless of the TARGET_FORCE_FP_AS_GP. + 2. User explicitly uses 'naked' attribute. + 3. Not optimize for size. + 4. Need frame pointer. + 5. If $fp is already required to be saved, + it means $fp is already choosen by register allocator. + Thus we better not to use it for fp_as_gp optimization. + 6. This function is a vararg function. + DO NOT apply fp_as_gp optimization on this function + because it may change and break stack frame. + 7. The epilogue is empty. + This happens when the function uses exit() + or its attribute is no_return. + In that case, compiler will not expand epilogue + so that we have no chance to output .omit_fp_end directive. */ + if (TARGET_FORBID_FP_AS_GP + || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) + || !optimize_size + || frame_pointer_needed + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || (cfun->stdarg == 1) + || (find_fallthru_edge (EXIT_BLOCK_PTR->preds) == NULL)) + return 0; + + /* Now we can check the possibility of using fp_as_gp optimization. */ + if (TARGET_FORCE_FP_AS_GP) + { + /* User explicitly issues -mforce-fp-as-gp option. */ + df_set_regs_ever_live (FP_REGNUM, 1); + return 1; + } + else + { + /* In the following we are going to evaluate whether + it is worth to do fp_as_gp optimization. */ + int good_gain = 0; + int symbol_count = 0; + + int threshold; + rtx insn; + + /* We check if there already requires prologue. + Note that $gp will be saved in prologue for PIC code generation. + After that, we can set threshold by the existence of prologue. + Each fp-implied instruction will gain 2-byte code size + from gp-aware instruction, so we have following heuristics. */ + if (flag_pic + || nds32_have_prologue_p ()) + { + /* Have-prologue: + Compiler already intends to generate prologue content, + so the fp_as_gp optimization will only insert + 'la $fp,_FP_BASE_' instruction, which will be + converted into 4-byte instruction at link time. + The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ + threshold = 3; + } + else + { + /* None-prologue: + Compiler originally does not generate prologue content, + so the fp_as_gp optimization will NOT ONLY insert + 'la $fp,_FP_BASE' instruction, but also causes + push/pop instructions. + If we are using v3push (push25/pop25), + the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; + If we are using normal push (smw/lmw), + the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ + threshold = 5 + (TARGET_V3PUSH ? 0 : 2); + } + + /* We would like to traverse every instruction in this function. + So we need to have push_topmost_sequence()/pop_topmost_sequence() + surrounding our for-loop evaluation. */ + push_topmost_sequence (); + /* Counting the insn number which the addressing mode is symbol. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (single_set (insn) && nds32_symbol_load_store_p (insn)) + symbol_count++; + + if (symbol_count == threshold) + { + good_gain = 1; + break; + } + } + pop_topmost_sequence (); + + /* Enable fp_as_gp optimization when potential gain is good enough. */ + if (good_gain) + { + df_set_regs_ever_live (FP_REGNUM, 1); + return 1; + } + } + + /* By default we return 0. */ + return 0; +} + + +/* Function to generate PC relative jump table. + Refer to nds32.md for more details. + + The following is the sample for the case that diff value + can be presented in '.short' size. + + addi $r1, $r1, -(case_lower_bound) + slti $ta, $r1, (case_number) + beqz $ta, .L_skip_label + + la $ta, .L35 ! get jump table address + lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry + addi $ta, $r1, $ta + jr5 $ta + + ! jump table entry + L35: + .short .L25-.L35 + .short .L26-.L35 + .short .L27-.L35 + .short .L28-.L35 + .short .L29-.L35 + .short .L30-.L35 + .short .L31-.L35 + .short .L32-.L35 + .short .L33-.L35 + .short .L34-.L35 */ +const char * +nds32_output_casesi_pc_relative (rtx *operands) +{ + enum machine_mode mode; + rtx diff_vec; + + diff_vec = PATTERN (next_active_insn (operands[1])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + /* Step C: "t <-- operands[1]". */ + output_asm_insn ("la\t$ta, %l1", operands); + + /* Get the mode of each element in the difference vector. */ + mode = GET_MODE (diff_vec); + + /* Step D: "z <-- (mem (plus (operands[0] << m) t))", + where m is 0, 1, or 2 to load address-diff value from table. */ + switch (mode) + { + case QImode: + output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); + break; + case HImode: + output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); + break; + case SImode: + output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + break; + default: + gcc_unreachable (); + } + + /* Step E: "t <-- z + t". + Add table label_ref with address-diff value to + obtain target case address. */ + output_asm_insn ("add\t$ta, %2, $ta", operands); + + /* Step F: jump to target with register t. */ + if (TARGET_16_BIT) + return "jr5\t$ta"; + else + return "jr\t$ta"; +} + +/* Function to generate normal jump table. */ +const char * +nds32_output_casesi (rtx *operands) +{ + /* Step C: "t <-- operands[1]". */ + output_asm_insn ("la\t$ta, %l1", operands); + + /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ + output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + + /* No need to perform Step E, which is only used for + pc relative jump table. */ + + /* Step F: jump to target with register z. */ + if (TARGET_16_BIT) + return "jr5\t%2"; + else + return "jr\t%2"; +} + + +/* Function to return memory format. */ +enum nds32_16bit_address_type +nds32_mem_format (rtx op) +{ + enum machine_mode mode_test; + int val; + int regno; + + if (!TARGET_16_BIT) + return ADDRESS_NOT_16BIT_FORMAT; + + mode_test = GET_MODE (op); + + op = XEXP (op, 0); + + /* 45 format. */ + if (GET_CODE (op) == REG && (mode_test == SImode)) + return ADDRESS_REG; + + /* 333 format for QI/HImode. */ + if (GET_CODE (op) == REG && (REGNO (op) < R8_REGNUM)) + return ADDRESS_LO_REG_IMM3U; + + /* post_inc 333 format. */ + if ((GET_CODE (op) == POST_INC) && (mode_test == SImode)) + { + regno = REGNO(XEXP (op, 0)); + + if (regno < 8) + return ADDRESS_POST_INC_LO_REG_IMM3U; + } + + /* post_inc 333 format. */ + if ((GET_CODE (op) == POST_MODIFY) + && (mode_test == SImode) + && (REG_P (XEXP (XEXP (op, 1), 0))) + && (CONST_INT_P (XEXP (XEXP (op, 1), 1)))) + { + regno = REGNO (XEXP (XEXP (op, 1), 0)); + val = INTVAL (XEXP (XEXP (op, 1), 1)); + if (regno < 8 && val < 32) + return ADDRESS_POST_INC_LO_REG_IMM3U; + } + + if ((GET_CODE (op) == PLUS) + && (GET_CODE (XEXP (op, 0)) == REG) + && (GET_CODE (XEXP (op, 1)) == CONST_INT)) + { + val = INTVAL (XEXP (op, 1)); + + regno = REGNO(XEXP (op, 0)); + + if (regno > 7 + && regno != SP_REGNUM + && regno != FP_REGNUM) + return ADDRESS_NOT_16BIT_FORMAT; + + switch (mode_test) + { + case QImode: + /* 333 format. */ + if (val >= 0 && val < 8 && regno < 8) + return ADDRESS_LO_REG_IMM3U; + break; + + case HImode: + /* 333 format. */ + if (val >= 0 && val < 16 && (val % 2 == 0) && regno < 8) + return ADDRESS_LO_REG_IMM3U; + break; + + case SImode: + case SFmode: + case DFmode: + /* fp imply 37 format. */ + if ((regno == FP_REGNUM) && + (val >= 0 && val < 512 && (val % 4 == 0))) + return ADDRESS_FP_IMM7U; + /* sp imply 37 format. */ + else if ((regno == SP_REGNUM) && + (val >= 0 && val < 512 && (val % 4 == 0))) + return ADDRESS_SP_IMM7U; + /* 333 format. */ + else if (val >= 0 && val < 32 && (val % 4 == 0) && regno < 8) + return ADDRESS_LO_REG_IMM3U; + break; + + default: + break; + } + } + + return ADDRESS_NOT_16BIT_FORMAT; +} + +/* Output 16-bit store. */ +const char * +nds32_output_16bit_store (rtx *operands, int byte) +{ + char pattern[100]; + char size; + rtx code = XEXP (operands[0], 0); + + size = nds32_byte_to_size (byte); + + switch (nds32_mem_format (operands[0])) + { + case ADDRESS_REG: + operands[0] = code; + output_asm_insn ("swi450\t%1, [%0]", operands); + break; + case ADDRESS_LO_REG_IMM3U: + snprintf (pattern, sizeof (pattern), "s%ci333\t%%1, %%0", size); + output_asm_insn (pattern, operands); + break; + case ADDRESS_POST_INC_LO_REG_IMM3U: + snprintf (pattern, sizeof (pattern), "s%ci333.bi\t%%1, %%0", size); + output_asm_insn (pattern, operands); + break; + case ADDRESS_FP_IMM7U: + output_asm_insn ("swi37\t%1, %0", operands); + break; + case ADDRESS_SP_IMM7U: + /* Get immediate value and set back to operands[1]. */ + operands[0] = XEXP (code, 1); + output_asm_insn ("swi37.sp\t%1, [ + (%0)]", operands); + break; + default: + break; + } + + return ""; +} + +/* Output 16-bit load. */ +const char * +nds32_output_16bit_load (rtx *operands, int byte) +{ + char pattern[100]; + unsigned char size; + rtx code = XEXP (operands[1], 0); + + size = nds32_byte_to_size (byte); + + switch (nds32_mem_format (operands[1])) + { + case ADDRESS_REG: + operands[1] = code; + output_asm_insn ("lwi450\t%0, [%1]", operands); + break; + case ADDRESS_LO_REG_IMM3U: + snprintf (pattern, sizeof (pattern), "l%ci333\t%%0, %%1", size); + output_asm_insn (pattern, operands); + break; + case ADDRESS_POST_INC_LO_REG_IMM3U: + snprintf (pattern, sizeof (pattern), "l%ci333.bi\t%%0, %%1", size); + output_asm_insn (pattern, operands); + break; + case ADDRESS_FP_IMM7U: + output_asm_insn ("lwi37\t%0, %1", operands); + break; + case ADDRESS_SP_IMM7U: + /* Get immediate value and set back to operands[0]. */ + operands[1] = XEXP (code, 1); + output_asm_insn ("lwi37.sp\t%0, [ + (%1)]", operands); + break; + default: + break; + } + + return ""; +} + +/* Output 32-bit store. */ +const char * +nds32_output_32bit_store (rtx *operands, int byte) +{ + char pattern[100]; + unsigned char size; + rtx code = XEXP (operands[0], 0); + + size = nds32_byte_to_size (byte); + + switch (GET_CODE (code)) + { + case REG: + /* (mem (reg X)) + => access location by using register, + use "sbi / shi / swi" */ + snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size); + break; + + case SYMBOL_REF: + case CONST: + /* (mem (symbol_ref X)) + (mem (const (...))) + => access global variables, + use "sbi.gp / shi.gp / swi.gp" */ + operands[0] = XEXP (operands[0], 0); + snprintf (pattern, sizeof (pattern), "s%ci.gp\t%%1, [ + %%0]", size); + break; + + case POST_INC: + /* (mem (post_inc reg)) + => access location by using register which will be post increment, + use "sbi.bi / shi.bi / swi.bi" */ + snprintf (pattern, sizeof (pattern), + "s%ci.bi\t%%1, %%0, %d", size, byte); + break; + + case POST_DEC: + /* (mem (post_dec reg)) + => access location by using register which will be post decrement, + use "sbi.bi / shi.bi / swi.bi" */ + snprintf (pattern, sizeof (pattern), + "s%ci.bi\t%%1, %%0, -%d", size, byte); + break; + + case POST_MODIFY: + switch (GET_CODE (XEXP (XEXP (code, 1), 1))) + { + case REG: + case SUBREG: + /* (mem (post_modify (reg) (plus (reg) (reg)))) + => access location by using register which will be + post modified with reg, + use "sb.bi/ sh.bi / sw.bi" */ + snprintf (pattern, sizeof (pattern), "s%c.bi\t%%1, %%0", size); + break; + case CONST_INT: + /* (mem (post_modify (reg) (plus (reg) (const_int)))) + => access location by using register which will be + post modified with const_int, + use "sbi.bi/ shi.bi / swi.bi" */ + snprintf (pattern, sizeof (pattern), "s%ci.bi\t%%1, %%0", size); + break; + default: + abort (); + } + break; + + case PLUS: + switch (GET_CODE (XEXP (code, 1))) + { + case REG: + case SUBREG: + /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) + => access location by adding two registers, + use "sb / sh / sw" */ + snprintf (pattern, sizeof (pattern), "s%c\t%%1, %%0", size); + break; + case CONST_INT: + /* (mem (plus reg const_int)) + => access location by adding one register with const_int, + use "sbi / shi / swi" */ + snprintf (pattern, sizeof (pattern), "s%ci\t%%1, %%0", size); + break; + default: + abort (); + } + break; + + case LO_SUM: + operands[2] = XEXP (code, 1); + operands[0] = XEXP (code, 0); + snprintf (pattern, sizeof (pattern), + "s%ci\t%%1, [%%0 + lo12(%%2)]", size); + break; + + default: + abort (); + } + + output_asm_insn (pattern, operands); + return ""; +} + +/* Output 32-bit load. */ +const char * +nds32_output_32bit_load (rtx *operands, int byte) +{ + char pattern[100]; + unsigned char size; + rtx code; + + code = XEXP (operands[1], 0); + + size = nds32_byte_to_size (byte); + + switch (GET_CODE (code)) + { + case REG: + /* (mem (reg X)) + => access location by using register, + use "lbi / lhi / lwi" */ + snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size); + break; + + case SYMBOL_REF: + case CONST: + /* (mem (symbol_ref X)) + (mem (const (...))) + => access global variables, + use "lbi.gp / lhi.gp / lwi.gp" */ + operands[1] = XEXP (operands[1], 0); + snprintf (pattern, sizeof (pattern), "l%ci.gp\t%%0, [ + %%1]", size); + break; + + case POST_INC: + /* (mem (post_inc reg)) + => access location by using register which will be post increment, + use "lbi.bi / lhi.bi / lwi.bi" */ + snprintf (pattern, sizeof (pattern), + "l%ci.bi\t%%0, %%1, %d", size, byte); + break; + + case POST_DEC: + /* (mem (post_dec reg)) + => access location by using register which will be post decrement, + use "lbi.bi / lhi.bi / lwi.bi" */ + snprintf (pattern, sizeof (pattern), + "l%ci.bi\t%%0, %%1, -%d", size, byte); + break; + + case POST_MODIFY: + switch (GET_CODE (XEXP (XEXP (code, 1), 1))) + { + case REG: + case SUBREG: + /* (mem (post_modify (reg) (plus (reg) (reg)))) + => access location by using register which will be + post modified with reg, + use "lb.bi/ lh.bi / lw.bi" */ + snprintf (pattern, sizeof (pattern), "l%c.bi\t%%0, %%1", size); + break; + case CONST_INT: + /* (mem (post_modify (reg) (plus (reg) (const_int)))) + => access location by using register which will be + post modified with const_int, + use "lbi.bi/ lhi.bi / lwi.bi" */ + snprintf (pattern, sizeof (pattern), "l%ci.bi\t%%0, %%1", size); + break; + default: + abort (); + } + break; + + case PLUS: + switch (GET_CODE (XEXP (code, 1))) + { + case REG: + case SUBREG: + /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) + use "lb / lh / lw" */ + snprintf (pattern, sizeof (pattern), "l%c\t%%0, %%1", size); + break; + case CONST_INT: + /* (mem (plus reg const_int)) + => access location by adding one register with const_int, + use "lbi / lhi / lwi" */ + snprintf (pattern, sizeof (pattern), "l%ci\t%%0, %%1", size); + break; + default: + abort (); + } + break; + + case LO_SUM: + operands[2] = XEXP (code, 1); + operands[1] = XEXP (code, 0); + snprintf (pattern, sizeof (pattern), + "l%ci\t%%0, [%%1 + lo12(%%2)]", size); + break; + + default: + abort (); + } + + output_asm_insn (pattern, operands); + return ""; +} + +/* Output 32-bit load with signed extension. */ +const char * +nds32_output_32bit_load_s (rtx *operands, int byte) +{ + char pattern[100]; + unsigned char size; + rtx code; + + code = XEXP (operands[1], 0); + + size = nds32_byte_to_size (byte); + + switch (GET_CODE (code)) + { + case REG: + /* (mem (reg X)) + => access location by using register, + use "lbsi / lhsi" */ + snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size); + break; + + case SYMBOL_REF: + case CONST: + /* (mem (symbol_ref X)) + (mem (const (...))) + => access global variables, + use "lbsi.gp / lhsi.gp" */ + operands[1] = XEXP (operands[1], 0); + snprintf (pattern, sizeof (pattern), "l%csi.gp\t%%0, [ + %%1]", size); + break; + + case POST_INC: + /* (mem (post_inc reg)) + => access location by using register which will be post increment, + use "lbsi.bi / lhsi.bi" */ + snprintf (pattern, sizeof (pattern), + "l%csi.bi\t%%0, %%1, %d", size, byte); + break; + + case POST_DEC: + /* (mem (post_dec reg)) + => access location by using register which will be post decrement, + use "lbsi.bi / lhsi.bi" */ + snprintf (pattern, sizeof (pattern), + "l%csi.bi\t%%0, %%1, -%d", size, byte); + break; + + case POST_MODIFY: + switch (GET_CODE (XEXP (XEXP (code, 1), 1))) + { + case REG: + case SUBREG: + /* (mem (post_modify (reg) (plus (reg) (reg)))) + => access location by using register which will be + post modified with reg, + use "lbs.bi/ lhs.bi" */ + snprintf (pattern, sizeof (pattern), "l%cs.bi\t%%0, %%1", size); + break; + case CONST_INT: + /* (mem (post_modify (reg) (plus (reg) (const_int)))) + => access location by using register which will be + post modified with const_int, + use "lbsi.bi/ lhsi.bi" */ + snprintf (pattern, sizeof (pattern), "l%csi.bi\t%%0, %%1", size); + break; + default: + abort (); + } + break; + + case PLUS: + switch (GET_CODE (XEXP (code, 1))) + { + case REG: + case SUBREG: + /* (mem (plus reg reg)) or (mem (plus (mult reg const_int) reg)) + use "lbs / lhs" */ + snprintf (pattern, sizeof (pattern), "l%cs\t%%0, %%1", size); + break; + case CONST_INT: + /* (mem (plus reg const_int)) + => access location by adding one register with const_int, + use "lbsi / lhsi" */ + snprintf (pattern, sizeof (pattern), "l%csi\t%%0, %%1", size); + break; + default: + abort (); + } + break; + + case LO_SUM: + operands[2] = XEXP (code, 1); + operands[1] = XEXP (code, 0); + snprintf (pattern, sizeof (pattern), + "l%csi\t%%0, [%%1 + lo12(%%2)]", size); + break; + + default: + abort (); + } + + output_asm_insn (pattern, operands); + return ""; +} + +/* Function to output stack push operation. + We need to deal with normal stack push multiple or stack v3push. */ +const char * +nds32_output_stack_push (void) +{ + /* A string pattern for output_asm_insn(). */ + char pattern[100]; + /* The operands array which will be used in output_asm_insn(). */ + rtx operands[3]; + /* Pick up callee-saved first regno and last regno for further use. */ + int rb_regno = cfun->machine->callee_saved_regs_first_regno; + int re_regno = cfun->machine->callee_saved_regs_last_regno; + + if (TARGET_V3PUSH) + { + /* For stack v3push: + operands[0]: Re + operands[1]: imm8u */ + + /* This variable is to check if 'push25 Re,imm8u' is available. */ + int sp_adjust; + + /* Set operands[0]. */ + operands[0] = gen_rtx_REG (SImode, re_regno); + + /* Check if we can generate 'push25 Re,imm8u', + otherwise, generate 'push25 Re,0'. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) + operands[1] = GEN_INT (sp_adjust); + else + operands[1] = GEN_INT (0); + + /* Create assembly code pattern. */ + snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1"); + } + else + { + /* For normal stack push multiple: + operands[0]: Rb + operands[1]: Re + operands[2]: En4 */ + + /* This variable is used to check if we only need to generate En4 field. + As long as Rb==Re=SP_REGNUM, we set this variable to 1. */ + int push_en4_only_p = 0; + + /* Set operands[0] and operands[1]. */ + operands[0] = gen_rtx_REG (SImode, rb_regno); + operands[1] = gen_rtx_REG (SImode, re_regno); + + /* 'smw.adm $sp,[$sp],$sp,0' means push nothing. */ + if (!cfun->machine->fp_size + && !cfun->machine->gp_size + && !cfun->machine->lp_size + && REGNO (operands[0]) == SP_REGNUM + && REGNO (operands[1]) == SP_REGNUM) + { + /* No need to generate instruction. */ + return ""; + } + else + { + /* If Rb==Re=SP_REGNUM, we only need to generate En4 field. */ + if (REGNO (operands[0]) == SP_REGNUM + && REGNO (operands[1]) == SP_REGNUM) + push_en4_only_p = 1; + + /* Create assembly code pattern. + We need to handle the form: "Rb, Re, { $fp $gp $lp }". */ + snprintf (pattern, sizeof (pattern), + "push.s\t%s{%s%s%s }", + push_en4_only_p ? "" : "%0, %1, ", + cfun->machine->fp_size ? " $fp" : "", + cfun->machine->gp_size ? " $gp" : "", + cfun->machine->lp_size ? " $lp" : ""); + } + } + + /* We use output_asm_insn() to output assembly code by ourself. */ + output_asm_insn (pattern, operands); + return ""; +} + +/* Function to output stack pop operation. + We need to deal with normal stack pop multiple or stack v3pop. */ +const char * +nds32_output_stack_pop (void) +{ + /* A string pattern for output_asm_insn(). */ + char pattern[100]; + /* The operands array which will be used in output_asm_insn(). */ + rtx operands[3]; + /* Pick up callee-saved first regno and last regno for further use. */ + int rb_regno = cfun->machine->callee_saved_regs_first_regno; + int re_regno = cfun->machine->callee_saved_regs_last_regno; + + if (TARGET_V3PUSH) + { + /* For stack v3pop: + operands[0]: Re + operands[1]: imm8u */ + + /* This variable is to check if 'pop25 Re,imm8u' is available. */ + int sp_adjust; + + /* Set operands[0]. */ + operands[0] = gen_rtx_REG (SImode, re_regno); + + /* Check if we can generate 'pop25 Re,imm8u', + otherwise, generate 'pop25 Re,0'. + We have to consider alloca issue as well. + If the function does call alloca(), the stack pointer is not fixed. + In that case, we cannot use 'pop25 Re,imm8u' directly. + We have to caculate stack pointer from frame pointer + and then use 'pop25 Re,0'. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_padding_bytes; + if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) + && !cfun->calls_alloca) + operands[1] = GEN_INT (sp_adjust); + else + operands[1] = GEN_INT (0); + + /* Create assembly code pattern. */ + snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); + } + else + { + /* For normal stack pop multiple: + operands[0]: Rb + operands[1]: Re + operands[2]: En4 */ + + /* This variable is used to check if we only need to generate En4 field. + As long as Rb==Re=SP_REGNUM, we set this variable to 1. */ + int pop_en4_only_p = 0; + + /* Set operands[0] and operands[1]. */ + operands[0] = gen_rtx_REG (SImode, rb_regno); + operands[1] = gen_rtx_REG (SImode, re_regno); + + /* 'lmw.bim $sp,[$sp],$sp,0' means pop nothing. */ + if (!cfun->machine->fp_size + && !cfun->machine->gp_size + && !cfun->machine->lp_size + && REGNO (operands[0]) == SP_REGNUM + && REGNO (operands[1]) == SP_REGNUM) + { + /* No need to generate instruction. */ + return ""; + } + else + { + /* If Rb==Re=SP_REGNUM, we only need to generate En4 field. */ + if (REGNO (operands[0]) == SP_REGNUM + && REGNO (operands[1]) == SP_REGNUM) + pop_en4_only_p = 1; + + /* Create assembly code pattern. + We need to handle the form: "Rb, Re, { $fp $gp $lp }". */ + snprintf (pattern, sizeof (pattern), + "pop.s\t%s{%s%s%s }", + pop_en4_only_p ? "" : "%0, %1, ", + cfun->machine->fp_size ? " $fp" : "", + cfun->machine->gp_size ? " $gp" : "", + cfun->machine->lp_size ? " $lp" : ""); + } + } + + /* We use output_asm_insn() to output assembly code by ourself. */ + output_asm_insn (pattern, operands); + return ""; +} + +/* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte. */ +int +nds32_target_alignment (rtx label) +{ + rtx insn; + + if (optimize_size) + return 0; + + insn = next_active_insn (label); + + if (insn == 0) + return 0; + else if ((get_attr_length (insn) % 4) == 0) + return 2; + else + return 0; +} + +/* ------------------------------------------------------------------------ */ + +/* PART 5: Initialize target hook structure and definitions. */ + +/* Controlling the Compilation Driver. */ + + +/* Run-time Target Specification. */ + + +/* Defining Data Structures for Per-function Information. */ + + +/* Storage Layout. */ + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE \ + default_promote_function_mode_always_promote + + +/* Layout of Source Language Data Types. */ + + +/* Register Usage. */ + +/* -- Basic Characteristics of Registers. */ + +/* -- Order of Allocation of Registers. */ + +/* -- How Values Fit in Registers. */ + +/* -- Handling Leaf Functions. */ + +/* -- Registers That Form a Stack. */ + + +/* Register Classes. */ + +#undef TARGET_CLASS_MAX_NREGS +#define TARGET_CLASS_MAX_NREGS nds32_class_max_nregs + +#undef TARGET_LRA_P +#define TARGET_LRA_P hook_bool_void_true + +#undef TARGET_REGISTER_PRIORITY +#define TARGET_REGISTER_PRIORITY nds32_register_priority + + +/* Obsolete Macros for Defining Constraints. */ + + +/* Stack Layout and Calling Conventions. */ + +/* -- Basic Stack Layout. */ + +/* -- Exception Handling Support. */ + +/* -- Specifying How Stack Checking is Done. */ + +/* -- Registers That Address the Stack Frame. */ + +/* -- Eliminating Frame Pointer and Arg Pointer. */ + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE nds32_can_eliminate + +/* -- Passing Function Arguments on the Stack. */ + +/* -- Passing Arguments in Registers. */ + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG nds32_function_arg + +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE nds32_function_arg_advance + +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY nds32_function_arg_boundary + +/* -- How Scalar Function Values Are Returned. */ + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE nds32_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE nds32_libcall_value + +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P nds32_function_value_regno_p + +/* -- How Large Values Are Returned. */ + +/* -- Caller-Saves Register Allocation. */ + +/* -- Function Entry and Exit. */ + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE nds32_asm_function_prologue + +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE nds32_asm_function_end_prologue + +#undef TARGET_ASM_FUNCTION_BEGIN_EPILOGUE +#define TARGET_ASM_FUNCTION_BEGIN_EPILOGUE nds32_asm_function_begin_epilogue + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE nds32_asm_function_epilogue + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK nds32_asm_output_mi_thunk + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +/* -- Generating Code for Profiling. */ + +/* -- Permitting tail calls. */ + +#undef TARGET_WARN_FUNC_RETURN +#define TARGET_WARN_FUNC_RETURN nds32_warn_func_return + +/* Stack smashing protection. */ + + +/* Implementing the Varargs Macros. */ + +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING nds32_strict_argument_naming + + +/* Trampolines for Nested Functions. */ + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE nds32_asm_trampoline_template + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT nds32_trampoline_init + + +/* Implicit Calls to Library Routines. */ + + +/* Addressing Modes. */ + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P nds32_legitimate_address_p + + +/* Anchored Addresses. */ + + +/* Condition Code Status. */ + +/* -- Representation of condition codes using (cc0). */ + +/* -- Representation of condition codes using registers. */ + +/* -- Macros to control conditional execution. */ + + +/* Describing Relative Costs of Operations. */ + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST nds32_register_move_cost + +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST nds32_memory_move_cost + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS nds32_rtx_costs + +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST nds32_address_cost + + +/* Adjusting the Instruction Scheduler. */ + + +/* Dividing the Output into Sections (Texts, Data, . . . ). */ + + +/* Position Independent Code. */ + + +/* Defining the Output Assembler Language. */ + +/* -- The Overall Framework of an Assembler File. */ + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START nds32_asm_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END nds32_asm_file_end + +/* -- Output of Data. */ + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" + +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +/* -- Output of Uninitialized Variables. */ + +/* -- Output and Generation of Labels. */ + +#undef TARGET_ASM_GLOBALIZE_LABEL +#define TARGET_ASM_GLOBALIZE_LABEL nds32_asm_globalize_label + +/* -- How Initialization Functions Are Handled. */ + +/* -- Macros Controlling Initialization Routines. */ + +/* -- Output of Assembler Instructions. */ + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND nds32_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS nds32_print_operand_address + +/* -- Output of Dispatch Tables. */ + +/* -- Assembler Commands for Exception Regions. */ + +/* -- Assembler Commands for Alignment. */ + + +/* Controlling Debugging Information Format. */ + +/* -- Macros Affecting All Debugging Formats. */ + +/* -- Specific Options for DBX Output. */ + +/* -- Open-Ended Hooks for DBX Format. */ + +/* -- File Names in DBX Format. */ + +/* -- Macros for SDB and DWARF Output. */ + +/* -- Macros for VMS Debug Format. */ + + +/* Cross Compilation and Floating Point. */ + + +/* Mode Switching Instructions. */ + + +/* Defining target-specific uses of __attribute__. */ + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE nds32_attribute_table + +#undef TARGET_MERGE_DECL_ATTRIBUTES +#define TARGET_MERGE_DECL_ATTRIBUTES nds32_merge_decl_attributes + +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES nds32_insert_attributes + +#undef TARGET_OPTION_PRAGMA_PARSE +#define TARGET_OPTION_PRAGMA_PARSE nds32_option_pragma_parse + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE nds32_option_override + + +/* Emulating TLS. */ + + +/* Defining coprocessor specifics for MIPS targets. */ + + +/* Parameters for Precompiled Header Validity Checking. */ + + +/* C++ ABI parameters. */ + + +/* Adding support for named address spaces. */ + + +/* Miscellaneous Parameters. */ + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS nds32_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN nds32_expand_builtin + + +/* ------------------------------------------------------------------------ */ + +/* Initialize the GCC target structure. */ + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h new file mode 100644 index 00000000000..74f126cecc4 --- /dev/null +++ b/gcc/config/nds32/nds32.h @@ -0,0 +1,982 @@ +/* Definitions of target machine of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2013 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + + +/* ------------------------------------------------------------------------ */ + +/* The following are auxiliary macros or structure declarations + that are used all over the nds32.c and nds32.h. */ + + +/* Computing the Length of an Insn. */ +#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ + (LENGTH = nds32_adjust_insn_length (INSN, LENGTH)) + +/* Check instruction LS-37-FP-implied form. + Note: actually its immediate range is imm9u + since it is used for lwi37/swi37 instructions. */ +#define NDS32_LS_37_FP_P(rt, ra, imm) \ + (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + && REGNO (ra) == FP_REGNUM \ + && satisfies_constraint_Iu09 (imm)) + +/* Check instruction LS-37-SP-implied form. + Note: actually its immediate range is imm9u + since it is used for lwi37/swi37 instructions. */ +#define NDS32_LS_37_SP_P(rt, ra, imm) \ + (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + && REGNO (ra) == SP_REGNUM \ + && satisfies_constraint_Iu09 (imm)) + + +/* Check load/store instruction form : Rt3, Ra3, imm3u. */ +#define NDS32_LS_333_P(rt, ra, imm, mode) nds32_ls_333_p (rt, ra, imm, mode) + +/* Check load/store instruction form : Rt4, Ra5, const_int_0. + Note: no need to check ra because Ra5 means it covers all registers. */ +#define NDS32_LS_450_P(rt, ra, imm) \ + ((imm == const0_rtx) \ + && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS)) + +/* Check instruction RRI-333-form. */ +#define NDS32_RRI_333_P(rt, ra, imm) \ + (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \ + && satisfies_constraint_Iu03 (imm)) + +/* Check instruction RI-45-form. */ +#define NDS32_RI_45_P(rt, ra, imm) \ + (REGNO (rt) == REGNO (ra) \ + && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS) \ + && satisfies_constraint_Iu05 (imm)) + + +/* Check instruction RR-33-form. */ +#define NDS32_RR_33_P(rt, ra) \ + (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS) + +/* Check instruction RRR-333-form. */ +#define NDS32_RRR_333_P(rt, ra, rb) \ + (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + && REGNO_REG_CLASS (REGNO (ra)) == LOW_REGS \ + && REGNO_REG_CLASS (REGNO (rb)) == LOW_REGS) + +/* Check instruction RR-45-form. + Note: no need to check rb because Rb5 means it covers all registers. */ +#define NDS32_RR_45_P(rt, ra, rb) \ + (REGNO (rt) == REGNO (ra) \ + && (REGNO_REG_CLASS (REGNO (rt)) == LOW_REGS \ + || REGNO_REG_CLASS (REGNO (rt)) == MIDDLE_REGS)) + +/* Classifies address type to distinguish 16-bit/32-bit format. */ +enum nds32_16bit_address_type +{ + /* [reg]: 45 format address. */ + ADDRESS_REG, + /* [lo_reg + imm3u]: 333 format address. */ + ADDRESS_LO_REG_IMM3U, + /* post_inc [lo_reg + imm3u]: 333 format address. */ + ADDRESS_POST_INC_LO_REG_IMM3U, + /* [$fp + imm7u]: fp imply address. */ + ADDRESS_FP_IMM7U, + /* [$sp + imm7u]: sp imply address. */ + ADDRESS_SP_IMM7U, + /* Other address format. */ + ADDRESS_NOT_16BIT_FORMAT +}; + + +/* ------------------------------------------------------------------------ */ + +/* Define maximum numbers of registers for passing arguments. */ +#define NDS32_MAX_REGS_FOR_ARGS 6 + +/* Define the register number for first argument. */ +#define NDS32_GPR_ARG_FIRST_REGNUM 0 + +/* Define the register number for return value. */ +#define NDS32_GPR_RET_FIRST_REGNUM 0 + + +/* Define double word alignment bits. */ +#define NDS32_DOUBLE_WORD_ALIGNMENT 64 + +/* Define alignment checking macros for convenience. */ +#define NDS32_HALF_WORD_ALIGN_P(value) (((value) & 0x01) == 0) +#define NDS32_SINGLE_WORD_ALIGN_P(value) (((value) & 0x03) == 0) +#define NDS32_DOUBLE_WORD_ALIGN_P(value) (((value) & 0x07) == 0) + +/* Round X up to the nearest double word. */ +#define NDS32_ROUND_UP_DOUBLE_WORD(value) (((value) + 7) & ~7) + + +/* This macro is used to calculate the numbers of registers for + containing 'size' bytes of the argument. + The size of a register is a word in nds32 target. + So we use UNITS_PER_WORD to do the calculation. */ +#define NDS32_NEED_N_REGS_FOR_ARG(mode, type) \ + ((mode == BLKmode) \ + ? ((int_size_in_bytes (type) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) \ + : ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* This macro is used to return the register number for passing argument. + We need to obey the following rules: + 1. If it is required MORE THAN one register, + make sure the register number is a even value. + 2. If it is required ONLY one register, + the register number can be odd or even value. */ +#define NDS32_AVAILABLE_REGNUM_FOR_ARG(reg_offset, mode, type) \ + ((NDS32_NEED_N_REGS_FOR_ARG (mode, type) > 1) \ + ? (((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM + 1) & ~1) \ + : ((reg_offset) + NDS32_GPR_ARG_FIRST_REGNUM)) + +/* This macro is to check if there are still available registers + for passing argument. */ +#define NDS32_ARG_PASS_IN_REG_P(reg_offset, mode, type) \ + (((reg_offset) < NDS32_MAX_REGS_FOR_ARGS) \ + && ((reg_offset) + NDS32_NEED_N_REGS_FOR_ARG (mode, type) \ + <= NDS32_MAX_REGS_FOR_ARGS)) + +/* This macro is to check if the register is required to be saved on stack. + If call_used_regs[regno] == 0, regno is the callee-saved register. + If df_regs_ever_live_p(regno) == true, it is used in the current function. + As long as the register satisfies both criteria above, + it is required to be saved. */ +#define NDS32_REQUIRED_CALLEE_SAVED_P(regno) \ + ((!call_used_regs[regno]) && (df_regs_ever_live_p (regno))) + +/* ------------------------------------------------------------------------ */ + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +struct GTY(()) machine_function +{ + /* Number of bytes allocated on the stack for variadic args + if we want to push them into stack as pretend arguments by ourself. */ + int va_args_size; + /* Number of bytes reserved on the stack for + local and temporary variables. */ + int local_size; + /* Number of bytes allocated on the stack for outgoing arguments. */ + int out_args_size; + + /* Number of bytes on the stack for saving $fp. */ + int fp_size; + /* Number of bytes on the stack for saving $gp. */ + int gp_size; + /* Number of bytes on the stack for saving $lp. */ + int lp_size; + + /* Number of bytes on the stack for saving callee-saved registers. */ + int callee_saved_regs_size; + /* The padding bytes in callee-saved area may be required. */ + int callee_saved_area_padding_bytes; + + /* The first required register that should be saved on stack + for va_args (one named argument + nameless arguments). */ + int va_args_first_regno; + /* The last required register that should be saved on stack + for va_args (one named argument + nameless arguments). */ + int va_args_last_regno; + + /* The first required callee-saved register. */ + int callee_saved_regs_first_regno; + /* The last required callee-saved register. */ + int callee_saved_regs_last_regno; + + /* Indicate that whether this function needs + prologue/epilogue code generation. */ + int naked_p; + /* Indicate that whether this function + uses fp_as_gp optimization. */ + int fp_as_gp_p; +}; + +/* A C structure that contains the arguments information. */ +typedef struct +{ + unsigned int reg_offset; +} nds32_cumulative_args; + +/* ------------------------------------------------------------------------ */ + +/* The following we define C-ISR related stuff. + In nds32 architecture, we have 73 vectors for interrupt/exception. + For each vector (except for vector 0, which is used for reset behavior), + we allow users to set its register saving scheme and interrupt level. */ + +/* There are 73 vectors in nds32 architecture. + 0 for reset handler, + 1-8 for exception handler, + and 9-72 for interrupt handler. + We use an array, which is defined in nds32.c, to record + essential information for each vector. */ +#define NDS32_N_ISR_VECTORS 73 + +/* Define possible isr category. */ +enum nds32_isr_category +{ + NDS32_ISR_NONE, + NDS32_ISR_INTERRUPT, + NDS32_ISR_EXCEPTION, + NDS32_ISR_RESET +}; + +/* Define isr register saving scheme. */ +enum nds32_isr_save_reg +{ + NDS32_SAVE_ALL, + NDS32_PARTIAL_SAVE +}; + +/* Define isr nested type. */ +enum nds32_isr_nested_type +{ + NDS32_NESTED, + NDS32_NOT_NESTED, + NDS32_NESTED_READY +}; + +/* Define structure to record isr information. + The isr vector array 'isr_vectors[]' with this structure + is defined in nds32.c. */ +struct nds32_isr_info +{ + /* The field to identify isr category. + It should be set to NDS32_ISR_NONE by default. + If user specifies a function as isr by using attribute, + this field will be set accordingly. */ + enum nds32_isr_category category; + + /* A string for the applied function name. + It should be set to empty string by default. */ + char func_name[100]; + + /* The register saving scheme. + It should be set to NDS32_PARTIAL_SAVE by default + unless user specifies attribute to change it. */ + enum nds32_isr_save_reg save_reg; + + /* The nested type. + It should be set to NDS32_NOT_NESTED by default + unless user specifies attribute to change it. */ + enum nds32_isr_nested_type nested_type; + + /* Total vectors. + The total vectors = interrupt + exception numbers + reset. + It should be set to 0 by default. + This field is ONLY used in NDS32_ISR_RESET category. */ + unsigned int total_n_vectors; + + /* A string for nmi handler name. + It should be set to empty string by default. + This field is ONLY used in NDS32_ISR_RESET category. */ + char nmi_name[100]; + + /* A string for warm handler name. + It should be set to empty string by default. + This field is ONLY used in NDS32_ISR_RESET category. */ + char warm_name[100]; +}; + +/* ------------------------------------------------------------------------ */ + +/* Define code for all nds32 builtins. */ +enum nds32_builtins +{ + NDS32_BUILTIN_ISYNC, + NDS32_BUILTIN_ISB, + NDS32_BUILTIN_MFSR, + NDS32_BUILTIN_MFUSR, + NDS32_BUILTIN_MTSR, + NDS32_BUILTIN_MTUSR, + NDS32_BUILTIN_SETGIE_EN, + NDS32_BUILTIN_SETGIE_DIS +}; + +/* ------------------------------------------------------------------------ */ + +#define TARGET_ISA_V2 (nds32_arch_option == ARCH_V2) +#define TARGET_ISA_V3 (nds32_arch_option == ARCH_V3) +#define TARGET_ISA_V3M (nds32_arch_option == ARCH_V3M) + +/* ------------------------------------------------------------------------ */ + +/* Controlling the Compilation Driver. */ + +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:-march=%(VALUE)}" } + +#define CC1_SPEC \ + "" + +#define ASM_SPEC \ + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" + +/* If user issues -mrelax, -mforce-fp-as-gp, or -mex9, + we need to pass '--relax' to linker. + Besides, for -mex9, we need to further pass '--mex9'. */ +#define LINK_SPEC \ + " %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + " %{mrelax|mforce-fp-as-gp|mex9:--relax}" \ + " %{mex9:--mex9}" + +#define LIB_SPEC \ + " -lc -lgloss" + +/* The option -mno-ctor-dtor can disable constructor/destructor feature + by applying different crt stuff. In the convention, crt0.o is the + startup file without constructor/destructor; + crt1.o, crti.o, crtbegin.o, crtend.o, and crtn.o are the + startup files with constructor/destructor. + Note that crt0.o, crt1.o, crti.o, and crtn.o are provided + by newlib/mculib/glibc/ublic, while crtbegin.o and crtend.o are + currently provided by GCC for nds32 target. + + For nds32 target so far: + If -mno-ctor-dtor, we are going to link + "crt0.o [user objects]". + If general cases, we are going to link + "crt1.o crtbegin1.o [user objects] crtend1.o". */ +#define STARTFILE_SPEC \ + " %{!mno-ctor-dtor:crt1.o%s;:crt0.o%s}" \ + " %{!mno-ctor-dtor:crtbegin1.o%s}" +#define ENDFILE_SPEC \ + " %{!mno-ctor-dtor:crtend1.o%s}" + +/* The TARGET_BIG_ENDIAN_DEFAULT is defined if we configure gcc + with --target=nds32be-* setting. + Check gcc/config.gcc for more information. + In addition, currently we only have elf toolchain, + where mgp-direct is always the default. */ +#ifdef TARGET_BIG_ENDIAN_DEFAULT +#define MULTILIB_DEFAULTS { "mbig-endian", "mgp-direct" } +#else +#define MULTILIB_DEFAULTS { "mlittle-endian", "mgp-direct" } +#endif + + +/* Run-time Target Specification. */ + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__nds32__"); \ + \ + if (TARGET_ISA_V2) \ + builtin_define ("__NDS32_ISA_V2__"); \ + if (TARGET_ISA_V3) \ + builtin_define ("__NDS32_ISA_V3__"); \ + if (TARGET_ISA_V3M) \ + builtin_define ("__NDS32_ISA_V3M__"); \ + \ + if (TARGET_BIG_ENDIAN) \ + builtin_define ("__big_endian__"); \ + if (TARGET_REDUCED_REGS) \ + builtin_define ("__NDS32_REDUCED_REGS__"); \ + if (TARGET_CMOV) \ + builtin_define ("__NDS32_CMOV__"); \ + if (TARGET_PERF_EXT) \ + builtin_define ("__NDS32_PERF_EXT__"); \ + if (TARGET_16_BIT) \ + builtin_define ("__NDS32_16_BIT__"); \ + if (TARGET_GP_DIRECT) \ + builtin_define ("__NDS32_GP_DIRECT__"); \ + \ + builtin_assert ("cpu=nds32"); \ + builtin_assert ("machine=nds32"); \ + } while (0) + + +/* Defining Data Structures for Per-function Information. */ + +/* This macro is called once per function, + before generation of any RTL has begun. */ +#define INIT_EXPANDERS nds32_init_expanders () + + +/* Storage Layout. */ + +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN) + +#define UNITS_PER_WORD 4 + +#define PROMOTE_MODE(m, unsignedp, type) \ + if (GET_MODE_CLASS (m) == MODE_INT && GET_MODE_SIZE (m) < UNITS_PER_WORD) \ + { \ + (m) = SImode; \ + } + +#define PARM_BOUNDARY 32 + +#define STACK_BOUNDARY 64 + +#define FUNCTION_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT 64 + +#define EMPTY_FIELD_BOUNDARY 32 + +#define STRUCTURE_SIZE_BOUNDARY 8 + +#define STRICT_ALIGNMENT 1 + +#define PCC_BITFIELD_TYPE_MATTERS 1 + + +/* Layout of Source Language Data Types. */ + +#define INT_TYPE_SIZE 32 +#define SHORT_TYPE_SIZE 16 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 + +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +#define SIZE_TYPE "long unsigned int" +#define PTRDIFF_TYPE "long int" +#define WCHAR_TYPE "short unsigned int" +#define WCHAR_TYPE_SIZE 16 + + +/* Register Usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. */ +#define FIRST_PSEUDO_REGISTER 34 + +/* An initializer that says which registers are used for fixed + purposes all throughout the compiled code and are therefore + not available for general allocation. + + $r28 : $fp + $r29 : $gp + $r30 : $lp + $r31 : $sp + + caller-save registers: $r0 ~ $r5, $r16 ~ $r23 + callee-save registers: $r6 ~ $r10, $r11 ~ $r14 + + reserved for assembler : $r15 + reserved for other use : $r24, $r25, $r26, $r27 */ +#define FIXED_REGISTERS \ +{ /* r0 r1 r2 r3 r4 r5 r6 r7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + /* r8 r9 r10 r11 r12 r13 r14 r15 */ \ + 0, 0, 0, 0, 0, 0, 0, 1, \ + /* r16 r17 r18 r19 r20 r21 r22 r23 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, \ + /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ + 1, 1, 1, 1, 0, 1, 0, 1, \ + /* ARG_POINTER:32 */ \ + 1, \ + /* FRAME_POINTER:33 */ \ + 1 \ +} + +/* Identifies the registers that are not available for + general allocation of values that must live across + function calls -- so they are caller-save registers. + + 0 : callee-save registers + 1 : caller-save registers */ +#define CALL_USED_REGISTERS \ +{ /* r0 r1 r2 r3 r4 r5 r6 r7 */ \ + 1, 1, 1, 1, 1, 1, 0, 0, \ + /* r8 r9 r10 r11 r12 r13 r14 r15 */ \ + 0, 0, 0, 0, 0, 0, 0, 1, \ + /* r16 r17 r18 r19 r20 r21 r22 r23 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, \ + /* r24 r25 r26 r27 r28 r29 r30 r31 */ \ + 1, 1, 1, 1, 0, 1, 0, 1, \ + /* ARG_POINTER:32 */ \ + 1, \ + /* FRAME_POINTER:33 */ \ + 1 \ +} + +/* In nds32 target, we have three levels of registers: + LOW_COST_REGS : $r0 ~ $r7 + MIDDLE_COST_REGS : $r8 ~ $r11, $r16 ~ $r19 + HIGH_COST_REGS : $r12 ~ $r14, $r20 ~ $r31 */ +#define REG_ALLOC_ORDER \ +{ \ + 0, 1, 2, 3, 4, 5, 6, 7, \ + 8, 9, 10, 11, 16, 17, 18, 19, \ + 12, 13, 14, 15, 20, 21, 22, 23, \ + 24, 25, 26, 27, 28, 29, 30, 31, \ + 32, \ + 33 \ +} + +/* Tell IRA to use the order we define rather than messing it up with its + own cost calculations. */ +#define HONOR_REG_ALLOC_ORDER + +/* The number of consecutive hard regs needed starting at + reg "regno" for holding a value of mode "mode". */ +#define HARD_REGNO_NREGS(regno, mode) nds32_hard_regno_nregs (regno, mode) + +/* Value is 1 if hard register "regno" can hold a value + of machine-mode "mode". */ +#define HARD_REGNO_MODE_OK(regno, mode) nds32_hard_regno_mode_ok (regno, mode) + +/* A C expression that is nonzero if a value of mode1 + is accessible in mode2 without copying. + Define this macro to return nonzero in as many cases as possible + since doing so will allow GCC to perform better register allocation. + We can use general registers to tie QI/HI/SI modes together. */ +#define MODES_TIEABLE_P(mode1, mode2) \ + (GET_MODE_CLASS (mode1) == MODE_INT \ + && GET_MODE_CLASS (mode2) == MODE_INT \ + && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD \ + && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD) + + +/* Register Classes. */ + +/* In nds32 target, we have three levels of registers: + Low cost regsiters : $r0 ~ $r7 + Middle cost registers : $r8 ~ $r11, $r16 ~ $r19 + High cost registers : $r12 ~ $r14, $r20 ~ $r31 + + In practice, we have MIDDLE_REGS cover LOW_REGS register class contents + so that it provides more chance to use low cost registers. */ +enum reg_class +{ + NO_REGS, + R15_TA_REG, + STACK_REG, + LOW_REGS, + MIDDLE_REGS, + HIGH_REGS, + GENERAL_REGS, + FRAME_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "R15_TA_REG", \ + "STACK_REG", \ + "LOW_REGS", \ + "MIDDLE_REGS", \ + "HIGH_REGS", \ + "GENERAL_REGS", \ + "FRAME_REGS", \ + "ALL_REGS" \ +} + +#define REG_CLASS_CONTENTS \ +{ \ + {0x00000000, 0x00000000}, /* NO_REGS : */ \ + {0x00008000, 0x00000000}, /* R15_TA_REG : 15 */ \ + {0x80000000, 0x00000000}, /* STACK_REG : 31 */ \ + {0x000000ff, 0x00000000}, /* LOW_REGS : 0-7 */ \ + {0x000f0fff, 0x00000000}, /* MIDDLE_REGS : 0-11, 16-19 */ \ + {0xfff07000, 0x00000000}, /* HIGH_REGS : 12-14, 20-31 */ \ + {0xffffffff, 0x00000000}, /* GENERAL_REGS: 0-31 */ \ + {0x00000000, 0x00000003}, /* FRAME_REGS : 32, 33 */ \ + {0xffffffff, 0x00000003} /* ALL_REGS : 0-31, 32, 33 */ \ +} + +#define REGNO_REG_CLASS(regno) nds32_regno_reg_class (regno) + +#define BASE_REG_CLASS GENERAL_REGS +#define INDEX_REG_CLASS GENERAL_REGS + +/* Return nonzero if it is suitable for use as a + base register in operand addresses. + So far, we return nonzero only if "num" is a hard reg + of the suitable class or a pseudo register which is + allocated to a suitable hard reg. */ +#define REGNO_OK_FOR_BASE_P(num) \ + ((num) < 32 || (unsigned) reg_renumber[num] < 32) + +/* Return nonzero if it is suitable for use as a + index register in operand addresses. + So far, we return nonzero only if "num" is a hard reg + of the suitable class or a pseudo register which is + allocated to a suitable hard reg. + The difference between an index register and a base register is that + the index register may be scaled. */ +#define REGNO_OK_FOR_INDEX_P(num) \ + ((num) < 32 || (unsigned) reg_renumber[num] < 32) + + +/* Obsolete Macros for Defining Constraints. */ + + +/* Stack Layout and Calling Conventions. */ + +#define STACK_GROWS_DOWNWARD + +#define FRAME_GROWS_DOWNWARD 1 + +#define STARTING_FRAME_OFFSET 0 + +#define STACK_POINTER_OFFSET 0 + +#define FIRST_PARM_OFFSET(fundecl) 0 + +#define RETURN_ADDR_RTX(count, frameaddr) \ + nds32_return_addr_rtx (count, frameaddr) + +/* A C expression whose value is RTL representing the location + of the incoming return address at the beginning of any function + before the prologue. + If this RTL is REG, you should also define + DWARF_FRAME_RETURN_COLUMN to DWARF_FRAME_REGNUM (REGNO). */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LP_REGNUM) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LP_REGNUM) + +#define STACK_POINTER_REGNUM SP_REGNUM + +#define FRAME_POINTER_REGNUM 33 + +#define HARD_FRAME_POINTER_REGNUM FP_REGNUM + +#define ARG_POINTER_REGNUM 32 + +#define STATIC_CHAIN_REGNUM 16 + +#define ELIMINABLE_REGS \ +{ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM } } + +#define INITIAL_ELIMINATION_OFFSET(from_reg, to_reg, offset_var) \ + (offset_var) = nds32_initial_elimination_offset (from_reg, to_reg) + +#define ACCUMULATE_OUTGOING_ARGS 1 + +#define OUTGOING_REG_PARM_STACK_SPACE(fntype) 1 + +#define CUMULATIVE_ARGS nds32_cumulative_args + +#define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \ + nds32_init_cumulative_args (&cum, fntype, libname, fndecl, n_named_args) + +/* The REGNO is an unsigned integer but NDS32_GPR_ARG_FIRST_REGNUM may be 0. + We better cast REGNO into signed integer so that we can avoid + 'comparison of unsigned expression >= 0 is always true' warning. */ +#define FUNCTION_ARG_REGNO_P(regno) \ + (((int) regno - NDS32_GPR_ARG_FIRST_REGNUM >= 0) \ + && ((int) regno - NDS32_GPR_ARG_FIRST_REGNUM < NDS32_MAX_REGS_FOR_ARGS)) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* EXIT_IGNORE_STACK should be nonzero if, when returning + from a function, the stack pointer does not matter. + The value is tested only in functions that have frame pointers. + In nds32 target, the function epilogue recovers the + stack pointer from the frame. */ +#define EXIT_IGNORE_STACK 1 + +#define FUNCTION_PROFILER(file, labelno) \ + fprintf (file, "/* profiler %d */", (labelno)) + + +/* Implementing the Varargs Macros. */ + + +/* Trampolines for Nested Functions. */ + +/* Giving A-function and B-function, + if B-function wants to call A-function's nested function, + we need to fill trampoline code into A-function's stack + so that B-function can execute the code in stack to indirectly + jump to (like 'trampoline' action) desired nested function. + + The trampoline code for nds32 target must contains following parts: + + 1. instructions (4 * 4 = 16 bytes): + get $pc first + load chain_value to static chain register via $pc + load nested function address to $r15 via $pc + jump to desired nested function via $r15 + 2. data (4 * 2 = 8 bytes): + chain_value + nested function address + + Please check nds32.c implementation for more information. */ +#define TRAMPOLINE_SIZE 24 + +/* Because all instructions/data in trampoline template are 4-byte size, + we set trampoline alignment 8*4=32 bits. */ +#define TRAMPOLINE_ALIGNMENT 32 + + +/* Implicit Calls to Library Routines. */ + + +/* Addressing Modes. */ + +/* We can use "LWI.bi Rt, [Ra], 4" to support post increment. */ +#define HAVE_POST_INCREMENT 1 +/* We can use "LWI.bi Rt, [Ra], -4" to support post decrement. */ +#define HAVE_POST_DECREMENT 1 + +/* We have "LWI.bi Rt, [Ra], imm" instruction form. */ +#define HAVE_POST_MODIFY_DISP 1 +/* We have "LW.bi Rt, [Ra], Rb" instruction form. */ +#define HAVE_POST_MODIFY_REG 1 + +#define CONSTANT_ADDRESS_P(x) (CONSTANT_P (x) && GET_CODE (x) != CONST_DOUBLE) + +#define MAX_REGS_PER_ADDRESS 2 + + +/* Anchored Addresses. */ + + +/* Condition Code Status. */ + + +/* Describing Relative Costs of Operations. */ + +/* A C expression for the cost of a branch instruction. + A value of 1 is the default; + other values are interpreted relative to that. */ +#define BRANCH_COST(speed_p, predictable_p) ((speed_p) ? 2 : 0) + +#define SLOW_BYTE_ACCESS 1 + +#define NO_FUNCTION_CSE + + +/* Adjusting the Instruction Scheduler. */ + + +/* Dividing the Output into Sections (Texts, Data, . . . ). */ + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" + +/* Currently, nds32 assembler does NOT handle '.bss' pseudo-op. + So we use '.section .bss' alternatively. */ +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + +/* Define this macro to be an expression with a nonzero value if jump tables + (for tablejump insns) should be output in the text section, + along with the assembler instructions. + Otherwise, the readonly data section is used. */ +#define JUMP_TABLES_IN_TEXT_SECTION 1 + + +/* Position Independent Code. */ + + +/* Defining the Output Assembler Language. */ + +#define ASM_COMMENT_START "!" + +#define ASM_APP_ON "! #APP" + +#define ASM_APP_OFF "! #NO_APP\n" + +#define ASM_OUTPUT_LABELREF(stream, name) \ + asm_fprintf (stream, "%U%s", (*targetm.strip_name_encoding) (name)) + +#define ASM_OUTPUT_SYMBOL_REF(stream, sym) \ + assemble_name (stream, XSTR (sym, 0)) + +#define ASM_OUTPUT_LABEL_REF(stream, buf) \ + assemble_name (stream, buf) + +#define LOCAL_LABEL_PREFIX "." + +#define REGISTER_NAMES \ +{ \ + "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", \ + "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$ta", \ + "$r16", "$r17", "$r18", "$r19", "$r20", "$r21", "$r22", "$r23", \ + "$r24", "$r25", "$r26", "$r27", "$fp", "$gp", "$lp", "$sp", \ + "$AP", \ + "$SFP" \ +} + +/* Output normal jump table entry. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(stream, value) \ + asm_fprintf (stream, "\t.word\t%LL%d\n", value) + +/* Output pc relative jump table entry. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(stream, body, value, rel) \ + do \ + { \ + switch (GET_MODE (body)) \ + { \ + case QImode: \ + asm_fprintf (stream, "\t.byte\t.L%d-.L%d\n", value, rel); \ + break; \ + case HImode: \ + asm_fprintf (stream, "\t.short\t.L%d-.L%d\n", value, rel); \ + break; \ + case SImode: \ + asm_fprintf (stream, "\t.word\t.L%d-.L%d\n", value, rel); \ + break; \ + default: \ + gcc_unreachable(); \ + } \ + } while (0) + +/* We have to undef it first because elfos.h formerly define it + check gcc/config.gcc and gcc/config/elfos.h for more information. */ +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(stream, prefix, num, table) \ + do \ + { \ + asm_fprintf (stream, "\t! Jump Table Begin\n"); \ + (*targetm.asm_out.internal_label) (stream, prefix, num); \ + } while (0) + +#define ASM_OUTPUT_CASE_END(stream, num, table) \ + do \ + { \ + /* Because our jump table is in text section, \ + we need to make sure 2-byte alignment after \ + the jump table for instructions fetch. */ \ + if (GET_MODE (PATTERN (table)) == QImode) \ + ASM_OUTPUT_ALIGN (stream, 1); \ + asm_fprintf (stream, "\t! Jump Table End\n"); \ + } while (0) + +/* This macro is not documented yet. + But we do need it to make jump table vector aligned. */ +#define ADDR_VEC_ALIGN(JUMPTABLE) 2 + +#define DWARF2_UNWIND_INFO 1 + +#define JUMP_ALIGN(x) \ + (align_jumps_log ? align_jumps_log : nds32_target_alignment (x)) + +#define LOOP_ALIGN(x) \ + (align_loops_log ? align_loops_log : nds32_target_alignment (x)) + +#define LABEL_ALIGN(x) \ + (align_labels_log ? align_labels_log : nds32_target_alignment (x)) + +#define ASM_OUTPUT_ALIGN(stream, power) \ + fprintf (stream, "\t.align\t%d\n", power) + + +/* Controlling Debugging Information Format. */ + +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +#define DWARF2_DEBUGGING_INFO 1 + +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + + +/* Cross Compilation and Floating Point. */ + + +/* Mode Switching Instructions. */ + + +/* Defining target-specific uses of __attribute__. */ + + +/* Emulating TLS. */ + + +/* Defining coprocessor specifics for MIPS targets. */ + + +/* Parameters for Precompiled Header Validity Checking. */ + + +/* C++ ABI parameters. */ + + +/* Adding support for named address spaces. */ + + +/* Miscellaneous Parameters. */ + +/* This is the machine mode that elements of a jump-table should have. */ +#define CASE_VECTOR_MODE Pmode + +/* Return the preferred mode for and addr_diff_vec when the mininum + and maximum offset are known. */ +#define CASE_VECTOR_SHORTEN_MODE(min_offset, max_offset, body) \ + ((min_offset < 0 || max_offset >= 0x2000 ) ? SImode \ + : (max_offset >= 100) ? HImode \ + : QImode) + +/* Generate pc relative jump table when -fpic or -Os. */ +#define CASE_VECTOR_PC_RELATIVE (flag_pic || optimize_size) + +/* Define this macro if operations between registers with integral mode + smaller than a word are always performed on the entire register. */ +#define WORD_REGISTER_OPERATIONS + +/* A C expression indicating when insns that read memory in mem_mode, + an integral mode narrower than a word, set the bits outside of mem_mode + to be either the sign-extension or the zero-extension of the data read. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* The maximum number of bytes that a single instruction can move quickly + between memory and registers or between two memory locations. */ +#define MOVE_MAX 4 + +/* A C expression that is nonzero if on this machine the number of bits + actually used for the count of a shift operation is equal to the number + of bits needed to represent the size of the object being shifted. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* A C expression which is nonzero if on this machine it is safe to "convert" + an integer of 'inprec' bits to one of 'outprec' bits by merely operating + on it as if it had only 'outprec' bits. */ +#define TRULY_NOOP_TRUNCATION(outprec, inprec) 1 + +/* A C expression describing the value returned by a comparison operator with + an integral mode and stored by a store-flag instruction ('cstoremode4') + when the condition is true. */ +#define STORE_FLAG_VALUE 1 + +/* An alias for the machine mode for pointers. */ +#define Pmode SImode + +/* An alias for the machine mode used for memory references to functions + being called, in call RTL expressions. */ +#define FUNCTION_MODE SImode + +/* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md new file mode 100644 index 00000000000..4a832eaf86b --- /dev/null +++ b/gcc/config/nds32/nds32.md @@ -0,0 +1,2221 @@ +;; Machine description of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; Include predicates definition. +(include "predicates.md") + +;; Include constraints definition. +(include "constraints.md") + +;; Include iterators definition. +(include "iterators.md") + +;; Include pipelines definition. +(include "pipelines.md") + + +;; Include constants definition. +(include "constants.md") + + +;; Include intrinsic functions definition. +(include "nds32-intrinsic.md") + +;; Include block move for nds32 multiple load/store behavior. +(include "nds32-multiple.md") + +;; Include DImode/DFmode operations. +(include "nds32-doubleword.md") + +;; Include peephole patterns. +(include "nds32-peephole2.md") + + +;; Insn type, it is used to default other attribute values. +(define_attr "type" + "unknown,move,load,store,alu,compare,branch,call,misc" + (const_string "unknown")) + + +;; Length, in bytes, default is 4-bytes. +(define_attr "length" "" (const_int 4)) + + +;; Enabled, which is used to enable/disable insn alternatives. +;; Note that we use length and TARGET_16_BIT here as criteria. +;; If the instruction pattern already check TARGET_16_BIT to +;; determine the length by itself, its enabled attribute should be +;; always 1 to avoid the conflict with the settings here. +(define_attr "enabled" "" + (cond [(and (eq_attr "length" "2") + (match_test "!TARGET_16_BIT")) + (const_int 0)] + (const_int 1))) + + +;; ---------------------------------------------------------------------------- + + +;; Move instructions. + +;; For QImode and HImode, the immediate value can be fit in imm20s. +;; So there is no need to split rtx for QI and HI patterns. + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (HImode, operands[1]); +}) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" +{ + /* Need to force register if mem <- !reg. */ + if (MEM_P (operands[0]) && !REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + + /* If operands[1] is a large constant and cannot be performed + by a single instruction, we need to split it. */ + if (CONST_INT_P (operands[1]) + && !satisfies_constraint_Is20 (operands[1]) + && !satisfies_constraint_Ihig (operands[1])) + { + rtx high20_rtx; + HOST_WIDE_INT low12_int; + rtx tmp_rtx; + + tmp_rtx = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + + high20_rtx = gen_int_mode ((INTVAL (operands[1]) >> 12) << 12, SImode); + low12_int = INTVAL (operands[1]) & 0xfff; + + emit_move_insn (tmp_rtx, high20_rtx); + emit_move_insn (operands[0], plus_constant (SImode, + tmp_rtx, + low12_int)); + DONE; + } +}) + +(define_insn "*mov<mode>" + [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m, l, l, l, d, r, d, r, r, r") + (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))] + "" +{ + switch (which_alternative) + { + case 0: + return "mov55\t%0, %1"; + case 1: + return "ori\t%0, %1, 0"; + case 2: + case 3: + case 4: + case 5: + return nds32_output_16bit_store (operands, <byte>); + case 6: + return nds32_output_32bit_store (operands, <byte>); + case 7: + case 8: + case 9: + case 10: + return nds32_output_16bit_load (operands, <byte>); + case 11: + return nds32_output_32bit_load (operands, <byte>); + case 12: + return "movpi45\t%0, %1"; + case 13: + return "movi55\t%0, %1"; + case 14: + return "movi\t%0, %1"; + case 15: + return "sethi\t%0, hi20(%1)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 4, 2, 2, 4, 4")]) + + +;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF +;; are able to match such instruction template. +(define_insn "*move_addr" + [(set (match_operand:SI 0 "register_operand" "=l, r") + (match_operand:SI 1 "nds32_symbolic_operand" " i, i"))] + "" + "la\t%0, %1" + [(set_attr "type" "move") + (set_attr "length" "8")]) + + +(define_insn "*sethi" + [(set (match_operand:SI 0 "register_operand" "=r") + (high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))] + "" + "sethi\t%0, hi20(%1)" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +(define_insn "*lo_sum" + [(set (match_operand:SI 0 "register_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_symbolic_operand" " i")))] + "" + "ori\t%0, %1, lo12(%2)" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; ---------------------------------------------------------------------------- + +;; Zero extension instructions. + +(define_insn "zero_extend<mode>si2" + [(set (match_operand:SI 0 "register_operand" "=l, r, l, *r") + (zero_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, U33, m")))] + "" +{ + switch (which_alternative) + { + case 0: + return "ze<size>33\t%0, %1"; + case 1: + return "ze<size>\t%0, %1"; + case 2: + return nds32_output_16bit_load (operands, <byte>); + case 3: + return nds32_output_32bit_load (operands, <byte>); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load,load") + (set_attr "length" " 2, 4, 2, 4")]) + + +;; Sign extension instructions. + +(define_insn "extend<mode>si2" + [(set (match_operand:SI 0 "register_operand" "=l, r, r") + (sign_extend:SI (match_operand:QIHI 1 "nonimmediate_operand" " l, r, m")))] + "" +{ + switch (which_alternative) + { + case 0: + return "se<size>33\t%0, %1"; + case 1: + return "se<size>\t%0, %1"; + case 2: + return nds32_output_32bit_load_s (operands, <byte>); + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,load") + (set_attr "length" " 2, 4, 4")]) + + +;; ---------------------------------------------------------------------------- + +;; Arithmetic instructions. + +(define_insn "add<mode>3" + [(set (match_operand:QIHISI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") + (plus:QIHISI (match_operand:QIHISI 1 "register_operand" " 0, l, 0, l, %0, l, 0, k, r, r") + (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03, r, l, Is10, Iu06, Is15, r")))] + "" +{ + switch (which_alternative) + { + case 0: + /* addi Rt4,Rt4,-x ==> subi45 Rt4,x + where 0 <= x <= 31 */ + operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode); + return "subi45\t%0, %2"; + case 1: + /* addi Rt3,Ra3,-x ==> subi333 Rt3,Ra3,x + where 0 <= x <= 7 */ + operands[2] = gen_int_mode (-INTVAL (operands[2]), SImode); + return "subi333\t%0, %1, %2"; + case 2: + return "addi45\t%0, %2"; + case 3: + return "addi333\t%0, %1, %2"; + case 4: + return "add45\t%0, %2"; + case 5: + return "add333\t%0, %1, %2"; + case 6: + return "addi10.sp\t%2"; + case 7: + return "addri36.sp\t%0, %2"; + case 8: + return "addi\t%0, %1, %2"; + case 9: + return "add\t%0, %1, %2"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") + (set_attr "length" " 2, 2, 2, 2, 2, 2, 2, 2, 4, 4")]) + +(define_insn "sub<mode>3" + [(set (match_operand:QIHISI 0 "register_operand" "=d, l, r, r") + (minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r") + (match_operand:QIHISI 2 "register_operand" " r, l, r, r")))] + "" + "@ + sub45\t%0, %2 + sub333\t%0, %1, %2 + subri\t%0, %2, %1 + sub\t%0, %1, %2" + [(set_attr "type" "alu,alu,alu,alu") + (set_attr "length" " 2, 2, 4, 4")]) + + +;; GCC intends to simplify (plus (ashift ...) (reg)) +;; into (plus (mult ...) (reg)), so our matching pattern takes 'mult' +;; and needs to ensure it is exact_log2 value. +(define_insn "*add_slli" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " i")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 + && (exact_log2 (INTVAL (operands[2])) != -1) + && (exact_log2 (INTVAL (operands[2])) <= 31)" +{ + /* Get floor_log2 of the immediate value + so that we can generate 'add_slli' instruction. */ + operands[2] = GEN_INT (floor_log2 (INTVAL (operands[2]))); + + return "add_slli\t%0, %3, %1, %2"; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*add_srli" + [(set (match_operand:SI 0 "register_operand" "= r") + (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "add_srli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; GCC intends to simplify (minus (reg) (ashift ...)) +;; into (minus (reg) (mult ...)), so our matching pattern takes 'mult' +;; and needs to ensure it is exact_log2 value. +(define_insn "*sub_slli" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" " r") + (mult:SI (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "immediate_operand" " i"))))] + "TARGET_ISA_V3 + && (exact_log2 (INTVAL (operands[3])) != -1) + && (exact_log2 (INTVAL (operands[3])) <= 31)" +{ + /* Get floor_log2 of the immediate value + so that we can generate 'sub_slli' instruction. */ + operands[3] = GEN_INT (floor_log2 (INTVAL (operands[3]))); + + return "sub_slli\t%0, %1, %2, %3"; +} + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*sub_srli" + [(set (match_operand:SI 0 "register_operand" "= r") + (minus:SI (match_operand:SI 1 "register_operand" " r") + (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "immediate_operand" " Iu05"))))] + "TARGET_ISA_V3" + "sub_srli\t%0, %1, %2, %3" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; Multiplication instructions. + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "= w, r") + (mult:SI (match_operand:SI 1 "register_operand" " %0, r") + (match_operand:SI 2 "register_operand" " w, r")))] + "" + "@ + mul33\t%0, %2 + mul\t%0, %1, %2" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" " r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))] + "TARGET_ISA_V2 || TARGET_ISA_V3" + "mulsr64\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" " r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))] + "TARGET_ISA_V2 || TARGET_ISA_V3" + "mulr64\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; Multiply-accumulate instructions. + +(define_insn "*maddr32_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 3 "register_operand" " 0") + (mult:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r"))))] + "" + "maddr32\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*maddr32_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")) + (match_operand:SI 3 "register_operand" " 0")))] + "" + "maddr32\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*msubr32" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 3 "register_operand" " 0") + (mult:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r"))))] + "" + "msubr32\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; Div Instructions. + +(define_insn "divmodsi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (div:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r"))) + (set (match_operand:SI 3 "register_operand" "=r") + (mod:SI (match_dup 1) (match_dup 2)))] + "" + "divsr\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "udivmodsi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (udiv:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r"))) + (set (match_operand:SI 3 "register_operand" "=r") + (umod:SI (match_dup 1) (match_dup 2)))] + "" + "divr\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; ---------------------------------------------------------------------------- + +;; Boolean instructions. +;; Note: We define the DImode versions in nds32-doubleword.md. + +;; ---------------------------------------------------------------------------- +;; 'AND' operation +;; ---------------------------------------------------------------------------- + +(define_insn "bitc" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (not:SI (match_operand:SI 1 "register_operand" " r")) + (match_operand:SI 2 "register_operand" " r")))] + "TARGET_ISA_V3" + "bitc\t%0, %2, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "= w, r, l, l, l, l, l, l, r, r, r, r, r") + (and:SI (match_operand:SI 1 "register_operand" " %0, r, l, l, l, l, 0, 0, r, r, r, r, r") + (match_operand:SI 2 "general_operand" " w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))] + "" +{ + HOST_WIDE_INT mask = INTVAL (operands[2]); + int zero_position; + + /* 16-bit andi instructions: + andi Rt3,Ra3,0xff -> zeb33 Rt3,Ra3 + andi Rt3,Ra3,0xffff -> zeh33 Rt3,Ra3 + andi Rt3,Ra3,0x01 -> xlsb33 Rt3,Ra3 + andi Rt3,Ra3,0x7ff -> x11b33 Rt3,Ra3 + andi Rt3,Rt3,2^imm3u -> bmski33 Rt3,imm3u + andi Rt3,Rt3,(2^(imm3u+1))-1 -> fexti33 Rt3,imm3u. */ + + switch (which_alternative) + { + case 0: + return "and33\t%0, %2"; + case 1: + return "and\t%0, %1, %2"; + case 2: + return "zeb33\t%0, %1"; + case 3: + return "zeh33\t%0, %1"; + case 4: + return "xlsb33\t%0, %1"; + case 5: + return "x11b33\t%0, %1"; + case 6: + operands[2] = GEN_INT (floor_log2 (mask)); + return "bmski33\t%0, %2"; + case 7: + operands[2] = GEN_INT (floor_log2 (mask + 1) - 1); + return "fexti33\t%0, %2"; + case 8: + return "zeb\t%0, %1"; + case 9: + return "zeh\t%0, %1"; + case 10: + return "andi\t%0, %1, %2"; + case 11: + operands[2] = GEN_INT (~mask); + return "bitci\t%0, %1, %2"; + case 12: + /* If we reach this alternative, + it must pass the nds32_can_use_bclr_p() test, + so that we can guarantee there is only one 0-bit + within the immediate value. */ + for (zero_position = 31; zero_position >= 0; zero_position--) + { + if ((INTVAL (operands[2]) & (1 << zero_position)) == 0) + { + /* Found the 0-bit position. */ + operands[2] = GEN_INT (zero_position); + break; + } + } + return "bclr\t%0, %1, %2"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4")]) + +(define_insn "*and_slli" + [(set (match_operand:SI 0 "register_operand" "= r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "and_slli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*and_srli" + [(set (match_operand:SI 0 "register_operand" "= r") + (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "and_srli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; ---------------------------------------------------------------------------- +;; 'OR' operation +;; ---------------------------------------------------------------------------- + +;; For V3/V3M ISA, we have 'or33' instruction. +;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2. +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "= w, r, r, r") + (ior:SI (match_operand:SI 1 "register_operand" " %0, r, r, r") + (match_operand:SI 2 "general_operand" " w, r, Iu15, Ie15")))] + "" +{ + int one_position; + + switch (which_alternative) + { + case 0: + return "or33\t%0, %2"; + case 1: + return "or\t%0, %1, %2"; + case 2: + return "ori\t%0, %1, %2"; + case 3: + /* If we reach this alternative, + it must pass the nds32_can_use_bset_p() test, + so that we can guarantee there is only one 1-bit + within the immediate value. */ + /* Use exact_log2() to search the 1-bit position. */ + one_position = exact_log2 (INTVAL (operands[2])); + operands[2] = GEN_INT (one_position); + return "bset\t%0, %1, %2"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,alu,alu") + (set_attr "length" " 2, 4, 4, 4")]) + +(define_insn "*or_slli" + [(set (match_operand:SI 0 "register_operand" "= r") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "or_slli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*or_srli" + [(set (match_operand:SI 0 "register_operand" "= r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "or_srli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + + +;; ---------------------------------------------------------------------------- +;; 'XOR' operation +;; ---------------------------------------------------------------------------- + +;; For V3/V3M ISA, we have 'xor33' instruction. +;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2. +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "= w, r, r, r") + (xor:SI (match_operand:SI 1 "register_operand" " %0, r, r, r") + (match_operand:SI 2 "general_operand" " w, r, Iu15, It15")))] + "" +{ + int one_position; + + switch (which_alternative) + { + case 0: + return "xor33\t%0, %2"; + case 1: + return "xor\t%0, %1, %2"; + case 2: + return "xori\t%0, %1, %2"; + case 3: + /* If we reach this alternative, + it must pass the nds32_can_use_btgl_p() test, + so that we can guarantee there is only one 1-bit + within the immediate value. */ + /* Use exact_log2() to search the 1-bit position. */ + one_position = exact_log2 (INTVAL (operands[2])); + operands[2] = GEN_INT (one_position); + return "btgl\t%0, %1, %2"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,alu,alu,alu") + (set_attr "length" " 2, 4, 4, 4")]) + +(define_insn "*xor_slli" + [(set (match_operand:SI 0 "register_operand" "= r") + (xor:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "xor_slli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*xor_srli" + [(set (match_operand:SI 0 "register_operand" "= r") + (xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3" + "xor_srli\t%0, %3, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +;; Rotate Right Instructions. + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (rotatert:SI (match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))] + "" + "@ + rotri\t%0, %1, %2 + rotr\t%0, %1, %2" + [(set_attr "type" "alu,alu") + (set_attr "length" " 4, 4")]) + + +;; ---------------------------------------------------------------------------- +;; 'NEG' operation +;; ---------------------------------------------------------------------------- + +;; For V3/V3M ISA, we have 'neg33' instruction. +;; So we can identify 'xor Rt3,Ra3' case and set its length to be 2. +;; And for V2 ISA, there is NO 'neg33' instruction. +;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B'). +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=w, r") + (neg:SI (match_operand:SI 1 "register_operand" " w, r")))] + "" + "@ + neg33\t%0, %1 + subri\t%0, %1, 0" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + + +;; ---------------------------------------------------------------------------- +;; 'ONE_COMPLIMENT' operation +;; ---------------------------------------------------------------------------- + +;; For V3/V3M ISA, we have 'not33' instruction. +;; So we can identify 'not Rt3,Ra3' case and set its length to be 2. +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=w, r") + (not:SI (match_operand:SI 1 "register_operand" " w, r")))] + "" + "@ + not33\t%0, %1 + nor\t%0, %1, %1" + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4")]) + + +;; ---------------------------------------------------------------------------- + +;; Shift instructions. + +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "= l, r, r") + (ashift:SI (match_operand:SI 1 "register_operand" " l, r, r") + (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))] + "" + "@ + slli333\t%0, %1, %2 + slli\t%0, %1, %2 + sll\t%0, %1, %2" + [(set_attr "type" "alu,alu,alu") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "= d, r, r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") + (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))] + "" + "@ + srai45\t%0, %2 + srai\t%0, %1, %2 + sra\t%0, %1, %2" + [(set_attr "type" "alu,alu,alu") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "= d, r, r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") + (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))] + "" + "@ + srli45\t%0, %2 + srli\t%0, %1, %2 + srl\t%0, %1, %2" + [(set_attr "type" "alu,alu,alu") + (set_attr "length" " 2, 4, 4")]) + + +;; ---------------------------------------------------------------------------- + +;; ---------------------------------------------------------------------------- +;; Conditional Move patterns +;; ---------------------------------------------------------------------------- + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")))] + "TARGET_CMOV" +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == SImode + && XEXP (operands[1], 1) == const0_rtx) + { + /* If the operands[1] rtx is already (eq X 0) or (ne X 0), + we have gcc generate original template rtx. */ + goto create_template; + } + else + { + /* Since there is only 'slt'(Set when Less Than) instruction for + comparison in Andes ISA, the major strategy we use here is to + convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination. + We design constraints properly so that the reload phase will assist + to make one source operand to use same register as result operand. + Then we can use cmovz/cmovn to catch the other source operand + which has different register. */ + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + rtx cmp_op0 = XEXP (operands[1], 0); + rtx cmp_op1 = XEXP (operands[1], 1); + rtx tmp; + int reverse = 0; + + /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part + Strategy : Reverse condition and swap comparison operands + + For example: + + a <= b ? P : Q (LE or LEU) + --> a > b ? Q : P (reverse condition) + --> b < a ? Q : P (swap comparison operands to achieve 'LT/LTU') + + a >= b ? P : Q (GE or GEU) + --> a < b ? Q : P (reverse condition to achieve 'LT/LTU') + + a < b ? P : Q (LT or LTU) + --> (NO NEED TO CHANGE, it is already 'LT/LTU') + + a > b ? P : Q (GT or GTU) + --> b < a ? P : Q (swap comparison operands to achieve 'LT/LTU') */ + switch (code) + { + case NE: + /* (a != b ? P : Q) + can be expressed as + (a == b ? Q : P) + so, fall through to reverse condition */ + case GE: case GEU: case LE: case LEU: + new_code = reverse_condition (code); + reverse = 1; + break; + case EQ: case GT: case GTU: case LT: case LTU: + /* no need to reverse condition */ + break; + default: + FAIL; + } + + /* For '>' comparison operator, we swap operands + so that we can have 'LT/LTU' operator. */ + if (new_code == GT || new_code == GTU) + { + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + + new_code = swap_condition (new_code); + } + + /* Use a temporary register to store slt/slts result. */ + tmp = gen_reg_rtx (SImode); + + /* Split EQ and NE because we don't have direct comparison of EQ and NE. + If we don't split it, the conditional move transformation will fail + when producing (SET A (EQ B C)) or (SET A (NE B C)). */ + if (new_code == EQ) + { + emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); + emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1))); + } + else if (new_code == NE) + { + emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); + emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp)); + } + else + /* This emit_insn will create corresponding 'slt/slts' insturction. */ + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); + + /* Change comparison semantic into (eq X 0) or (ne X 0) behavior + so that cmovz or cmovn will be matched later. + + For reverse condition cases, we want to create a semantic that: + (eq X 0) --> pick up "else" part + For normal cases, we want to create a semantic that: + (ne X 0) --> pick up "then" part + + Later we will have cmovz/cmovn instruction pattern to + match corresponding behavior and output instruction. */ + operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, + VOIDmode, tmp, const0_rtx); + } + +create_template: + do {} while(0); /* dummy line */ +}) + +(define_insn "cmovz" + [(set (match_operand:SI 0 "register_operand" "=r, r") + (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r") + (const_int 0)) + (match_operand:SI 2 "register_operand" " r, 0") + (match_operand:SI 3 "register_operand" " 0, r")))] + "TARGET_CMOV" + "@ + cmovz\t%0, %2, %1 + cmovn\t%0, %3, %1" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn "cmovn" + [(set (match_operand:SI 0 "register_operand" "=r, r") + (if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r") + (const_int 0)) + (match_operand:SI 2 "register_operand" " r, 0") + (match_operand:SI 3 "register_operand" " 0, r")))] + "TARGET_CMOV" + "@ + cmovn\t%0, %2, %1 + cmovz\t%0, %3, %1" + [(set_attr "type" "move") + (set_attr "length" "4")]) + + +;; ---------------------------------------------------------------------------- +;; Conditional Branch patterns +;; ---------------------------------------------------------------------------- + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nds32_reg_constant_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx tmp_reg; + enum rtx_code code; + + code = GET_CODE (operands[0]); + + /* If operands[2] is (const_int 0), + we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions. + So we have gcc generate original template rtx. */ + if (GET_CODE (operands[2]) == CONST_INT) + if (INTVAL (operands[2]) == 0) + if ((code != GTU) + && (code != GEU) + && (code != LTU) + && (code != LEU)) + goto create_template; + + /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than) + behavior for the comparison, we might need to generate other + rtx patterns to achieve same semantic. */ + switch (code) + { + case GT: + case GTU: + if (GET_CODE (operands[2]) == CONST_INT) + { + /* GT reg_A, const_int => !(LT reg_A, const_int + 1) */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + /* We want to plus 1 into the integer value + of operands[2] to create 'slt' instruction. + This caculation is performed on the host machine, + which may be 64-bit integer. + So the meaning of caculation result may be + different from the 32-bit nds32 target. + + For example: + 0x7fffffff + 0x1 -> 0x80000000, + this value is POSITIVE on 64-bit machine, + but the expected value on 32-bit nds32 target + should be NEGATIVE value. + + Hence, instead of using GEN_INT(), we use gen_int_mode() to + explicitly create SImode constant rtx. */ + operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode); + + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* GTU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + + PUT_CODE (operands[0], EQ); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + } + else + { + /* GT reg_A, reg_B => LT reg_B, reg_A */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); + } + else + { + /* GTU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); + } + + PUT_CODE (operands[0], NE); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + } + + case GE: + case GEU: + /* GE reg_A, reg_B => !(LT reg_A, reg_B) */ + /* GE reg_A, const_int => !(LT reg_A, const_int) */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + if (code == GE) + { + /* GE, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* GEU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + + PUT_CODE (operands[0], EQ); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + + case LT: + case LTU: + /* LT reg_A, reg_B => LT reg_A, reg_B */ + /* LT reg_A, const_int => LT reg_A, const_int */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + if (code == LT) + { + /* LT, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* LTU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + + PUT_CODE (operands[0], NE); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + + case LE: + case LEU: + if (GET_CODE (operands[2]) == CONST_INT) + { + /* LE reg_A, const_int => LT reg_A, const_int + 1 */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN). + We better have an assert here in case GCC does not properly + optimize it away. The INT_MAX here is 0x7fffffff for target. */ + gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff); + operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode); + + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + + PUT_CODE (operands[0], NE); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + } + else + { + /* LE reg_A, reg_B => !(LT reg_B, reg_A) */ + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); + } + + PUT_CODE (operands[0], EQ); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + } + + case EQ: + case NE: + /* NDS32 ISA has various form for eq/ne behavior no matter + what kind of the operand is. + So just generate original template rtx. */ + goto create_template; + + default: + FAIL; + } + +create_template: + do {} while(0); /* dummy line */ +}) + + +(define_insn "*cbranchsi4_equality_zero" + [(set (pc) + (if_then_else (match_operator 0 "nds32_equality_comparison_operator" + [(match_operand:SI 1 "register_operand" "t, l, r") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + enum rtx_code code; + + code = GET_CODE (operands[0]); + + /* This zero-comparison conditional branch has two forms: + 32-bit instruction => beqz/bnez imm16s << 1 + 16-bit instruction => beqzs8/bnezs8/beqz38/bnez38 imm8s << 1 + + For 32-bit case, + we assume it is always reachable. (but check range -65500 ~ 65500) + + For 16-bit case, + it must satisfy { 255 >= (label - pc) >= -256 } condition. + However, since the $pc for nds32 is at the beginning of the instruction, + we should leave some length space for current insn. + So we use range -250 ~ 250. */ + + switch (get_attr_length (insn)) + { + case 2: + if (which_alternative == 0) + { + /* constraint: t */ + return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2"; + } + else if (which_alternative == 1) + { + /* constraint: l */ + return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2"; + } + else + { + /* constraint: r */ + /* For which_alternative==2, it should not be here. */ + gcc_unreachable (); + } + case 4: + /* including constraints: t, l, and r */ + return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2"; + case 6: + if (which_alternative == 0) + { + /* constraint: t */ + if (code == EQ) + { + /* beqzs8 .L0 + => + bnezs8 .LCB0 + j .L0 + .LCB0: + */ + return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:"; + } + else + { + /* bnezs8 .L0 + => + beqzs8 .LCB0 + j .L0 + .LCB0: + */ + return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:"; + } + } + else if (which_alternative == 1) + { + /* constraint: l */ + if (code == EQ) + { + /* beqz38 $r0, .L0 + => + bnez38 $r0, .LCB0 + j .L0 + .LCB0: + */ + return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + } + else + { + /* bnez38 $r0, .L0 + => + beqz38 $r0, .LCB0 + j .L0 + .LCB0: + */ + return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + } + } + else + { + /* constraint: r */ + /* For which_alternative==2, it should not be here. */ + gcc_unreachable (); + } + case 8: + /* constraint: t, l, r. */ + if (code == EQ) + { + /* beqz $r8, .L0 + => + bnez $r8, .LCB0 + j .L0 + .LCB0: + */ + return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + } + else + { + /* bnez $r8, .L0 + => + beqz $r8, .LCB0 + j .L0 + .LCB0: + */ + return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "branch") + (set_attr "enabled" "1") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) + (le (minus (match_dup 2) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 6) + (const_int 8)))) + ;; Alternative 1 + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) + (le (minus (match_dup 2) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 6) + (const_int 8)))) + ;; Alternative 2 + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (const_int 8)) + ])]) + + +;; This pattern is dedicated to V2 ISA, +;; because V2 DOES NOT HAVE beqc/bnec instruction. +(define_insn "*cbranchsi4_equality_reg" + [(set (pc) + (if_then_else (match_operator 0 "nds32_equality_comparison_operator" + [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "nds32_reg_constant_operand" "r")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_ISA_V2" +{ + enum rtx_code code; + + code = GET_CODE (operands[0]); + + /* This register-comparison conditional branch has one form: + 32-bit instruction => beq/bne imm14s << 1 + + For 32-bit case, + we assume it is always reachable. (but check range -16350 ~ 16350). */ + + switch (code) + { + case EQ: + /* r, r */ + switch (get_attr_length (insn)) + { + case 4: + return "beq\t%1, %2, %3"; + case 8: + /* beq $r0, $r1, .L0 + => + bne $r0, $r1, .LCB0 + j .L0 + .LCB0: + */ + return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; + default: + gcc_unreachable (); + } + + case NE: + /* r, r */ + switch (get_attr_length (insn)) + { + case 4: + return "bne\t%1, %2, %3"; + case 8: + /* bne $r0, $r1, .L0 + => + beq $r0, $r1, .LCB0 + j .L0 + .LCB0: + */ + return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) + (le (minus (match_dup 3) (pc)) (const_int 16350))) + (const_int 4) + (const_int 8)))]) + + +;; This pattern is dedicated to V3/V3M, +;; because V3/V3M DO HAVE beqc/bnec instruction. +(define_insn "*cbranchsi4_equality_reg_or_const_int" + [(set (pc) + (if_then_else (match_operator 0 "nds32_equality_comparison_operator" + [(match_operand:SI 1 "register_operand" "r, r") + (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_ISA_V3 || TARGET_ISA_V3M" +{ + enum rtx_code code; + + code = GET_CODE (operands[0]); + + /* This register-comparison conditional branch has one form: + 32-bit instruction => beq/bne imm14s << 1 + 32-bit instruction => beqc/bnec imm8s << 1 + + For 32-bit case, we assume it is always reachable. + (but check range -16350 ~ 16350 and -250 ~ 250). */ + + switch (code) + { + case EQ: + if (which_alternative == 0) + { + /* r, r */ + switch (get_attr_length (insn)) + { + case 4: + return "beq\t%1, %2, %3"; + case 8: + /* beq $r0, $r1, .L0 + => + bne $r0, $r1, .LCB0 + j .L0 + .LCB0: + */ + return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; + default: + gcc_unreachable (); + } + } + else + { + /* r, Is11 */ + switch (get_attr_length (insn)) + { + case 4: + return "beqc\t%1, %2, %3"; + case 8: + /* beqc $r0, constant, .L0 + => + bnec $r0, constant, .LCB0 + j .L0 + .LCB0: + */ + return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; + default: + gcc_unreachable (); + } + } + case NE: + if (which_alternative == 0) + { + /* r, r */ + switch (get_attr_length (insn)) + { + case 4: + return "bne\t%1, %2, %3"; + case 8: + /* bne $r0, $r1, .L0 + => + beq $r0, $r1, .LCB0 + j .L0 + .LCB0: + */ + return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; + default: + gcc_unreachable (); + } + } + else + { + /* r, Is11 */ + switch (get_attr_length (insn)) + { + case 4: + return "bnec\t%1, %2, %3"; + case 8: + /* bnec $r0, constant, .L0 + => + beqc $r0, constant, .LCB0 + j .L0 + .LCB0: + */ + return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; + default: + gcc_unreachable (); + } + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "branch") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) + (le (minus (match_dup 3) (pc)) (const_int 16350))) + (const_int 4) + (const_int 8)) + ;; Alternative 1 + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 250))) + (const_int 4) + (const_int 8)) + ])]) + + +(define_insn "*cbranchsi4_greater_less_zero" + [(set (pc) + (if_then_else (match_operator 0 "nds32_greater_less_comparison_operator" + [(match_operand:SI 1 "register_operand" "r") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" +{ + enum rtx_code code; + + code = GET_CODE (operands[0]); + + /* This zero-greater-less-comparison conditional branch has one form: + 32-bit instruction => bgtz/bgez/bltz/blez imm16s << 1 + + For 32-bit case, we assume it is always reachable. + (but check range -65500 ~ 65500). */ + + if (get_attr_length (insn) == 8) + { + /* The branch target is too far to simply use one + bgtz/bgez/bltz/blez instruction. + We need to reverse condition and use 'j' to jump to the target. */ + switch (code) + { + case GT: + /* bgtz $r8, .L0 + => + blez $r8, .LCB0 + j .L0 + .LCB0: + */ + return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + case GE: + /* bgez $r8, .L0 + => + bltz $r8, .LCB0 + j .L0 + .LCB0: + */ + return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + case LT: + /* bltz $r8, .L0 + => + bgez $r8, .LCB0 + j .L0 + .LCB0: + */ + return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + case LE: + /* blez $r8, .L0 + => + bgtz $r8, .LCB0 + j .L0 + .LCB0: + */ + return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; + default: + gcc_unreachable (); + } + } + + switch (code) + { + case GT: + return "bgtz\t%1, %2"; + case GE: + return "bgez\t%1, %2"; + case LT: + return "bltz\t%1, %2"; + case LE: + return "blez\t%1, %2"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (const_int 8)))]) + + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "comparison_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "nonmemory_operand" "")]))] + "" +{ + rtx tmp_reg; + enum rtx_code code; + + code = GET_CODE (operands[1]); + + switch (code) + { + case EQ: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A == const_int_B) + --> addi reg_C, reg_A, -const_int_B + slti reg_R, reg_C, const_int_1 */ + tmp_reg = gen_reg_rtx (SImode); + operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode); + /* If the integer value is not in the range of imm15s, + we need to force register first because our addsi3 pattern + only accept nds32_rimm15s_operand predicate. */ + if (!satisfies_constraint_Is15 (operands[3])) + operands[3] = force_reg (SImode, operands[3]); + emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3])); + emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx)); + + DONE; + } + else + { + /* reg_R = (reg_A == reg_B) + --> xor reg_C, reg_A, reg_B + slti reg_R, reg_C, const_int_1 */ + tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); + emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx)); + + DONE; + } + + case NE: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A != const_int_B) + --> addi reg_C, reg_A, -const_int_B + slti reg_R, const_int_0, reg_C */ + tmp_reg = gen_reg_rtx (SImode); + operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode); + /* If the integer value is not in the range of imm15s, + we need to force register first because our addsi3 pattern + only accept nds32_rimm15s_operand predicate. */ + if (!satisfies_constraint_Is15 (operands[3])) + operands[3] = force_reg (SImode, operands[3]); + emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3])); + emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); + + DONE; + } + else + { + /* reg_R = (reg_A != reg_B) + --> xor reg_C, reg_A, reg_B + slti reg_R, const_int_0, reg_C */ + tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); + emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); + + DONE; + } + + case GT: + case GTU: + /* reg_R = (reg_A > reg_B) --> slt reg_R, reg_B, reg_A */ + /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */ + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], operands[3], operands[2])); + } + else + { + /* GTU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], operands[3], operands[2])); + } + + DONE; + + case GE: + case GEU: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A >= const_int_B) + --> movi reg_C, const_int_B - 1 + slt reg_R, reg_C, reg_A */ + tmp_reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp_reg, + gen_int_mode (INTVAL (operands[3]) - 1, + SImode))); + if (code == GE) + { + /* GE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2])); + } + else + { + /* GEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], tmp_reg, operands[2])); + } + + DONE; + } + else + { + /* reg_R = (reg_A >= reg_B) + --> slt reg_R, reg_A, reg_B + xori reg_R, reg_R, const_int_1 */ + if (code == GE) + { + /* GE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], + operands[2], operands[3])); + } + else + { + /* GEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], + operands[2], operands[3])); + } + + /* perform 'not' behavior */ + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + + DONE; + } + + case LT: + case LTU: + /* reg_R = (reg_A < reg_B) --> slt reg_R, reg_A, reg_B */ + /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */ + if (code == LT) + { + /* LT, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], operands[2], operands[3])); + } + else + { + /* LTU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], operands[2], operands[3])); + } + + DONE; + + case LE: + case LEU: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A <= const_int_B) + --> movi reg_C, const_int_B + 1 + slt reg_R, reg_A, reg_C */ + tmp_reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp_reg, + gen_int_mode (INTVAL (operands[3]) + 1, + SImode))); + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg)); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], operands[2], tmp_reg)); + } + + DONE; + } + else + { + /* reg_R = (reg_A <= reg_B) --> slt reg_R, reg_B, reg_A + xori reg_R, reg_R, const_int_1 */ + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], + operands[3], operands[2])); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], + operands[3], operands[2])); + } + + /* perform 'not' behavior */ + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + + DONE; + } + + + default: + gcc_unreachable (); + } +}) + + +(define_insn "slts_compare" + [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") + (lt:SI (match_operand:SI 1 "nonmemory_operand" " d, d, r, r") + (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))] + "" + "@ + slts45\t%1, %2 + sltsi45\t%1, %2 + slts\t%0, %1, %2 + sltsi\t%0, %1, %2" + [(set_attr "type" "compare,compare,compare,compare") + (set_attr "length" " 2, 2, 4, 4")]) + +(define_insn "slt_compare" + [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") + (ltu:SI (match_operand:SI 1 "nonmemory_operand" " d, d, r, r") + (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))] + "" + "@ + slt45\t%1, %2 + slti45\t%1, %2 + slt\t%0, %1, %2 + slti\t%0, %1, %2" + [(set_attr "type" "compare,compare,compare,compare") + (set_attr "length" " 2, 2, 4, 4")]) + + +;; ---------------------------------------------------------------------------- + +;; Unconditional and other jump instructions. + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" +{ + /* This unconditional jump has two forms: + 32-bit instruction => j imm24s << 1 + 16-bit instruction => j8 imm8s << 1 + + For 32-bit case, + we assume it is always reachable. + For 16-bit case, + it must satisfy { 255 >= (label - pc) >= -256 } condition. + However, since the $pc for nds32 is at the beginning of the instruction, + we should leave some length space for current insn. + So we use range -250 ~ 250. */ + switch (get_attr_length (insn)) + { + case 2: + return "j8\t%0"; + case 4: + return "j\t%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "branch") + (set_attr "enabled" "1") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) + (le (minus (match_dup 0) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + (const_int 4)))]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))] + "" + "@ + jr5\t%0 + jr\t%0" + [(set_attr "type" "branch,branch") + (set_attr "length" " 2, 4")]) + +;; Subroutine call instruction returning no value. +;; operands[0]: It should be a mem RTX whose address is +;; the the address of the function. +;; operands[1]: It is the number of bytes of arguments pushed as a const_int. +;; operands[2]: It is the number of registers used as operands. + +(define_expand "call" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1)) + (clobber (reg:SI LP_REGNUM))])] + "" + "" +) + +(define_insn "*call_register" + [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r")) + (match_operand 1)) + (clobber (reg:SI LP_REGNUM))])] + "" + "@ + jral5\t%0 + jral\t%0" + [(set_attr "type" "branch,branch") + (set_attr "length" " 2, 4")]) + +(define_insn "*call_immediate" + [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i")) + (match_operand 1)) + (clobber (reg:SI LP_REGNUM))])] + "" + "jal\t%0" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + + +;; Subroutine call instruction returning a value. +;; operands[0]: It is the hard regiser in which the value is returned. +;; The rest three operands are the same as the +;; three operands of the 'call' instruction. +;; (but with numbers increased by one) + +(define_expand "call_value" + [(parallel [(set (match_operand 0) + (call (match_operand 1 "memory_operand" "") + (match_operand 2))) + (clobber (reg:SI LP_REGNUM))])] + "" + "" +) + +(define_insn "*call_value_register" + [(parallel [(set (match_operand 0) + (call (mem (match_operand:SI 1 "register_operand" "r, r")) + (match_operand 2))) + (clobber (reg:SI LP_REGNUM))])] + "" + "@ + jral5\t%1 + jral\t%1" + [(set_attr "type" "branch,branch") + (set_attr "length" " 2, 4")]) + +(define_insn "*call_value_immediate" + [(parallel [(set (match_operand 0) + (call (mem (match_operand:SI 1 "immediate_operand" "i")) + (match_operand 2))) + (clobber (reg:SI LP_REGNUM))])] + "" + "jal\t%1" + [(set_attr "type" "branch") + (set_attr "length" "4")]) + + +;; prologue and epilogue. + +(define_expand "prologue" [(const_int 0)] + "" +{ + /* Note that only under V3/V3M ISA, we could use v3push prologue. */ + if (TARGET_V3PUSH) + nds32_expand_prologue_v3push (); + else + nds32_expand_prologue (); + DONE; +}) + +(define_expand "epilogue" [(const_int 0)] + "" +{ + /* Note that only under V3/V3M ISA, we could use v3pop epilogue. */ + if (TARGET_V3PUSH) + nds32_expand_epilogue_v3pop (); + else + nds32_expand_epilogue (); + DONE; +}) + + +;; nop instruction. + +(define_insn "nop" + [(const_int 0)] + "" +{ + if (TARGET_16_BIT) + return "nop16"; + else + return "nop"; +} + [(set_attr "type" "misc") + (set_attr "enabled" "1") + (set (attr "length") + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)))]) + + +;; ---------------------------------------------------------------------------- +;; Stack push/pop operations +;; ---------------------------------------------------------------------------- + +;; The pattern for stack push. +;; Both stack_push_multiple and stack_v3push use the following pattern. +;; So we need to use TARGET_V3PUSH to determine the instruction length. +(define_insn "*stack_push" + [(match_parallel 0 "nds32_stack_push_operation" + [(set (mem:SI (plus:SI (reg:SI SP_REGNUM) + (match_operand:SI 1 "const_int_operand" ""))) + (match_operand:SI 2 "register_operand" "")) + ])] + "" +{ + return nds32_output_stack_push (); +} + [(set_attr "type" "misc") + (set_attr "enabled" "1") + (set (attr "length") + (if_then_else (match_test "TARGET_V3PUSH") + (const_int 2) + (const_int 4)))]) + + +;; The pattern for stack pop. +;; Both stack_pop_multiple and stack_v3pop use the following pattern. +;; So we need to use TARGET_V3PUSH to determine the instruction length. +(define_insn "*stack_pop" + [(match_parallel 0 "nds32_stack_pop_operation" + [(set (match_operand:SI 1 "register_operand" "") + (mem:SI (reg:SI SP_REGNUM))) + ])] + "" +{ + return nds32_output_stack_pop (); +} + [(set_attr "type" "misc") + (set_attr "enabled" "1") + (set (attr "length") + (if_then_else (match_test "TARGET_V3PUSH") + (const_int 2) + (const_int 4)))]) + + +;; ---------------------------------------------------------------------------- +;; unspec operation patterns +;; ---------------------------------------------------------------------------- + +;; In nds32 target, the 'ret5' instuction is actually 'jr5 $lp'. +;; This pattern is designed to distinguish function return +;; from general indirect_jump pattern so that we can directly +;; generate 'ret5' for readability. + +(define_insn "unspec_volatile_func_return" + [(set (pc) + (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_FUNC_RETURN))] + "" +{ + if (TARGET_16_BIT) + return "ret5"; + else + return "ret"; +} + [(set_attr "type" "misc") + (set_attr "enabled" "1") + (set (attr "length") + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)))]) + + +;; ---------------------------------------------------------------------------- +;; Jump Table patterns +;; ---------------------------------------------------------------------------- +;; Need to implement ASM_OUTPUT_ADDR_VEC_ELT (for normal jump table) +;; or ASM_OUTPUT_ADDR_DIFF_ELT (for pc relative jump table) as well. +;; +;; operands[0]: The index to dispatch on. +;; operands[1]: The lower bound for indices in the table. +;; operands[2]: The total range of indices int the table. +;; i.e. The largest index minus the smallest one. +;; operands[3]: A label that precedes the table itself. +;; operands[4]: A label to jump to if the index has a value outside the bounds. +;; +;; We need to create following sequences for jump table code generation: +;; A) k <-- (plus (operands[0]) (-operands[1])) +;; B) if (gtu k operands[2]) then goto operands[4] +;; C) t <-- operands[3] +;; D) z <-- (mem (plus (k << 0 or 1 or 2) t)) +;; E) z <-- t + z (NOTE: This is only required for pc relative jump table.) +;; F) jump to target with register t or z +;; +;; The steps C, D, E, and F are performed by casesi_internal pattern. +(define_expand "casesi" + [(match_operand:SI 0 "register_operand" "r") ; index to jump on + (match_operand:SI 1 "immediate_operand" "i") ; lower bound + (match_operand:SI 2 "immediate_operand" "i") ; total range + (match_operand:SI 3 "" "") ; table label + (match_operand:SI 4 "" "")] ; Out of range label + "" +{ + rtx add_tmp; + rtx reg, test; + + /* Step A: "k <-- (plus (operands[0]) (-operands[1]))". */ + if (operands[1] != const0_rtx) + { + reg = gen_reg_rtx (SImode); + add_tmp = gen_int_mode (-INTVAL (operands[1]), SImode); + + /* If the integer value is not in the range of imm15s, + we need to force register first because our addsi3 pattern + only accept nds32_rimm15s_operand predicate. */ + add_tmp = force_reg (SImode, add_tmp); + + emit_insn (gen_addsi3 (reg, operands[0], add_tmp)); + operands[0] = reg; + } + + /* Step B: "if (gtu k operands[2]) then goto operands[4]". */ + test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]); + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], + operands[4])); + + operands[5] = gen_reg_rtx (SImode); + /* Step C, D, E, and F, using another temporary register operands[5]. */ + emit_jump_insn (gen_casesi_internal (operands[0], + operands[3], + operands[5])); + DONE; +}) + +;; We are receiving operands from casesi pattern: +;; +;; operands[0]: The index that have been substracted with lower bound. +;; operands[1]: A label that precedes the table itself. +;; operands[2]: A temporary register to retrieve value in table. +;; +;; We need to perform steps C, D, E, and F: +;; +;; C) t <-- operands[1] +;; D) z <-- (mem (plus (operands[0] << m) t)) +;; m is 2 for normal jump table. +;; m is 0, 1, or 2 for pc relative jump table based on diff size. +;; E) t <-- z + t (NOTE: This is only required for pc relative jump table.) +;; F) Jump to target with register t or z. +;; +;; The USE in this pattern is needed to tell flow analysis that this is +;; a CASESI insn. It has no other purpose. +(define_insn "casesi_internal" + [(parallel [(set (pc) + (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r") + (const_int 4)) + (label_ref (match_operand 1 "" ""))))) + (use (label_ref (match_dup 1))) + (clobber (match_operand:SI 2 "register_operand" "")) + (clobber (reg:SI TA_REGNUM))])] + "" +{ + if (CASE_VECTOR_PC_RELATIVE) + return nds32_output_casesi_pc_relative (operands); + else + return nds32_output_casesi (operands); +} + [(set_attr "length" "20") + (set_attr "type" "alu")]) + +;; ---------------------------------------------------------------------------- + +;; Performance Extension + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "register_operand" " r")))] + "TARGET_PERF_EXT" + "clz\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "smaxsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (smax:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")))] + "TARGET_PERF_EXT" + "max\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (smin:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")))] + "TARGET_PERF_EXT" + "min\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +(define_insn "*btst" + [(set (match_operand:SI 0 "register_operand" "= r") + (zero_extract:SI (match_operand:SI 1 "register_operand" " r") + (const_int 1) + (match_operand:SI 2 "immediate_operand" " Iu05")))] + "TARGET_PERF_EXT" + "btst\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + +;; ---------------------------------------------------------------------------- diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt new file mode 100644 index 00000000000..b2b45bb3e02 --- /dev/null +++ b/gcc/config/nds32/nds32.opt @@ -0,0 +1,102 @@ +; Options of Andes NDS32 cpu for GNU compiler +; Copyright (C) 2012-2013 Free Software Foundation, Inc. +; Contributed by Andes Technology Corporation. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License as published +; by the Free Software Foundation; either version 3, or (at your +; option) any later version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +HeaderInclude +config/nds32/nds32-opts.h + +mbig-endian +Target Report RejectNegative Negative(mlittle-endian) Mask(BIG_ENDIAN) +Generate code in big-endian mode. + +mlittle-endian +Target Report RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN) +Generate code in little-endian mode. + +mreduced-regs +Target Report RejectNegative Negative(mfull-regs) Mask(REDUCED_REGS) +Use reduced-set registers for register allocation. + +mfull-regs +Target Report RejectNegative Negative(mreduced-regs) InverseMask(REDUCED_REGS) +Use full-set registers for register allocation. + +mcmov +Target Report Mask(CMOV) +Generate conditional move instructions. + +mperf-ext +Target Report Mask(PERF_EXT) +Generate performance extension instructions. + +mv3push +Target Report Mask(V3PUSH) +Generate v3 push25/pop25 instructions. + +m16-bit +Target Report Mask(16_BIT) +Generate 16-bit instructions. + +mgp-direct +Target Report Mask(GP_DIRECT) +Generate GP base instructions directly. + +misr-vector-size= +Target RejectNegative Joined UInteger Var(nds32_isr_vector_size) Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) +Specify the size of each interrupt vector, which must be 4 or 16. + +mcache-block-size= +Target RejectNegative Joined UInteger Var(nds32_cache_block_size) Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE) +Specify the size of each cache block, which must be a power of 2 between 4 and 512. + +march= +Target RejectNegative Joined Enum(nds32_arch_type) Var(nds32_arch_option) Init(ARCH_V3) +Specify the name of the target architecture. + +Enum +Name(nds32_arch_type) Type(enum nds32_arch_type) + +EnumValue +Enum(nds32_arch_type) String(v2) Value(ARCH_V2) + +EnumValue +Enum(nds32_arch_type) String(v3) Value(ARCH_V3) + +EnumValue +Enum(nds32_arch_type) String(v3m) Value(ARCH_V3M) + +mforce-fp-as-gp +Target Report Mask(FORCE_FP_AS_GP) +Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization. + +mforbid-fp-as-gp +Target Report Mask(FORBID_FP_AS_GP) +Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'. + +mex9 +Target Report Mask(EX9) +Use special directives to guide linker doing ex9 optimization. + +mctor-dtor +Target Report +Enable constructor/destructor feature. + +mrelax +Target Report +Guide linker to relax instructions. diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h new file mode 100644 index 00000000000..33064a966ed --- /dev/null +++ b/gcc/config/nds32/nds32_intrinsic.h @@ -0,0 +1,37 @@ +/* Intrinsic definitions of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2013 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _NDS32_INTRINSIC_H +#define _NDS32_INTRINSIC_H + +enum nds32_intrinsic_registers +{ + __NDS32_REG_PSW__ = 1024, + __NDS32_REG_IPSW__, + __NDS32_REG_ITYPE__, + __NDS32_REG_IPC__ +}; + +#endif /* nds32_intrinsic.h */ diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md new file mode 100644 index 00000000000..9c8c56bab83 --- /dev/null +++ b/gcc/config/nds32/pipelines.md @@ -0,0 +1,29 @@ +;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "nds32_machine") + +(define_cpu_unit "general_unit" "nds32_machine") + +(define_insn_reservation "simple_insn" 1 + (eq_attr "type" "unknown,load,store,move,alu,compare,branch,call,misc") + "general_unit") + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md new file mode 100644 index 00000000000..df4eccdb891 --- /dev/null +++ b/gcc/config/nds32/predicates.md @@ -0,0 +1,92 @@ +;; Predicate definitions of Andes NDS32 cpu for GNU compiler +;; Copyright (C) 2012-2013 Free Software Foundation, Inc. +;; Contributed by Andes Technology Corporation. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_predicate "nds32_equality_comparison_operator" + (match_code "eq,ne")) + +(define_predicate "nds32_greater_less_comparison_operator" + (match_code "gt,ge,lt,le")) + +(define_special_predicate "nds32_logical_binary_operator" + (match_code "and,ior,xor")) + +(define_predicate "nds32_symbolic_operand" + (match_code "const,symbol_ref,label_ref")) + +(define_predicate "nds32_reg_constant_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_int_operand"))) + +(define_predicate "nds32_rimm15s_operand" + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Is15 (op)")))) + +(define_predicate "nds32_imm5u_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu05 (op)"))) + +(define_predicate "nds32_move_operand" + (and (match_operand 0 "general_operand") + (not (match_code "high,const,symbol_ref,label_ref"))) +{ + /* If the constant op does NOT satisfy Is20 nor Ihig, + we can not perform move behavior by a single instruction. */ + if (CONST_INT_P (op) + && !satisfies_constraint_Is20 (op) + && !satisfies_constraint_Ihig (op)) + return false; + + return true; +}) + +(define_special_predicate "nds32_load_multiple_operation" + (match_code "parallel") +{ + /* To verify 'load' operation, pass 'true' for the second argument. + See the implementation in nds32.c for details. */ + return nds32_valid_multiple_load_store (op, true); +}) + +(define_special_predicate "nds32_store_multiple_operation" + (match_code "parallel") +{ + /* To verify 'store' operation, pass 'false' for the second argument. + See the implementation in nds32.c for details. */ + return nds32_valid_multiple_load_store (op, false); +}) + +(define_special_predicate "nds32_stack_push_operation" + (match_code "parallel") +{ + /* To verify 'push' operation, pass 'true' for the second argument. + See the implementation in nds32.c for details. */ + return nds32_valid_stack_push_pop (op, true); +}) + +(define_special_predicate "nds32_stack_pop_operation" + (match_code "parallel") +{ + /* To verify 'pop' operation, pass 'false' for the second argument. + See the implementation in nds32.c for details. */ + return nds32_valid_stack_push_pop (op, false); +}) + +;; ------------------------------------------------------------------------ diff --git a/gcc/config/nds32/t-mlibs b/gcc/config/nds32/t-mlibs new file mode 100644 index 00000000000..ec546e48c1b --- /dev/null +++ b/gcc/config/nds32/t-mlibs @@ -0,0 +1,38 @@ +# The multilib settings of Andes NDS32 cpu for GNU compiler +# Copyright (C) 2012-2013 Free Software Foundation, Inc. +# Contributed by Andes Technology Corporation. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# We need to build following multilibs combinations: +# +# 1. <None multilibs> +# 2. -mlittle-endian +# 3. -mbig-endian +# 4. -mgp-direct +# 5. -mno-gp-direct +# 6. -mlittle-endian -mgp-direct +# 7. -mlittle-endian -mno-gp-direct +# 8. -mbig-endian -mgp-direct +# 9. -mlittle-endian -mno-gp-direct +# +# We also define a macro MULTILIB_DEFAULTS in nds32.h that tells the +# driver program which options are defaults for this target and thus +# do not need to be handled specially. +MULTILIB_OPTIONS = mlittle-endian/mbig-endian mgp-direct/mno-gp-direct + +# ------------------------------------------------------------------------ |