diff options
Diffstat (limited to 'gcc/config/arm/arm.md')
-rw-r--r-- | gcc/config/arm/arm.md | 176 |
1 files changed, 136 insertions, 40 deletions
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index a26550a476a..841c624d485 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -82,6 +82,9 @@ ;; Processor type. This is created automatically from arm-cores.def. (include "arm-tune.md") +;; Instruction classification types +(include "types.md") + ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when ; generating ARM code. This is used to control the length of some insn ; patterns that share the same RTL in both ARM and Thumb code. @@ -191,6 +194,12 @@ (const_string "yes")] (const_string "no"))) +(define_attr "use_literal_pool" "no,yes" + (cond [(and (eq_attr "type" "f_loads,f_loadd") + (match_test "CONSTANT_P (operands[1])")) + (const_string "yes")] + (const_string "no"))) + ; Allows an insn to disable certain alternatives for reasons other than ; arch support. (define_attr "insn_enabled" "no,yes" @@ -210,6 +219,10 @@ (match_test "arm_restrict_it")) (const_string "no") + (and (eq_attr "use_literal_pool" "yes") + (match_test "arm_disable_literal_pool")) + (const_string "no") + (eq_attr "arch_enabled" "no") (const_string "no") @@ -245,9 +258,6 @@ (set_attr "length" "4") (set_attr "pool_range" "250")]) -;; Instruction classification types -(include "types.md") - ; Load scheduling, set from the arm_ld_sched variable ; initialized by arm_option_override() (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) @@ -3716,7 +3726,7 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "s_register_operand" "r")])) (clobber (reg:CC CC_REGNUM))] - "TARGET_32BIT && optimize_insn_for_size_p()" + "TARGET_32BIT && optimize_function_for_size_p (cfun)" "* operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, operands[1], operands[2]); @@ -4708,49 +4718,94 @@ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" "") +(define_insn_and_split "*zextendsidi_negsi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (zero_extend:DI (neg:SI (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT" + "#" + "" + [(set (match_dup 2) + (neg:SI (match_dup 1))) + (set (match_dup 3) + (const_int 0))] + { + operands[2] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[0]); + } + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + ;; Negate an extended 32-bit value. (define_insn_and_split "*negdi_extendsidi" - [(set (match_operand:DI 0 "s_register_operand" "=r,&r,l,&l") - (neg:DI (sign_extend:DI (match_operand:SI 1 "s_register_operand" "0,r,0,l")))) + [(set (match_operand:DI 0 "s_register_operand" "=l,r") + (neg:DI (sign_extend:DI + (match_operand:SI 1 "s_register_operand" "l,r")))) (clobber (reg:CC CC_REGNUM))] "TARGET_32BIT" - "#" ; rsb\\t%Q0, %1, #0\;asr\\t%R0, %Q0, #31 + "#" "&& reload_completed" [(const_int 0)] { - operands[2] = gen_highpart (SImode, operands[0]); - operands[0] = gen_lowpart (SImode, operands[0]); - rtx tmp = gen_rtx_SET (VOIDmode, - operands[0], - gen_rtx_MINUS (SImode, - const0_rtx, - operands[1])); - if (TARGET_ARM) - { - emit_insn (tmp); - } - else - { - /* Set the flags, to emit the short encoding in Thumb2. */ - rtx flags = gen_rtx_SET (VOIDmode, - gen_rtx_REG (CCmode, CC_REGNUM), - gen_rtx_COMPARE (CCmode, - const0_rtx, - operands[1])); - emit_insn (gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, - flags, - tmp))); - } - emit_insn (gen_rtx_SET (VOIDmode, - operands[2], - gen_rtx_ASHIFTRT (SImode, - operands[0], - GEN_INT (31)))); - DONE; + rtx low = gen_lowpart (SImode, operands[0]); + rtx high = gen_highpart (SImode, operands[0]); + + if (reg_overlap_mentioned_p (low, operands[1])) + { + /* Input overlaps the low word of the output. Use: + asr Rhi, Rin, #31 + rsbs Rlo, Rin, #0 + rsc Rhi, Rhi, #0 (thumb2: sbc Rhi, Rhi, Rhi, lsl #1). */ + rtx cc_reg = gen_rtx_REG (CC_Cmode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_ASHIFTRT (SImode, operands[1], + GEN_INT (31)))); + + emit_insn (gen_subsi3_compare (low, const0_rtx, operands[1])); + if (TARGET_ARM) + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_MINUS (SImode, + gen_rtx_MINUS (SImode, + const0_rtx, + high), + gen_rtx_LTU (SImode, + cc_reg, + const0_rtx)))); + else + { + rtx two_x = gen_rtx_ASHIFT (SImode, high, GEN_INT (1)); + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_MINUS (SImode, + gen_rtx_MINUS (SImode, + high, + two_x), + gen_rtx_LTU (SImode, + cc_reg, + const0_rtx)))); + } + } + else + { + /* No overlap, or overlap on high word. Use: + rsb Rlo, Rin, #0 + bic Rhi, Rlo, Rin + asr Rhi, Rhi, #31 + Flags not needed for this sequence. */ + emit_insn (gen_rtx_SET (VOIDmode, low, + gen_rtx_NEG (SImode, operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_AND (SImode, + gen_rtx_NOT (SImode, operands[1]), + low))); + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_ASHIFTRT (SImode, high, + GEN_INT (31)))); + } + DONE; } - [(set_attr "length" "8,8,4,4") - (set_attr "arch" "a,a,t2,t2") + [(set_attr "length" "12") + (set_attr "arch" "t2,*") (set_attr "type" "multiple")] ) @@ -6022,7 +6077,7 @@ "TARGET_32BIT && reload_completed && (arm_const_double_inline_cost (operands[1]) - <= ((optimize_size || arm_ld_sched) ? 3 : 4))" + <= arm_max_const_double_inline_cost ())" [(const_int 0)] " arm_split_constant (SET, SImode, curr_insn, @@ -6285,6 +6340,47 @@ " ) +;; A normal way to do (symbol + offset) requires three instructions at least +;; (depends on how big the offset is) as below: +;; movw r0, #:lower16:g +;; movw r0, #:upper16:g +;; adds r0, #4 +;; +;; A better way would be: +;; movw r0, #:lower16:g+4 +;; movw r0, #:upper16:g+4 +;; +;; The limitation of this way is that the length of offset should be a 16-bit +;; signed value, because current assembler only supports REL type relocation for +;; such case. If the more powerful RELA type is supported in future, we should +;; update this pattern to go with better way. +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (const:SI (plus:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" ""))))] + "TARGET_THUMB2 + && arm_disable_literal_pool + && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + [(clobber (const_int 0))] + " + int offset = INTVAL (operands[2]); + + if (offset < -0x8000 || offset > 0x7fff) + { + arm_emit_movpair (operands[0], operands[1]); + emit_insn (gen_rtx_SET (SImode, operands[0], + gen_rtx_PLUS (SImode, operands[0], operands[2]))); + } + else + { + rtx op = gen_rtx_CONST (SImode, + gen_rtx_PLUS (SImode, operands[1], operands[2])); + arm_emit_movpair (operands[0], op); + } + " +) + ;; Split symbol_refs at the later stage (after cprop), instead of generating ;; movt/movw pair directly at expand. Otherwise corresponding high_sum ;; and lo_sum would be merged back into memory load at cprop. However, |