diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-12-21 13:59:01 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-12-21 13:59:01 +0000 |
commit | 2d5f6d3c1e2780698d60b53144be4d7eb7df2a02 (patch) | |
tree | 4ef5de50928d3637f7cf6e978a5fc2ea84332013 /gcc/config | |
parent | 89011aed2df43a3ea68ba208d8cacdb9d52a61c1 (diff) | |
download | gcc-2d5f6d3c1e2780698d60b53144be4d7eb7df2a02.tar.gz |
2012-12-21 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 194659 using svnmerge.py
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@194661 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
47 files changed, 1725 insertions, 384 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 7bc2f6b896a..03b13613c6e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -5865,6 +5865,14 @@ aarch64_preferred_simd_mode (enum machine_mode mode) return word_mode; } +/* Return the bitmask of possible vector sizes for the vectorizer + to iterate over. */ +static unsigned int +aarch64_autovectorize_vector_sizes (void) +{ + return (16 | 8); +} + /* A table to help perform AArch64-specific name mangling for AdvSIMD vector types in order to conform to the AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", Appendix A). To @@ -6374,8 +6382,6 @@ aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest, int aarch64_simd_attr_length_move (rtx insn) { - rtx reg, mem, addr; - int load; enum machine_mode mode; extract_insn_cached (insn); @@ -6658,7 +6664,6 @@ aarch64_split_compare_and_swap (rtx operands[]) { rtx rval, mem, oldval, newval, scratch; enum machine_mode mode; - enum memmodel mod_s; bool is_weak; rtx label1, label2, x, cond; @@ -6667,7 +6672,6 @@ aarch64_split_compare_and_swap (rtx operands[]) oldval = operands[2]; newval = operands[3]; is_weak = (operands[4] != const0_rtx); - mod_s = (enum memmodel) INTVAL (operands[5]); scratch = operands[7]; mode = GET_MODE (mem); @@ -7519,6 +7523,10 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ aarch64_builtin_vectorized_function +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + aarch64_autovectorize_vector_sizes + /* Section anchor support. */ #undef TARGET_MIN_ANCHOR_OFFSET diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b59d53df86a..ec65b3c2a11 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -840,8 +840,8 @@ (define_insn "insv_imm<mode>" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (const_int 16) - (match_operand 1 "const_int_operand" "n")) - (match_operand 2 "const_int_operand" "n"))] + (match_operand:GPI 1 "const_int_operand" "n")) + (match_operand:GPI 2 "const_int_operand" "n"))] "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode) && INTVAL (operands[1]) % 16 == 0 && INTVAL (operands[2]) <= 0xffff" @@ -1215,6 +1215,22 @@ (set_attr "mode" "SI")] ) +;; zero_extend version of above +(define_insn "*addsi3_aarch64_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=rk,rk,rk") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk") + (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))] + "" + "@ + add\\t%w0, %w1, %2 + add\\t%w0, %w1, %w2 + sub\\t%w0, %w1, #%n2" + [(set_attr "v8type" "alu") + (set_attr "mode" "SI")] +) + (define_insn "*adddi3_aarch64" [(set (match_operand:DI 0 "register_operand" "=rk,rk,rk,!w") @@ -1248,6 +1264,23 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*addsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:SI (match_operand:SI 1 "register_operand" "%r,r") + (match_operand:SI 2 "aarch64_plus_operand" "rI,J")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "" + "@ + adds\\t%w0, %w1, %w2 + subs\\t%w0, %w1, #%n2" + [(set_attr "v8type" "alus") + (set_attr "mode" "SI")] +) + (define_insn "*add<mode>3nr_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1284,6 +1317,19 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*add_<shift>_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ASHIFT:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "add\\t%w0, %w3, %w1, <shift> %2" + [(set_attr "v8type" "alu_shift") + (set_attr "mode" "SI")] +) + (define_insn "*add_mul_imm_<mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") @@ -1305,6 +1351,18 @@ (set_attr "mode" "<GPI:MODE>")] ) +;; zero_extend version of above +(define_insn "*add_<optab><SHORT:mode>_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r"))))] + "" + "add\\t%w0, %w2, %w1, <su>xt<SHORT:size>" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*add_<optab><ALLX:mode>_shft_<GPI:mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (ashift:GPI (ANY_EXTEND:GPI @@ -1317,6 +1375,20 @@ (set_attr "mode" "<GPI:MODE>")] ) +;; zero_extend version of above +(define_insn "*add_<optab><SHORT:mode>_shft_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ashift:SI (ANY_EXTEND:SI + (match_operand:SHORT 1 "register_operand" "r")) + (match_operand 2 "aarch64_imm3" "Ui3")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (mult:GPI (ANY_EXTEND:GPI @@ -1329,6 +1401,19 @@ (set_attr "mode" "<GPI:MODE>")] ) +;; zero_extend version of above +(define_insn "*add_<optab><SHORT:mode>_mult_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI + (match_operand:SHORT 1 "register_operand" "r")) + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*add_<optab><mode>_multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (ANY_EXTRACT:GPI @@ -1343,6 +1428,22 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*add_<optab>si_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ANY_EXTRACT:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)) + (match_operand:SI 4 "register_operand" "r"))))] + "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])" + "add\\t%w0, %w4, %w1, <su>xt%e3 %p2" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*add<mode>3_carryin" [(set (match_operand:GPI 0 "register_operand" "=r") @@ -1356,6 +1457,21 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*addsi3_carryin_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "v8type" "adc") + (set_attr "mode" "SI")] +) + (define_insn "*add<mode>3_carryin_alt1" [(set (match_operand:GPI 0 "register_operand" "=r") @@ -1369,6 +1485,21 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*addsi3_carryin_alt1_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "v8type" "adc") + (set_attr "mode" "SI")] +) + (define_insn "*add<mode>3_carryin_alt2" [(set (match_operand:GPI 0 "register_operand" "=r") @@ -1382,6 +1513,21 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*addsi3_carryin_alt2_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r"))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "v8type" "adc") + (set_attr "mode" "SI")] +) + (define_insn "*add<mode>3_carryin_alt3" [(set (match_operand:GPI 0 "register_operand" "=r") @@ -1395,6 +1541,21 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*addsi3_carryin_alt3_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 1 "register_operand" "r"))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "v8type" "adc") + (set_attr "mode" "SI")] +) + (define_insn "*add_uxt<mode>_multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -1411,6 +1572,24 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*add_uxtsi_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (and:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n")) + (match_operand:SI 4 "register_operand" "r"))))] + "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0" + "* + operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), + INTVAL (operands[3]))); + return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "subsi3" [(set (match_operand:SI 0 "register_operand" "=rk") (minus:SI (match_operand:SI 1 "register_operand" "r") @@ -1421,6 +1600,18 @@ (set_attr "mode" "SI")] ) +;; zero_extend version of above +(define_insn "*subsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "sub\\t%w0, %w1, %w2" + [(set_attr "v8type" "alu") + (set_attr "mode" "SI")] +) + (define_insn "subdi3" [(set (match_operand:DI 0 "register_operand" "=rk,!w") (minus:DI (match_operand:DI 1 "register_operand" "r,!w") @@ -1448,6 +1639,20 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*subsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] + "" + "subs\\t%w0, %w1, %w2" + [(set_attr "v8type" "alus") + (set_attr "mode" "SI")] +) + (define_insn "*sub_<shift>_<mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 3 "register_operand" "r") @@ -1460,6 +1665,20 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*sub_<shift>_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 3 "register_operand" "r") + (ASHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))] + "" + "sub\\t%w0, %w3, %w1, <shift> %2" + [(set_attr "v8type" "alu_shift") + (set_attr "mode" "SI")] +) + (define_insn "*sub_mul_imm_<mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 3 "register_operand" "r") @@ -1472,6 +1691,20 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*sub_mul_imm_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 3 "register_operand" "r") + (mult:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))] + "" + "sub\\t%w0, %w3, %w1, lsl %p2" + [(set_attr "v8type" "alu_shift") + (set_attr "mode" "SI")] +) + (define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 1 "register_operand" "r") @@ -1483,6 +1716,19 @@ (set_attr "mode" "<GPI:MODE>")] ) +;; zero_extend version of above +(define_insn "*sub_<optab><SHORT:mode>_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "r") + (ANY_EXTEND:SI + (match_operand:SHORT 2 "register_operand" "r")))))] + "" + "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*sub_<optab><ALLX:mode>_shft_<GPI:mode>" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 1 "register_operand" "r") @@ -1495,6 +1741,20 @@ (set_attr "mode" "<GPI:MODE>")] ) +;; zero_extend version of above +(define_insn "*sub_<optab><SHORT:mode>_shft_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "r") + (ashift:SI (ANY_EXTEND:SI + (match_operand:SHORT 2 "register_operand" "r")) + (match_operand 3 "aarch64_imm3" "Ui3")))))] + "" + "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*sub_<optab><mode>_multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "r") @@ -1509,6 +1769,22 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*sub_<optab>si_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 4 "register_operand" "r") + (ANY_EXTRACT:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)))))] + "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])" + "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "*sub_uxt<mode>_multp2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "r") @@ -1525,6 +1801,24 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*sub_uxtsi_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 4 "register_operand" "r") + (and:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n")))))] + "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0" + "* + operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), + INTVAL (operands[3]))); + return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";" + [(set_attr "v8type" "alu_ext") + (set_attr "mode" "SI")] +) + (define_insn "neg<mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") (neg:GPI (match_operand:GPI 1 "register_operand" "r")))] @@ -1534,6 +1828,16 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*negsi2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))] + "" + "neg\\t%w0, %w1" + [(set_attr "v8type" "alu") + (set_attr "mode" "SI")] +) + (define_insn "*neg<mode>2_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r")) @@ -1546,6 +1850,19 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*negsi2_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (neg:SI (match_dup 1))))] + "" + "negs\\t%w0, %w1" + [(set_attr "v8type" "alus") + (set_attr "mode" "SI")] +) + (define_insn "*neg_<shift>_<mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") (neg:GPI (ASHIFT:GPI @@ -1557,6 +1874,19 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*neg_<shift>_si2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (neg:SI (ASHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))] + "" + "neg\\t%w0, %w1, <shift> %2" + [(set_attr "v8type" "alu_shift") + (set_attr "mode" "SI")] +) + (define_insn "*neg_mul_imm_<mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") (neg:GPI (mult:GPI @@ -1568,6 +1898,19 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*neg_mul_imm_si2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (neg:SI (mult:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))] + "" + "neg\\t%w0, %w1, lsl %p2" + [(set_attr "v8type" "alu_shift") + (set_attr "mode" "SI")] +) + (define_insn "mul<mode>3" [(set (match_operand:GPI 0 "register_operand" "=r") (mult:GPI (match_operand:GPI 1 "register_operand" "r") @@ -1578,6 +1921,18 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*mulsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "mul\\t%w0, %w1, %w2" + [(set_attr "v8type" "mult") + (set_attr "mode" "SI")] +) + (define_insn "*madd<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") @@ -1589,6 +1944,19 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*maddsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "madd\\t%w0, %w1, %w2, %w3" + [(set_attr "v8type" "madd") + (set_attr "mode" "SI")] +) + (define_insn "*msub<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (minus:GPI (match_operand:GPI 3 "register_operand" "r") @@ -1601,6 +1969,20 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*msubsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 3 "register_operand" "r") + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))))] + + "" + "msub\\t%w0, %w1, %w2, %w3" + [(set_attr "v8type" "madd") + (set_attr "mode" "SI")] +) + (define_insn "*mul<mode>_neg" [(set (match_operand:GPI 0 "register_operand" "=r") (mult:GPI (neg:GPI (match_operand:GPI 1 "register_operand" "r")) @@ -1612,6 +1994,19 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*mulsi_neg_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (mult:SI (neg:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r"))))] + + "" + "mneg\\t%w0, %w1, %w2" + [(set_attr "v8type" "mult") + (set_attr "mode" "SI")] +) + (define_insn "<su_optab>mulsidi3" [(set (match_operand:DI 0 "register_operand" "=r") (mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r")) @@ -1682,6 +2077,18 @@ (set_attr "mode" "<MODE>")] ) +;; zero_extend version of above +(define_insn "*<su_optab>divsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ANY_DIV:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "<su>div\\t%w0, %w1, %w2" + [(set_attr "v8type" "<su>div") + (set_attr "mode" "SI")] +) + ;; ------------------------------------------------------------------- ;; Comparison insns ;; ------------------------------------------------------------------- diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 84112182426..fa2f7335881 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -9686,6 +9686,30 @@ alpha_conditional_register_usage (void) for (i = 32; i < 63; i++) fixed_regs[i] = call_used_regs[i] = 1; } + +/* Canonicalize a comparison from one we don't have to one we do have. */ + +static void +alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) +{ + if (!op0_preserve_value + && (*code == GE || *code == GT || *code == GEU || *code == GTU) + && (REG_P (*op1) || *op1 == const0_rtx)) + { + rtx tem = *op0; + *op0 = *op1; + *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } + + if ((*code == LT || *code == LTU) + && CONST_INT_P (*op1) && INTVAL (*op1) == 256) + { + *code = *code == LT ? LE : LEU; + *op1 = GEN_INT (255); + } +} /* Initialize the GCC target structure. */ #if TARGET_ABI_OPEN_VMS @@ -9853,6 +9877,9 @@ alpha_conditional_register_usage (void) #undef TARGET_CONDITIONAL_REGISTER_USAGE #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison + struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index bc14d84055b..a70c7f89f47 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -922,26 +922,6 @@ do { \ #define FLOAT_STORE_FLAG_VALUE(MODE) \ REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE)) -/* Canonicalize a comparison from one we don't have to one we do have. */ - -#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \ - do { \ - if (((CODE) == GE || (CODE) == GT || (CODE) == GEU || (CODE) == GTU) \ - && (REG_P (OP1) || (OP1) == const0_rtx)) \ - { \ - rtx tem = (OP0); \ - (OP0) = (OP1); \ - (OP1) = tem; \ - (CODE) = swap_condition (CODE); \ - } \ - if (((CODE) == LT || (CODE) == LTU) \ - && CONST_INT_P (OP1) && INTVAL (OP1) == 256) \ - { \ - (CODE) = (CODE) == LT ? LE : LEU; \ - (OP1) = GEN_INT (255); \ - } \ - } while (0) - /* Specify the machine mode that pointers have. After generation of rtl, the compiler makes no further distinction between pointers and any other objects of this machine mode. */ diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index d942c5b07a1..4c61e35ea28 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -53,7 +53,6 @@ extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code); extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, HOST_WIDE_INT, rtx, rtx, int); -extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *); extern int legitimate_pic_operand_p (rtx); extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); extern rtx legitimize_tls_address (rtx, rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 84ce56fb84d..13d745fb973 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); static int arm_comp_type_attributes (const_tree, const_tree); static void arm_set_default_type_attributes (tree); static int arm_adjust_cost (rtx, rtx, rtx, int); +static int arm_sched_reorder (FILE *, int, rtx *, int *, int); static int optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence); @@ -269,7 +270,8 @@ static int arm_cortex_a5_branch_cost (bool, bool); static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); - +static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -366,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST arm_adjust_cost +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER arm_sched_reorder + #undef TARGET_REGISTER_MOVE_COST #define TARGET_REGISTER_MOVE_COST arm_register_move_cost @@ -626,6 +631,10 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ arm_vectorize_vec_perm_const_ok +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON \ + arm_canonicalize_comparison + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -3543,8 +3552,9 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, This can be done for a few constant compares, where we can make the immediate value easier to load. */ -enum rtx_code -arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) +static void +arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) { enum machine_mode mode; unsigned HOST_WIDE_INT i, maxval; @@ -3563,15 +3573,15 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) { rtx tem; - if (code == GT || code == LE - || (!TARGET_ARM && (code == GTU || code == LEU))) + if (*code == GT || *code == LE + || (!TARGET_ARM && (*code == GTU || *code == LEU))) { /* Missing comparison. First try to use an available comparison. */ if (CONST_INT_P (*op1)) { i = INTVAL (*op1); - switch (code) + switch (*code) { case GT: case LE: @@ -3579,7 +3589,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) && arm_const_double_by_immediates (GEN_INT (i + 1))) { *op1 = GEN_INT (i + 1); - return code == GT ? GE : LT; + *code = *code == GT ? GE : LT; + return; } break; case GTU: @@ -3588,7 +3599,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) && arm_const_double_by_immediates (GEN_INT (i + 1))) { *op1 = GEN_INT (i + 1); - return code == GTU ? GEU : LTU; + *code = *code == GTU ? GEU : LTU; + return; } break; default: @@ -3597,13 +3609,15 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) } /* If that did not work, reverse the condition. */ - tem = *op0; - *op0 = *op1; - *op1 = tem; - return swap_condition (code); + if (!op0_preserve_value) + { + tem = *op0; + *op0 = *op1; + *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } } - - return code; + return; } /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing @@ -3624,15 +3638,15 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) if (!CONST_INT_P (*op1) || const_ok_for_arm (INTVAL (*op1)) || const_ok_for_arm (- INTVAL (*op1))) - return code; + return; i = INTVAL (*op1); - switch (code) + switch (*code) { case EQ: case NE: - return code; + return; case GT: case LE: @@ -3640,7 +3654,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) { *op1 = GEN_INT (i + 1); - return code == GT ? GE : LT; + *code = *code == GT ? GE : LT; + return; } break; @@ -3650,7 +3665,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) { *op1 = GEN_INT (i - 1); - return code == GE ? GT : LE; + *code = *code == GE ? GT : LE; + return; } break; @@ -3660,7 +3676,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) { *op1 = GEN_INT (i + 1); - return code == GTU ? GEU : LTU; + *code = *code == GTU ? GEU : LTU; + return; } break; @@ -3670,15 +3687,14 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) { *op1 = GEN_INT (i - 1); - return code == GEU ? GTU : LEU; + *code = *code == GEU ? GTU : LEU; + return; } break; default: gcc_unreachable (); } - - return code; } @@ -5566,7 +5582,9 @@ thumb_find_work_register (unsigned long pushed_regs_mask) if (! cfun->machine->uses_anonymous_args && crtl->args.size >= 0 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD) - && crtl->args.info.nregs < 4) + && (TARGET_AAPCS_BASED + ? crtl->args.info.aapcs_ncrn < 4 + : crtl->args.info.nregs < 4)) return LAST_ARG_REGNUM; /* Otherwise look for a call-saved register that is going to be pushed. */ @@ -8680,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, } } + +/* Return true if and only if this insn can dual-issue only as older. */ +static bool +cortexa7_older_only (rtx insn) +{ + if (recog_memoized (insn) < 0) + return false; + + if (get_attr_insn (insn) == INSN_MOV) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_ALU_REG: + case TYPE_LOAD_BYTE: + case TYPE_LOAD1: + case TYPE_STORE1: + case TYPE_FFARITHS: + case TYPE_FADDS: + case TYPE_FFARITHD: + case TYPE_FADDD: + case TYPE_FCPYS: + case TYPE_F_CVT: + case TYPE_FCMPS: + case TYPE_FCMPD: + case TYPE_FCONSTS: + case TYPE_FCONSTD: + case TYPE_FMULS: + case TYPE_FMACS: + case TYPE_FMULD: + case TYPE_FMACD: + case TYPE_FDIVS: + case TYPE_FDIVD: + case TYPE_F_2_R: + case TYPE_F_FLAG: + case TYPE_F_LOADS: + case TYPE_F_STORES: + return true; + default: + return false; + } +} + +/* Return true if and only if this insn can dual-issue as younger. */ +static bool +cortexa7_younger (FILE *file, int verbose, rtx insn) +{ + if (recog_memoized (insn) < 0) + { + if (verbose > 5) + fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn)); + return false; + } + + if (get_attr_insn (insn) == INSN_MOV) + return true; + + switch (get_attr_type (insn)) + { + case TYPE_SIMPLE_ALU_IMM: + case TYPE_SIMPLE_ALU_SHIFT: + case TYPE_BRANCH: + return true; + default: + return false; + } +} + + +/* Look for an instruction that can dual issue only as an older + instruction, and move it in front of any instructions that can + dual-issue as younger, while preserving the relative order of all + other instructions in the ready list. This is a hueuristic to help + dual-issue in later cycles, by postponing issue of more flexible + instructions. This heuristic may affect dual issue opportunities + in the current cycle. */ +static void +cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, + int clock) +{ + int i; + int first_older_only = -1, first_younger = -1; + + if (verbose > 5) + fprintf (file, + ";; sched_reorder for cycle %d with %d insns in ready list\n", + clock, + *n_readyp); + + /* Traverse the ready list from the head (the instruction to issue + first), and looking for the first instruction that can issue as + younger and the first instruction that can dual-issue only as + older. */ + for (i = *n_readyp - 1; i >= 0; i--) + { + rtx insn = ready[i]; + if (cortexa7_older_only (insn)) + { + first_older_only = i; + if (verbose > 5) + fprintf (file, ";; reorder older found %d\n", INSN_UID (insn)); + break; + } + else if (cortexa7_younger (file, verbose, insn) && first_younger == -1) + first_younger = i; + } + + /* Nothing to reorder because either no younger insn found or insn + that can dual-issue only as older appears before any insn that + can dual-issue as younger. */ + if (first_younger == -1) + { + if (verbose > 5) + fprintf (file, ";; sched_reorder nothing to reorder as no younger\n"); + return; + } + + /* Nothing to reorder because no older-only insn in the ready list. */ + if (first_older_only == -1) + { + if (verbose > 5) + fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n"); + return; + } + + /* Move first_older_only insn before first_younger. */ + if (verbose > 5) + fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n", + INSN_UID(ready [first_older_only]), + INSN_UID(ready [first_younger])); + rtx first_older_only_insn = ready [first_older_only]; + for (i = first_older_only; i < first_younger; i++) + { + ready[i] = ready[i+1]; + } + + ready[i] = first_older_only_insn; + return; +} + +/* Implement TARGET_SCHED_REORDER. */ +static int +arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, + int clock) +{ + switch (arm_tune) + { + case cortexa7: + cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock); + break; + default: + /* Do nothing for other cores. */ + break; + } + + return arm_issue_rate (); +} + /* This function implements the target macro TARGET_SCHED_ADJUST_COST. It corrects the value of COST based on the relationship between INSN and DEP through the dependence LINK. It returns the new @@ -25466,6 +25642,7 @@ arm_issue_rate (void) case cortexr5: case genericv7a: case cortexa5: + case cortexa7: case cortexa8: case cortexa9: case fa726te: @@ -26979,7 +27156,7 @@ bool arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) { enum rtx_code code = GET_CODE (*comparison); - enum rtx_code canonical_code; + int code_int; enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) ? GET_MODE (*op2) : GET_MODE (*op1); @@ -26988,8 +27165,9 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) if (code == UNEQ || code == LTGT) return false; - canonical_code = arm_canonicalize_comparison (code, op1, op2); - PUT_CODE (*comparison, canonical_code); + code_int = (int)code; + arm_canonicalize_comparison (&code_int, op1, op2, 0); + PUT_CODE (*comparison, (enum rtx_code)code_int); switch (mode) { diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index d0f351d861e..2fa945c0c04 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2078,9 +2078,6 @@ extern int making_const_table; ? reverse_condition_maybe_unordered (code) \ : reverse_condition (code)) -#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ - (CODE) = arm_canonicalize_comparison (CODE, &(OP0), &(OP1)) - /* The arm5 clz instruction returns 32. */ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 7f38816a14a..1cb1515b1fa 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -240,6 +240,7 @@ ; regs or have a shifted source operand ; and does not have an immediate operand. This is ; also the default +; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB ; alu_shift any data instruction that doesn't hit memory or fp ; regs, but has a source operand shifted by a constant ; alu_shift_reg any data instruction that doesn't hit memory or fp @@ -271,6 +272,7 @@ (define_attr "type" "simple_alu_imm,\ alu_reg,\ + simple_alu_shift,\ alu_shift,\ alu_shift_reg,\ mult,\ @@ -284,6 +286,8 @@ fmacd,\ f_rints,\ f_rintd,\ + f_minmaxs,\ + f_minmaxd,\ f_flag,\ f_loads,\ f_loadd,\ @@ -454,7 +458,9 @@ ; than one on the main cpu execution unit. (define_attr "core_cycles" "single,multi" (if_then_else (eq_attr "type" - "simple_alu_imm,alu_reg,alu_shift,float,fdivd,fdivs") + "simple_alu_imm,alu_reg,\ + simple_alu_shift,alu_shift,\ + float,fdivd,fdivs") (const_string "single") (const_string "multi"))) @@ -496,7 +502,7 @@ (define_attr "generic_sched" "yes,no" (const (if_then_else - (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4") (eq_attr "tune_cortexr4" "yes")) (const_string "no") (const_string "yes")))) @@ -504,7 +510,7 @@ (define_attr "generic_vfp" "yes,no" (const (if_then_else (and (eq_attr "fpu" "vfp") - (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4") (eq_attr "tune_cortexr4" "no")) (const_string "yes") (const_string "no")))) @@ -521,6 +527,7 @@ (include "fmp626.md") (include "fa726te.md") (include "cortex-a5.md") +(include "cortex-a7.md") (include "cortex-a8.md") (include "cortex-a9.md") (include "cortex-a15.md") @@ -4484,33 +4491,36 @@ ;; Zero and sign extension instructions. (define_insn "zero_extend<mode>di2" - [(set (match_operand:DI 0 "s_register_operand" "=r") + [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r") (zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>" "<qhs_zextenddi_cstr>")))] "TARGET_32BIT <qhs_zextenddi_cond>" "#" - [(set_attr "length" "8") + [(set_attr "length" "8,4,8") (set_attr "ce_count" "2") (set_attr "predicable" "yes")] ) (define_insn "extend<mode>di2" - [(set (match_operand:DI 0 "s_register_operand" "=r") + [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,?r") (sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>" "<qhs_extenddi_cstr>")))] "TARGET_32BIT <qhs_sextenddi_cond>" "#" - [(set_attr "length" "8") + [(set_attr "length" "8,4,8,8") (set_attr "ce_count" "2") (set_attr "shift" "1") - (set_attr "predicable" "yes")] + (set_attr "predicable" "yes") + (set_attr "arch" "*,*,a,t")] ) ;; Splits for all extensions to DImode (define_split [(set (match_operand:DI 0 "s_register_operand" "") (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))] - "TARGET_32BIT" + "TARGET_32BIT && (!TARGET_NEON + || (reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))))" [(set (match_dup 0) (match_dup 1))] { rtx lo_part = gen_lowpart (SImode, operands[0]); @@ -4536,7 +4546,9 @@ (define_split [(set (match_operand:DI 0 "s_register_operand" "") (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))] - "TARGET_32BIT" + "TARGET_32BIT && (!TARGET_NEON + || (reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))))" [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))] { rtx lo_part = gen_lowpart (SImode, operands[0]); @@ -4629,11 +4641,7 @@ [(if_then_else (eq_attr "is_arch6" "yes") (const_int 2) (const_int 4)) (const_int 4)]) - (set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")])] + (set_attr "type" "simple_alu_shift, load_byte")] ) (define_insn "*arm_zero_extendhisi2" @@ -4655,11 +4663,7 @@ uxth%?\\t%0, %1 ldr%(h%)\\t%0, %1" [(set_attr "predicable" "yes") - (set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")])] + (set_attr "type" "simple_alu_shift,load_byte")] ) (define_insn "*arm_zero_extendhisi2addsi" @@ -4729,11 +4733,7 @@ uxtb\\t%0, %1 ldrb\\t%0, %1" [(set_attr "length" "2") - (set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")])] + (set_attr "type" "simple_alu_shift,load_byte")] ) (define_insn "*arm_zero_extendqisi2" @@ -4755,11 +4755,7 @@ "@ uxtb%(%)\\t%0, %1 ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" - [(set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes")] ) @@ -4933,11 +4929,7 @@ [(if_then_else (eq_attr "is_arch6" "yes") (const_int 2) (const_int 4)) (const_int 4)]) - (set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + (set_attr "type" "simple_alu_shift,load_byte") (set_attr "pool_range" "*,1018")] ) @@ -5010,11 +5002,7 @@ "@ sxth%?\\t%0, %1 ldr%(sh%)\\t%0, %1" - [(set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -5114,11 +5102,7 @@ "@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" - [(set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,256") (set_attr "neg_pool_range" "*,244")] @@ -5231,12 +5215,7 @@ (const_int 2) (if_then_else (eq_attr "is_arch6" "yes") (const_int 4) (const_int 6))]) - (set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte") - (const_string "load_byte")])] + (set_attr "type" "simple_alu_shift,load_byte,load_byte")] ) (define_expand "extendsfdf2" diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md index 3d3ff23e7c6..9a41d305736 100644 --- a/gcc/config/arm/arm1020e.md +++ b/gcc/config/arm/arm1020e.md @@ -72,7 +72,7 @@ ;; ALU operations with a shift-by-constant operand (define_insn_reservation "1020alu_shift_op" 1 (and (eq_attr "tune" "arm1020e,arm1022e") - (eq_attr "type" "alu_shift")) + (eq_attr "type" "simple_alu_shift,alu_shift")) "1020a_e,1020a_m,1020a_w") ;; ALU operations with a shift-by-register operand diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md index d9ed858f861..52f6241d7dd 100644 --- a/gcc/config/arm/arm1026ejs.md +++ b/gcc/config/arm/arm1026ejs.md @@ -72,7 +72,7 @@ ;; ALU operations with a shift-by-constant operand (define_insn_reservation "alu_shift_op" 1 (and (eq_attr "tune" "arm1026ejs") - (eq_attr "type" "alu_shift")) + (eq_attr "type" "simple_alu_shift,alu_shift")) "a_e,a_m,a_w") ;; ALU operations with a shift-by-register operand diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md index ff5e614b37b..9e885586072 100644 --- a/gcc/config/arm/arm1136jfs.md +++ b/gcc/config/arm/arm1136jfs.md @@ -81,7 +81,7 @@ ;; ALU operations with a shift-by-constant operand (define_insn_reservation "11_alu_shift_op" 2 (and (eq_attr "tune" "arm1136js,arm1136jfs") - (eq_attr "type" "alu_shift")) + (eq_attr "type" "simple_alu_shift,alu_shift")) "e_1,e_2,e_3,e_wb") ;; ALU operations with a shift-by-register operand diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md index 656a90e41af..4c94e3337ab 100644 --- a/gcc/config/arm/arm926ejs.md +++ b/gcc/config/arm/arm926ejs.md @@ -58,7 +58,7 @@ ;; ALU operations with no shifted operand (define_insn_reservation "9_alu_op" 1 (and (eq_attr "tune" "arm926ejs") - (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift")) + (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift")) "e,m,w") ;; ALU operations with a shift-by-register operand diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md index f25fcee9f01..33e53df2b55 100644 --- a/gcc/config/arm/cortex-a15.md +++ b/gcc/config/arm/cortex-a15.md @@ -68,7 +68,7 @@ ;; ALU ops with immediate shift (define_insn_reservation "cortex_a15_alu_shift" 3 (and (eq_attr "tune" "cortexa15") - (and (eq_attr "type" "alu_shift") + (and (eq_attr "type" "simple_alu_shift,alu_shift") (eq_attr "neon_type" "none"))) "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md index 1121c7effcf..2b5abe524a6 100644 --- a/gcc/config/arm/cortex-a5.md +++ b/gcc/config/arm/cortex-a5.md @@ -63,7 +63,7 @@ (define_insn_reservation "cortex_a5_alu_shift" 2 (and (eq_attr "tune" "cortexa5") - (eq_attr "type" "alu_shift,alu_shift_reg")) + (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) "cortex_a5_ex1") ;; Forwarding path for unshifted operands. diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md new file mode 100644 index 00000000000..74d4ca0bc3d --- /dev/null +++ b/gcc/config/arm/cortex-a7.md @@ -0,0 +1,353 @@ +;; ARM Cortex-A7 pipeline description +;; Copyright (C) 2012 Free Software Foundation, Inc. +;; +;; Contributed by ARM Ltd. +;; Based on cortex-a5.md which was originally contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "cortex_a7") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The Cortex-A7 pipeline integer and vfp pipeline. +;; The decode is the same for all instructions, so do not model it. +;; We only model the first execution stage because +;; instructions always advance one stage per cycle in order. +;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together. + +(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7") + +(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2") + +(define_cpu_unit "cortex_a7_branch" "cortex_a7") + +;; Cortex-A7 is in order and can dual-issue under limited circumstances. +;; ex2 can be reserved only after ex1 is reserved. + +(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1") + +;; Pseudo-unit for blocking the multiply pipeline when a double-precision +;; multiply is in progress. + +(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7") + +;; The floating-point add pipeline (ex1/f1 stage), used to model the usage +;; of the add pipeline by fmac instructions, etc. + +(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7") + +;; Neon pipeline +(define_cpu_unit "cortex_a7_neon" "cortex_a7") + +(define_reservation "cortex_a7_all" "cortex_a7_both+\ + cortex_a7_fpmul_pipe+\ + cortex_a7_fpadd_pipe+\ + cortex_a7_fp_div_sqrt+\ + cortex_a7_neon") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; A direct branch can dual issue either as younger or older instruction, +;; but branches cannot dual issue with branches. +;; No latency as there is no result. + +(define_insn_reservation "cortex_a7_branch" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "branch") + (eq_attr "neon_type" "none"))) + "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch") + +;; A call reserves all issue slots. The result is available the next cycle. +(define_insn_reservation "cortex_a7_call" 1 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "call") + (eq_attr "neon_type" "none"))) + "cortex_a7_all") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instruction with an immediate operand can dual-issue. +(define_insn_reservation "cortex_a7_alu_imm" 2 + (and (eq_attr "tune" "cortexa7") + (and (ior (eq_attr "type" "simple_alu_imm") + (ior (eq_attr "type" "simple_alu_shift") + (and (eq_attr "insn" "mov") + (not (eq_attr "length" "8"))))) + (eq_attr "neon_type" "none"))) + "cortex_a7_ex2|cortex_a7_ex1") + +;; ALU instruction with register operands can dual-issue +;; with a younger immediate-based instruction. +(define_insn_reservation "cortex_a7_alu_reg" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "alu_reg") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_alu_shift" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "alu_shift,alu_shift_reg") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +;; Forwarding path for unshifted operands. +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul") + +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_store*" + "arm_no_early_store_addr_dep") + +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results from wr stage only so +;; there's no need to specify bypasses. +;; Multiply instructions cannot dual-issue. + +(define_insn_reservation "cortex_a7_mul" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "mult") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +;; The latency depends on the operands, so we use an estimate here. +(define_insn_reservation "cortex_a7_idiv" 5 + (and (eq_attr "tune" "cortexa7") + (eq_attr "insn" "udiv,sdiv")) + "cortex_a7_all*5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage. +;; Double-word accesses can be issued in a single cycle, +;; and occupy only one pipeline stage. + +(define_insn_reservation "cortex_a7_load1" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load_byte,load1") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_store1" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store1") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_load2" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load2") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_store2" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store2") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_load3" 3 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load3") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_store3" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store4") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_load4" 3 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "load4") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_both") + +(define_insn_reservation "cortex_a7_store4" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "store3") + (eq_attr "neon_type" "none"))) + "cortex_a7_both, cortex_a7_both") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_fpalu" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\ + f_cvt, fcmps, fcmpd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpadd_pipe") + +;; For fconsts and fconstd, 8-bit immediate data is passed directly from +;; f1 to f3 (which I think reduces the latency by one cycle). + +(define_insn_reservation "cortex_a7_fconst" 3 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fconsts,fconstd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpadd_pipe") + +;; We should try not to attempt to issue a single-precision multiplication in +;; the middle of a double-precision multiplication operation (the usage of +;; cortex_a7_fpmul_pipe). + +(define_insn_reservation "cortex_a7_fpmuls" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmuls") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe") + +;; For single-precision multiply-accumulate, the add (accumulate) is issued +;; whilst the multiply is in F4. The multiply result can then be forwarded +;; from F5 to F1. The issue unit is only used once (when we first start +;; processing the instruction), but the usage of the FP add pipeline could +;; block other instructions attempting to use it simultaneously. We try to +;; avoid that using cortex_a7_fpadd_pipe. + +(define_insn_reservation "cortex_a7_fpmacs" 8 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmacs") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe") + +;; Non-multiply instructions can issue between two cycles of a +;; double-precision multiply. + +(define_insn_reservation "cortex_a7_fpmuld" 7 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmuld") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\ + cortex_a7_ex1+cortex_a7_fpmul_pipe") + +(define_insn_reservation "cortex_a7_fpmacd" 11 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fmacd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\ + cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_fdivs" 16 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fdivs") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14") + +(define_insn_reservation "cortex_a7_fdivd" 29 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "fdivd") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Core-to-VFP transfers. + +(define_insn_reservation "cortex_a7_r2f" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "r_2_f") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_f2r" 2 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_2_r") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Fuxne: The flag forwarding from fmstat to the second instruction is +;; not modeled at present. + +(define_insn_reservation "cortex_a7_f_flags" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_flag") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_f_loads" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_loads") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_f_loadd" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_loadd") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_f_stores" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_stores") + (eq_attr "neon_type" "none"))) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_f_stored" 0 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "type" "f_stored") + (eq_attr "neon_type" "none"))) + "cortex_a7_both") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd" + "cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\ + cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\ + cortex_a7_f2r") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NEON load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +(define_insn_reservation "cortex_a7_neon" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "neon_type" "!none")) + "cortex_a7_both*2") diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md index 7c266d3b71f..73c61b15d0b 100644 --- a/gcc/config/arm/cortex-a8.md +++ b/gcc/config/arm/cortex-a8.md @@ -93,7 +93,7 @@ (define_insn_reservation "cortex_a8_alu_shift" 2 (and (eq_attr "tune" "cortexa8") - (and (eq_attr "type" "alu_shift") + (and (eq_attr "type" "simple_alu_shift,alu_shift") (not (eq_attr "insn" "mov,mvn")))) "cortex_a8_default") @@ -107,7 +107,7 @@ (define_insn_reservation "cortex_a8_mov" 1 (and (eq_attr "tune" "cortexa8") - (and (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift,alu_shift_reg") + (and (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg") (eq_attr "insn" "mov,mvn"))) "cortex_a8_default") diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md index 336c4fcefae..f1bd7cfa91a 100644 --- a/gcc/config/arm/cortex-a9.md +++ b/gcc/config/arm/cortex-a9.md @@ -82,7 +82,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") (and (eq_attr "tune" "cortexa9") (ior (and (eq_attr "type" "alu_reg,simple_alu_imm") (eq_attr "neon_type" "none")) - (and (and (eq_attr "type" "alu_shift_reg, alu_shift") + (and (and (eq_attr "type" "alu_shift_reg, simple_alu_shift,alu_shift") (eq_attr "insn" "mov")) (eq_attr "neon_type" "none")))) "cortex_a9_p0_default|cortex_a9_p1_default") @@ -90,7 +90,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") ;; An instruction using the shifter will go down E1. (define_insn_reservation "cortex_a9_dp_shift" 3 (and (eq_attr "tune" "cortexa9") - (and (eq_attr "type" "alu_shift_reg, alu_shift") + (and (eq_attr "type" "alu_shift_reg, simple_alu_shift,alu_shift") (not (eq_attr "insn" "mov")))) "cortex_a9_p0_shift | cortex_a9_p1_shift") diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md index bff17dd77fb..063fe5fabdd 100644 --- a/gcc/config/arm/cortex-m4.md +++ b/gcc/config/arm/cortex-m4.md @@ -31,7 +31,7 @@ ;; ALU and multiply is one cycle. (define_insn_reservation "cortex_m4_alu" 1 (and (eq_attr "tune" "cortexm4") - (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift,alu_shift_reg,mult")) + (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult")) "cortex_m4_ex") ;; Byte, half-word and word load is two cycles. diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md index 26de65aa1b3..a870dc06f51 100644 --- a/gcc/config/arm/cortex-r4.md +++ b/gcc/config/arm/cortex-r4.md @@ -90,7 +90,7 @@ (define_insn_reservation "cortex_r4_alu_shift" 2 (and (eq_attr "tune_cortexr4" "yes") - (eq_attr "type" "alu_shift")) + (eq_attr "type" "simple_alu_shift,alu_shift")) "cortex_r4_alu") (define_insn_reservation "cortex_r4_alu_shift_reg" 2 diff --git a/gcc/config/arm/driver-arm.c b/gcc/config/arm/driver-arm.c index 3e14b14593c..3a17e104aca 100644 --- a/gcc/config/arm/driver-arm.c +++ b/gcc/config/arm/driver-arm.c @@ -37,6 +37,7 @@ static struct vendor_cpu arm_cpu_table[] = { {"0xb56", "armv6t2", "arm1156t2-s"}, {"0xb76", "armv6zk", "arm1176jz-s"}, {"0xc05", "armv7-a", "cortex-a5"}, + {"0xc07", "armv7-a", "cortex-a7"}, {"0xc08", "armv7-a", "cortex-a8"}, {"0xc09", "armv7-a", "cortex-a9"}, {"0xc0f", "armv7-a", "cortex-a15"}, diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md index 2b89bb5429b..81085225753 100644 --- a/gcc/config/arm/fa526.md +++ b/gcc/config/arm/fa526.md @@ -67,7 +67,7 @@ (define_insn_reservation "526_alu_shift_op" 2 (and (eq_attr "tune" "fa526") - (eq_attr "type" "alu_shift,alu_shift_reg")) + (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) "fa526_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md index 4725b93b6cc..d995b1d9663 100644 --- a/gcc/config/arm/fa606te.md +++ b/gcc/config/arm/fa606te.md @@ -62,7 +62,7 @@ ;; ALU operations (define_insn_reservation "606te_alu_op" 1 (and (eq_attr "tune" "fa606te") - (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift,alu_shift_reg")) + (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg")) "fa606te_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md index bed3995a5e2..6b01b06aaaf 100644 --- a/gcc/config/arm/fa626te.md +++ b/gcc/config/arm/fa626te.md @@ -73,7 +73,7 @@ (define_insn_reservation "626te_alu_shift_op" 2 (and (eq_attr "tune" "fa626,fa626te") - (eq_attr "type" "alu_shift,alu_shift_reg")) + (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) "fa626te_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md index a4c256ce22e..7c898ab3b17 100644 --- a/gcc/config/arm/fa726te.md +++ b/gcc/config/arm/fa726te.md @@ -95,7 +95,7 @@ ;; it takes 3 cycles. (define_insn_reservation "726te_alu_shift_op" 3 (and (eq_attr "tune" "fa726te") - (and (eq_attr "type" "alu_shift") + (and (eq_attr "type" "simple_alu_shift,alu_shift") (not (eq_attr "insn" "mov,mvn")))) "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md index 228817c85e5..f63b6bf54a2 100644 --- a/gcc/config/arm/fmp626.md +++ b/gcc/config/arm/fmp626.md @@ -68,7 +68,7 @@ (define_insn_reservation "mp626_alu_shift_op" 2 (and (eq_attr "tune" "fmp626") - (eq_attr "type" "alu_shift,alu_shift_reg")) + (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg")) "fmp626_core") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 5ae1aefe2b9..3a20f5fea83 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -429,8 +429,8 @@ (define_mode_attr qhs_extenddi_op [(SI "s_register_operand") (HI "nonimmediate_operand") (QI "arm_reg_or_extendqisi_mem_op")]) -(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")]) -(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")]) +(define_mode_attr qhs_extenddi_cstr [(SI "r,0,r,r") (HI "r,0,rm,rm") (QI "r,0,rUq,rm")]) +(define_mode_attr qhs_zextenddi_cstr [(SI "r,0,r") (HI "r,0,rm") (QI "r,0,rm")]) ;; Mode attributes used for fixed-point support. (define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index fc382698a0d..c3f14bb8edf 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5932,3 +5932,65 @@ (const_string "neon_fp_vadd_qqq_vabs_qq")) (const_string "neon_int_5")))] ) + +;; Copy from core-to-neon regs, then extend, not vice-versa + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))] + { + operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] + { + operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] + { + operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))] + { + operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] + { + operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))] + { + operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); + }) diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 731b6146e76..2ceb938199f 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/constraints.md \ $(srcdir)/config/arm/cortex-a15.md \ $(srcdir)/config/arm/cortex-a5.md \ + $(srcdir)/config/arm/cortex-a7.md \ $(srcdir)/config/arm/cortex-a8.md \ $(srcdir)/config/arm/cortex-a8-neon.md \ $(srcdir)/config/arm/cortex-a9.md \ diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index f22666cf9a9..d4dd1b9364c 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -570,11 +570,7 @@ "@ sxtb%?\\t%0, %1 ldr%(sb%)\\t%0, %1" - [(set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,4094") (set_attr "neg_pool_range" "*,250")] @@ -587,11 +583,7 @@ "@ uxth%?\\t%0, %1 ldr%(h%)\\t%0, %1" - [(set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,4094") (set_attr "neg_pool_range" "*,250")] @@ -604,11 +596,7 @@ "@ uxtb%(%)\\t%0, %1 ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" - [(set_attr_alternative "type" - [(if_then_else (eq_attr "tune" "cortexa7") - (const_string "simple_alu_imm") - (const_string "alu_shift")) - (const_string "load_byte")]) + [(set_attr "type" "simple_alu_shift,load_byte") (set_attr "predicable" "yes") (set_attr "pool_range" "*,4094") (set_attr "neg_pool_range" "*,250")] diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index a5c461df36f..3f2975ff546 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -1265,6 +1265,31 @@ (set_attr "type" "f_rint<vfp_type>")] ) +;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. +;; The 'smax' and 'smin' RTL standard pattern names do not specify which +;; operand will be returned when both operands are zero (i.e. they may not +;; honour signed zeroes), or when either operand is NaN. Therefore GCC +;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring +;; NaNs. + +(define_insn "smax<mode>3" + [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") + (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>") + (match_operand:SDF 2 "register_operand" "<F_constraint>")))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" + "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "type" "f_minmax<vfp_type>")] +) + +(define_insn "smin<mode>3" + [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") + (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>") + (match_operand:SDF 2 "register_operand" "<F_constraint>")))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>" + "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "type" "f_minmax<vfp_type>")] +) + ;; Unimplemented insns: ;; fldm* ;; fstm* diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 69f44aa6086..b466a4fbbdf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2026,7 +2026,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE regs instead of memory. */ - m_COREI7 | m_CORE2I7 + m_COREI7 | m_CORE2I7, + + /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for + a conditional move. */ + m_ATOM }; /* Feature tests against the various architecture variations. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 3ac345172c8..d2f535a7566 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -331,6 +331,7 @@ enum ix86_tune_indices { X86_TUNE_REASSOC_INT_TO_PARALLEL, X86_TUNE_REASSOC_FP_TO_PARALLEL, X86_TUNE_GENERAL_REGS_SSE_SPILL, + X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, X86_TUNE_LAST }; @@ -436,6 +437,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL] #define TARGET_GENERAL_REGS_SSE_SPILL \ ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] +#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ + ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3846065066b..95a52cdd1a7 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16093,6 +16093,28 @@ [(set_attr "type" "icmov") (set_attr "mode" "<MODE>")]) +;; Don't do conditional moves with memory inputs. This splitter helps +;; register starved x86_32 by forcing inputs into registers before reload. +(define_split + [(set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "nonimmediate_operand") + (match_operand:SWI248 3 "nonimmediate_operand")))] + "!TARGET_64BIT && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) + && can_create_pseudo_p () + && optimize_insn_for_speed_p ()" + [(set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + operands[2] = force_reg (<MODE>mode, operands[2]); + if (MEM_P (operands[3])) + operands[3] = force_reg (<MODE>mode, operands[3]); +}) + (define_insn "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r") (if_then_else:QI (match_operator 1 "ix86_comparison_operator" @@ -16105,14 +16127,12 @@ (set_attr "mode" "QI")]) (define_split - [(set (match_operand 0 "register_operand") - (if_then_else (match_operator 1 "ix86_comparison_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand 2 "register_operand") - (match_operand 3 "register_operand")))] + [(set (match_operand:SWI12 0 "register_operand") + (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI12 2 "register_operand") + (match_operand:SWI12 3 "register_operand")))] "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL - && (GET_MODE (operands[0]) == QImode - || GET_MODE (operands[0]) == HImode) && reload_completed" [(set (match_dup 0) (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] @@ -16122,6 +16142,33 @@ operands[3] = gen_lowpart (SImode, operands[3]); }) +;; Don't do conditional moves with memory inputs +(define_peephole2 + [(match_scratch:SWI248 2 "r") + (set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_operand:SWI248 3 "memory_operand")))] + "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:SWI248 2 "r") + (set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 3 "memory_operand") + (match_dup 0)))] + "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))]) + (define_expand "mov<mode>cc" [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF @@ -16209,6 +16256,59 @@ [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) +;; Don't do conditional moves with memory inputs. This splitter helps +;; register starved x86_32 by forcing inputs into registers before reload. +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:MODEF 2 "nonimmediate_operand") + (match_operand:MODEF 3 "nonimmediate_operand")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) + && can_create_pseudo_p () + && optimize_insn_for_speed_p ()" + [(set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + operands[2] = force_reg (<MODE>mode, operands[2]); + if (MEM_P (operands[3])) + operands[3] = force_reg (<MODE>mode, operands[3]); +}) + +;; Don't do conditional moves with memory inputs +(define_peephole2 + [(match_scratch:MODEF 2 "r") + (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_operand:MODEF 3 "memory_operand")))] + "(<MODE>mode != DFmode || TARGET_64BIT) + && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:MODEF 2 "r") + (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:MODEF 3 "memory_operand") + (match_dup 0)))] + "(<MODE>mode != DFmode || TARGET_64BIT) + && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))]) + ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict ;; the scalar versions to have only XMM registers as operands. diff --git a/gcc/config/i386/t-kfreebsd b/gcc/config/i386/t-kfreebsd index b8338a9f9fc..762d520fae9 100644 --- a/gcc/config/i386/t-kfreebsd +++ b/gcc/config/i386/t-kfreebsd @@ -2,4 +2,4 @@ MULTIARCH_DIRNAME = $(call if_multiarch,i386-kfreebsd-gnu) # MULTILIB_OSDIRNAMES are set in t-linux64. KFREEBSD_OS = $(filter kfreebsd%, $(word 3, $(subst -, ,$(target)))) -MULTILIB_OSDIRNAMES := $(filter-out mx32=% $(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES))) +MULTILIB_OSDIRNAMES := $(filter-out mx32=%,$(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES))) diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux index 3611027ae27..017a293cde3 100644 --- a/gcc/config/rs6000/t-linux +++ b/gcc/config/rs6000/t-linux @@ -1,5 +1,9 @@ # do not define the multiarch name if configured for a soft-float cpu # or soft-float. ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float))) +ifneq (,$(findstring spe,$(target))) +MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1) +else MULTIARCH_DIRNAME = powerpc-linux-gnu endif +endif diff --git a/gcc/config/rs6000/t-spe b/gcc/config/rs6000/t-spe index 90eb802a495..1bed1e32b0e 100644 --- a/gcc/config/rs6000/t-spe +++ b/gcc/config/rs6000/t-spe @@ -71,7 +71,3 @@ MULTILIB_EXCEPTIONS = maltivec mabi=altivec mno-spe mabi=no-spe mno-isel \ mabi=altivec/mlittle \ maltivec/mlittle \ maltivec/mabi=altivec/mlittle - -ifneq (,$(findstring linux, $(target))) -MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file)),,v1) -endif diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md index 8564b6619a5..9d416adb165 100644 --- a/gcc/config/s390/constraints.md +++ b/gcc/config/s390/constraints.md @@ -45,6 +45,8 @@ ;; H,Q: mode of the part ;; D,S,H: mode of the containing operand ;; 0,F: value of the other parts (F - all bits set) +;; -- +;; xx[DS]q satisfies s390_contiguous_bitmask_p for DImode or SImode ;; ;; The constraint matches if the specified part of a constant ;; has a value different from its other parts. If the letter x @@ -330,8 +332,15 @@ (and (match_code "const_int") (match_test "s390_N_constraint_str (\"xQH0\", ival)"))) +(define_constraint "NxxDq" + "@internal" + (and (match_code "const_int") + (match_test "s390_contiguous_bitmask_p (ival, 64, NULL, NULL)"))) - +(define_constraint "NxxSq" + "@internal" + (and (match_code "const_int") + (match_test "s390_contiguous_bitmask_p (ival, 32, NULL, NULL)"))) ;; ;; Double-letter constraints starting with O follow. diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index 9ba85bf387c..d5e185d5ac7 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -101,6 +101,10 @@ return true; }) +(define_predicate "nonzero_shift_count_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (mode) - 1)"))) + ;; Return true if OP a valid operand for the LARL instruction. (define_predicate "larl_operand" @@ -154,6 +158,12 @@ return false; }) +(define_predicate "contiguous_bitmask_operand" + (match_code "const_int") +{ + return s390_contiguous_bitmask_p (INTVAL (op), GET_MODE_BITSIZE (mode), NULL, NULL); +}) + ;; operators -------------------------------------------------------------- ;; Return nonzero if OP is a valid comparison operator diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index 79673d6d835..a494ba22893 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -58,7 +58,6 @@ extern int tls_symbolic_operand (rtx); extern bool s390_match_ccmode (rtx, enum machine_mode); extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool); extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx); -extern void s390_canonicalize_comparison (enum rtx_code *, rtx *, rtx *); extern rtx s390_emit_compare (enum rtx_code, rtx, rtx); extern void s390_emit_jump (rtx, rtx); extern bool symbolic_reference_mentioned_p (rtx); @@ -110,5 +109,6 @@ extern bool s390_legitimate_address_without_index_p (rtx); extern bool s390_decompose_shift_count (rtx, rtx *, HOST_WIDE_INT *); extern int s390_branch_condition_mask (rtx); extern int s390_compare_and_branch_condition_mask (rtx); +extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT); #endif /* RTX_CODE */ diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 6517bce15e0..2edc8ab78f2 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -745,9 +745,13 @@ s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1) /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one that we can implement more efficiently. */ -void -s390_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1) +static void +s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) { + if (op0_preserve_value) + return; + /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */ if ((*code == EQ || *code == NE) && *op1 == const0_rtx @@ -894,7 +898,7 @@ s390_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1) if (MEM_P (*op0) && REG_P (*op1)) { rtx tem = *op0; *op0 = *op1; *op1 = tem; - *code = swap_condition (*code); + *code = (int)swap_condition ((enum rtx_code)*code); } } @@ -1343,6 +1347,24 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size, return true; } +/* Check whether a rotate of ROTL followed by an AND of CONTIG is + equivalent to a shift followed by the AND. In particular, CONTIG + should not overlap the (rotated) bit 0/bit 63 gap. Negative values + for ROTL indicate a rotate to the right. */ + +bool +s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig) +{ + int pos, len; + bool ok; + + ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len); + gcc_assert (ok); + + return ((rotl >= 0 && rotl <= pos) + || (rotl < 0 && -rotl <= bitsize - len - pos)); +} + /* Check whether we can (and want to) split a double-word move in mode MODE from SRC to DST into two single-word moves, moving the subword FIRST_SUBWORD first. */ @@ -5364,28 +5386,35 @@ print_operand_address (FILE *file, rtx addr) 'C': print opcode suffix for branch condition. 'D': print opcode suffix for inverse branch condition. 'E': print opcode suffix for branch on index instruction. - 'J': print tls_load/tls_gdcall/tls_ldcall suffix 'G': print the size of the operand in bytes. + 'J': print tls_load/tls_gdcall/tls_ldcall suffix + 'M': print the second word of a TImode operand. + 'N': print the second word of a DImode operand. 'O': print only the displacement of a memory reference. 'R': print only the base register of a memory reference. 'S': print S-type memory reference (base+displacement). - 'N': print the second word of a DImode operand. - 'M': print the second word of a TImode operand. 'Y': print shift count operand. 'b': print integer X as if it's an unsigned byte. 'c': print integer X as if it's an signed byte. - 'x': print integer X as if it's an unsigned halfword. + 'e': "end" of DImode contiguous bitmask X. + 'f': "end" of SImode contiguous bitmask X. 'h': print integer X as if it's a signed halfword. 'i': print the first nonzero HImode part of X. 'j': print the first HImode part unequal to -1 of X. 'k': print the first nonzero SImode part of X. 'm': print the first SImode part unequal to -1 of X. - 'o': print integer X as if it's an unsigned 32bit word. */ + 'o': print integer X as if it's an unsigned 32bit word. + 's': "start" of DImode contiguous bitmask X. + 't': "start" of SImode contiguous bitmask X. + 'x': print integer X as if it's an unsigned halfword. +*/ void print_operand (FILE *file, rtx x, int code) { + HOST_WIDE_INT ival; + switch (code) { case 'C': @@ -5564,30 +5593,57 @@ print_operand (FILE *file, rtx x, int code) break; case CONST_INT: - if (code == 'b') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xff); - else if (code == 'c') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xff) ^ 0x80) - 0x80); - else if (code == 'x') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff); - else if (code == 'h') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000); - else if (code == 'i') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, - s390_extract_part (x, HImode, 0)); - else if (code == 'j') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, - s390_extract_part (x, HImode, -1)); - else if (code == 'k') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, - s390_extract_part (x, SImode, 0)); - else if (code == 'm') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, - s390_extract_part (x, SImode, -1)); - else if (code == 'o') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffffffff); - else - fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + ival = INTVAL (x); + switch (code) + { + case 0: + break; + case 'b': + ival &= 0xff; + break; + case 'c': + ival = ((ival & 0xff) ^ 0x80) - 0x80; + break; + case 'x': + ival &= 0xffff; + break; + case 'h': + ival = ((ival & 0xffff) ^ 0x8000) - 0x8000; + break; + case 'i': + ival = s390_extract_part (x, HImode, 0); + break; + case 'j': + ival = s390_extract_part (x, HImode, -1); + break; + case 'k': + ival = s390_extract_part (x, SImode, 0); + break; + case 'm': + ival = s390_extract_part (x, SImode, -1); + break; + case 'o': + ival &= 0xffffffff; + break; + case 'e': case 'f': + case 's': case 't': + { + int pos, len; + bool ok; + + len = (code == 's' || code == 'e' ? 64 : 32); + ok = s390_contiguous_bitmask_p (ival, len, &pos, &len); + gcc_assert (ok); + if (code == 's' || code == 't') + ival = 64 - pos - len; + else + ival = 64 - 1 - pos; + } + break; + default: + output_operand_lossage ("invalid constant for output modifier '%c'", code); + } + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); break; case CONST_DOUBLE: @@ -11071,6 +11127,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) #undef TARGET_UNWIND_WORD_MODE #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 30408f4ac28..286046abdff 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -720,10 +720,6 @@ do { \ return the mode to be used for the comparison. */ #define SELECT_CC_MODE(OP, X, Y) s390_select_ccmode ((OP), (X), (Y)) -/* Canonicalize a comparison from one we don't have to one we do have. */ -#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ - s390_canonicalize_comparison (&(CODE), &(OP0), &(OP1)) - /* Relative costs of operations. */ /* A C expression for the cost of a branch instruction. A value of 1 diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index bea58cd7a83..4666122ecef 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -393,6 +393,9 @@ ;; the same template. (define_code_iterator SHIFT [ashift lshiftrt]) +;; This iterator allow r[ox]sbg to be defined with the same template +(define_code_iterator IXOR [ior xor]) + ;; This iterator and attribute allow to combine most atomic operations. (define_code_iterator ATOMIC [and ior xor plus minus mult]) (define_code_iterator ATOMIC_Z196 [and ior xor plus]) @@ -527,6 +530,13 @@ ;; Maximum unsigned integer that fits in MODE. (define_mode_attr max_uint [(HI "65535") (QI "255")]) +;; Start and end field computations for RISBG et al. +(define_mode_attr bfstart [(DI "s") (SI "t")]) +(define_mode_attr bfend [(DI "e") (SI "f")]) + +;; In place of GET_MODE_BITSIZE (<MODE>mode) +(define_mode_attr bitsize [(DI "64") (SI "32") (HI "16") (QI "8")]) + ;; ;;- Compare instructions. ;; @@ -3300,15 +3310,64 @@ [(set_attr "op_type" "RS,RSY") (set_attr "z10prop" "z10_super_E1,z10_super_E1")]) +; +; extv instruction patterns +; + +; FIXME: This expander needs to be converted from DI to GPR as well +; after resolving some issues with it. + +(define_expand "extzv" + [(parallel + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extract:DI + (match_operand:DI 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "") ; size + (match_operand 3 "const_int_operand" ""))) ; start + (clobber (reg:CC CC_REGNUM))])] + "TARGET_Z10" +{ + /* Starting with zEC12 there is risbgn not clobbering CC. */ + if (TARGET_ZEC12) + { + emit_move_insn (operands[0], + gen_rtx_ZERO_EXTRACT (DImode, + operands[1], + operands[2], + operands[3])); + DONE; + } +}) -(define_insn_and_split "*extzv<mode>" +(define_insn "*extzv<mode>_zEC12" + [(set (match_operand:GPR 0 "register_operand" "=d") + (zero_extract:GPR + (match_operand:GPR 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "") ; size + (match_operand 3 "const_int_operand" "")))] ; start] + "TARGET_ZEC12" + "risbgn\t%0,%1,64-%2,128+63,<bitsize>+%3+%2" ; dst, src, start, end, shift + [(set_attr "op_type" "RIE")]) + +(define_insn "*extzv<mode>_z10" + [(set (match_operand:GPR 0 "register_operand" "=d") + (zero_extract:GPR + (match_operand:GPR 1 "register_operand" "d") + (match_operand 2 "const_int_operand" "") ; size + (match_operand 3 "const_int_operand" ""))) ; start + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" + "risbg\t%0,%1,64-%2,128+63,<bitsize>+%3+%2" ; dst, src, start, end, shift + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + +(define_insn_and_split "*pre_z10_extzv<mode>" [(set (match_operand:GPR 0 "register_operand" "=d") (zero_extract:GPR (match_operand:QI 1 "s_operand" "QS") - (match_operand 2 "const_int_operand" "n") + (match_operand 2 "nonzero_shift_count_operand" "") (const_int 0))) (clobber (reg:CC CC_REGNUM))] - "INTVAL (operands[2]) > 0 - && INTVAL (operands[2]) <= GET_MODE_BITSIZE (SImode)" + "!TARGET_Z10" "#" "&& reload_completed" [(parallel @@ -3322,18 +3381,17 @@ operands[1] = adjust_address (operands[1], BLKmode, 0); set_mem_size (operands[1], size); - operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - bitsize); + operands[2] = GEN_INT (<GPR:bitsize> - bitsize); operands[3] = GEN_INT (mask); }) -(define_insn_and_split "*extv<mode>" +(define_insn_and_split "*pre_z10_extv<mode>" [(set (match_operand:GPR 0 "register_operand" "=d") (sign_extract:GPR (match_operand:QI 1 "s_operand" "QS") - (match_operand 2 "const_int_operand" "n") + (match_operand 2 "nonzero_shift_count_operand" "") (const_int 0))) (clobber (reg:CC CC_REGNUM))] - "INTVAL (operands[2]) > 0 - && INTVAL (operands[2]) <= GET_MODE_BITSIZE (SImode)" + "" "#" "&& reload_completed" [(parallel @@ -3349,7 +3407,7 @@ operands[1] = adjust_address (operands[1], BLKmode, 0); set_mem_size (operands[1], size); - operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - bitsize); + operands[2] = GEN_INT (<GPR:bitsize> - bitsize); operands[3] = GEN_INT (mask); }) @@ -3369,49 +3427,29 @@ FAIL; }) + +; The normal RTL expansion will never generate a zero_extract where +; the location operand isn't word mode. However, we do this in the +; back-end when generating atomic operations. See s390_two_part_insv. (define_insn "*insv<mode>_zEC12" [(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d") - (match_operand 1 "const_int_operand" "I") - (match_operand 2 "const_int_operand" "I")) + (match_operand 1 "const_int_operand" "I") ; size + (match_operand 2 "const_int_operand" "I")) ; pos (match_operand:GPR 3 "nonimmediate_operand" "d"))] "TARGET_ZEC12 - && (INTVAL (operands[1]) + INTVAL (operands[2])) <= - GET_MODE_BITSIZE (<MODE>mode)" -{ - int start = INTVAL (operands[2]); - int size = INTVAL (operands[1]); - int offset = 64 - GET_MODE_BITSIZE (<MODE>mode); - - operands[2] = GEN_INT (offset + start); /* start bit position */ - operands[1] = GEN_INT (offset + start + size - 1); /* end bit position */ - operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - - start - size); /* left shift count */ - - return "risbgn\t%0,%3,%b2,%b1,%b4"; -} + && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>" + "risbgn\t%0,%3,64-<bitsize>+%2,64-<bitsize>+%2+%1-1,<bitsize>-%2-%1" [(set_attr "op_type" "RIE")]) (define_insn "*insv<mode>_z10" [(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d") - (match_operand 1 "const_int_operand" "I") - (match_operand 2 "const_int_operand" "I")) + (match_operand 1 "const_int_operand" "I") ; size + (match_operand 2 "const_int_operand" "I")) ; pos (match_operand:GPR 3 "nonimmediate_operand" "d")) (clobber (reg:CC CC_REGNUM))] "TARGET_Z10 - && (INTVAL (operands[1]) + INTVAL (operands[2])) <= - GET_MODE_BITSIZE (<MODE>mode)" -{ - int start = INTVAL (operands[2]); - int size = INTVAL (operands[1]); - int offset = 64 - GET_MODE_BITSIZE (<MODE>mode); - - operands[2] = GEN_INT (offset + start); /* start bit position */ - operands[1] = GEN_INT (offset + start + size - 1); /* end bit position */ - operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - - start - size); /* left shift count */ - - return "risbg\t%0,%3,%b2,%b1,%b4"; -} + && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>" + "risbg\t%0,%3,64-<bitsize>+%2,64-<bitsize>+%2+%1-1,<bitsize>-%2-%1" [(set_attr "op_type" "RIE") (set_attr "z10prop" "z10_super_E1")]) @@ -3420,82 +3458,135 @@ (define_insn "*insv<mode>_zEC12_noshift" [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d") - (match_operand 2 "const_int_operand" "n")) + (match_operand:GPR 2 "contiguous_bitmask_operand" "")) (and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0") - (match_operand 4 "const_int_operand" "n"))))] - "TARGET_ZEC12 - && s390_contiguous_bitmask_p (INTVAL (operands[2]), - GET_MODE_BITSIZE (<MODE>mode), NULL, NULL) - && INTVAL (operands[2]) == ~(INTVAL (operands[4]))" - -{ - int start; - int size; - - s390_contiguous_bitmask_p (INTVAL (operands[2]), - GET_MODE_BITSIZE (<MODE>mode), &start, &size); - - operands[5] = GEN_INT (64 - start - size); /* start bit position */ - operands[6] = GEN_INT (64 - 1 - start); /* end bit position */ - operands[7] = const0_rtx; /* left shift count */ - - return "risbgn\t%0,%1,%b5,%b6,%b7"; -} + (match_operand:GPR 4 "const_int_operand" ""))))] + "TARGET_ZEC12 && INTVAL (operands[2]) == ~INTVAL (operands[4])" + "risbgn\t%0,%1,%<bfstart>2,%<bfend>2,0" [(set_attr "op_type" "RIE")]) -; and op1 with a mask being 1 for the selected bits and 0 for the rest -; and op3=op0 with a mask being 0 for the selected bits and 1 for the rest (define_insn "*insv<mode>_z10_noshift" [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d") - (match_operand 2 "const_int_operand" "n")) + (match_operand:GPR 2 "contiguous_bitmask_operand" "")) (and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0") - (match_operand 4 "const_int_operand" "n")))) + (match_operand:GPR 4 "const_int_operand" "")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_Z10 - && s390_contiguous_bitmask_p (INTVAL (operands[2]), - GET_MODE_BITSIZE (<MODE>mode), NULL, NULL) - && INTVAL (operands[2]) == ~(INTVAL (operands[4]))" - -{ - int start; - int size; - - s390_contiguous_bitmask_p (INTVAL (operands[2]), - GET_MODE_BITSIZE (<MODE>mode), &start, &size); - - operands[5] = GEN_INT (64 - start - size); /* start bit position */ - operands[6] = GEN_INT (64 - 1 - start); /* end bit position */ - operands[7] = const0_rtx; /* left shift count */ - - return "risbg\t%0,%1,%b5,%b6,%b7"; -} + "TARGET_Z10 && INTVAL (operands[2]) == ~INTVAL (operands[4])" + "risbg\t%0,%1,%<bfstart>2,%<bfend>2,0" [(set_attr "op_type" "RIE") (set_attr "z10prop" "z10_super_E1")]) -; and op1 with a mask being 1 for the selected bits and 0 for the rest -(define_insn "*insv<mode>_or_z10_noshift" +(define_insn "*r<noxa>sbg_<mode>_noshift" [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") - (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d") - (match_operand 2 "const_int_operand" "n")) - (match_operand:GPR 3 "nonimmediate_operand" "0"))) + (IXOR:GPR + (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d") + (match_operand:GPR 2 "contiguous_bitmask_operand" "")) + (match_operand:GPR 3 "nonimmediate_operand" "0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" + "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,0" + [(set_attr "op_type" "RIE")]) + +(define_insn "*r<noxa>sbg_di_rotl" + [(set (match_operand:DI 0 "nonimmediate_operand" "=d") + (IXOR:DI + (and:DI + (rotate:DI + (match_operand:DI 1 "nonimmediate_operand" "d") + (match_operand:DI 3 "const_int_operand" "")) + (match_operand:DI 2 "contiguous_bitmask_operand" "")) + (match_operand:DI 4 "nonimmediate_operand" "0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10" + "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%b3" + [(set_attr "op_type" "RIE")]) + +(define_insn "*r<noxa>sbg_<mode>_srl" + [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") + (IXOR:GPR + (and:GPR + (lshiftrt:GPR + (match_operand:GPR 1 "nonimmediate_operand" "d") + (match_operand:GPR 3 "nonzero_shift_count_operand" "")) + (match_operand:GPR 2 "contiguous_bitmask_operand" "")) + (match_operand:GPR 4 "nonimmediate_operand" "0"))) (clobber (reg:CC CC_REGNUM))] "TARGET_Z10 - && s390_contiguous_bitmask_p (INTVAL (operands[2]), - GET_MODE_BITSIZE (<MODE>mode), NULL, NULL)" -{ - int start; - int size; + && s390_extzv_shift_ok (<bitsize>, 64 - INTVAL (operands[3]), + INTVAL (operands[2]))" + "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,64-%3" + [(set_attr "op_type" "RIE")]) - s390_contiguous_bitmask_p (INTVAL (operands[2]), - GET_MODE_BITSIZE (<MODE>mode), &start, &size); +(define_insn "*r<noxa>sbg_<mode>_sll" + [(set (match_operand:GPR 0 "nonimmediate_operand" "=d") + (IXOR:GPR + (and:GPR + (ashift:GPR + (match_operand:GPR 1 "nonimmediate_operand" "d") + (match_operand:GPR 3 "nonzero_shift_count_operand" "")) + (match_operand:GPR 2 "contiguous_bitmask_operand" "")) + (match_operand:GPR 4 "nonimmediate_operand" "0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 + && s390_extzv_shift_ok (<bitsize>, INTVAL (operands[3]), + INTVAL (operands[2]))" + "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%3" + [(set_attr "op_type" "RIE")]) - operands[4] = GEN_INT (64 - start - size); /* start bit position */ - operands[5] = GEN_INT (64 - 1 - start); /* end bit position */ - operands[6] = const0_rtx; /* left shift count */ +;; These two are generated by combine for s.bf &= val. +;; ??? For bitfields smaller than 32-bits, we wind up with SImode +;; shifts and ands, which results in some truly awful patterns +;; including subregs of operations. Rather unnecessisarily, IMO. +;; Instead of +;; +;; (set (zero_extract:DI (reg/v:DI 50 [ s ]) +;; (const_int 24 [0x18]) +;; (const_int 0 [0])) +;; (subreg:DI (and:SI (subreg:SI (lshiftrt:DI (reg/v:DI 50 [ s ]) +;; (const_int 40 [0x28])) 4) +;; (reg:SI 4 %r4 [ y+4 ])) 0)) +;; +;; we should instead generate +;; +;; (set (zero_extract:DI (reg/v:DI 50 [ s ]) +;; (const_int 24 [0x18]) +;; (const_int 0 [0])) +;; (and:DI (lshiftrt:DI (reg/v:DI 50 [ s ]) +;; (const_int 40 [0x28])) +;; (subreg:DI (reg:SI 4 %r4 [ y+4 ]) 0))) +;; +;; by noticing that we can push down the outer paradoxical subreg +;; into the operation. + +(define_insn "*insv_rnsbg_noshift" + [(set (zero_extract:DI + (match_operand:DI 0 "nonimmediate_operand" "+d") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) + (and:DI + (match_dup 0) + (match_operand:DI 3 "nonimmediate_operand" "d"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 + && INTVAL (operands[1]) + INTVAL (operands[2]) == 64" + "rnsbg\t%0,%3,%2,63,0" + [(set_attr "op_type" "RIE")]) - return "rosbg\t%0,%1,%b4,%b5,%b6"; -} +(define_insn "*insv_rnsbg_srl" + [(set (zero_extract:DI + (match_operand:DI 0 "nonimmediate_operand" "+d") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) + (and:DI + (lshiftrt:DI + (match_dup 0) + (match_operand 3 "const_int_operand" "")) + (match_operand:DI 4 "nonimmediate_operand" "d"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 + && INTVAL (operands[3]) == 64 - INTVAL (operands[1]) - INTVAL (operands[2])" + "rnsbg\t%0,%4,%2,%2+%1-1,%3" [(set_attr "op_type" "RIE")]) (define_insn "*insv<mode>_mem_reg" @@ -3568,15 +3659,14 @@ [(set_attr "op_type" "RIL") (set_attr "z10prop" "z10_fwd_E1")]) -; Update the right-most 32 bit of a DI, or the whole of a SI. -(define_insn "*insv_l<mode>_reg_extimm" - [(set (zero_extract:P (match_operand:P 0 "register_operand" "+d") - (const_int 32) - (match_operand 1 "const_int_operand" "n")) - (match_operand:P 2 "const_int_operand" "n"))] - "TARGET_EXTIMM - && BITS_PER_WORD - INTVAL (operands[1]) == 32" - "iilf\t%0,%o2" +; Update the right-most 32 bit of a DI. +(define_insn "*insv_l_di_reg_extimm" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d") + (const_int 32) + (const_int 32)) + (match_operand:DI 1 "const_int_operand" "n"))] + "TARGET_EXTIMM" + "iilf\t%0,%o1" [(set_attr "op_type" "RIL") (set_attr "z10prop" "z10_fwd_A1")]) @@ -3630,8 +3720,7 @@ } else if (!TARGET_EXTIMM) { - rtx bitcount = GEN_INT (GET_MODE_BITSIZE (<DSI:MODE>mode) - - GET_MODE_BITSIZE (<HQI:MODE>mode)); + rtx bitcount = GEN_INT (<DSI:bitsize> - <HQI:bitsize>); operands[1] = gen_lowpart (<DSI:MODE>mode, operands[1]); emit_insn (gen_ashl<DSI:mode>3 (operands[0], operands[1], bitcount)); @@ -3733,8 +3822,7 @@ { operands[1] = adjust_address (operands[1], BLKmode, 0); set_mem_size (operands[1], GET_MODE_SIZE (QImode)); - operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - - GET_MODE_BITSIZE (QImode)); + operands[2] = GEN_INT (<GPR:bitsize> - BITS_PER_UNIT); }) ; @@ -3845,8 +3933,7 @@ } else if (!TARGET_EXTIMM) { - rtx bitcount = GEN_INT (GET_MODE_BITSIZE(DImode) - - GET_MODE_BITSIZE(<MODE>mode)); + rtx bitcount = GEN_INT (64 - <HQI:bitsize>); operands[1] = gen_lowpart (DImode, operands[1]); emit_insn (gen_ashldi3 (operands[0], operands[1], bitcount)); emit_insn (gen_lshrdi3 (operands[0], operands[0], bitcount)); @@ -3863,7 +3950,7 @@ { operands[1] = gen_lowpart (SImode, operands[1]); emit_insn (gen_andsi3 (operands[0], operands[1], - GEN_INT ((1 << GET_MODE_BITSIZE(<MODE>mode)) - 1))); + GEN_INT ((1 << <HQI:bitsize>) - 1))); DONE; } }) @@ -4056,8 +4143,8 @@ REAL_VALUE_TYPE cmp, sub; operands[1] = force_reg (<BFP:MODE>mode, operands[1]); - real_2expN (&cmp, GET_MODE_BITSIZE(<GPR:MODE>mode) - 1, <BFP:MODE>mode); - real_2expN (&sub, GET_MODE_BITSIZE(<GPR:MODE>mode), <BFP:MODE>mode); + real_2expN (&cmp, <GPR:bitsize> - 1, <BFP:MODE>mode); + real_2expN (&sub, <GPR:bitsize>, <BFP:MODE>mode); emit_cmp_and_jump_insns (operands[1], CONST_DOUBLE_FROM_REAL_VALUE (cmp, <BFP:MODE>mode), @@ -4772,9 +4859,9 @@ (plus:GPR (match_dup 1) (match_dup 2)))] "s390_match_ccmode (insn, CCAmode) && (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'K', \"K\") - || CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'O', \"Os\") - || CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'C', \"C\")) - && INTVAL (operands[2]) != -((HOST_WIDE_INT)1 << (GET_MODE_BITSIZE(<MODE>mode) - 1))" + || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'O', \"Os\") + /* Avoid INT32_MIN on 32 bit. */ + && (!TARGET_ZARCH || INTVAL (operands[2]) != -0x7fffffff - 1)))" "@ a<g>hi\t%0,%h2 a<g>hik\t%0,%1,%h2 @@ -6044,44 +6131,50 @@ (define_insn "*anddi3_cc" [(set (reg CC_REGNUM) - (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0") - (match_operand:DI 2 "general_operand" " d,d,RT")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=d,d, d") + (compare + (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0, d") + (match_operand:DI 2 "general_operand" " d,d,RT,NxxDq")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=d,d, d, d") (and:DI (match_dup 1) (match_dup 2)))] - "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH" + "TARGET_ZARCH && s390_match_ccmode(insn, CCTmode)" "@ ngr\t%0,%2 ngrk\t%0,%1,%2 - ng\t%0,%2" - [(set_attr "op_type" "RRE,RRF,RXY") - (set_attr "cpu_facility" "*,z196,*") - (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")]) + ng\t%0,%2 + risbg\t%0,%1,%s2,128+%e2,0" + [(set_attr "op_type" "RRE,RRF,RXY,RIE") + (set_attr "cpu_facility" "*,z196,*,z10") + (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")]) (define_insn "*anddi3_cconly" [(set (reg CC_REGNUM) - (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0") - (match_operand:DI 2 "general_operand" " d,d,RT")) + (compare + (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0, d") + (match_operand:DI 2 "general_operand" " d,d,RT,NxxDq")) (const_int 0))) - (clobber (match_scratch:DI 0 "=d,d, d"))] - "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH + (clobber (match_scratch:DI 0 "=d,d, d, d"))] + "TARGET_ZARCH + && s390_match_ccmode(insn, CCTmode) /* Do not steal TM patterns. */ && s390_single_part (operands[2], DImode, HImode, 0) < 0" "@ ngr\t%0,%2 ngrk\t%0,%1,%2 - ng\t%0,%2" - [(set_attr "op_type" "RRE,RRF,RXY") - (set_attr "cpu_facility" "*,z196,*") - (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")]) + ng\t%0,%2 + risbg\t%0,%1,%s2,128+%e2,0" + [(set_attr "op_type" "RRE,RRF,RXY,RIE") + (set_attr "cpu_facility" "*,z196,*,z10") + (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")]) (define_insn "*anddi3" [(set (match_operand:DI 0 "nonimmediate_operand" - "=d,d, d, d, d, d, d, d,d,d, d, AQ,Q") - (and:DI (match_operand:DI 1 "nonimmediate_operand" - "%d,o, 0, 0, 0, 0, 0, 0,0,d, 0, 0,0") - (match_operand:DI 2 "general_operand" - "M, M,N0HDF,N1HDF,N2HDF,N3HDF,N0SDF,N1SDF,d,d,RT,NxQDF,Q"))) + "=d,d, d, d, d, d, d, d,d,d, d, d, AQ,Q") + (and:DI + (match_operand:DI 1 "nonimmediate_operand" + "%d,o, 0, 0, 0, 0, 0, 0,0,d, 0, d, 0,0") + (match_operand:DI 2 "general_operand" + "M, M,N0HDF,N1HDF,N2HDF,N3HDF,N0SDF,N1SDF,d,d,RT,NxxDq,NxQDF,Q"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ZARCH && s390_logical_operator_ok_p (operands)" "@ @@ -6096,10 +6189,11 @@ ngr\t%0,%2 ngrk\t%0,%1,%2 ng\t%0,%2 + risbg\t%0,%1,%s2,128+%e2,0 # #" - [(set_attr "op_type" "RRE,RXE,RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,SI,SS") - (set_attr "cpu_facility" "*,*,*,*,*,*,extimm,extimm,*,z196,*,*,*") + [(set_attr "op_type" "RRE,RXE,RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,RIE,SI,SS") + (set_attr "cpu_facility" "*,*,*,*,*,*,extimm,extimm,*,z196,*,z10,*,*") (set_attr "z10prop" "*, *, z10_super_E1, @@ -6111,6 +6205,7 @@ z10_super_E1, *, z10_super_E1, + z10_super_E1, *, *")]) @@ -6124,6 +6219,36 @@ (clobber (reg:CC CC_REGNUM))])] "s390_narrow_logical_operator (AND, &operands[0], &operands[1]);") +;; These two are what combine generates for (ashift (zero_extract)). +(define_insn "*extzv_<mode>_srl" + [(set (match_operand:GPR 0 "register_operand" "=d") + (and:GPR (lshiftrt:GPR + (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "nonzero_shift_count_operand" "")) + (match_operand:GPR 3 "contiguous_bitmask_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 + /* Note that even for the SImode pattern, the rotate is always DImode. */ + && s390_extzv_shift_ok (<bitsize>, -INTVAL (operands[2]), + INTVAL (operands[3]))" + "risbg\t%0,%1,%<bfstart>3,128+%<bfend>3,64-%2" + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + +(define_insn "*extzv_<mode>_sll" + [(set (match_operand:GPR 0 "register_operand" "=d") + (and:GPR (ashift:GPR + (match_operand:GPR 1 "register_operand" "d") + (match_operand:GPR 2 "nonzero_shift_count_operand" "")) + (match_operand:GPR 3 "contiguous_bitmask_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_Z10 + && s390_extzv_shift_ok (<bitsize>, INTVAL (operands[2]), + INTVAL (operands[3]))" + "risbg\t%0,%1,%<bfstart>3,128+%<bfend>3,%2" + [(set_attr "op_type" "RIE") + (set_attr "z10prop" "z10_super_E1")]) + ; ; andsi3 instruction pattern(s). @@ -6131,10 +6256,12 @@ (define_insn "*andsi3_cc" [(set (reg CC_REGNUM) - (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0") - (match_operand:SI 2 "general_operand" "Os,d,d,R,T")) - (const_int 0))) - (set (match_operand:SI 0 "register_operand" "=d,d,d,d,d") + (compare + (and:SI + (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0, d") + (match_operand:SI 2 "general_operand" "Os,d,d,R,T,NxxSq")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=d,d,d,d,d, d") (and:SI (match_dup 1) (match_dup 2)))] "s390_match_ccmode(insn, CCTmode)" "@ @@ -6142,17 +6269,21 @@ nr\t%0,%2 nrk\t%0,%1,%2 n\t%0,%2 - ny\t%0,%2" - [(set_attr "op_type" "RIL,RR,RRF,RX,RXY") - (set_attr "cpu_facility" "*,*,z196,*,*") - (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")]) + ny\t%0,%2 + risbg\t%0,%1,%t2,128+%f2,0" + [(set_attr "op_type" "RIL,RR,RRF,RX,RXY,RIE") + (set_attr "cpu_facility" "*,*,z196,*,*,z10") + (set_attr "z10prop" "z10_super_E1,z10_super_E1,*, + z10_super_E1,z10_super_E1,z10_super_E1")]) (define_insn "*andsi3_cconly" [(set (reg CC_REGNUM) - (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0") - (match_operand:SI 2 "general_operand" "Os,d,d,R,T")) - (const_int 0))) - (clobber (match_scratch:SI 0 "=d,d,d,d,d"))] + (compare + (and:SI + (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0, d") + (match_operand:SI 2 "general_operand" "Os,d,d,R,T,NxxSq")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=d,d,d,d,d, d"))] "s390_match_ccmode(insn, CCTmode) /* Do not steal TM patterns. */ && s390_single_part (operands[2], SImode, HImode, 0) < 0" @@ -6161,19 +6292,20 @@ nr\t%0,%2 nrk\t%0,%1,%2 n\t%0,%2 - ny\t%0,%2" - [(set_attr "op_type" "RIL,RR,RRF,RX,RXY") - (set_attr "cpu_facility" "*,*,z196,*,*") + ny\t%0,%2 + risbg\t%0,%1,%t2,128+%f2,0" + [(set_attr "op_type" "RIL,RR,RRF,RX,RXY,RIE") + (set_attr "cpu_facility" "*,*,z196,*,*,z10") (set_attr "z10prop" "z10_super_E1,z10_super_E1,*, - z10_super_E1,z10_super_E1")]) + z10_super_E1,z10_super_E1,z10_super_E1")]) (define_insn "*andsi3_zarch" [(set (match_operand:SI 0 "nonimmediate_operand" - "=d,d, d, d, d,d,d,d,d, AQ,Q") + "=d,d, d, d, d,d,d,d,d, d, AQ,Q") (and:SI (match_operand:SI 1 "nonimmediate_operand" - "%d,o, 0, 0, 0,0,d,0,0, 0,0") + "%d,o, 0, 0, 0,0,d,0,0, d, 0,0") (match_operand:SI 2 "general_operand" - " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxQSF,Q"))) + " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxxSq,NxQSF,Q"))) (clobber (reg:CC CC_REGNUM))] "TARGET_ZARCH && s390_logical_operator_ok_p (operands)" "@ @@ -6186,10 +6318,11 @@ nrk\t%0,%1,%2 n\t%0,%2 ny\t%0,%2 + risbg\t%0,%1,%t2,128+%f2,0 # #" - [(set_attr "op_type" "RRE,RXE,RI,RI,RIL,RR,RRF,RX,RXY,SI,SS") - (set_attr "cpu_facility" "*,*,*,*,*,*,z196,*,*,*,*") + [(set_attr "op_type" "RRE,RXE,RI,RI,RIL,RR,RRF,RX,RXY,RIE,SI,SS") + (set_attr "cpu_facility" "*,*,*,*,*,*,z196,*,*,z10,*,*") (set_attr "z10prop" "*, *, z10_super_E1, @@ -6199,6 +6332,7 @@ *, z10_super_E1, z10_super_E1, + z10_super_E1, *, *")]) diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index d4e97db8902..793aadace95 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -159,8 +159,6 @@ extern bool sh_expand_t_scc (rtx *); extern rtx sh_gen_truncate (enum machine_mode, rtx, int); extern bool sh_vector_mode_supported_p (enum machine_mode); extern bool sh_cfun_trap_exit_p (void); -extern void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, - enum machine_mode mode = VOIDmode); extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem); extern int sh_eval_treg_value (rtx op); diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index cf0abb474be..4a42d7eeb5c 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -314,6 +314,9 @@ static int max_mov_insn_displacement (enum machine_mode, bool); static int mov_insn_alignment_mask (enum machine_mode, bool); static HOST_WIDE_INT disp_addr_displacement (rtx); static bool sequence_insn_p (rtx); +static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); +static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, + enum machine_mode, bool); static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED; @@ -586,6 +589,9 @@ static const struct attribute_spec sh_attribute_table[] = #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison + /* Machine-specific symbol_ref flags. */ #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) @@ -1909,12 +1915,14 @@ prepare_move_operands (rtx operands[], enum machine_mode mode) } } -/* Implement the CANONICALIZE_COMPARISON macro for the combine pass. - This function is also re-used to canonicalize comparisons in cbranch - pattern expanders. */ -void +/* Implement the canonicalize_comparison target hook for the combine + pass. For the target hook this function is invoked via + sh_canonicalize_comparison. This function is also re-used to + canonicalize comparisons in cbranch pattern expanders. */ +static void sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1, - enum machine_mode mode) + enum machine_mode mode, + bool op0_preserve_value ATTRIBUTE_UNUSED) { /* When invoked from within the combine pass the mode is not specified, so try to get it from one of the operands. */ @@ -2008,6 +2016,19 @@ sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1, } } +/* This function implements the canonicalize_comparison target hook. + This wrapper around the internally used sh_canonicalize_comparison + function is needed to do the enum rtx_code <-> int conversion. + Target hooks cannot use enum rtx_code in its definition. */ +static void +sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) +{ + enum rtx_code tmp_code = (enum rtx_code)*code; + sh_canonicalize_comparison (tmp_code, *op0, *op1, + VOIDmode, op0_preserve_value); + *code = (int)tmp_code; +} enum rtx_code prepare_cbranch_operands (rtx *operands, enum machine_mode mode, enum rtx_code comparison) @@ -2021,7 +2042,8 @@ prepare_cbranch_operands (rtx *operands, enum machine_mode mode, else scratch = operands[4]; - sh_canonicalize_comparison (comparison, operands[1], operands[2], mode); + sh_canonicalize_comparison (comparison, operands[1], operands[2], + mode, false); /* Notice that this function is also invoked after reload by the cbranchdi4_i pattern, through expand_cbranchdi4. */ diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 212b97d9c10..76a5cb1edda 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1873,10 +1873,6 @@ struct sh_args { more compact code. */ #define SHIFT_COUNT_TRUNCATED (0) -/* CANONICALIZE_COMPARISON macro for the combine pass. */ -#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ - sh_canonicalize_comparison ((CODE), (OP0), (OP1)) - /* All integers have the same format so truncation is easy. */ /* But SHmedia must sign-extend DImode when truncating to SImode. */ #define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \ diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index 82fb4340cf5..15e7c2e2bba 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -7095,6 +7095,20 @@ spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, final_end_function (); } +/* Canonicalize a comparison from one we don't have to one we do have. */ +static void +spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) +{ + if (!op0_preserve_value + && (*code == LE || *code == LT || *code == LEU || *code == LTU)) + { + rtx tem = *op0; + *op0 = *op1; + *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } +} /* Table of machine attributes. */ static const struct attribute_spec spu_attribute_table[] = @@ -7308,6 +7322,9 @@ static const struct attribute_spec spu_attribute_table[] = #undef TARGET_DELAY_VARTRACK #define TARGET_DELAY_VARTRACK true +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-spu.h" diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h index ce0bc8edb5f..031b80e1602 100644 --- a/gcc/config/spu/spu.h +++ b/gcc/config/spu/spu.h @@ -520,18 +520,6 @@ do { \ #define NO_IMPLICIT_EXTERN_C 1 -/* Canonicalize a comparison from one we don't have to one we do have. */ -#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \ - do { \ - if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \ - { \ - rtx tem = (OP0); \ - (OP0) = (OP1); \ - (OP1) = tem; \ - (CODE) = swap_condition (CODE); \ - } \ - } while (0) - /* Address spaces. */ #define ADDR_SPACE_EA 1 |