summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2012-12-21 13:59:01 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2012-12-21 13:59:01 +0000
commit2d5f6d3c1e2780698d60b53144be4d7eb7df2a02 (patch)
tree4ef5de50928d3637f7cf6e978a5fc2ea84332013 /gcc/config
parent89011aed2df43a3ea68ba208d8cacdb9d52a61c1 (diff)
downloadgcc-2d5f6d3c1e2780698d60b53144be4d7eb7df2a02.tar.gz
2012-12-21 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 194659 using svnmerge.py git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@194661 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64.c16
-rw-r--r--gcc/config/aarch64/aarch64.md411
-rw-r--r--gcc/config/alpha/alpha.c27
-rw-r--r--gcc/config/alpha/alpha.h20
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c232
-rw-r--r--gcc/config/arm/arm.h3
-rw-r--r--gcc/config/arm/arm.md81
-rw-r--r--gcc/config/arm/arm1020e.md2
-rw-r--r--gcc/config/arm/arm1026ejs.md2
-rw-r--r--gcc/config/arm/arm1136jfs.md2
-rw-r--r--gcc/config/arm/arm926ejs.md2
-rw-r--r--gcc/config/arm/cortex-a15.md2
-rw-r--r--gcc/config/arm/cortex-a5.md2
-rw-r--r--gcc/config/arm/cortex-a7.md353
-rw-r--r--gcc/config/arm/cortex-a8.md4
-rw-r--r--gcc/config/arm/cortex-a9.md4
-rw-r--r--gcc/config/arm/cortex-m4.md2
-rw-r--r--gcc/config/arm/cortex-r4.md2
-rw-r--r--gcc/config/arm/driver-arm.c1
-rw-r--r--gcc/config/arm/fa526.md2
-rw-r--r--gcc/config/arm/fa606te.md2
-rw-r--r--gcc/config/arm/fa626te.md2
-rw-r--r--gcc/config/arm/fa726te.md2
-rw-r--r--gcc/config/arm/fmp626.md2
-rw-r--r--gcc/config/arm/iterators.md4
-rw-r--r--gcc/config/arm/neon.md62
-rw-r--r--gcc/config/arm/t-arm1
-rw-r--r--gcc/config/arm/thumb2.md18
-rw-r--r--gcc/config/arm/vfp.md25
-rw-r--r--gcc/config/i386/i386.c6
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.md114
-rw-r--r--gcc/config/i386/t-kfreebsd2
-rw-r--r--gcc/config/rs6000/t-linux4
-rw-r--r--gcc/config/rs6000/t-spe4
-rw-r--r--gcc/config/s390/constraints.md11
-rw-r--r--gcc/config/s390/predicates.md10
-rw-r--r--gcc/config/s390/s390-protos.h2
-rw-r--r--gcc/config/s390/s390.c123
-rw-r--r--gcc/config/s390/s390.h4
-rw-r--r--gcc/config/s390/s390.md468
-rw-r--r--gcc/config/sh/sh-protos.h2
-rw-r--r--gcc/config/sh/sh.c34
-rw-r--r--gcc/config/sh/sh.h4
-rw-r--r--gcc/config/spu/spu.c17
-rw-r--r--gcc/config/spu/spu.h12
47 files changed, 1725 insertions, 384 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7bc2f6b896a..03b13613c6e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5865,6 +5865,14 @@ aarch64_preferred_simd_mode (enum machine_mode mode)
return word_mode;
}
+/* Return the bitmask of possible vector sizes for the vectorizer
+ to iterate over. */
+static unsigned int
+aarch64_autovectorize_vector_sizes (void)
+{
+ return (16 | 8);
+}
+
/* A table to help perform AArch64-specific name mangling for AdvSIMD
vector types in order to conform to the AAPCS64 (see "Procedure
Call Standard for the ARM 64-bit Architecture", Appendix A). To
@@ -6374,8 +6382,6 @@ aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
int
aarch64_simd_attr_length_move (rtx insn)
{
- rtx reg, mem, addr;
- int load;
enum machine_mode mode;
extract_insn_cached (insn);
@@ -6658,7 +6664,6 @@ aarch64_split_compare_and_swap (rtx operands[])
{
rtx rval, mem, oldval, newval, scratch;
enum machine_mode mode;
- enum memmodel mod_s;
bool is_weak;
rtx label1, label2, x, cond;
@@ -6667,7 +6672,6 @@ aarch64_split_compare_and_swap (rtx operands[])
oldval = operands[2];
newval = operands[3];
is_weak = (operands[4] != const0_rtx);
- mod_s = (enum memmodel) INTVAL (operands[5]);
scratch = operands[7];
mode = GET_MODE (mem);
@@ -7519,6 +7523,10 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
aarch64_builtin_vectorized_function
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+ aarch64_autovectorize_vector_sizes
+
/* Section anchor support. */
#undef TARGET_MIN_ANCHOR_OFFSET
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b59d53df86a..ec65b3c2a11 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -840,8 +840,8 @@
(define_insn "insv_imm<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
(const_int 16)
- (match_operand 1 "const_int_operand" "n"))
- (match_operand 2 "const_int_operand" "n"))]
+ (match_operand:GPI 1 "const_int_operand" "n"))
+ (match_operand:GPI 2 "const_int_operand" "n"))]
"INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
&& INTVAL (operands[1]) % 16 == 0
&& INTVAL (operands[2]) <= 0xffff"
@@ -1215,6 +1215,22 @@
(set_attr "mode" "SI")]
)
+;; zero_extend version of above
+(define_insn "*addsi3_aarch64_uxtw"
+ [(set
+ (match_operand:DI 0 "register_operand" "=rk,rk,rk")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk")
+ (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))]
+ ""
+ "@
+ add\\t%w0, %w1, %2
+ add\\t%w0, %w1, %w2
+ sub\\t%w0, %w1, #%n2"
+ [(set_attr "v8type" "alu")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*adddi3_aarch64"
[(set
(match_operand:DI 0 "register_operand" "=rk,rk,rk,!w")
@@ -1248,6 +1264,23 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*addsi3_compare0_uxtw"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (plus:SI (match_operand:SI 1 "register_operand" "%r,r")
+ (match_operand:SI 2 "aarch64_plus_operand" "rI,J"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+ ""
+ "@
+ adds\\t%w0, %w1, %w2
+ subs\\t%w0, %w1, #%n2"
+ [(set_attr "v8type" "alus")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add<mode>3nr_compare0"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
@@ -1284,6 +1317,19 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*add_<shift>_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (plus:SI (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+ (match_operand:SI 3 "register_operand" "r"))))]
+ ""
+ "add\\t%w0, %w3, %w1, <shift> %2"
+ [(set_attr "v8type" "alu_shift")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add_mul_imm_<mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1305,6 +1351,18 @@
(set_attr "mode" "<GPI:MODE>")]
)
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (plus:SI (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r"))
+ (match_operand:GPI 2 "register_operand" "r"))))]
+ ""
+ "add\\t%w0, %w2, %w1, <su>xt<SHORT:size>"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add_<optab><ALLX:mode>_shft_<GPI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(plus:GPI (ashift:GPI (ANY_EXTEND:GPI
@@ -1317,6 +1375,20 @@
(set_attr "mode" "<GPI:MODE>")]
)
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_shft_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (plus:SI (ashift:SI (ANY_EXTEND:SI
+ (match_operand:SHORT 1 "register_operand" "r"))
+ (match_operand 2 "aarch64_imm3" "Ui3"))
+ (match_operand:SI 3 "register_operand" "r"))))]
+ ""
+ "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(plus:GPI (mult:GPI (ANY_EXTEND:GPI
@@ -1329,6 +1401,19 @@
(set_attr "mode" "<GPI:MODE>")]
)
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_mult_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI
+ (match_operand:SHORT 1 "register_operand" "r"))
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+ (match_operand:SI 3 "register_operand" "r"))))]
+ ""
+ "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add_<optab><mode>_multp2"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(plus:GPI (ANY_EXTRACT:GPI
@@ -1343,6 +1428,22 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*add_<optab>si_multp2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (plus:SI (ANY_EXTRACT:SI
+ (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+ (match_operand 3 "const_int_operand" "n")
+ (const_int 0))
+ (match_operand:SI 4 "register_operand" "r"))))]
+ "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+ "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add<mode>3_carryin"
[(set
(match_operand:GPI 0 "register_operand" "=r")
@@ -1356,6 +1457,21 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_uxtw"
+ [(set
+ (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+ (plus:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))))]
+ ""
+ "adc\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "adc")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add<mode>3_carryin_alt1"
[(set
(match_operand:GPI 0 "register_operand" "=r")
@@ -1369,6 +1485,21 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt1_uxtw"
+ [(set
+ (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r"))
+ (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
+ ""
+ "adc\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "adc")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add<mode>3_carryin_alt2"
[(set
(match_operand:GPI 0 "register_operand" "=r")
@@ -1382,6 +1513,21 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt2_uxtw"
+ [(set
+ (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI
+ (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+ (match_operand:SI 1 "register_operand" "r"))
+ (match_operand:SI 2 "register_operand" "r"))))]
+ ""
+ "adc\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "adc")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add<mode>3_carryin_alt3"
[(set
(match_operand:GPI 0 "register_operand" "=r")
@@ -1395,6 +1541,21 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt3_uxtw"
+ [(set
+ (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI
+ (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:SI 1 "register_operand" "r"))))]
+ ""
+ "adc\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "adc")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*add_uxt<mode>_multp2"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(plus:GPI (and:GPI
@@ -1411,6 +1572,24 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*add_uxtsi_multp2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (plus:SI (and:SI
+ (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+ (match_operand 3 "const_int_operand" "n"))
+ (match_operand:SI 4 "register_operand" "r"))))]
+ "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
+ "*
+ operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+ INTVAL (operands[3])));
+ return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "subsi3"
[(set (match_operand:SI 0 "register_operand" "=rk")
(minus:SI (match_operand:SI 1 "register_operand" "r")
@@ -1421,6 +1600,18 @@
(set_attr "mode" "SI")]
)
+;; zero_extend version of above
+(define_insn "*subsi3_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r"))))]
+ ""
+ "sub\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "alu")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "subdi3"
[(set (match_operand:DI 0 "register_operand" "=rk,!w")
(minus:DI (match_operand:DI 1 "register_operand" "r,!w")
@@ -1448,6 +1639,20 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*subsi3_compare0_uxtw"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))]
+ ""
+ "subs\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "alus")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*sub_<shift>_<mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1460,6 +1665,20 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*sub_<shift>_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 3 "register_operand" "r")
+ (ASHIFT:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+ ""
+ "sub\\t%w0, %w3, %w1, <shift> %2"
+ [(set_attr "v8type" "alu_shift")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*sub_mul_imm_<mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1472,6 +1691,20 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*sub_mul_imm_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 3 "register_operand" "r")
+ (mult:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+ ""
+ "sub\\t%w0, %w3, %w1, lsl %p2"
+ [(set_attr "v8type" "alu_shift")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(minus:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1483,6 +1716,19 @@
(set_attr "mode" "<GPI:MODE>")]
)
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "r")
+ (ANY_EXTEND:SI
+ (match_operand:SHORT 2 "register_operand" "r")))))]
+ ""
+ "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*sub_<optab><ALLX:mode>_shft_<GPI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(minus:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1495,6 +1741,20 @@
(set_attr "mode" "<GPI:MODE>")]
)
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_shft_si_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "r")
+ (ashift:SI (ANY_EXTEND:SI
+ (match_operand:SHORT 2 "register_operand" "r"))
+ (match_operand 3 "aarch64_imm3" "Ui3")))))]
+ ""
+ "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*sub_<optab><mode>_multp2"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(minus:GPI (match_operand:GPI 4 "register_operand" "r")
@@ -1509,6 +1769,22 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*sub_<optab>si_multp2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 4 "register_operand" "r")
+ (ANY_EXTRACT:SI
+ (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+ (match_operand 3 "const_int_operand" "n")
+ (const_int 0)))))]
+ "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+ "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*sub_uxt<mode>_multp2"
[(set (match_operand:GPI 0 "register_operand" "=rk")
(minus:GPI (match_operand:GPI 4 "register_operand" "r")
@@ -1525,6 +1801,24 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*sub_uxtsi_multp2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=rk")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 4 "register_operand" "r")
+ (and:SI
+ (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+ (match_operand 3 "const_int_operand" "n")))))]
+ "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
+ "*
+ operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+ INTVAL (operands[3])));
+ return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
+ [(set_attr "v8type" "alu_ext")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "neg<mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
(neg:GPI (match_operand:GPI 1 "register_operand" "r")))]
@@ -1534,6 +1828,16 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*negsi2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))]
+ ""
+ "neg\\t%w0, %w1"
+ [(set_attr "v8type" "alu")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*neg<mode>2_compare0"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
@@ -1546,6 +1850,19 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*negsi2_compare0_uxtw"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "r"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (neg:SI (match_dup 1))))]
+ ""
+ "negs\\t%w0, %w1"
+ [(set_attr "v8type" "alus")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*neg_<shift>_<mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
(neg:GPI (ASHIFT:GPI
@@ -1557,6 +1874,19 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*neg_<shift>_si2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (neg:SI (ASHIFT:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+ ""
+ "neg\\t%w0, %w1, <shift> %2"
+ [(set_attr "v8type" "alu_shift")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*neg_mul_imm_<mode>2"
[(set (match_operand:GPI 0 "register_operand" "=r")
(neg:GPI (mult:GPI
@@ -1568,6 +1898,19 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*neg_mul_imm_si2_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (neg:SI (mult:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+ ""
+ "neg\\t%w0, %w1, lsl %p2"
+ [(set_attr "v8type" "alu_shift")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "mul<mode>3"
[(set (match_operand:GPI 0 "register_operand" "=r")
(mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1578,6 +1921,18 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*mulsi3_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r"))))]
+ ""
+ "mul\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "mult")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*madd<mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1589,6 +1944,19 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*maddsi_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:SI 3 "register_operand" "r"))))]
+ ""
+ "madd\\t%w0, %w1, %w2, %w3"
+ [(set_attr "v8type" "madd")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*msub<mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1601,6 +1969,20 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*msubsi_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 3 "register_operand" "r")
+ (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r")))))]
+
+ ""
+ "msub\\t%w0, %w1, %w2, %w3"
+ [(set_attr "v8type" "madd")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "*mul<mode>_neg"
[(set (match_operand:GPI 0 "register_operand" "=r")
(mult:GPI (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
@@ -1612,6 +1994,19 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*mulsi_neg_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (mult:SI (neg:SI (match_operand:SI 1 "register_operand" "r"))
+ (match_operand:SI 2 "register_operand" "r"))))]
+
+ ""
+ "mneg\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "mult")
+ (set_attr "mode" "SI")]
+)
+
(define_insn "<su_optab>mulsidi3"
[(set (match_operand:DI 0 "register_operand" "=r")
(mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
@@ -1682,6 +2077,18 @@
(set_attr "mode" "<MODE>")]
)
+;; zero_extend version of above
+(define_insn "*<su_optab>divsi3_uxtw"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (ANY_DIV:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "register_operand" "r"))))]
+ ""
+ "<su>div\\t%w0, %w1, %w2"
+ [(set_attr "v8type" "<su>div")
+ (set_attr "mode" "SI")]
+)
+
;; -------------------------------------------------------------------
;; Comparison insns
;; -------------------------------------------------------------------
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 84112182426..fa2f7335881 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -9686,6 +9686,30 @@ alpha_conditional_register_usage (void)
for (i = 32; i < 63; i++)
fixed_regs[i] = call_used_regs[i] = 1;
}
+
+/* Canonicalize a comparison from one we don't have to one we do have. */
+
+static void
+alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value)
+{
+ if (!op0_preserve_value
+ && (*code == GE || *code == GT || *code == GEU || *code == GTU)
+ && (REG_P (*op1) || *op1 == const0_rtx))
+ {
+ rtx tem = *op0;
+ *op0 = *op1;
+ *op1 = tem;
+ *code = (int)swap_condition ((enum rtx_code)*code);
+ }
+
+ if ((*code == LT || *code == LTU)
+ && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
+ {
+ *code = *code == LT ? LE : LEU;
+ *op1 = GEN_INT (255);
+ }
+}
/* Initialize the GCC target structure. */
#if TARGET_ABI_OPEN_VMS
@@ -9853,6 +9877,9 @@ alpha_conditional_register_usage (void)
#undef TARGET_CONDITIONAL_REGISTER_USAGE
#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
+
struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index bc14d84055b..a70c7f89f47 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -922,26 +922,6 @@ do { \
#define FLOAT_STORE_FLAG_VALUE(MODE) \
REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE))
-/* Canonicalize a comparison from one we don't have to one we do have. */
-
-#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \
- do { \
- if (((CODE) == GE || (CODE) == GT || (CODE) == GEU || (CODE) == GTU) \
- && (REG_P (OP1) || (OP1) == const0_rtx)) \
- { \
- rtx tem = (OP0); \
- (OP0) = (OP1); \
- (OP1) = tem; \
- (CODE) = swap_condition (CODE); \
- } \
- if (((CODE) == LT || (CODE) == LTU) \
- && CONST_INT_P (OP1) && INTVAL (OP1) == 256) \
- { \
- (CODE) = (CODE) == LT ? LE : LEU; \
- (OP1) = GEN_INT (255); \
- } \
- } while (0)
-
/* Specify the machine mode that pointers have.
After generation of rtl, the compiler makes no further distinction
between pointers and any other objects of this machine mode. */
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index d942c5b07a1..4c61e35ea28 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -53,7 +53,6 @@ extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
-extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
extern int legitimate_pic_operand_p (rtx);
extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
extern rtx legitimize_tls_address (rtx, rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 84ce56fb84d..13d745fb973 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int);
+static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
static int optimal_immediate_sequence (enum rtx_code code,
unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence);
@@ -269,7 +270,8 @@ static int arm_cortex_a5_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
-
+static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value);
/* Table of machine attributes. */
static const struct attribute_spec arm_attribute_table[] =
@@ -366,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER arm_sched_reorder
+
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
@@ -626,6 +631,10 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
arm_vectorize_vec_perm_const_ok
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON \
+ arm_canonicalize_comparison
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@@ -3543,8 +3552,9 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
This can be done for a few constant compares, where we can make the
immediate value easier to load. */
-enum rtx_code
-arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
+static void
+arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value)
{
enum machine_mode mode;
unsigned HOST_WIDE_INT i, maxval;
@@ -3563,15 +3573,15 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
{
rtx tem;
- if (code == GT || code == LE
- || (!TARGET_ARM && (code == GTU || code == LEU)))
+ if (*code == GT || *code == LE
+ || (!TARGET_ARM && (*code == GTU || *code == LEU)))
{
/* Missing comparison. First try to use an available
comparison. */
if (CONST_INT_P (*op1))
{
i = INTVAL (*op1);
- switch (code)
+ switch (*code)
{
case GT:
case LE:
@@ -3579,7 +3589,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
&& arm_const_double_by_immediates (GEN_INT (i + 1)))
{
*op1 = GEN_INT (i + 1);
- return code == GT ? GE : LT;
+ *code = *code == GT ? GE : LT;
+ return;
}
break;
case GTU:
@@ -3588,7 +3599,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
&& arm_const_double_by_immediates (GEN_INT (i + 1)))
{
*op1 = GEN_INT (i + 1);
- return code == GTU ? GEU : LTU;
+ *code = *code == GTU ? GEU : LTU;
+ return;
}
break;
default:
@@ -3597,13 +3609,15 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
}
/* If that did not work, reverse the condition. */
- tem = *op0;
- *op0 = *op1;
- *op1 = tem;
- return swap_condition (code);
+ if (!op0_preserve_value)
+ {
+ tem = *op0;
+ *op0 = *op1;
+ *op1 = tem;
+ *code = (int)swap_condition ((enum rtx_code)*code);
+ }
}
-
- return code;
+ return;
}
/* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
@@ -3624,15 +3638,15 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
if (!CONST_INT_P (*op1)
|| const_ok_for_arm (INTVAL (*op1))
|| const_ok_for_arm (- INTVAL (*op1)))
- return code;
+ return;
i = INTVAL (*op1);
- switch (code)
+ switch (*code)
{
case EQ:
case NE:
- return code;
+ return;
case GT:
case LE:
@@ -3640,7 +3654,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
&& (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
{
*op1 = GEN_INT (i + 1);
- return code == GT ? GE : LT;
+ *code = *code == GT ? GE : LT;
+ return;
}
break;
@@ -3650,7 +3665,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
&& (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
{
*op1 = GEN_INT (i - 1);
- return code == GE ? GT : LE;
+ *code = *code == GE ? GT : LE;
+ return;
}
break;
@@ -3660,7 +3676,8 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
&& (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
{
*op1 = GEN_INT (i + 1);
- return code == GTU ? GEU : LTU;
+ *code = *code == GTU ? GEU : LTU;
+ return;
}
break;
@@ -3670,15 +3687,14 @@ arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
&& (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
{
*op1 = GEN_INT (i - 1);
- return code == GEU ? GTU : LEU;
+ *code = *code == GEU ? GTU : LEU;
+ return;
}
break;
default:
gcc_unreachable ();
}
-
- return code;
}
@@ -5566,7 +5582,9 @@ thumb_find_work_register (unsigned long pushed_regs_mask)
if (! cfun->machine->uses_anonymous_args
&& crtl->args.size >= 0
&& crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
- && crtl->args.info.nregs < 4)
+ && (TARGET_AAPCS_BASED
+ ? crtl->args.info.aapcs_ncrn < 4
+ : crtl->args.info.nregs < 4))
return LAST_ARG_REGNUM;
/* Otherwise look for a call-saved register that is going to be pushed. */
@@ -8680,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
}
}
+
+/* Return true if and only if this insn can dual-issue only as older. */
+static bool
+cortexa7_older_only (rtx insn)
+{
+ if (recog_memoized (insn) < 0)
+ return false;
+
+ if (get_attr_insn (insn) == INSN_MOV)
+ return false;
+
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU_REG:
+ case TYPE_LOAD_BYTE:
+ case TYPE_LOAD1:
+ case TYPE_STORE1:
+ case TYPE_FFARITHS:
+ case TYPE_FADDS:
+ case TYPE_FFARITHD:
+ case TYPE_FADDD:
+ case TYPE_FCPYS:
+ case TYPE_F_CVT:
+ case TYPE_FCMPS:
+ case TYPE_FCMPD:
+ case TYPE_FCONSTS:
+ case TYPE_FCONSTD:
+ case TYPE_FMULS:
+ case TYPE_FMACS:
+ case TYPE_FMULD:
+ case TYPE_FMACD:
+ case TYPE_FDIVS:
+ case TYPE_FDIVD:
+ case TYPE_F_2_R:
+ case TYPE_F_FLAG:
+ case TYPE_F_LOADS:
+ case TYPE_F_STORES:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Return true if and only if this insn can dual-issue as younger. */
+static bool
+cortexa7_younger (FILE *file, int verbose, rtx insn)
+{
+ if (recog_memoized (insn) < 0)
+ {
+ if (verbose > 5)
+ fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
+ return false;
+ }
+
+ if (get_attr_insn (insn) == INSN_MOV)
+ return true;
+
+ switch (get_attr_type (insn))
+ {
+ case TYPE_SIMPLE_ALU_IMM:
+ case TYPE_SIMPLE_ALU_SHIFT:
+ case TYPE_BRANCH:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+/* Look for an instruction that can dual issue only as an older
+ instruction, and move it in front of any instructions that can
+ dual-issue as younger, while preserving the relative order of all
+ other instructions in the ready list. This is a hueuristic to help
+ dual-issue in later cycles, by postponing issue of more flexible
+ instructions. This heuristic may affect dual issue opportunities
+ in the current cycle. */
+static void
+cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+ int clock)
+{
+ int i;
+ int first_older_only = -1, first_younger = -1;
+
+ if (verbose > 5)
+ fprintf (file,
+ ";; sched_reorder for cycle %d with %d insns in ready list\n",
+ clock,
+ *n_readyp);
+
+ /* Traverse the ready list from the head (the instruction to issue
+ first), and looking for the first instruction that can issue as
+ younger and the first instruction that can dual-issue only as
+ older. */
+ for (i = *n_readyp - 1; i >= 0; i--)
+ {
+ rtx insn = ready[i];
+ if (cortexa7_older_only (insn))
+ {
+ first_older_only = i;
+ if (verbose > 5)
+ fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
+ break;
+ }
+ else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
+ first_younger = i;
+ }
+
+ /* Nothing to reorder because either no younger insn found or insn
+ that can dual-issue only as older appears before any insn that
+ can dual-issue as younger. */
+ if (first_younger == -1)
+ {
+ if (verbose > 5)
+ fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
+ return;
+ }
+
+ /* Nothing to reorder because no older-only insn in the ready list. */
+ if (first_older_only == -1)
+ {
+ if (verbose > 5)
+ fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
+ return;
+ }
+
+ /* Move first_older_only insn before first_younger. */
+ if (verbose > 5)
+ fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
+ INSN_UID(ready [first_older_only]),
+ INSN_UID(ready [first_younger]));
+ rtx first_older_only_insn = ready [first_older_only];
+ for (i = first_older_only; i < first_younger; i++)
+ {
+ ready[i] = ready[i+1];
+ }
+
+ ready[i] = first_older_only_insn;
+ return;
+}
+
+/* Implement TARGET_SCHED_REORDER. */
+static int
+arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+ int clock)
+{
+ switch (arm_tune)
+ {
+ case cortexa7:
+ cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
+ break;
+ default:
+ /* Do nothing for other cores. */
+ break;
+ }
+
+ return arm_issue_rate ();
+}
+
/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
It corrects the value of COST based on the relationship between
INSN and DEP through the dependence LINK. It returns the new
@@ -25466,6 +25642,7 @@ arm_issue_rate (void)
case cortexr5:
case genericv7a:
case cortexa5:
+ case cortexa7:
case cortexa8:
case cortexa9:
case fa726te:
@@ -26979,7 +27156,7 @@ bool
arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
{
enum rtx_code code = GET_CODE (*comparison);
- enum rtx_code canonical_code;
+ int code_int;
enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
? GET_MODE (*op2) : GET_MODE (*op1);
@@ -26988,8 +27165,9 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
if (code == UNEQ || code == LTGT)
return false;
- canonical_code = arm_canonicalize_comparison (code, op1, op2);
- PUT_CODE (*comparison, canonical_code);
+ code_int = (int)code;
+ arm_canonicalize_comparison (&code_int, op1, op2, 0);
+ PUT_CODE (*comparison, (enum rtx_code)code_int);
switch (mode)
{
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index d0f351d861e..2fa945c0c04 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2078,9 +2078,6 @@ extern int making_const_table;
? reverse_condition_maybe_unordered (code) \
: reverse_condition (code))
-#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
- (CODE) = arm_canonicalize_comparison (CODE, &(OP0), &(OP1))
-
/* The arm5 clz instruction returns 32. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 7f38816a14a..1cb1515b1fa 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -240,6 +240,7 @@
; regs or have a shifted source operand
; and does not have an immediate operand. This is
; also the default
+; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB
; alu_shift any data instruction that doesn't hit memory or fp
; regs, but has a source operand shifted by a constant
; alu_shift_reg any data instruction that doesn't hit memory or fp
@@ -271,6 +272,7 @@
(define_attr "type"
"simple_alu_imm,\
alu_reg,\
+ simple_alu_shift,\
alu_shift,\
alu_shift_reg,\
mult,\
@@ -284,6 +286,8 @@
fmacd,\
f_rints,\
f_rintd,\
+ f_minmaxs,\
+ f_minmaxd,\
f_flag,\
f_loads,\
f_loadd,\
@@ -454,7 +458,9 @@
; than one on the main cpu execution unit.
(define_attr "core_cycles" "single,multi"
(if_then_else (eq_attr "type"
- "simple_alu_imm,alu_reg,alu_shift,float,fdivd,fdivs")
+ "simple_alu_imm,alu_reg,\
+ simple_alu_shift,alu_shift,\
+ float,fdivd,fdivs")
(const_string "single")
(const_string "multi")))
@@ -496,7 +502,7 @@
(define_attr "generic_sched" "yes,no"
(const (if_then_else
- (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
+ (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4")
(eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
@@ -504,7 +510,7 @@
(define_attr "generic_vfp" "yes,no"
(const (if_then_else
(and (eq_attr "fpu" "vfp")
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "no"))
(const_string "yes")
(const_string "no"))))
@@ -521,6 +527,7 @@
(include "fmp626.md")
(include "fa726te.md")
(include "cortex-a5.md")
+(include "cortex-a7.md")
(include "cortex-a8.md")
(include "cortex-a9.md")
(include "cortex-a15.md")
@@ -4484,33 +4491,36 @@
;; Zero and sign extension instructions.
(define_insn "zero_extend<mode>di2"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
+ [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r")
(zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
"<qhs_zextenddi_cstr>")))]
"TARGET_32BIT <qhs_zextenddi_cond>"
"#"
- [(set_attr "length" "8")
+ [(set_attr "length" "8,4,8")
(set_attr "ce_count" "2")
(set_attr "predicable" "yes")]
)
(define_insn "extend<mode>di2"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
+ [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,?r")
(sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
"<qhs_extenddi_cstr>")))]
"TARGET_32BIT <qhs_sextenddi_cond>"
"#"
- [(set_attr "length" "8")
+ [(set_attr "length" "8,4,8,8")
(set_attr "ce_count" "2")
(set_attr "shift" "1")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "arch" "*,*,a,t")]
)
;; Splits for all extensions to DImode
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && (!TARGET_NEON
+ || (reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))))"
[(set (match_dup 0) (match_dup 1))]
{
rtx lo_part = gen_lowpart (SImode, operands[0]);
@@ -4536,7 +4546,9 @@
(define_split
[(set (match_operand:DI 0 "s_register_operand" "")
(sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && (!TARGET_NEON
+ || (reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))))"
[(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))]
{
rtx lo_part = gen_lowpart (SImode, operands[0]);
@@ -4629,11 +4641,7 @@
[(if_then_else (eq_attr "is_arch6" "yes")
(const_int 2) (const_int 4))
(const_int 4)])
- (set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])]
+ (set_attr "type" "simple_alu_shift, load_byte")]
)
(define_insn "*arm_zero_extendhisi2"
@@ -4655,11 +4663,7 @@
uxth%?\\t%0, %1
ldr%(h%)\\t%0, %1"
[(set_attr "predicable" "yes")
- (set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])]
+ (set_attr "type" "simple_alu_shift,load_byte")]
)
(define_insn "*arm_zero_extendhisi2addsi"
@@ -4729,11 +4733,7 @@
uxtb\\t%0, %1
ldrb\\t%0, %1"
[(set_attr "length" "2")
- (set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])]
+ (set_attr "type" "simple_alu_shift,load_byte")]
)
(define_insn "*arm_zero_extendqisi2"
@@ -4755,11 +4755,7 @@
"@
uxtb%(%)\\t%0, %1
ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
- [(set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ [(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")]
)
@@ -4933,11 +4929,7 @@
[(if_then_else (eq_attr "is_arch6" "yes")
(const_int 2) (const_int 4))
(const_int 4)])
- (set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ (set_attr "type" "simple_alu_shift,load_byte")
(set_attr "pool_range" "*,1018")]
)
@@ -5010,11 +5002,7 @@
"@
sxth%?\\t%0, %1
ldr%(sh%)\\t%0, %1"
- [(set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ [(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")
(set_attr "pool_range" "*,256")
(set_attr "neg_pool_range" "*,244")]
@@ -5114,11 +5102,7 @@
"@
sxtb%?\\t%0, %1
ldr%(sb%)\\t%0, %1"
- [(set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ [(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")
(set_attr "pool_range" "*,256")
(set_attr "neg_pool_range" "*,244")]
@@ -5231,12 +5215,7 @@
(const_int 2)
(if_then_else (eq_attr "is_arch6" "yes")
(const_int 4) (const_int 6))])
- (set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")
- (const_string "load_byte")])]
+ (set_attr "type" "simple_alu_shift,load_byte,load_byte")]
)
(define_expand "extendsfdf2"
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index 3d3ff23e7c6..9a41d305736 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -72,7 +72,7 @@
;; ALU operations with a shift-by-constant operand
(define_insn_reservation "1020alu_shift_op" 1
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "type" "alu_shift"))
+ (eq_attr "type" "simple_alu_shift,alu_shift"))
"1020a_e,1020a_m,1020a_w")
;; ALU operations with a shift-by-register operand
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
index d9ed858f861..52f6241d7dd 100644
--- a/gcc/config/arm/arm1026ejs.md
+++ b/gcc/config/arm/arm1026ejs.md
@@ -72,7 +72,7 @@
;; ALU operations with a shift-by-constant operand
(define_insn_reservation "alu_shift_op" 1
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "type" "alu_shift"))
+ (eq_attr "type" "simple_alu_shift,alu_shift"))
"a_e,a_m,a_w")
;; ALU operations with a shift-by-register operand
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
index ff5e614b37b..9e885586072 100644
--- a/gcc/config/arm/arm1136jfs.md
+++ b/gcc/config/arm/arm1136jfs.md
@@ -81,7 +81,7 @@
;; ALU operations with a shift-by-constant operand
(define_insn_reservation "11_alu_shift_op" 2
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "type" "alu_shift"))
+ (eq_attr "type" "simple_alu_shift,alu_shift"))
"e_1,e_2,e_3,e_wb")
;; ALU operations with a shift-by-register operand
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
index 656a90e41af..4c94e3337ab 100644
--- a/gcc/config/arm/arm926ejs.md
+++ b/gcc/config/arm/arm926ejs.md
@@ -58,7 +58,7 @@
;; ALU operations with no shifted operand
(define_insn_reservation "9_alu_op" 1
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift"))
+ (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift"))
"e,m,w")
;; ALU operations with a shift-by-register operand
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index f25fcee9f01..33e53df2b55 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -68,7 +68,7 @@
;; ALU ops with immediate shift
(define_insn_reservation "cortex_a15_alu_shift" 3
(and (eq_attr "tune" "cortexa15")
- (and (eq_attr "type" "alu_shift")
+ (and (eq_attr "type" "simple_alu_shift,alu_shift")
(eq_attr "neon_type" "none")))
"ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\
|(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)")
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
index 1121c7effcf..2b5abe524a6 100644
--- a/gcc/config/arm/cortex-a5.md
+++ b/gcc/config/arm/cortex-a5.md
@@ -63,7 +63,7 @@
(define_insn_reservation "cortex_a5_alu_shift" 2
(and (eq_attr "tune" "cortexa5")
- (eq_attr "type" "alu_shift,alu_shift_reg"))
+ (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg"))
"cortex_a5_ex1")
;; Forwarding path for unshifted operands.
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
new file mode 100644
index 00000000000..74d4ca0bc3d
--- /dev/null
+++ b/gcc/config/arm/cortex-a7.md
@@ -0,0 +1,353 @@
+;; ARM Cortex-A7 pipeline description
+;; Copyright (C) 2012 Free Software Foundation, Inc.
+;;
+;; Contributed by ARM Ltd.
+;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_a7")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Functional units.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; The Cortex-A7 pipeline integer and vfp pipeline.
+;; The decode is the same for all instructions, so do not model it.
+;; We only model the first execution stage because
+;; instructions always advance one stage per cycle in order.
+;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together.
+
+(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7")
+
+(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2")
+
+(define_cpu_unit "cortex_a7_branch" "cortex_a7")
+
+;; Cortex-A7 is in order and can dual-issue under limited circumstances.
+;; ex2 can be reserved only after ex1 is reserved.
+
+(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1")
+
+;; Pseudo-unit for blocking the multiply pipeline when a double-precision
+;; multiply is in progress.
+
+(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7")
+
+;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
+;; of the add pipeline by fmac instructions, etc.
+
+(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7")
+
+;; Floating-point div/sqrt (long latency, out-of-order completion).
+
+(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7")
+
+;; Neon pipeline
+(define_cpu_unit "cortex_a7_neon" "cortex_a7")
+
+(define_reservation "cortex_a7_all" "cortex_a7_both+\
+ cortex_a7_fpmul_pipe+\
+ cortex_a7_fpadd_pipe+\
+ cortex_a7_fp_div_sqrt+\
+ cortex_a7_neon")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Branches.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; A direct branch can dual issue either as younger or older instruction,
+;; but branches cannot dual issue with branches.
+;; No latency as there is no result.
+
+(define_insn_reservation "cortex_a7_branch" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "branch")
+ (eq_attr "neon_type" "none")))
+ "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
+
+;; A call reserves all issue slots. The result is available the next cycle.
+(define_insn_reservation "cortex_a7_call" 1
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "call")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_all")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ALU instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; ALU instruction with an immediate operand can dual-issue.
+(define_insn_reservation "cortex_a7_alu_imm" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (ior (eq_attr "type" "simple_alu_imm")
+ (ior (eq_attr "type" "simple_alu_shift")
+ (and (eq_attr "insn" "mov")
+ (not (eq_attr "length" "8")))))
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex2|cortex_a7_ex1")
+
+;; ALU instruction with register operands can dual-issue
+;; with a younger immediate-based instruction.
+(define_insn_reservation "cortex_a7_alu_reg" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "alu_reg")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_alu_shift" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "alu_shift,alu_shift_reg")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+;; Forwarding path for unshifted operands.
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+ "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul")
+
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+ "cortex_a7_store*"
+ "arm_no_early_store_addr_dep")
+
+(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
+ "cortex_a7_alu_shift"
+ "arm_no_early_alu_shift_dep")
+
+;; The multiplier pipeline can forward results from wr stage only so
+;; there's no need to specify bypasses.
+;; Multiply instructions cannot dual-issue.
+
+(define_insn_reservation "cortex_a7_mul" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "mult")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+;; The latency depends on the operands, so we use an estimate here.
+(define_insn_reservation "cortex_a7_idiv" 5
+ (and (eq_attr "tune" "cortexa7")
+ (eq_attr "insn" "udiv,sdiv"))
+ "cortex_a7_all*5")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Load/store instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Address-generation happens in the issue stage.
+;; Double-word accesses can be issued in a single cycle,
+;; and occupy only one pipeline stage.
+
+(define_insn_reservation "cortex_a7_load1" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load_byte,load1")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_store1" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store1")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_load2" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load2")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_store2" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store2")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_load3" 3
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load3")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_store3" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store4")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_load4" 3
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "load4")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_store4" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "store3")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both, cortex_a7_both")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point arithmetic.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_fpalu" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\
+ f_cvt, fcmps, fcmpd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpadd_pipe")
+
+;; For fconsts and fconstd, 8-bit immediate data is passed directly from
+;; f1 to f3 (which I think reduces the latency by one cycle).
+
+(define_insn_reservation "cortex_a7_fconst" 3
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fconsts,fconstd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpadd_pipe")
+
+;; We should try not to attempt to issue a single-precision multiplication in
+;; the middle of a double-precision multiplication operation (the usage of
+;; cortex_a7_fpmul_pipe).
+
+(define_insn_reservation "cortex_a7_fpmuls" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmuls")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+;; For single-precision multiply-accumulate, the add (accumulate) is issued
+;; whilst the multiply is in F4. The multiply result can then be forwarded
+;; from F5 to F1. The issue unit is only used once (when we first start
+;; processing the instruction), but the usage of the FP add pipeline could
+;; block other instructions attempting to use it simultaneously. We try to
+;; avoid that using cortex_a7_fpadd_pipe.
+
+(define_insn_reservation "cortex_a7_fpmacs" 8
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmacs")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
+
+;; Non-multiply instructions can issue between two cycles of a
+;; double-precision multiply.
+
+(define_insn_reservation "cortex_a7_fpmuld" 7
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmuld")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
+ cortex_a7_ex1+cortex_a7_fpmul_pipe")
+
+(define_insn_reservation "cortex_a7_fpmacd" 11
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fmacd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
+ cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Floating-point divide/square root instructions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_fdivs" 16
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fdivs")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14")
+
+(define_insn_reservation "cortex_a7_fdivd" 29
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "fdivd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP to/from core transfers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Core-to-VFP transfers.
+
+(define_insn_reservation "cortex_a7_r2f" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "r_2_f")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_f2r" 2
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_2_r")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP flag transfer.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Fuxne: The flag forwarding from fmstat to the second instruction is
+;; not modeled at present.
+
+(define_insn_reservation "cortex_a7_f_flags" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_flag")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; VFP load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a7_f_loads" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_loads")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_f_loadd" 4
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_loadd")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+(define_insn_reservation "cortex_a7_f_stores" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_stores")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_ex1")
+
+(define_insn_reservation "cortex_a7_f_stored" 0
+ (and (eq_attr "tune" "cortexa7")
+ (and (eq_attr "type" "f_stored")
+ (eq_attr "neon_type" "none")))
+ "cortex_a7_both")
+
+;; Load-to-use for floating-point values has a penalty of one cycle,
+;; i.e. a latency of two.
+
+(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
+ "cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\
+ cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\
+ cortex_a7_f2r")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; NEON load/store.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+(define_insn_reservation "cortex_a7_neon" 4
+ (and (eq_attr "tune" "cortexa7")
+ (eq_attr "neon_type" "!none"))
+ "cortex_a7_both*2")
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
index 7c266d3b71f..73c61b15d0b 100644
--- a/gcc/config/arm/cortex-a8.md
+++ b/gcc/config/arm/cortex-a8.md
@@ -93,7 +93,7 @@
(define_insn_reservation "cortex_a8_alu_shift" 2
(and (eq_attr "tune" "cortexa8")
- (and (eq_attr "type" "alu_shift")
+ (and (eq_attr "type" "simple_alu_shift,alu_shift")
(not (eq_attr "insn" "mov,mvn"))))
"cortex_a8_default")
@@ -107,7 +107,7 @@
(define_insn_reservation "cortex_a8_mov" 1
(and (eq_attr "tune" "cortexa8")
- (and (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift,alu_shift_reg")
+ (and (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg")
(eq_attr "insn" "mov,mvn")))
"cortex_a8_default")
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index 336c4fcefae..f1bd7cfa91a 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -82,7 +82,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
(and (eq_attr "tune" "cortexa9")
(ior (and (eq_attr "type" "alu_reg,simple_alu_imm")
(eq_attr "neon_type" "none"))
- (and (and (eq_attr "type" "alu_shift_reg, alu_shift")
+ (and (and (eq_attr "type" "alu_shift_reg, simple_alu_shift,alu_shift")
(eq_attr "insn" "mov"))
(eq_attr "neon_type" "none"))))
"cortex_a9_p0_default|cortex_a9_p1_default")
@@ -90,7 +90,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
;; An instruction using the shifter will go down E1.
(define_insn_reservation "cortex_a9_dp_shift" 3
(and (eq_attr "tune" "cortexa9")
- (and (eq_attr "type" "alu_shift_reg, alu_shift")
+ (and (eq_attr "type" "alu_shift_reg, simple_alu_shift,alu_shift")
(not (eq_attr "insn" "mov"))))
"cortex_a9_p0_shift | cortex_a9_p1_shift")
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
index bff17dd77fb..063fe5fabdd 100644
--- a/gcc/config/arm/cortex-m4.md
+++ b/gcc/config/arm/cortex-m4.md
@@ -31,7 +31,7 @@
;; ALU and multiply is one cycle.
(define_insn_reservation "cortex_m4_alu" 1
(and (eq_attr "tune" "cortexm4")
- (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift,alu_shift_reg,mult"))
+ (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult"))
"cortex_m4_ex")
;; Byte, half-word and word load is two cycles.
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 26de65aa1b3..a870dc06f51 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -90,7 +90,7 @@
(define_insn_reservation "cortex_r4_alu_shift" 2
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "type" "alu_shift"))
+ (eq_attr "type" "simple_alu_shift,alu_shift"))
"cortex_r4_alu")
(define_insn_reservation "cortex_r4_alu_shift_reg" 2
diff --git a/gcc/config/arm/driver-arm.c b/gcc/config/arm/driver-arm.c
index 3e14b14593c..3a17e104aca 100644
--- a/gcc/config/arm/driver-arm.c
+++ b/gcc/config/arm/driver-arm.c
@@ -37,6 +37,7 @@ static struct vendor_cpu arm_cpu_table[] = {
{"0xb56", "armv6t2", "arm1156t2-s"},
{"0xb76", "armv6zk", "arm1176jz-s"},
{"0xc05", "armv7-a", "cortex-a5"},
+ {"0xc07", "armv7-a", "cortex-a7"},
{"0xc08", "armv7-a", "cortex-a8"},
{"0xc09", "armv7-a", "cortex-a9"},
{"0xc0f", "armv7-a", "cortex-a15"},
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
index 2b89bb5429b..81085225753 100644
--- a/gcc/config/arm/fa526.md
+++ b/gcc/config/arm/fa526.md
@@ -67,7 +67,7 @@
(define_insn_reservation "526_alu_shift_op" 2
(and (eq_attr "tune" "fa526")
- (eq_attr "type" "alu_shift,alu_shift_reg"))
+ (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg"))
"fa526_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
index 4725b93b6cc..d995b1d9663 100644
--- a/gcc/config/arm/fa606te.md
+++ b/gcc/config/arm/fa606te.md
@@ -62,7 +62,7 @@
;; ALU operations
(define_insn_reservation "606te_alu_op" 1
(and (eq_attr "tune" "fa606te")
- (eq_attr "type" "alu_reg,simple_alu_imm,alu_shift,alu_shift_reg"))
+ (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg"))
"fa606te_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
index bed3995a5e2..6b01b06aaaf 100644
--- a/gcc/config/arm/fa626te.md
+++ b/gcc/config/arm/fa626te.md
@@ -73,7 +73,7 @@
(define_insn_reservation "626te_alu_shift_op" 2
(and (eq_attr "tune" "fa626,fa626te")
- (eq_attr "type" "alu_shift,alu_shift_reg"))
+ (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg"))
"fa626te_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
index a4c256ce22e..7c898ab3b17 100644
--- a/gcc/config/arm/fa726te.md
+++ b/gcc/config/arm/fa726te.md
@@ -95,7 +95,7 @@
;; it takes 3 cycles.
(define_insn_reservation "726te_alu_shift_op" 3
(and (eq_attr "tune" "fa726te")
- (and (eq_attr "type" "alu_shift")
+ (and (eq_attr "type" "simple_alu_shift,alu_shift")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
index 228817c85e5..f63b6bf54a2 100644
--- a/gcc/config/arm/fmp626.md
+++ b/gcc/config/arm/fmp626.md
@@ -68,7 +68,7 @@
(define_insn_reservation "mp626_alu_shift_op" 2
(and (eq_attr "tune" "fmp626")
- (eq_attr "type" "alu_shift,alu_shift_reg"))
+ (eq_attr "type" "simple_alu_shift,alu_shift,alu_shift_reg"))
"fmp626_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 5ae1aefe2b9..3a20f5fea83 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -429,8 +429,8 @@
(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
(HI "nonimmediate_operand")
(QI "arm_reg_or_extendqisi_mem_op")])
-(define_mode_attr qhs_extenddi_cstr [(SI "r") (HI "rm") (QI "rUq")])
-(define_mode_attr qhs_zextenddi_cstr [(SI "r") (HI "rm") (QI "rm")])
+(define_mode_attr qhs_extenddi_cstr [(SI "r,0,r,r") (HI "r,0,rm,rm") (QI "r,0,rUq,rm")])
+(define_mode_attr qhs_zextenddi_cstr [(SI "r,0,r") (HI "r,0,rm") (QI "r,0,rm")])
;; Mode attributes used for fixed-point support.
(define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fc382698a0d..c3f14bb8edf 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -5932,3 +5932,65 @@
(const_string "neon_fp_vadd_qqq_vabs_qq"))
(const_string "neon_int_5")))]
)
+
+;; Copy from core-to-neon regs, then extend, not vice-versa
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+ (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
+ {
+ operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
+ (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
+ {
+ operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
+ (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
+ {
+ operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
+ {
+ operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
+ {
+ operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:DI 0 "s_register_operand" "")
+ (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
+ "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
+ [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
+ {
+ operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
+ })
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 731b6146e76..2ceb938199f 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/constraints.md \
$(srcdir)/config/arm/cortex-a15.md \
$(srcdir)/config/arm/cortex-a5.md \
+ $(srcdir)/config/arm/cortex-a7.md \
$(srcdir)/config/arm/cortex-a8.md \
$(srcdir)/config/arm/cortex-a8-neon.md \
$(srcdir)/config/arm/cortex-a9.md \
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index f22666cf9a9..d4dd1b9364c 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -570,11 +570,7 @@
"@
sxtb%?\\t%0, %1
ldr%(sb%)\\t%0, %1"
- [(set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ [(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")
(set_attr "pool_range" "*,4094")
(set_attr "neg_pool_range" "*,250")]
@@ -587,11 +583,7 @@
"@
uxth%?\\t%0, %1
ldr%(h%)\\t%0, %1"
- [(set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ [(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")
(set_attr "pool_range" "*,4094")
(set_attr "neg_pool_range" "*,250")]
@@ -604,11 +596,7 @@
"@
uxtb%(%)\\t%0, %1
ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2"
- [(set_attr_alternative "type"
- [(if_then_else (eq_attr "tune" "cortexa7")
- (const_string "simple_alu_imm")
- (const_string "alu_shift"))
- (const_string "load_byte")])
+ [(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")
(set_attr "pool_range" "*,4094")
(set_attr "neg_pool_range" "*,250")]
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index a5c461df36f..3f2975ff546 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -1265,6 +1265,31 @@
(set_attr "type" "f_rint<vfp_type>")]
)
+;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
+;; The 'smax' and 'smin' RTL standard pattern names do not specify which
+;; operand will be returned when both operands are zero (i.e. they may not
+;; honour signed zeroes), or when either operand is NaN. Therefore GCC
+;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
+;; NaNs.
+
+(define_insn "smax<mode>3"
+ [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+ (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+ (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+ "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "f_minmax<vfp_type>")]
+)
+
+(define_insn "smin<mode>3"
+ [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+ (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+ (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
+ "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
+ "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "type" "f_minmax<vfp_type>")]
+)
+
;; Unimplemented insns:
;; fldm*
;; fstm*
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 69f44aa6086..b466a4fbbdf 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2026,7 +2026,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
regs instead of memory. */
- m_COREI7 | m_CORE2I7
+ m_COREI7 | m_CORE2I7,
+
+ /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
+ a conditional move. */
+ m_ATOM
};
/* Feature tests against the various architecture variations. */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3ac345172c8..d2f535a7566 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -331,6 +331,7 @@ enum ix86_tune_indices {
X86_TUNE_REASSOC_INT_TO_PARALLEL,
X86_TUNE_REASSOC_FP_TO_PARALLEL,
X86_TUNE_GENERAL_REGS_SSE_SPILL,
+ X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
X86_TUNE_LAST
};
@@ -436,6 +437,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
#define TARGET_GENERAL_REGS_SSE_SPILL \
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
+#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
+ ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3846065066b..95a52cdd1a7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16093,6 +16093,28 @@
[(set_attr "type" "icmov")
(set_attr "mode" "<MODE>")])
+;; Don't do conditional moves with memory inputs. This splitter helps
+;; register starved x86_32 by forcing inputs into registers before reload.
+(define_split
+ [(set (match_operand:SWI248 0 "register_operand")
+ (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SWI248 2 "nonimmediate_operand")
+ (match_operand:SWI248 3 "nonimmediate_operand")))]
+ "!TARGET_64BIT && TARGET_CMOVE
+ && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && (MEM_P (operands[2]) || MEM_P (operands[3]))
+ && can_create_pseudo_p ()
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 0)
+ (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+ if (MEM_P (operands[2]))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ if (MEM_P (operands[3]))
+ operands[3] = force_reg (<MODE>mode, operands[3]);
+})
+
(define_insn "*movqicc_noc"
[(set (match_operand:QI 0 "register_operand" "=r,r")
(if_then_else:QI (match_operator 1 "ix86_comparison_operator"
@@ -16105,14 +16127,12 @@
(set_attr "mode" "QI")])
(define_split
- [(set (match_operand 0 "register_operand")
- (if_then_else (match_operator 1 "ix86_comparison_operator"
- [(reg FLAGS_REG) (const_int 0)])
- (match_operand 2 "register_operand")
- (match_operand 3 "register_operand")))]
+ [(set (match_operand:SWI12 0 "register_operand")
+ (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SWI12 2 "register_operand")
+ (match_operand:SWI12 3 "register_operand")))]
"TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL
- && (GET_MODE (operands[0]) == QImode
- || GET_MODE (operands[0]) == HImode)
&& reload_completed"
[(set (match_dup 0)
(if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
@@ -16122,6 +16142,33 @@
operands[3] = gen_lowpart (SImode, operands[3]);
})
+;; Don't do conditional moves with memory inputs
+(define_peephole2
+ [(match_scratch:SWI248 2 "r")
+ (set (match_operand:SWI248 0 "register_operand")
+ (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_dup 0)
+ (match_operand:SWI248 3 "memory_operand")))]
+ "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0)
+ (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))])
+
+(define_peephole2
+ [(match_scratch:SWI248 2 "r")
+ (set (match_operand:SWI248 0 "register_operand")
+ (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SWI248 3 "memory_operand")
+ (match_dup 0)))]
+ "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0)
+ (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))])
+
(define_expand "mov<mode>cc"
[(set (match_operand:X87MODEF 0 "register_operand")
(if_then_else:X87MODEF
@@ -16209,6 +16256,59 @@
[(set_attr "type" "fcmov,fcmov,icmov,icmov")
(set_attr "mode" "SF,SF,SI,SI")])
+;; Don't do conditional moves with memory inputs. This splitter helps
+;; register starved x86_32 by forcing inputs into registers before reload.
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand")
+ (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:MODEF 2 "nonimmediate_operand")
+ (match_operand:MODEF 3 "nonimmediate_operand")))]
+ "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+ && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && (MEM_P (operands[2]) || MEM_P (operands[3]))
+ && can_create_pseudo_p ()
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 0)
+ (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))]
+{
+ if (MEM_P (operands[2]))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ if (MEM_P (operands[3]))
+ operands[3] = force_reg (<MODE>mode, operands[3]);
+})
+
+;; Don't do conditional moves with memory inputs
+(define_peephole2
+ [(match_scratch:MODEF 2 "r")
+ (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+ (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_dup 0)
+ (match_operand:MODEF 3 "memory_operand")))]
+ "(<MODE>mode != DFmode || TARGET_64BIT)
+ && TARGET_80387 && TARGET_CMOVE
+ && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0)
+ (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))])
+
+(define_peephole2
+ [(match_scratch:MODEF 2 "r")
+ (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+ (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:MODEF 3 "memory_operand")
+ (match_dup 0)))]
+ "(<MODE>mode != DFmode || TARGET_64BIT)
+ && TARGET_80387 && TARGET_CMOVE
+ && TARGET_AVOID_MEM_OPND_FOR_CMOVE
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0)
+ (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))])
+
;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
;; the scalar versions to have only XMM registers as operands.
diff --git a/gcc/config/i386/t-kfreebsd b/gcc/config/i386/t-kfreebsd
index b8338a9f9fc..762d520fae9 100644
--- a/gcc/config/i386/t-kfreebsd
+++ b/gcc/config/i386/t-kfreebsd
@@ -2,4 +2,4 @@ MULTIARCH_DIRNAME = $(call if_multiarch,i386-kfreebsd-gnu)
# MULTILIB_OSDIRNAMES are set in t-linux64.
KFREEBSD_OS = $(filter kfreebsd%, $(word 3, $(subst -, ,$(target))))
-MULTILIB_OSDIRNAMES := $(filter-out mx32=% $(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES)))
+MULTILIB_OSDIRNAMES := $(filter-out mx32=%,$(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES)))
diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux
index 3611027ae27..017a293cde3 100644
--- a/gcc/config/rs6000/t-linux
+++ b/gcc/config/rs6000/t-linux
@@ -1,5 +1,9 @@
# do not define the multiarch name if configured for a soft-float cpu
# or soft-float.
ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
+ifneq (,$(findstring spe,$(target)))
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1)
+else
MULTIARCH_DIRNAME = powerpc-linux-gnu
endif
+endif
diff --git a/gcc/config/rs6000/t-spe b/gcc/config/rs6000/t-spe
index 90eb802a495..1bed1e32b0e 100644
--- a/gcc/config/rs6000/t-spe
+++ b/gcc/config/rs6000/t-spe
@@ -71,7 +71,3 @@ MULTILIB_EXCEPTIONS = maltivec mabi=altivec mno-spe mabi=no-spe mno-isel \
mabi=altivec/mlittle \
maltivec/mlittle \
maltivec/mabi=altivec/mlittle
-
-ifneq (,$(findstring linux, $(target)))
-MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file)),,v1)
-endif
diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md
index 8564b6619a5..9d416adb165 100644
--- a/gcc/config/s390/constraints.md
+++ b/gcc/config/s390/constraints.md
@@ -45,6 +45,8 @@
;; H,Q: mode of the part
;; D,S,H: mode of the containing operand
;; 0,F: value of the other parts (F - all bits set)
+;; --
+;; xx[DS]q satisfies s390_contiguous_bitmask_p for DImode or SImode
;;
;; The constraint matches if the specified part of a constant
;; has a value different from its other parts. If the letter x
@@ -330,8 +332,15 @@
(and (match_code "const_int")
(match_test "s390_N_constraint_str (\"xQH0\", ival)")))
+(define_constraint "NxxDq"
+ "@internal"
+ (and (match_code "const_int")
+ (match_test "s390_contiguous_bitmask_p (ival, 64, NULL, NULL)")))
-
+(define_constraint "NxxSq"
+ "@internal"
+ (and (match_code "const_int")
+ (match_test "s390_contiguous_bitmask_p (ival, 32, NULL, NULL)")))
;;
;; Double-letter constraints starting with O follow.
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 9ba85bf387c..d5e185d5ac7 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -101,6 +101,10 @@
return true;
})
+(define_predicate "nonzero_shift_count_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 1, GET_MODE_BITSIZE (mode) - 1)")))
+
;; Return true if OP a valid operand for the LARL instruction.
(define_predicate "larl_operand"
@@ -154,6 +158,12 @@
return false;
})
+(define_predicate "contiguous_bitmask_operand"
+ (match_code "const_int")
+{
+ return s390_contiguous_bitmask_p (INTVAL (op), GET_MODE_BITSIZE (mode), NULL, NULL);
+})
+
;; operators --------------------------------------------------------------
;; Return nonzero if OP is a valid comparison operator
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 79673d6d835..a494ba22893 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -58,7 +58,6 @@ extern int tls_symbolic_operand (rtx);
extern bool s390_match_ccmode (rtx, enum machine_mode);
extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool);
extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
-extern void s390_canonicalize_comparison (enum rtx_code *, rtx *, rtx *);
extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
extern void s390_emit_jump (rtx, rtx);
extern bool symbolic_reference_mentioned_p (rtx);
@@ -110,5 +109,6 @@ extern bool s390_legitimate_address_without_index_p (rtx);
extern bool s390_decompose_shift_count (rtx, rtx *, HOST_WIDE_INT *);
extern int s390_branch_condition_mask (rtx);
extern int s390_compare_and_branch_condition_mask (rtx);
+extern bool s390_extzv_shift_ok (int, int, unsigned HOST_WIDE_INT);
#endif /* RTX_CODE */
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 6517bce15e0..2edc8ab78f2 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -745,9 +745,13 @@ s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
that we can implement more efficiently. */
-void
-s390_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1)
+static void
+s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value)
{
+ if (op0_preserve_value)
+ return;
+
/* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
if ((*code == EQ || *code == NE)
&& *op1 == const0_rtx
@@ -894,7 +898,7 @@ s390_canonicalize_comparison (enum rtx_code *code, rtx *op0, rtx *op1)
if (MEM_P (*op0) && REG_P (*op1))
{
rtx tem = *op0; *op0 = *op1; *op1 = tem;
- *code = swap_condition (*code);
+ *code = (int)swap_condition ((enum rtx_code)*code);
}
}
@@ -1343,6 +1347,24 @@ s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
return true;
}
+/* Check whether a rotate of ROTL followed by an AND of CONTIG is
+ equivalent to a shift followed by the AND. In particular, CONTIG
+ should not overlap the (rotated) bit 0/bit 63 gap. Negative values
+ for ROTL indicate a rotate to the right. */
+
+bool
+s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
+{
+ int pos, len;
+ bool ok;
+
+ ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
+ gcc_assert (ok);
+
+ return ((rotl >= 0 && rotl <= pos)
+ || (rotl < 0 && -rotl <= bitsize - len - pos));
+}
+
/* Check whether we can (and want to) split a double-word
move in mode MODE from SRC to DST into two single-word
moves, moving the subword FIRST_SUBWORD first. */
@@ -5364,28 +5386,35 @@ print_operand_address (FILE *file, rtx addr)
'C': print opcode suffix for branch condition.
'D': print opcode suffix for inverse branch condition.
'E': print opcode suffix for branch on index instruction.
- 'J': print tls_load/tls_gdcall/tls_ldcall suffix
'G': print the size of the operand in bytes.
+ 'J': print tls_load/tls_gdcall/tls_ldcall suffix
+ 'M': print the second word of a TImode operand.
+ 'N': print the second word of a DImode operand.
'O': print only the displacement of a memory reference.
'R': print only the base register of a memory reference.
'S': print S-type memory reference (base+displacement).
- 'N': print the second word of a DImode operand.
- 'M': print the second word of a TImode operand.
'Y': print shift count operand.
'b': print integer X as if it's an unsigned byte.
'c': print integer X as if it's an signed byte.
- 'x': print integer X as if it's an unsigned halfword.
+ 'e': "end" of DImode contiguous bitmask X.
+ 'f': "end" of SImode contiguous bitmask X.
'h': print integer X as if it's a signed halfword.
'i': print the first nonzero HImode part of X.
'j': print the first HImode part unequal to -1 of X.
'k': print the first nonzero SImode part of X.
'm': print the first SImode part unequal to -1 of X.
- 'o': print integer X as if it's an unsigned 32bit word. */
+ 'o': print integer X as if it's an unsigned 32bit word.
+ 's': "start" of DImode contiguous bitmask X.
+ 't': "start" of SImode contiguous bitmask X.
+ 'x': print integer X as if it's an unsigned halfword.
+*/
void
print_operand (FILE *file, rtx x, int code)
{
+ HOST_WIDE_INT ival;
+
switch (code)
{
case 'C':
@@ -5564,30 +5593,57 @@ print_operand (FILE *file, rtx x, int code)
break;
case CONST_INT:
- if (code == 'b')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xff);
- else if (code == 'c')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xff) ^ 0x80) - 0x80);
- else if (code == 'x')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
- else if (code == 'h')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
- else if (code == 'i')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, HImode, 0));
- else if (code == 'j')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, HImode, -1));
- else if (code == 'k')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, SImode, 0));
- else if (code == 'm')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC,
- s390_extract_part (x, SImode, -1));
- else if (code == 'o')
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffffffff);
- else
- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ ival = INTVAL (x);
+ switch (code)
+ {
+ case 0:
+ break;
+ case 'b':
+ ival &= 0xff;
+ break;
+ case 'c':
+ ival = ((ival & 0xff) ^ 0x80) - 0x80;
+ break;
+ case 'x':
+ ival &= 0xffff;
+ break;
+ case 'h':
+ ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
+ break;
+ case 'i':
+ ival = s390_extract_part (x, HImode, 0);
+ break;
+ case 'j':
+ ival = s390_extract_part (x, HImode, -1);
+ break;
+ case 'k':
+ ival = s390_extract_part (x, SImode, 0);
+ break;
+ case 'm':
+ ival = s390_extract_part (x, SImode, -1);
+ break;
+ case 'o':
+ ival &= 0xffffffff;
+ break;
+ case 'e': case 'f':
+ case 's': case 't':
+ {
+ int pos, len;
+ bool ok;
+
+ len = (code == 's' || code == 'e' ? 64 : 32);
+ ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
+ gcc_assert (ok);
+ if (code == 's' || code == 't')
+ ival = 64 - pos - len;
+ else
+ ival = 64 - 1 - pos;
+ }
+ break;
+ default:
+ output_operand_lossage ("invalid constant for output modifier '%c'", code);
+ }
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
break;
case CONST_DOUBLE:
@@ -11071,6 +11127,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
#undef TARGET_UNWIND_WORD_MODE
#define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 30408f4ac28..286046abdff 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -720,10 +720,6 @@ do { \
return the mode to be used for the comparison. */
#define SELECT_CC_MODE(OP, X, Y) s390_select_ccmode ((OP), (X), (Y))
-/* Canonicalize a comparison from one we don't have to one we do have. */
-#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
- s390_canonicalize_comparison (&(CODE), &(OP0), &(OP1))
-
/* Relative costs of operations. */
/* A C expression for the cost of a branch instruction. A value of 1
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index bea58cd7a83..4666122ecef 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -393,6 +393,9 @@
;; the same template.
(define_code_iterator SHIFT [ashift lshiftrt])
+;; This iterator allow r[ox]sbg to be defined with the same template
+(define_code_iterator IXOR [ior xor])
+
;; This iterator and attribute allow to combine most atomic operations.
(define_code_iterator ATOMIC [and ior xor plus minus mult])
(define_code_iterator ATOMIC_Z196 [and ior xor plus])
@@ -527,6 +530,13 @@
;; Maximum unsigned integer that fits in MODE.
(define_mode_attr max_uint [(HI "65535") (QI "255")])
+;; Start and end field computations for RISBG et al.
+(define_mode_attr bfstart [(DI "s") (SI "t")])
+(define_mode_attr bfend [(DI "e") (SI "f")])
+
+;; In place of GET_MODE_BITSIZE (<MODE>mode)
+(define_mode_attr bitsize [(DI "64") (SI "32") (HI "16") (QI "8")])
+
;;
;;- Compare instructions.
;;
@@ -3300,15 +3310,64 @@
[(set_attr "op_type" "RS,RSY")
(set_attr "z10prop" "z10_super_E1,z10_super_E1")])
+;
+; extv instruction patterns
+;
+
+; FIXME: This expander needs to be converted from DI to GPR as well
+; after resolving some issues with it.
+
+(define_expand "extzv"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extract:DI
+ (match_operand:DI 1 "register_operand" "d")
+ (match_operand 2 "const_int_operand" "") ; size
+ (match_operand 3 "const_int_operand" ""))) ; start
+ (clobber (reg:CC CC_REGNUM))])]
+ "TARGET_Z10"
+{
+ /* Starting with zEC12 there is risbgn not clobbering CC. */
+ if (TARGET_ZEC12)
+ {
+ emit_move_insn (operands[0],
+ gen_rtx_ZERO_EXTRACT (DImode,
+ operands[1],
+ operands[2],
+ operands[3]));
+ DONE;
+ }
+})
-(define_insn_and_split "*extzv<mode>"
+(define_insn "*extzv<mode>_zEC12"
+ [(set (match_operand:GPR 0 "register_operand" "=d")
+ (zero_extract:GPR
+ (match_operand:GPR 1 "register_operand" "d")
+ (match_operand 2 "const_int_operand" "") ; size
+ (match_operand 3 "const_int_operand" "")))] ; start]
+ "TARGET_ZEC12"
+ "risbgn\t%0,%1,64-%2,128+63,<bitsize>+%3+%2" ; dst, src, start, end, shift
+ [(set_attr "op_type" "RIE")])
+
+(define_insn "*extzv<mode>_z10"
+ [(set (match_operand:GPR 0 "register_operand" "=d")
+ (zero_extract:GPR
+ (match_operand:GPR 1 "register_operand" "d")
+ (match_operand 2 "const_int_operand" "") ; size
+ (match_operand 3 "const_int_operand" ""))) ; start
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+ "risbg\t%0,%1,64-%2,128+63,<bitsize>+%3+%2" ; dst, src, start, end, shift
+ [(set_attr "op_type" "RIE")
+ (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn_and_split "*pre_z10_extzv<mode>"
[(set (match_operand:GPR 0 "register_operand" "=d")
(zero_extract:GPR (match_operand:QI 1 "s_operand" "QS")
- (match_operand 2 "const_int_operand" "n")
+ (match_operand 2 "nonzero_shift_count_operand" "")
(const_int 0)))
(clobber (reg:CC CC_REGNUM))]
- "INTVAL (operands[2]) > 0
- && INTVAL (operands[2]) <= GET_MODE_BITSIZE (SImode)"
+ "!TARGET_Z10"
"#"
"&& reload_completed"
[(parallel
@@ -3322,18 +3381,17 @@
operands[1] = adjust_address (operands[1], BLKmode, 0);
set_mem_size (operands[1], size);
- operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - bitsize);
+ operands[2] = GEN_INT (<GPR:bitsize> - bitsize);
operands[3] = GEN_INT (mask);
})
-(define_insn_and_split "*extv<mode>"
+(define_insn_and_split "*pre_z10_extv<mode>"
[(set (match_operand:GPR 0 "register_operand" "=d")
(sign_extract:GPR (match_operand:QI 1 "s_operand" "QS")
- (match_operand 2 "const_int_operand" "n")
+ (match_operand 2 "nonzero_shift_count_operand" "")
(const_int 0)))
(clobber (reg:CC CC_REGNUM))]
- "INTVAL (operands[2]) > 0
- && INTVAL (operands[2]) <= GET_MODE_BITSIZE (SImode)"
+ ""
"#"
"&& reload_completed"
[(parallel
@@ -3349,7 +3407,7 @@
operands[1] = adjust_address (operands[1], BLKmode, 0);
set_mem_size (operands[1], size);
- operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - bitsize);
+ operands[2] = GEN_INT (<GPR:bitsize> - bitsize);
operands[3] = GEN_INT (mask);
})
@@ -3369,49 +3427,29 @@
FAIL;
})
+
+; The normal RTL expansion will never generate a zero_extract where
+; the location operand isn't word mode. However, we do this in the
+; back-end when generating atomic operations. See s390_two_part_insv.
(define_insn "*insv<mode>_zEC12"
[(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d")
- (match_operand 1 "const_int_operand" "I")
- (match_operand 2 "const_int_operand" "I"))
+ (match_operand 1 "const_int_operand" "I") ; size
+ (match_operand 2 "const_int_operand" "I")) ; pos
(match_operand:GPR 3 "nonimmediate_operand" "d"))]
"TARGET_ZEC12
- && (INTVAL (operands[1]) + INTVAL (operands[2])) <=
- GET_MODE_BITSIZE (<MODE>mode)"
-{
- int start = INTVAL (operands[2]);
- int size = INTVAL (operands[1]);
- int offset = 64 - GET_MODE_BITSIZE (<MODE>mode);
-
- operands[2] = GEN_INT (offset + start); /* start bit position */
- operands[1] = GEN_INT (offset + start + size - 1); /* end bit position */
- operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) -
- start - size); /* left shift count */
-
- return "risbgn\t%0,%3,%b2,%b1,%b4";
-}
+ && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>"
+ "risbgn\t%0,%3,64-<bitsize>+%2,64-<bitsize>+%2+%1-1,<bitsize>-%2-%1"
[(set_attr "op_type" "RIE")])
(define_insn "*insv<mode>_z10"
[(set (zero_extract:GPR (match_operand:GPR 0 "nonimmediate_operand" "+d")
- (match_operand 1 "const_int_operand" "I")
- (match_operand 2 "const_int_operand" "I"))
+ (match_operand 1 "const_int_operand" "I") ; size
+ (match_operand 2 "const_int_operand" "I")) ; pos
(match_operand:GPR 3 "nonimmediate_operand" "d"))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10
- && (INTVAL (operands[1]) + INTVAL (operands[2])) <=
- GET_MODE_BITSIZE (<MODE>mode)"
-{
- int start = INTVAL (operands[2]);
- int size = INTVAL (operands[1]);
- int offset = 64 - GET_MODE_BITSIZE (<MODE>mode);
-
- operands[2] = GEN_INT (offset + start); /* start bit position */
- operands[1] = GEN_INT (offset + start + size - 1); /* end bit position */
- operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) -
- start - size); /* left shift count */
-
- return "risbg\t%0,%3,%b2,%b1,%b4";
-}
+ && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>"
+ "risbg\t%0,%3,64-<bitsize>+%2,64-<bitsize>+%2+%1-1,<bitsize>-%2-%1"
[(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
@@ -3420,82 +3458,135 @@
(define_insn "*insv<mode>_zEC12_noshift"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
- (match_operand 2 "const_int_operand" "n"))
+ (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
(and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0")
- (match_operand 4 "const_int_operand" "n"))))]
- "TARGET_ZEC12
- && s390_contiguous_bitmask_p (INTVAL (operands[2]),
- GET_MODE_BITSIZE (<MODE>mode), NULL, NULL)
- && INTVAL (operands[2]) == ~(INTVAL (operands[4]))"
-
-{
- int start;
- int size;
-
- s390_contiguous_bitmask_p (INTVAL (operands[2]),
- GET_MODE_BITSIZE (<MODE>mode), &start, &size);
-
- operands[5] = GEN_INT (64 - start - size); /* start bit position */
- operands[6] = GEN_INT (64 - 1 - start); /* end bit position */
- operands[7] = const0_rtx; /* left shift count */
-
- return "risbgn\t%0,%1,%b5,%b6,%b7";
-}
+ (match_operand:GPR 4 "const_int_operand" ""))))]
+ "TARGET_ZEC12 && INTVAL (operands[2]) == ~INTVAL (operands[4])"
+ "risbgn\t%0,%1,%<bfstart>2,%<bfend>2,0"
[(set_attr "op_type" "RIE")])
-; and op1 with a mask being 1 for the selected bits and 0 for the rest
-; and op3=op0 with a mask being 0 for the selected bits and 1 for the rest
(define_insn "*insv<mode>_z10_noshift"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
- (match_operand 2 "const_int_operand" "n"))
+ (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
(and:GPR (match_operand:GPR 3 "nonimmediate_operand" "0")
- (match_operand 4 "const_int_operand" "n"))))
+ (match_operand:GPR 4 "const_int_operand" ""))))
(clobber (reg:CC CC_REGNUM))]
- "TARGET_Z10
- && s390_contiguous_bitmask_p (INTVAL (operands[2]),
- GET_MODE_BITSIZE (<MODE>mode), NULL, NULL)
- && INTVAL (operands[2]) == ~(INTVAL (operands[4]))"
-
-{
- int start;
- int size;
-
- s390_contiguous_bitmask_p (INTVAL (operands[2]),
- GET_MODE_BITSIZE (<MODE>mode), &start, &size);
-
- operands[5] = GEN_INT (64 - start - size); /* start bit position */
- operands[6] = GEN_INT (64 - 1 - start); /* end bit position */
- operands[7] = const0_rtx; /* left shift count */
-
- return "risbg\t%0,%1,%b5,%b6,%b7";
-}
+ "TARGET_Z10 && INTVAL (operands[2]) == ~INTVAL (operands[4])"
+ "risbg\t%0,%1,%<bfstart>2,%<bfend>2,0"
[(set_attr "op_type" "RIE")
(set_attr "z10prop" "z10_super_E1")])
-; and op1 with a mask being 1 for the selected bits and 0 for the rest
-(define_insn "*insv<mode>_or_z10_noshift"
+(define_insn "*r<noxa>sbg_<mode>_noshift"
[(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
- (ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
- (match_operand 2 "const_int_operand" "n"))
- (match_operand:GPR 3 "nonimmediate_operand" "0")))
+ (IXOR:GPR
+ (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "d")
+ (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+ (match_operand:GPR 3 "nonimmediate_operand" "0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+ "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,0"
+ [(set_attr "op_type" "RIE")])
+
+(define_insn "*r<noxa>sbg_di_rotl"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=d")
+ (IXOR:DI
+ (and:DI
+ (rotate:DI
+ (match_operand:DI 1 "nonimmediate_operand" "d")
+ (match_operand:DI 3 "const_int_operand" ""))
+ (match_operand:DI 2 "contiguous_bitmask_operand" ""))
+ (match_operand:DI 4 "nonimmediate_operand" "0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10"
+ "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%b3"
+ [(set_attr "op_type" "RIE")])
+
+(define_insn "*r<noxa>sbg_<mode>_srl"
+ [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+ (IXOR:GPR
+ (and:GPR
+ (lshiftrt:GPR
+ (match_operand:GPR 1 "nonimmediate_operand" "d")
+ (match_operand:GPR 3 "nonzero_shift_count_operand" ""))
+ (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+ (match_operand:GPR 4 "nonimmediate_operand" "0")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10
- && s390_contiguous_bitmask_p (INTVAL (operands[2]),
- GET_MODE_BITSIZE (<MODE>mode), NULL, NULL)"
-{
- int start;
- int size;
+ && s390_extzv_shift_ok (<bitsize>, 64 - INTVAL (operands[3]),
+ INTVAL (operands[2]))"
+ "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,64-%3"
+ [(set_attr "op_type" "RIE")])
- s390_contiguous_bitmask_p (INTVAL (operands[2]),
- GET_MODE_BITSIZE (<MODE>mode), &start, &size);
+(define_insn "*r<noxa>sbg_<mode>_sll"
+ [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+ (IXOR:GPR
+ (and:GPR
+ (ashift:GPR
+ (match_operand:GPR 1 "nonimmediate_operand" "d")
+ (match_operand:GPR 3 "nonzero_shift_count_operand" ""))
+ (match_operand:GPR 2 "contiguous_bitmask_operand" ""))
+ (match_operand:GPR 4 "nonimmediate_operand" "0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10
+ && s390_extzv_shift_ok (<bitsize>, INTVAL (operands[3]),
+ INTVAL (operands[2]))"
+ "r<noxa>sbg\t%0,%1,%<bfstart>2,%<bfend>2,%3"
+ [(set_attr "op_type" "RIE")])
- operands[4] = GEN_INT (64 - start - size); /* start bit position */
- operands[5] = GEN_INT (64 - 1 - start); /* end bit position */
- operands[6] = const0_rtx; /* left shift count */
+;; These two are generated by combine for s.bf &= val.
+;; ??? For bitfields smaller than 32-bits, we wind up with SImode
+;; shifts and ands, which results in some truly awful patterns
+;; including subregs of operations. Rather unnecessisarily, IMO.
+;; Instead of
+;;
+;; (set (zero_extract:DI (reg/v:DI 50 [ s ])
+;; (const_int 24 [0x18])
+;; (const_int 0 [0]))
+;; (subreg:DI (and:SI (subreg:SI (lshiftrt:DI (reg/v:DI 50 [ s ])
+;; (const_int 40 [0x28])) 4)
+;; (reg:SI 4 %r4 [ y+4 ])) 0))
+;;
+;; we should instead generate
+;;
+;; (set (zero_extract:DI (reg/v:DI 50 [ s ])
+;; (const_int 24 [0x18])
+;; (const_int 0 [0]))
+;; (and:DI (lshiftrt:DI (reg/v:DI 50 [ s ])
+;; (const_int 40 [0x28]))
+;; (subreg:DI (reg:SI 4 %r4 [ y+4 ]) 0)))
+;;
+;; by noticing that we can push down the outer paradoxical subreg
+;; into the operation.
+
+(define_insn "*insv_rnsbg_noshift"
+ [(set (zero_extract:DI
+ (match_operand:DI 0 "nonimmediate_operand" "+d")
+ (match_operand 1 "const_int_operand" "")
+ (match_operand 2 "const_int_operand" ""))
+ (and:DI
+ (match_dup 0)
+ (match_operand:DI 3 "nonimmediate_operand" "d")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10
+ && INTVAL (operands[1]) + INTVAL (operands[2]) == 64"
+ "rnsbg\t%0,%3,%2,63,0"
+ [(set_attr "op_type" "RIE")])
- return "rosbg\t%0,%1,%b4,%b5,%b6";
-}
+(define_insn "*insv_rnsbg_srl"
+ [(set (zero_extract:DI
+ (match_operand:DI 0 "nonimmediate_operand" "+d")
+ (match_operand 1 "const_int_operand" "")
+ (match_operand 2 "const_int_operand" ""))
+ (and:DI
+ (lshiftrt:DI
+ (match_dup 0)
+ (match_operand 3 "const_int_operand" ""))
+ (match_operand:DI 4 "nonimmediate_operand" "d")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10
+ && INTVAL (operands[3]) == 64 - INTVAL (operands[1]) - INTVAL (operands[2])"
+ "rnsbg\t%0,%4,%2,%2+%1-1,%3"
[(set_attr "op_type" "RIE")])
(define_insn "*insv<mode>_mem_reg"
@@ -3568,15 +3659,14 @@
[(set_attr "op_type" "RIL")
(set_attr "z10prop" "z10_fwd_E1")])
-; Update the right-most 32 bit of a DI, or the whole of a SI.
-(define_insn "*insv_l<mode>_reg_extimm"
- [(set (zero_extract:P (match_operand:P 0 "register_operand" "+d")
- (const_int 32)
- (match_operand 1 "const_int_operand" "n"))
- (match_operand:P 2 "const_int_operand" "n"))]
- "TARGET_EXTIMM
- && BITS_PER_WORD - INTVAL (operands[1]) == 32"
- "iilf\t%0,%o2"
+; Update the right-most 32 bit of a DI.
+(define_insn "*insv_l_di_reg_extimm"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+d")
+ (const_int 32)
+ (const_int 32))
+ (match_operand:DI 1 "const_int_operand" "n"))]
+ "TARGET_EXTIMM"
+ "iilf\t%0,%o1"
[(set_attr "op_type" "RIL")
(set_attr "z10prop" "z10_fwd_A1")])
@@ -3630,8 +3720,7 @@
}
else if (!TARGET_EXTIMM)
{
- rtx bitcount = GEN_INT (GET_MODE_BITSIZE (<DSI:MODE>mode) -
- GET_MODE_BITSIZE (<HQI:MODE>mode));
+ rtx bitcount = GEN_INT (<DSI:bitsize> - <HQI:bitsize>);
operands[1] = gen_lowpart (<DSI:MODE>mode, operands[1]);
emit_insn (gen_ashl<DSI:mode>3 (operands[0], operands[1], bitcount));
@@ -3733,8 +3822,7 @@
{
operands[1] = adjust_address (operands[1], BLKmode, 0);
set_mem_size (operands[1], GET_MODE_SIZE (QImode));
- operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)
- - GET_MODE_BITSIZE (QImode));
+ operands[2] = GEN_INT (<GPR:bitsize> - BITS_PER_UNIT);
})
;
@@ -3845,8 +3933,7 @@
}
else if (!TARGET_EXTIMM)
{
- rtx bitcount = GEN_INT (GET_MODE_BITSIZE(DImode) -
- GET_MODE_BITSIZE(<MODE>mode));
+ rtx bitcount = GEN_INT (64 - <HQI:bitsize>);
operands[1] = gen_lowpart (DImode, operands[1]);
emit_insn (gen_ashldi3 (operands[0], operands[1], bitcount));
emit_insn (gen_lshrdi3 (operands[0], operands[0], bitcount));
@@ -3863,7 +3950,7 @@
{
operands[1] = gen_lowpart (SImode, operands[1]);
emit_insn (gen_andsi3 (operands[0], operands[1],
- GEN_INT ((1 << GET_MODE_BITSIZE(<MODE>mode)) - 1)));
+ GEN_INT ((1 << <HQI:bitsize>) - 1)));
DONE;
}
})
@@ -4056,8 +4143,8 @@
REAL_VALUE_TYPE cmp, sub;
operands[1] = force_reg (<BFP:MODE>mode, operands[1]);
- real_2expN (&cmp, GET_MODE_BITSIZE(<GPR:MODE>mode) - 1, <BFP:MODE>mode);
- real_2expN (&sub, GET_MODE_BITSIZE(<GPR:MODE>mode), <BFP:MODE>mode);
+ real_2expN (&cmp, <GPR:bitsize> - 1, <BFP:MODE>mode);
+ real_2expN (&sub, <GPR:bitsize>, <BFP:MODE>mode);
emit_cmp_and_jump_insns (operands[1],
CONST_DOUBLE_FROM_REAL_VALUE (cmp, <BFP:MODE>mode),
@@ -4772,9 +4859,9 @@
(plus:GPR (match_dup 1) (match_dup 2)))]
"s390_match_ccmode (insn, CCAmode)
&& (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'K', \"K\")
- || CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'O', \"Os\")
- || CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'C', \"C\"))
- && INTVAL (operands[2]) != -((HOST_WIDE_INT)1 << (GET_MODE_BITSIZE(<MODE>mode) - 1))"
+ || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (operands[2]), 'O', \"Os\")
+ /* Avoid INT32_MIN on 32 bit. */
+ && (!TARGET_ZARCH || INTVAL (operands[2]) != -0x7fffffff - 1)))"
"@
a<g>hi\t%0,%h2
a<g>hik\t%0,%1,%h2
@@ -6044,44 +6131,50 @@
(define_insn "*anddi3_cc"
[(set (reg CC_REGNUM)
- (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
- (match_operand:DI 2 "general_operand" " d,d,RT"))
- (const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=d,d, d")
+ (compare
+ (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0, d")
+ (match_operand:DI 2 "general_operand" " d,d,RT,NxxDq"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=d,d, d, d")
(and:DI (match_dup 1) (match_dup 2)))]
- "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH"
+ "TARGET_ZARCH && s390_match_ccmode(insn, CCTmode)"
"@
ngr\t%0,%2
ngrk\t%0,%1,%2
- ng\t%0,%2"
- [(set_attr "op_type" "RRE,RRF,RXY")
- (set_attr "cpu_facility" "*,z196,*")
- (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+ ng\t%0,%2
+ risbg\t%0,%1,%s2,128+%e2,0"
+ [(set_attr "op_type" "RRE,RRF,RXY,RIE")
+ (set_attr "cpu_facility" "*,z196,*,z10")
+ (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
(define_insn "*anddi3_cconly"
[(set (reg CC_REGNUM)
- (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0")
- (match_operand:DI 2 "general_operand" " d,d,RT"))
+ (compare
+ (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,d, 0, d")
+ (match_operand:DI 2 "general_operand" " d,d,RT,NxxDq"))
(const_int 0)))
- (clobber (match_scratch:DI 0 "=d,d, d"))]
- "s390_match_ccmode(insn, CCTmode) && TARGET_ZARCH
+ (clobber (match_scratch:DI 0 "=d,d, d, d"))]
+ "TARGET_ZARCH
+ && s390_match_ccmode(insn, CCTmode)
/* Do not steal TM patterns. */
&& s390_single_part (operands[2], DImode, HImode, 0) < 0"
"@
ngr\t%0,%2
ngrk\t%0,%1,%2
- ng\t%0,%2"
- [(set_attr "op_type" "RRE,RRF,RXY")
- (set_attr "cpu_facility" "*,z196,*")
- (set_attr "z10prop" "z10_super_E1,*,z10_super_E1")])
+ ng\t%0,%2
+ risbg\t%0,%1,%s2,128+%e2,0"
+ [(set_attr "op_type" "RRE,RRF,RXY,RIE")
+ (set_attr "cpu_facility" "*,z196,*,z10")
+ (set_attr "z10prop" "z10_super_E1,*,z10_super_E1,z10_super_E1")])
(define_insn "*anddi3"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=d,d, d, d, d, d, d, d,d,d, d, AQ,Q")
- (and:DI (match_operand:DI 1 "nonimmediate_operand"
- "%d,o, 0, 0, 0, 0, 0, 0,0,d, 0, 0,0")
- (match_operand:DI 2 "general_operand"
- "M, M,N0HDF,N1HDF,N2HDF,N3HDF,N0SDF,N1SDF,d,d,RT,NxQDF,Q")))
+ "=d,d, d, d, d, d, d, d,d,d, d, d, AQ,Q")
+ (and:DI
+ (match_operand:DI 1 "nonimmediate_operand"
+ "%d,o, 0, 0, 0, 0, 0, 0,0,d, 0, d, 0,0")
+ (match_operand:DI 2 "general_operand"
+ "M, M,N0HDF,N1HDF,N2HDF,N3HDF,N0SDF,N1SDF,d,d,RT,NxxDq,NxQDF,Q")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
"@
@@ -6096,10 +6189,11 @@
ngr\t%0,%2
ngrk\t%0,%1,%2
ng\t%0,%2
+ risbg\t%0,%1,%s2,128+%e2,0
#
#"
- [(set_attr "op_type" "RRE,RXE,RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,SI,SS")
- (set_attr "cpu_facility" "*,*,*,*,*,*,extimm,extimm,*,z196,*,*,*")
+ [(set_attr "op_type" "RRE,RXE,RI,RI,RI,RI,RIL,RIL,RRE,RRF,RXY,RIE,SI,SS")
+ (set_attr "cpu_facility" "*,*,*,*,*,*,extimm,extimm,*,z196,*,z10,*,*")
(set_attr "z10prop" "*,
*,
z10_super_E1,
@@ -6111,6 +6205,7 @@
z10_super_E1,
*,
z10_super_E1,
+ z10_super_E1,
*,
*")])
@@ -6124,6 +6219,36 @@
(clobber (reg:CC CC_REGNUM))])]
"s390_narrow_logical_operator (AND, &operands[0], &operands[1]);")
+;; These two are what combine generates for (ashift (zero_extract)).
+(define_insn "*extzv_<mode>_srl"
+ [(set (match_operand:GPR 0 "register_operand" "=d")
+ (and:GPR (lshiftrt:GPR
+ (match_operand:GPR 1 "register_operand" "d")
+ (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+ (match_operand:GPR 3 "contiguous_bitmask_operand" "")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10
+ /* Note that even for the SImode pattern, the rotate is always DImode. */
+ && s390_extzv_shift_ok (<bitsize>, -INTVAL (operands[2]),
+ INTVAL (operands[3]))"
+ "risbg\t%0,%1,%<bfstart>3,128+%<bfend>3,64-%2"
+ [(set_attr "op_type" "RIE")
+ (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*extzv_<mode>_sll"
+ [(set (match_operand:GPR 0 "register_operand" "=d")
+ (and:GPR (ashift:GPR
+ (match_operand:GPR 1 "register_operand" "d")
+ (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+ (match_operand:GPR 3 "contiguous_bitmask_operand" "")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_Z10
+ && s390_extzv_shift_ok (<bitsize>, INTVAL (operands[2]),
+ INTVAL (operands[3]))"
+ "risbg\t%0,%1,%<bfstart>3,128+%<bfend>3,%2"
+ [(set_attr "op_type" "RIE")
+ (set_attr "z10prop" "z10_super_E1")])
+
;
; andsi3 instruction pattern(s).
@@ -6131,10 +6256,12 @@
(define_insn "*andsi3_cc"
[(set (reg CC_REGNUM)
- (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
- (match_operand:SI 2 "general_operand" "Os,d,d,R,T"))
- (const_int 0)))
- (set (match_operand:SI 0 "register_operand" "=d,d,d,d,d")
+ (compare
+ (and:SI
+ (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0, d")
+ (match_operand:SI 2 "general_operand" "Os,d,d,R,T,NxxSq"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "=d,d,d,d,d, d")
(and:SI (match_dup 1) (match_dup 2)))]
"s390_match_ccmode(insn, CCTmode)"
"@
@@ -6142,17 +6269,21 @@
nr\t%0,%2
nrk\t%0,%1,%2
n\t%0,%2
- ny\t%0,%2"
- [(set_attr "op_type" "RIL,RR,RRF,RX,RXY")
- (set_attr "cpu_facility" "*,*,z196,*,*")
- (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,z10_super_E1,z10_super_E1")])
+ ny\t%0,%2
+ risbg\t%0,%1,%t2,128+%f2,0"
+ [(set_attr "op_type" "RIL,RR,RRF,RX,RXY,RIE")
+ (set_attr "cpu_facility" "*,*,z196,*,*,z10")
+ (set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
+ z10_super_E1,z10_super_E1,z10_super_E1")])
(define_insn "*andsi3_cconly"
[(set (reg CC_REGNUM)
- (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0")
- (match_operand:SI 2 "general_operand" "Os,d,d,R,T"))
- (const_int 0)))
- (clobber (match_scratch:SI 0 "=d,d,d,d,d"))]
+ (compare
+ (and:SI
+ (match_operand:SI 1 "nonimmediate_operand" "%0,0,d,0,0, d")
+ (match_operand:SI 2 "general_operand" "Os,d,d,R,T,NxxSq"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=d,d,d,d,d, d"))]
"s390_match_ccmode(insn, CCTmode)
/* Do not steal TM patterns. */
&& s390_single_part (operands[2], SImode, HImode, 0) < 0"
@@ -6161,19 +6292,20 @@
nr\t%0,%2
nrk\t%0,%1,%2
n\t%0,%2
- ny\t%0,%2"
- [(set_attr "op_type" "RIL,RR,RRF,RX,RXY")
- (set_attr "cpu_facility" "*,*,z196,*,*")
+ ny\t%0,%2
+ risbg\t%0,%1,%t2,128+%f2,0"
+ [(set_attr "op_type" "RIL,RR,RRF,RX,RXY,RIE")
+ (set_attr "cpu_facility" "*,*,z196,*,*,z10")
(set_attr "z10prop" "z10_super_E1,z10_super_E1,*,
- z10_super_E1,z10_super_E1")])
+ z10_super_E1,z10_super_E1,z10_super_E1")])
(define_insn "*andsi3_zarch"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=d,d, d, d, d,d,d,d,d, AQ,Q")
+ "=d,d, d, d, d,d,d,d,d, d, AQ,Q")
(and:SI (match_operand:SI 1 "nonimmediate_operand"
- "%d,o, 0, 0, 0,0,d,0,0, 0,0")
+ "%d,o, 0, 0, 0,0,d,0,0, d, 0,0")
(match_operand:SI 2 "general_operand"
- " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxQSF,Q")))
+ " M,M,N0HSF,N1HSF,Os,d,d,R,T,NxxSq,NxQSF,Q")))
(clobber (reg:CC CC_REGNUM))]
"TARGET_ZARCH && s390_logical_operator_ok_p (operands)"
"@
@@ -6186,10 +6318,11 @@
nrk\t%0,%1,%2
n\t%0,%2
ny\t%0,%2
+ risbg\t%0,%1,%t2,128+%f2,0
#
#"
- [(set_attr "op_type" "RRE,RXE,RI,RI,RIL,RR,RRF,RX,RXY,SI,SS")
- (set_attr "cpu_facility" "*,*,*,*,*,*,z196,*,*,*,*")
+ [(set_attr "op_type" "RRE,RXE,RI,RI,RIL,RR,RRF,RX,RXY,RIE,SI,SS")
+ (set_attr "cpu_facility" "*,*,*,*,*,*,z196,*,*,z10,*,*")
(set_attr "z10prop" "*,
*,
z10_super_E1,
@@ -6199,6 +6332,7 @@
*,
z10_super_E1,
z10_super_E1,
+ z10_super_E1,
*,
*")])
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index d4e97db8902..793aadace95 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -159,8 +159,6 @@ extern bool sh_expand_t_scc (rtx *);
extern rtx sh_gen_truncate (enum machine_mode, rtx, int);
extern bool sh_vector_mode_supported_p (enum machine_mode);
extern bool sh_cfun_trap_exit_p (void);
-extern void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
- enum machine_mode mode = VOIDmode);
extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem);
extern int sh_eval_treg_value (rtx op);
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index cf0abb474be..4a42d7eeb5c 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -314,6 +314,9 @@ static int max_mov_insn_displacement (enum machine_mode, bool);
static int mov_insn_alignment_mask (enum machine_mode, bool);
static HOST_WIDE_INT disp_addr_displacement (rtx);
static bool sequence_insn_p (rtx);
+static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
+static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
+ enum machine_mode, bool);
static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
@@ -586,6 +589,9 @@ static const struct attribute_spec sh_attribute_table[] =
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
+
/* Machine-specific symbol_ref flags. */
#define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
@@ -1909,12 +1915,14 @@ prepare_move_operands (rtx operands[], enum machine_mode mode)
}
}
-/* Implement the CANONICALIZE_COMPARISON macro for the combine pass.
- This function is also re-used to canonicalize comparisons in cbranch
- pattern expanders. */
-void
+/* Implement the canonicalize_comparison target hook for the combine
+ pass. For the target hook this function is invoked via
+ sh_canonicalize_comparison. This function is also re-used to
+ canonicalize comparisons in cbranch pattern expanders. */
+static void
sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
- enum machine_mode mode)
+ enum machine_mode mode,
+ bool op0_preserve_value ATTRIBUTE_UNUSED)
{
/* When invoked from within the combine pass the mode is not specified,
so try to get it from one of the operands. */
@@ -2008,6 +2016,19 @@ sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
}
}
+/* This function implements the canonicalize_comparison target hook.
+ This wrapper around the internally used sh_canonicalize_comparison
+ function is needed to do the enum rtx_code <-> int conversion.
+ Target hooks cannot use enum rtx_code in its definition. */
+static void
+sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value)
+{
+ enum rtx_code tmp_code = (enum rtx_code)*code;
+ sh_canonicalize_comparison (tmp_code, *op0, *op1,
+ VOIDmode, op0_preserve_value);
+ *code = (int)tmp_code;
+}
enum rtx_code
prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
enum rtx_code comparison)
@@ -2021,7 +2042,8 @@ prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
else
scratch = operands[4];
- sh_canonicalize_comparison (comparison, operands[1], operands[2], mode);
+ sh_canonicalize_comparison (comparison, operands[1], operands[2],
+ mode, false);
/* Notice that this function is also invoked after reload by
the cbranchdi4_i pattern, through expand_cbranchdi4. */
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 212b97d9c10..76a5cb1edda 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1873,10 +1873,6 @@ struct sh_args {
more compact code. */
#define SHIFT_COUNT_TRUNCATED (0)
-/* CANONICALIZE_COMPARISON macro for the combine pass. */
-#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
- sh_canonicalize_comparison ((CODE), (OP0), (OP1))
-
/* All integers have the same format so truncation is easy. */
/* But SHmedia must sign-extend DImode when truncating to SImode. */
#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 82fb4340cf5..15e7c2e2bba 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -7095,6 +7095,20 @@ spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
final_end_function ();
}
+/* Canonicalize a comparison from one we don't have to one we do have. */
+static void
+spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+ bool op0_preserve_value)
+{
+ if (!op0_preserve_value
+ && (*code == LE || *code == LT || *code == LEU || *code == LTU))
+ {
+ rtx tem = *op0;
+ *op0 = *op1;
+ *op1 = tem;
+ *code = (int)swap_condition ((enum rtx_code)*code);
+ }
+}
/* Table of machine attributes. */
static const struct attribute_spec spu_attribute_table[] =
@@ -7308,6 +7322,9 @@ static const struct attribute_spec spu_attribute_table[] =
#undef TARGET_DELAY_VARTRACK
#define TARGET_DELAY_VARTRACK true
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-spu.h"
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index ce0bc8edb5f..031b80e1602 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -520,18 +520,6 @@ do { \
#define NO_IMPLICIT_EXTERN_C 1
-/* Canonicalize a comparison from one we don't have to one we do have. */
-#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \
- do { \
- if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \
- { \
- rtx tem = (OP0); \
- (OP0) = (OP1); \
- (OP1) = tem; \
- (CODE) = swap_condition (CODE); \
- } \
- } while (0)
-
/* Address spaces. */
#define ADDR_SPACE_EA 1