From 65cbf05437b8a57ff08846beb19407c9e0dd2553 Mon Sep 17 00:00:00 2001 From: bstarynk Date: Mon, 23 Apr 2012 12:53:36 +0000 Subject: 2012-04-23 Basile Starynkevitch MELT branch merged with trunk rev 186692 using svnmerge [gcc/] 2012-04-23 Basile Starynkevitch {{improvements for merging with GCC 4.8 trunk svn rev 186692}} * melt-run.proto.h (MELT_GCC_VERSION): Define, if unknown, in the generated melt-run.h * melt-runtime.c (melt_val2passflag): TODO_dump_func & TODO_dump_cgraph don't exist in GCC 4.8. * melt-build.tpl: Say flavor, not variant! Build first the quicklybuilt application modules, to catch error in macro C strings... * melt-build.mk: Regenerate. * melt/warmelt-base.melt (valdesc_strbuf): Check for MELT_GCC_VERSION also. * melt/warmelt-genobj.melt (compilobj_nrep_citeration): Use meltcit prefix in generated citerator names.. * melt/warmelt-outobj.melt (syntestgen_citerator): Use meltcitstate prefix. * melt/xtramelt-ana-base.melt (each_cgraph_fun_body) (each_cgraph_fun_entryblock, each_cgraph_fun_call_flow_graph) (each_bb_cfun, with_cfun_decl): Adapt to GCC 4.8, add documentation. (each_cgraph_decl): Only for GCC 4.6 & 4.7 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@186705 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/config/i386/sse.md | 1949 +++++++++++------------------------------------- 1 file changed, 436 insertions(+), 1513 deletions(-) (limited to 'gcc/config/i386/sse.md') diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b63d774e43f..d270c634ae0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -79,8 +79,7 @@ UNSPEC_VCVTPS2PH ;; For AVX2 support - UNSPEC_VPERMSI - UNSPEC_VPERMSF + UNSPEC_VPERMVAR UNSPEC_VPERMTI UNSPEC_GATHER UNSPEC_VSIBADDR @@ -5305,83 +5304,33 @@ (sign_extend:V8SI (vec_select:V8HI (match_operand:V16HI 1 "nonimmediate_operand") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) (sign_extend:V8SI (vec_select:V8HI (match_operand:V16HI 2 "nonimmediate_operand") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)])))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))) (mult:V8SI (sign_extend:V8SI (vec_select:V8HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) (sign_extend:V8SI (vec_select:V8HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))))))] "TARGET_AVX2" "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") -(define_expand "sse2_pmaddwd" - [(set (match_operand:V4SI 0 "register_operand") - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))))] - "TARGET_SSE2" - "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") - (define_insn "*avx2_pmaddwd" [(set (match_operand:V8SI 0 "register_operand" "=x") (plus:V8SI @@ -5389,52 +5338,62 @@ (sign_extend:V8SI (vec_select:V8HI (match_operand:V16HI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) (sign_extend:V8SI (vec_select:V8HI (match_operand:V16HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)])))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))) (mult:V8SI (sign_extend:V8SI (vec_select:V8HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) (sign_extend:V8SI (vec_select:V8HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))))))] "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) +(define_expand "sse2_pmaddwd" + [(set (match_operand:V4SI 0 "register_operand") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 1 "nonimmediate_operand") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (sign_extend:V4SI + (vec_select:V4HI + (match_operand:V8HI 2 "nonimmediate_operand") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI + (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) + (sign_extend:V4SI + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))))] + "TARGET_SSE2" + "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") + (define_insn "*sse2_pmaddwd" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (plus:V4SI @@ -5442,30 +5401,22 @@ (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) (mult:V4SI (sign_extend:V4SI (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))))] "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" "@ pmaddwd\t{%2, %0|%0, %2} @@ -5489,8 +5440,9 @@ (define_insn "*_mul3" [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x") - (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") - (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] + (mult:VI4_AVX2 + (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") + (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, mode, operands)" "@ pmulld\t{%2, %0|%0, %2} @@ -7021,9 +6973,10 @@ rtx t2 = gen_reg_rtx (mode); emit_insn (gen_avx2_interleave_low (t1, operands[1], operands[2])); emit_insn (gen_avx2_interleave_high (t2, operands[1], operands[2])); - emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), - gen_lowpart (V4DImode, t1), - gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); + emit_insn (gen_avx2_permv2ti + (gen_lowpart (V4DImode, operands[0]), + gen_lowpart (V4DImode, t1), + gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); DONE; }) @@ -7037,9 +6990,10 @@ rtx t2 = gen_reg_rtx (mode); emit_insn (gen_avx2_interleave_low (t1, operands[1], operands[2])); emit_insn (gen_avx2_interleave_high (t2, operands[1], operands[2])); - emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), - gen_lowpart (V4DImode, t1), - gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); + emit_insn (gen_avx2_permv2ti + (gen_lowpart (V4DImode, operands[0]), + gen_lowpart (V4DImode, t1), + gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); DONE; }) @@ -8038,9 +7992,10 @@ ;; surely not generally useful. (define_insn "_psadbw" [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x") - (unspec:VI8_AVX2 [(match_operand: 1 "register_operand" "0,x") - (match_operand: 2 "nonimmediate_operand" "xm,xm")] - UNSPEC_PSADBW))] + (unspec:VI8_AVX2 + [(match_operand: 1 "register_operand" "0,x") + (match_operand: 2 "nonimmediate_operand" "xm,xm")] + UNSPEC_PSADBW))] "TARGET_SSE2" "@ psadbw\t{%2, %0|%0, %2} @@ -8176,123 +8131,125 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "avx2_phaddwv16hi3" +(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus]) + +(define_insn "avx2_phwv16hi3" [(set (match_operand:V16HI 0 "register_operand" "=x") (vec_concat:V16HI (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_operand:V16HI 1 "register_operand" "x") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_operand:V16HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] "TARGET_AVX2" - "vphaddw\t{%2, %1, %0|%0, %1, %2}" + "vphw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn "ssse3_phaddwv8hi3" +(define_insn "ssse3_phwv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_operand:V8HI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] "TARGET_SSSE3" "@ - phaddw\t{%2, %0|%0, %2} - vphaddw\t{%2, %1, %0|%0, %1, %2}" + phw\t{%2, %0|%0, %2} + vphw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") @@ -8301,104 +8258,104 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phaddwv4hi3" +(define_insn "ssse3_phwv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") (vec_concat:V4HI (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_operand:V4HI 1 "register_operand" "0") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) (vec_concat:V2HI - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (plus:HI + (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] "TARGET_SSSE3" - "phaddw\t{%2, %0|%0, %2}" + "phw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "avx2_phadddv8si3" +(define_insn "avx2_phdv8si3" [(set (match_operand:V8SI 0 "register_operand" "=x") (vec_concat:V8SI (vec_concat:V4SI (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_operand:V8SI 1 "register_operand" "x") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) (vec_concat:V4SI (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_operand:V8SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] "TARGET_AVX2" - "vphaddd\t{%2, %1, %0|%0, %1, %2}" + "vphd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn "ssse3_phadddv4si3" +(define_insn "ssse3_phdv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_concat:V4SI (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_operand:V4SI 1 "register_operand" "0,x") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] "TARGET_SSSE3" "@ - phaddd\t{%2, %0|%0, %2} - vphaddd\t{%2, %1, %0|%0, %1, %2}" + phd\t{%2, %0|%0, %2} + vphd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") @@ -8407,793 +8364,176 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phadddv2si3" +(define_insn "ssse3_phdv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") (vec_concat:V2SI - (plus:SI + (plusminus:SI (vec_select:SI (match_operand:V2SI 1 "register_operand" "0") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (plus:SI + (plusminus:SI (vec_select:SI (match_operand:V2SI 2 "nonimmediate_operand" "ym") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] "TARGET_SSSE3" - "phaddd\t{%2, %0|%0, %2}" + "phd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "avx2_phaddswv16hi3" +(define_insn "avx2_pmaddubsw256" [(set (match_operand:V16HI 0 "register_operand" "=x") - (vec_concat:V16HI - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V16HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V16HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] + (ss_plus:V16HI + (mult:V16HI + (zero_extend:V16HI + (vec_select:V16QI + (match_operand:V32QI 1 "register_operand" "x") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14) + (const_int 16) (const_int 18) + (const_int 20) (const_int 22) + (const_int 24) (const_int 26) + (const_int 28) (const_int 30)]))) + (sign_extend:V16HI + (vec_select:V16QI + (match_operand:V32QI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14) + (const_int 16) (const_int 18) + (const_int 20) (const_int 22) + (const_int 24) (const_int 26) + (const_int 28) (const_int 30)])))) + (mult:V16HI + (zero_extend:V16HI + (vec_select:V16QI (match_dup 1) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15) + (const_int 17) (const_int 19) + (const_int 21) (const_int 23) + (const_int 25) (const_int 27) + (const_int 29) (const_int 31)]))) + (sign_extend:V16HI + (vec_select:V16QI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15) + (const_int 17) (const_int 19) + (const_int 21) (const_int 23) + (const_int 25) (const_int 27) + (const_int 29) (const_int 31)]))))))] "TARGET_AVX2" - "vphaddsw\t{%2, %1, %0|%0, %1, %2}" + "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -(define_insn "ssse3_phaddswv8hi3" +(define_insn "ssse3_pmaddubsw128" [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0,x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] + (ss_plus:V8HI + (mult:V8HI + (zero_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 1 "register_operand" "0,x") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) + (sign_extend:V8HI + (vec_select:V8QI + (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)])))) + (mult:V8HI + (zero_extend:V8HI + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))) + (sign_extend:V8HI + (vec_select:V8QI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)]))))))] "TARGET_SSSE3" "@ - phaddsw\t{%2, %0|%0, %2} - vphaddsw\t{%2, %1, %0|%0, %1, %2}" + pmaddubsw\t{%2, %0|%0, %2} + vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phaddswv4hi3" +(define_insn "ssse3_pmaddubsw" [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_plus:HI - (vec_select:HI - (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_plus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] + (ss_plus:V4HI + (mult:V4HI + (zero_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (sign_extend:V4HI + (vec_select:V4QI + (match_operand:V8QI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) + (mult:V4HI + (zero_extend:V4HI + (vec_select:V4QI (match_dup 1) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) + (sign_extend:V4HI + (vec_select:V4QI (match_dup 2) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))))] "TARGET_SSSE3" - "phaddsw\t{%2, %0|%0, %2}" + "pmaddubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") + (set_attr "atom_unit" "simul") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) (set_attr "mode" "DI")]) -(define_insn "avx2_phsubwv16hi3" - [(set (match_operand:V16HI 0 "register_operand" "=x") - (vec_concat:V16HI - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V16HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V16HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] - "TARGET_AVX2" - "vphsubw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "ssse3_phsubwv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0,x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_SSSE3" - "@ - phsubw\t{%2, %0|%0, %2} - vphsubw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "ssse3_phsubwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (minus:HI - (vec_select:HI - (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] - "TARGET_SSSE3" - "phsubw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix_extra" "1") - (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) - -(define_insn "avx2_phsubdv8si3" - [(set (match_operand:V8SI 0 "register_operand" "=x") - (vec_concat:V8SI - (vec_concat:V4SI - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V8SI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2SI - (minus:SI - (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) - (minus:SI - (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4SI - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V8SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2SI - (minus:SI - (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) - (minus:SI - (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_AVX2" - "vphsubd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "ssse3_phsubdv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (vec_concat:V4SI - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V4SI 1 "register_operand" "0,x") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] - "TARGET_SSSE3" - "@ - phsubd\t{%2, %0|%0, %2} - vphsubd\t{%2, %1, %0|%0, %1, %2}" - - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "ssse3_phsubdv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_concat:V2SI - (minus:SI - (vec_select:SI - (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) - (minus:SI - (vec_select:SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])) - (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] - "TARGET_SSSE3" - "phsubd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix_extra" "1") - (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) - -(define_insn "avx2_phsubswv16hi3" - [(set (match_operand:V16HI 0 "register_operand" "=x") - (vec_concat:V16HI - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V16HI 1 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V16HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] - "TARGET_AVX2" - "vphsubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "ssse3_phsubswv8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (vec_concat:V8HI - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V8HI 1 "register_operand" "0,x") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] - "TARGET_SSSE3" - "@ - phsubsw\t{%2, %0|%0, %2} - vphsubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "ssse3_phsubswv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (vec_concat:V4HI - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) - (vec_concat:V2HI - (ss_minus:HI - (vec_select:HI - (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) - (ss_minus:HI - (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) - (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] - "TARGET_SSSE3" - "phsubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "atom_unit" "complex") - (set_attr "prefix_extra" "1") - (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) - -(define_insn "avx2_pmaddubsw256" - [(set (match_operand:V16HI 0 "register_operand" "=x") - (ss_plus:V16HI - (mult:V16HI - (zero_extend:V16HI - (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14) - (const_int 16) - (const_int 18) - (const_int 20) - (const_int 22) - (const_int 24) - (const_int 26) - (const_int 28) - (const_int 30)]))) - (sign_extend:V16HI - (vec_select:V16QI - (match_operand:V32QI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14) - (const_int 16) - (const_int 18) - (const_int 20) - (const_int 22) - (const_int 24) - (const_int 26) - (const_int 28) - (const_int 30)])))) - (mult:V16HI - (zero_extend:V16HI - (vec_select:V16QI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15) - (const_int 17) - (const_int 19) - (const_int 21) - (const_int 23) - (const_int 25) - (const_int 27) - (const_int 29) - (const_int 31)]))) - (sign_extend:V16HI - (vec_select:V16QI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15) - (const_int 17) - (const_int 19) - (const_int 21) - (const_int 23) - (const_int 25) - (const_int 27) - (const_int 29) - (const_int 31)]))))))] - "TARGET_AVX2" - "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "ssse3_pmaddubsw128" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (ss_plus:V8HI - (mult:V8HI - (zero_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "0,x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)])))) - (mult:V8HI - (zero_extend:V8HI - (vec_select:V8QI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))) - (sign_extend:V8HI - (vec_select:V8QI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)]))))))] - "TARGET_SSSE3" - "@ - pmaddubsw\t{%2, %0|%0, %2} - vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseiadd") - (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "TI")]) - -(define_insn "ssse3_pmaddubsw" - [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI - (mult:V4HI - (zero_extend:V4HI - (vec_select:V4QI - (match_operand:V8QI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4HI - (vec_select:V4QI - (match_operand:V8QI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4HI - (zero_extend:V4HI - (vec_select:V4QI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4HI - (vec_select:V4QI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))))] - "TARGET_SSSE3" - "pmaddubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") - (set_attr "atom_unit" "simul") - (set_attr "prefix_extra" "1") - (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) - -(define_expand "avx2_umulhrswv16hi3" - [(set (match_operand:V16HI 0 "register_operand") - (truncate:V16HI - (lshiftrt:V16SI - (plus:V16SI - (lshiftrt:V16SI - (mult:V16SI - (sign_extend:V16SI - (match_operand:V16HI 1 "nonimmediate_operand")) - (sign_extend:V16SI - (match_operand:V16HI 2 "nonimmediate_operand"))) - (const_int 14)) - (const_vector:V16HI [(const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1) - (const_int 1) (const_int 1)])) - (const_int 1))))] +(define_expand "avx2_umulhrswv16hi3" + [(set (match_operand:V16HI 0 "register_operand") + (truncate:V16HI + (lshiftrt:V16SI + (plus:V16SI + (lshiftrt:V16SI + (mult:V16SI + (sign_extend:V16SI + (match_operand:V16HI 1 "nonimmediate_operand")) + (sign_extend:V16SI + (match_operand:V16HI 2 "nonimmediate_operand"))) + (const_int 14)) + (const_vector:V16HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1))))] "TARGET_AVX2" "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") @@ -9315,9 +8655,10 @@ (define_insn "_pshufb3" [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") - (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") - (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")] - UNSPEC_PSHUFB))] + (unspec:VI1_AVX2 + [(match_operand:VI1_AVX2 1 "register_operand" "0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")] + UNSPEC_PSHUFB))] "TARGET_SSSE3" "@ pshufb\t{%2, %0|%0, %2} @@ -9373,10 +8714,11 @@ (define_insn "_palignr" [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x") - (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x") - (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] - UNSPEC_PALIGNR))] + (unspec:SSESCALARMODE + [(match_operand:SSESCALARMODE 1 "register_operand" "0,x") + (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] + UNSPEC_PALIGNR))] "TARGET_SSSE3" { operands[3] = GEN_INT (INTVAL (operands[3]) / 8); @@ -9596,10 +8938,11 @@ (define_insn "_mpsadbw" [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") - (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") - (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") - (match_operand:SI 3 "const_0_to_255_operand" "n,n")] - UNSPEC_MPSADBW))] + (unspec:VI1_AVX2 + [(match_operand:VI1_AVX2 1 "register_operand" "0,x") + (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") + (match_operand:SI 3 "const_0_to_255_operand" "n,n")] + UNSPEC_MPSADBW))] "TARGET_SSE4_1" "@ mpsadbw\t{%3, %2, %0|%0, %2, %3} @@ -9748,14 +9091,10 @@ (any_extend:V8HI (vec_select:V8QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3) - (const_int 4) - (const_int 5) - (const_int 6) - (const_int 7)]))))] + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] "TARGET_SSE4_1" "%vpmovbw\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") @@ -9768,14 +9107,10 @@ (any_extend:V8SI (vec_select:V8QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3) - (const_int 4) - (const_int 5) - (const_int 6) - (const_int 7)]))))] + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] "TARGET_AVX2" "vpmovbd\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") @@ -9788,10 +9123,8 @@ (any_extend:V4SI (vec_select:V4QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3)]))))] + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1" "%vpmovbd\t{%1, %0|%0, %k1}" [(set_attr "type" "ssemov") @@ -9815,10 +9148,8 @@ (any_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3)]))))] + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] "TARGET_SSE4_1" "%vpmovwd\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") @@ -9831,10 +9162,8 @@ (any_extend:V4DI (vec_select:V4QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3)]))))] + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] "TARGET_AVX2" "vpmovbq\t{%1, %0|%0, %k1}" [(set_attr "type" "ssemov") @@ -9847,8 +9176,7 @@ (any_extend:V2DI (vec_select:V2QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] + (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1" "%vpmovbq\t{%1, %0|%0, %w1}" [(set_attr "type" "ssemov") @@ -9861,10 +9189,8 @@ (any_extend:V4DI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1) - (const_int 2) - (const_int 3)]))))] + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)]))))] "TARGET_AVX2" "vpmovwq\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") @@ -9877,8 +9203,7 @@ (any_extend:V2DI (vec_select:V2HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] + (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1" "%vpmovwq\t{%1, %0|%0, %k1}" [(set_attr "type" "ssemov") @@ -9901,8 +9226,7 @@ (any_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 1)]))))] + (parallel [(const_int 0) (const_int 1)]))))] "TARGET_SSE4_1" "%vpmovdq\t{%1, %0|%0, %q1}" [(set_attr "type" "ssemov") @@ -10397,112 +9721,61 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_code_iterator xop_plus [plus ss_plus]) + +(define_code_attr macs [(plus "macs") (ss_plus "macss")]) +(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")]) + ;; XOP parallel integer multiply/add instructions. ;; Note the XOP multiply/add instructions ;; a[i] = b[i] * c[i] + d[i]; ;; do not allow the value being added to be a memory operation. -(define_insn "xop_pmacsww" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI - (mult:V8HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")) - (match_operand:V8HI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "xop_pmacssww" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") - (match_operand:V8HI 2 "nonimmediate_operand" "xm")) - (match_operand:V8HI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "xop_pmacsdd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (mult:V4SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")) - (match_operand:V4SI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) -(define_insn "xop_pmacssdd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ss_plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")) - (match_operand:V4SI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "xop_pmacssdql" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (ss_plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)]))) - (match_operand:V2DI 3 "nonimmediate_operand" "x")))] +(define_insn "xop_p" + [(set (match_operand:VI24_128 0 "register_operand" "=x") + (xop_plus:VI24_128 + (mult:VI24_128 + (match_operand:VI24_128 1 "nonimmediate_operand" "%x") + (match_operand:VI24_128 2 "nonimmediate_operand" "xm")) + (match_operand:VI24_128 3 "nonimmediate_operand" "x")))] "TARGET_XOP" - "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vp\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "xop_pmacssdqh" +(define_insn "xop_pdql" [(set (match_operand:V2DI 0 "register_operand" "=x") - (ss_plus:V2DI + (xop_plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) + (parallel [(const_int 1) (const_int 3)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" - "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vpdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "xop_pmacsdql" +(define_insn "xop_pdqh" [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI + (xop_plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))) + (parallel [(const_int 0) (const_int 2)])))) (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" - "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vpdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) @@ -10516,13 +9789,11 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))))] + (parallel [(const_int 1) (const_int 3)])))))] "TARGET_XOP" "#" "&& reload_completed" @@ -10534,13 +9805,11 @@ (sign_extend:V2DI (vec_select:V2SI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) + (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) + (parallel [(const_int 1) (const_int 3)])))) (match_dup 0)))] { operands[3] = CONST0_RTX (V2DImode); @@ -10548,26 +9817,6 @@ [(set_attr "type" "ssemul") (set_attr "mode" "TI")]) -(define_insn "xop_pmacsdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) - (match_operand:V2DI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so ;; fake it with a multiply/add. In general, we expect the define_split to ;; occur before register allocation, so we have to handle the corner case where @@ -10578,13 +9827,11 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))))] + (parallel [(const_int 0) (const_int 2)])))))] "TARGET_XOP" "#" "&& reload_completed" @@ -10596,13 +9843,11 @@ (sign_extend:V2DI (vec_select:V2SI (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) + (parallel [(const_int 0) (const_int 2)])))) (match_dup 0)))] { operands[3] = CONST0_RTX (V2DImode); @@ -10611,131 +9856,55 @@ (set_attr "mode" "TI")]) ;; XOP parallel integer multiply/add instructions for the intrinisics -(define_insn "xop_pmacsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ss_plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))) - (match_operand:V4SI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "xop_pmacswd" +(define_insn "xop_pwd" [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI + (xop_plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))) - (match_operand:V4SI 3 "nonimmediate_operand" "x")))] - "TARGET_XOP" - "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "xop_pmadcsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (ss_plus:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))) (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" - "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vpwd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn "xop_pmadcswd" +(define_insn "xop_pwd" [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI + (xop_plus:V4SI (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)])))) (mult:V4SI (sign_extend:V4SI (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))) + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)]))))) (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" - "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vpwd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) @@ -10751,376 +9920,156 @@ [(set_attr "type" "sse4arg")]) ;; XOP horizontal add/subtract instructions -(define_insn "xop_phaddbw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (sign_extend:V8HI - (vec_select:V8QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)])))))] - "TARGET_XOP" - "vphaddbw\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "xop_phaddbd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (plus:V4SI - (sign_extend:V4SI - (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4) - (const_int 8) - (const_int 12)]))) - (sign_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5) - (const_int 9) - (const_int 13)])))) - (plus:V4SI - (sign_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6) - (const_int 10) - (const_int 14)]))) - (sign_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7) - (const_int 11) - (const_int 15)]))))))] - "TARGET_XOP" - "vphaddbd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "xop_phaddbq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (plus:V2DI - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))) - (plus:V2DI - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 8) - (const_int 12)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 9) - (const_int 13)])))) - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 10) - (const_int 14)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 11) - (const_int 15)])))))))] - "TARGET_XOP" - "vphaddbq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "xop_phaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))))] - "TARGET_XOP" - "vphaddwd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "xop_phaddwq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (sign_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (sign_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))))] - "TARGET_XOP" - "vphaddwq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "xop_phadddq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_XOP" - "vphadddq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "xop_phaddubw" +(define_insn "xop_phaddbw" [(set (match_operand:V8HI 0 "register_operand" "=x") (plus:V8HI - (zero_extend:V8HI + (any_extend:V8HI (vec_select:V8QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (zero_extend:V8HI + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) + (any_extend:V8HI (vec_select:V8QI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)])))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)])))))] "TARGET_XOP" - "vphaddubw\t{%1, %0|%0, %1}" + "vphaddbw\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "xop_phaddubd" +(define_insn "xop_phaddbd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (plus:V4SI - (zero_extend:V4SI + (any_extend:V4SI (vec_select:V4QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4) - (const_int 8) - (const_int 12)]))) - (zero_extend:V4SI + (parallel [(const_int 0) (const_int 4) + (const_int 8) (const_int 12)]))) + (any_extend:V4SI (vec_select:V4QI (match_dup 1) - (parallel [(const_int 1) - (const_int 5) - (const_int 9) - (const_int 13)])))) + (parallel [(const_int 1) (const_int 5) + (const_int 9) (const_int 13)])))) (plus:V4SI - (zero_extend:V4SI + (any_extend:V4SI (vec_select:V4QI (match_dup 1) - (parallel [(const_int 2) - (const_int 6) - (const_int 10) - (const_int 14)]))) - (zero_extend:V4SI + (parallel [(const_int 2) (const_int 6) + (const_int 10) (const_int 14)]))) + (any_extend:V4SI (vec_select:V4QI (match_dup 1) - (parallel [(const_int 3) - (const_int 7) - (const_int 11) - (const_int 15)]))))))] + (parallel [(const_int 3) (const_int 7) + (const_int 11) (const_int 15)]))))))] "TARGET_XOP" - "vphaddubd\t{%1, %0|%0, %1}" + "vphaddbd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "xop_phaddubq" +(define_insn "xop_phaddbq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (plus:V2DI (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (sign_extend:V2DI + (parallel [(const_int 0) (const_int 4)]))) + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) + (parallel [(const_int 1) (const_int 5)])))) (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (zero_extend:V2DI + (parallel [(const_int 2) (const_int 6)]))) + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))) + (parallel [(const_int 3) (const_int 7)]))))) (plus:V2DI (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 8) - (const_int 12)]))) - (sign_extend:V2DI + (parallel [(const_int 8) (const_int 12)]))) + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 9) - (const_int 13)])))) + (parallel [(const_int 9) (const_int 13)])))) (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 10) - (const_int 14)]))) - (zero_extend:V2DI + (parallel [(const_int 10) (const_int 14)]))) + (any_extend:V2DI (vec_select:V2QI (match_dup 1) - (parallel [(const_int 11) - (const_int 15)])))))))] + (parallel [(const_int 11) (const_int 15)])))))))] "TARGET_XOP" - "vphaddubq\t{%1, %0|%0, %1}" + "vphaddbq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "xop_phadduwd" +(define_insn "xop_phaddwd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI - (zero_extend:V4SI + (any_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (zero_extend:V4SI + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) + (any_extend:V4SI (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))))] "TARGET_XOP" - "vphadduwd\t{%1, %0|%0, %1}" + "vphaddwd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "xop_phadduwq" +(define_insn "xop_phaddwq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (zero_extend:V2DI + (parallel [(const_int 0) (const_int 4)]))) + (any_extend:V2DI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) + (parallel [(const_int 1) (const_int 5)])))) (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (zero_extend:V2DI + (parallel [(const_int 2) (const_int 6)]))) + (any_extend:V2DI (vec_select:V2HI (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))))] + (parallel [(const_int 3) (const_int 7)]))))))] "TARGET_XOP" - "vphadduwq\t{%1, %0|%0, %1}" + "vphaddwq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) -(define_insn "xop_phaddudq" +(define_insn "xop_phadddq" [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI - (zero_extend:V2DI + (any_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)]))) - (zero_extend:V2DI + (parallel [(const_int 0) (const_int 2)]))) + (any_extend:V2DI (vec_select:V2SI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)])))))] + (parallel [(const_int 1) (const_int 3)])))))] "TARGET_XOP" - "vphaddudq\t{%1, %0|%0, %1}" + "vphadddq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) (define_insn "xop_phsubbw" @@ -11129,25 +10078,17 @@ (sign_extend:V8HI (vec_select:V8QI (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6) + (const_int 8) (const_int 10) + (const_int 12) (const_int 14)]))) (sign_extend:V8HI (vec_select:V8QI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)])))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7) + (const_int 9) (const_int 11) + (const_int 13) (const_int 15)])))))] "TARGET_XOP" "vphsubbw\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) @@ -11158,17 +10099,13 @@ (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) + (parallel [(const_int 0) (const_int 2) + (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))))] + (parallel [(const_int 1) (const_int 3) + (const_int 5) (const_int 7)])))))] "TARGET_XOP" "vphsubwd\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) @@ -11179,13 +10116,11 @@ (sign_extend:V2DI (vec_select:V2SI (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)]))) + (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI (match_dup 1) - (parallel [(const_int 1) - (const_int 3)])))))] + (parallel [(const_int 1) (const_int 3)])))))] "TARGET_XOP" "vphsubdq\t{%1, %0|%0, %1}" [(set_attr "type" "sseiadd1")]) @@ -11901,26 +10836,14 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "avx2_permvarv8si" - [(set (match_operand:V8SI 0 "register_operand" "=x") - (unspec:V8SI - [(match_operand:V8SI 1 "register_operand" "x") - (match_operand:V8SI 2 "nonimmediate_operand" "xm")] - UNSPEC_VPERMSI))] - "TARGET_AVX2" - "vpermd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "avx2_permvarv8sf" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (unspec:V8SF - [(match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")] - UNSPEC_VPERMSF))] +(define_insn "avx2_permvar" + [(set (match_operand:VI4F_256 0 "register_operand" "=x") + (unspec:VI4F_256 + [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm") + (match_operand:V8SI 2 "register_operand" "x")] + UNSPEC_VPERMVAR))] "TARGET_AVX2" - "vpermps\t{%2, %1, %0|%0, %1, %2}" + "vperm\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) -- cgit v1.2.1