diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-01-11 08:28:21 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-01-11 08:28:21 +0000 |
commit | 84d429b9a24117fcd1ad6b88ae2658cc8fa5f8ac (patch) | |
tree | fb62b90dc09045605d9426eb9febca89032fd1b2 /gcc/config/arm/neon.md | |
parent | f22b491886dbe1718cc7076fcce41f157ac735d9 (diff) | |
download | gcc-84d429b9a24117fcd1ad6b88ae2658cc8fa5f8ac.tar.gz |
2012-01-11 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 183090 using svnmerge
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@183091 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/neon.md')
-rw-r--r-- | gcc/config/arm/neon.md | 144 |
1 files changed, 104 insertions, 40 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index d7caa379b85..24a15802bc3 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1,5 +1,6 @@ ;; ARM NEON coprocessor Machine Description -;; Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +;; Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012 +;; Free Software Foundation, Inc. ;; Written by CodeSourcery. ;; ;; This file is part of GCC. @@ -35,6 +36,7 @@ UNSPEC_VCGE UNSPEC_VCGT UNSPEC_VCLS + UNSPEC_VCONCAT UNSPEC_VCVT UNSPEC_VCVT_N UNSPEC_VEXT @@ -2860,6 +2862,20 @@ DONE; }) +; Disabled before reload because we don't want combine doing something silly, +; but used by the post-reload expansion of neon_vcombine. +(define_insn "*neon_vswp<mode>" + [(set (match_operand:VDQX 0 "s_register_operand" "+w") + (match_operand:VDQX 1 "s_register_operand" "+w")) + (set (match_dup 1) (match_dup 0))] + "TARGET_NEON && reload_completed" + "vswp\t%<V_reg>1, %<V_reg>2" + [(set (attr "neon_type") + (if_then_else (match_test "<Is_d_reg>") + (const_string "neon_bp_simple") + (const_string "neon_bp_2cycle")))] +) + ;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector. ;; FIXME: A different implementation of this builtin could make it much @@ -2867,48 +2883,19 @@ ;; it so that the reg allocator puts things in the right places magically ;; instead). Lack of subregs for vectors makes that tricky though, I think. -(define_insn "neon_vcombine<mode>" +(define_insn_and_split "neon_vcombine<mode>" [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") - (vec_concat:<V_DOUBLE> (match_operand:VDX 1 "s_register_operand" "w") - (match_operand:VDX 2 "s_register_operand" "w")))] + (vec_concat:<V_DOUBLE> + (match_operand:VDX 1 "s_register_operand" "w") + (match_operand:VDX 2 "s_register_operand" "w")))] "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] { - int dest = REGNO (operands[0]); - int src1 = REGNO (operands[1]); - int src2 = REGNO (operands[2]); - rtx destlo; - - if (src1 == dest && src2 == dest + 2) - return ""; - else if (src2 == dest && src1 == dest + 2) - /* Special case of reversed high/low parts. */ - return "vswp\t%P1, %P2"; - - destlo = gen_rtx_REG (<MODE>mode, dest); - - if (!reg_overlap_mentioned_p (operands[2], destlo)) - { - /* Try to avoid unnecessary moves if part of the result is in the right - place already. */ - if (src1 != dest) - output_asm_insn ("vmov\t%e0, %P1", operands); - if (src2 != dest + 2) - output_asm_insn ("vmov\t%f0, %P2", operands); - } - else - { - if (src2 != dest + 2) - output_asm_insn ("vmov\t%f0, %P2", operands); - if (src1 != dest) - output_asm_insn ("vmov\t%e0, %P1", operands); - } - - return ""; -} - ;; We set the neon_type attribute based on the vmov instructions above. - [(set_attr "length" "8") - (set_attr "neon_type" "neon_bp_simple")] -) + neon_split_vcombine (operands); + DONE; +}) (define_expand "neon_vget_high<mode>" [(match_operand:<V_HALF> 0 "s_register_operand") @@ -3920,6 +3907,83 @@ [(set_attr "neon_type" "neon_bp_3cycle")] ) +;; These three are used by the vec_perm infrastructure for V16QImode. +(define_insn_and_split "neon_vtbl1v16qi" + [(set (match_operand:V16QI 0 "s_register_operand" "=&w") + (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") + (match_operand:V16QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0, op1, op2, part0, part2; + unsigned ofs; + + op0 = operands[0]; + op1 = gen_lowpart (TImode, operands[1]); + op2 = operands[2]; + + ofs = subreg_lowpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + + ofs = subreg_highpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + DONE; +}) + +(define_insn_and_split "neon_vtbl2v16qi" + [(set (match_operand:V16QI 0 "s_register_operand" "=&w") + (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:V16QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0, op1, op2, part0, part2; + unsigned ofs; + + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; + + ofs = subreg_lowpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + + ofs = subreg_highpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + DONE; +}) + +;; ??? Logically we should extend the regular neon_vcombine pattern to +;; handle quad-word input modes, producing octa-word output modes. But +;; that requires us to add support for octa-word vector modes in moves. +;; That seems overkill for this one use in vec_perm. +(define_insn_and_split "neon_vcombinev16qi" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") + (match_operand:V16QI 2 "s_register_operand" "w")] + UNSPEC_VCONCAT))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + neon_split_vcombine (operands); + DONE; +}) + (define_insn "neon_vtbx1v8qi" [(set (match_operand:V8QI 0 "s_register_operand" "=w") (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") |