diff options
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r-- | gcc/config/i386/sse.md | 2412 |
1 files changed, 12 insertions, 2400 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7c60f015e83..2ddbbf551de 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -66,7 +66,7 @@ ;; Modes handled by integer vcond pattern (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI - (V2DI "TARGET_SSE4_2 || TARGET_SSE5")]) + (V2DI "TARGET_SSE4_2")]) ;; Mapping from float mode to required SSE level (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) @@ -74,15 +74,11 @@ ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) -;; Mapping of the sse5 suffix +;; Mapping of the avx suffix (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")]) -(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") - (V4SF "ss") (V2DF "sd")]) -(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) -;; Mapping of the max integer size for sse5 rotate immediate constraint -(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) +(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) ;; Mapping of vector modes back to the scalar modes (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") @@ -1453,8 +1449,7 @@ (match_operator:SSEMODEF4 3 "sse_comparison_operator" [(match_operand:SSEMODEF4 1 "register_operand" "0") (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] - "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode)) - && !TARGET_SSE5" + "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -1468,7 +1463,7 @@ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) (match_dup 1) (const_int 1)))] - "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5" + "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "length_immediate" "1") @@ -1666,563 +1661,6 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; SSE5 floating point multiply/accumulate instructions This includes the -;; scalar version of the instructions as well as the vector -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; In order to match (*a * *b) + *c, particularly when vectorizing, allow -;; combine to generate a multiply/add with two memory references. We then -;; split this insn, into loading up the destination register with one of the -;; memory operations. If we don't manage to split the insn, reload will -;; generate the appropriate moves. The reason this is needed, is that combine -;; has already folded one of the memory references into both the multiply and -;; add insns, and it can't generate a new pseudo. I.e.: -;; (set (reg1) (mem (addr1))) -;; (set (reg2) (mult (reg1) (mem (addr2)))) -;; (set (reg3) (plus (reg2) (mem (addr3)))) - -(define_insn "sse5_fmadd<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") - (plus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" - "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Split fmadd with two memory operands into a load and the fmadd. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (plus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fmadd -(define_insn "sse5_vmfmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Floating multiply and subtract -;; Allow two memory operands the same as fmadd -(define_insn "sse5_fmsub<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" - "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Split fmsub with two memory operands into a load and the fmsub. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fmsub -(define_insn "sse5_vmfmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Floating point negative multiply and add -;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) -;; Note operands are out of order to simplify call to ix86_sse5_valid_p -;; Allow two memory operands to help in optimizing. -(define_insn "sse5_fnmadd<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") - (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0") - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" - "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Split fnmadd with two memory operands into a load and the fnmadd. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "") - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fnmadd -(define_insn "sse5_vmfnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Floating point negative multiply and subtract -;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c -;; Allow 2 memory operands to help with optimization -(define_insn "sse5_fnmsub<mode>4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" - "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Split fnmsub with two memory operands into a load and the fmsub. -(define_split - [(set (match_operand:SSEMODEF4 0 "register_operand" "") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false) - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); - emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; -}) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fnmsub -(define_insn "sse5_vmfnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) - (const_int 1)))] - "TARGET_SSE5 && TARGET_FUSED_MADD - && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" - "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; The same instructions using an UNSPEC to allow the intrinsic to be used -;; even if the user used -mno-fused-madd -;; Parallel instructions. During instruction generation, just default -;; to registers, and let combine later build the appropriate instruction. -(define_expand "sse5i_fmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" ""))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_expand "sse5i_fmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" ""))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) -;; Note operands are out of order to simplify call to ix86_sse5_valid_p -(define_expand "sse5i_fnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "register_operand" "") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c -(define_expand "sse5i_fnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "")) - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" ""))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_fnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;; Scalar instructions -(define_expand "sse5i_vmfmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" "")) - (match_dup 1) - (const_int 0))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are accepted. -(define_insn "*sse5i_vmfmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 0))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<ssescalarmode>")]) - -(define_expand "sse5i_vmfmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" "")) - (match_dup 0) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_vmfmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<ssescalarmode>")]) - -;; Note operands are out of order to simplify call to ix86_sse5_valid_p -(define_expand "sse5i_vmfnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "register_operand" "") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "") - (match_operand:SSEMODEF2P 2 "register_operand" ""))) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_vmfnmadd<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" - "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<ssescalarmode>")]) - -(define_expand "sse5i_vmfnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "")) - (match_operand:SSEMODEF2P 2 "register_operand" "")) - (match_operand:SSEMODEF2P 3 "register_operand" "")) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5" -{ - /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ - if (TARGET_FUSED_MADD) - { - emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -}) - -(define_insn "*sse5i_vmfnmsub<mode>4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "0,0")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 1) - (const_int 1))] - UNSPEC_SSE5_INTRINSIC))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<ssescalarmode>")]) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ;; Parallel single-precision floating point conversion operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -4874,40 +4312,9 @@ "&& 1" [(const_int 0)] { - rtx t[12], op0, op[3]; + rtx t[12]; int i; - if (TARGET_SSE5) - { - /* On SSE5, we can take advantage of the pperm instruction to pack and - unpack the bytes. Unpack data such that we've got a source byte in - each low byte of each word. We don't care what goes into the high - byte, so put 0 there. */ - for (i = 0; i < 6; ++i) - t[i] = gen_reg_rtx (V8HImode); - - for (i = 0; i < 2; i++) - { - op[0] = t[i]; - op[1] = operands[i+1]; - ix86_expand_sse5_unpack (op, true, true); /* high bytes */ - - op[0] = t[i+2]; - ix86_expand_sse5_unpack (op, true, false); /* low bytes */ - } - - /* Multiply words. */ - emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */ - emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */ - - /* Pack the low byte of each word back into a single xmm */ - op[0] = operands[0]; - op[1] = t[5]; - op[2] = t[4]; - ix86_expand_sse5_pack (op); - DONE; - } - for (i = 0; i < 12; ++i) t[i] = gen_reg_rtx (V16QImode); @@ -4938,8 +4345,7 @@ emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ - op0 = operands[0]; - emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ + emit_insn (gen_sse2_punpcklbw (operands[0], t[11], t[10])); /* ABCDEFGHIJKLMNOP */ DONE; }) @@ -5272,7 +4678,7 @@ (match_operand:V4SI 2 "register_operand" "")))] "TARGET_SSE2" { - if (TARGET_SSE4_1 || TARGET_SSE5) + if (TARGET_SSE4_1) ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); }) @@ -5297,36 +4703,11 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) -;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a -;; multiply/add. In general, we expect the define_split to occur before -;; register allocation, so we have to handle the corner case where the target -;; is the same as one of the inputs. -(define_insn_and_split "*sse5_mulv4si3" - [(set (match_operand:V4SI 0 "register_operand" "=&x") - (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x") - (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE5" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V4SI (mult:V4SI (match_dup 1) - (match_dup 2)) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V4SImode); -} - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - (define_insn_and_split "*sse2_mulv4si3" [(set (match_operand:V4SI 0 "register_operand" "") (mult:V4SI (match_operand:V4SI 1 "register_operand" "") (match_operand:V4SI 2 "register_operand" "")))] - "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5 + "TARGET_SSE2 && !TARGET_SSE4_1 && can_create_pseudo_p ()" "#" "&& 1" @@ -5388,42 +4769,6 @@ rtx t1, t2, t3, t4, t5, t6, thirtytwo; rtx op0, op1, op2; - if (TARGET_SSE5) - { - /* op1: A,B,C,D, op2: E,F,G,H */ - op0 = operands[0]; - op1 = gen_lowpart (V4SImode, operands[1]); - op2 = gen_lowpart (V4SImode, operands[2]); - t1 = gen_reg_rtx (V4SImode); - t2 = gen_reg_rtx (V4SImode); - t3 = gen_reg_rtx (V4SImode); - t4 = gen_reg_rtx (V2DImode); - t5 = gen_reg_rtx (V2DImode); - - /* t1: B,A,D,C */ - emit_insn (gen_sse2_pshufd_1 (t1, op1, - GEN_INT (1), - GEN_INT (0), - GEN_INT (3), - GEN_INT (2))); - - /* t2: 0 */ - emit_move_insn (t2, CONST0_RTX (V4SImode)); - - /* t3: (B*E),(A*F),(D*G),(C*H) */ - emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2)); - - /* t4: (B*E)+(A*F), (D*G)+(C*H) */ - emit_insn (gen_sse5_phadddq (t4, t3)); - - /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ - emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); - - /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ - emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5)); - DONE; - } - op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; @@ -5539,57 +4884,6 @@ DONE; }) -(define_expand "vec_widen_smult_hi_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE5" -{ - rtx t1, t2; - - t1 = gen_reg_rtx (V4SImode); - t2 = gen_reg_rtx (V4SImode); - - emit_insn (gen_sse2_pshufd_1 (t1, operands[1], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_sse2_pshufd_1 (t2, operands[2], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2)); - DONE; -}) - -(define_expand "vec_widen_smult_lo_v4si" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "register_operand" "")] - "TARGET_SSE5" -{ - rtx t1, t2; - - t1 = gen_reg_rtx (V4SImode); - t2 = gen_reg_rtx (V4SImode); - - emit_insn (gen_sse2_pshufd_1 (t1, operands[1], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_sse2_pshufd_1 (t2, operands[2], - GEN_INT (0), - GEN_INT (2), - GEN_INT (1), - GEN_INT (3))); - emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2)); - DONE; - DONE; -}) - (define_expand "vec_widen_umult_hi_v4si" [(match_operand:V2DI 0 "register_operand" "") (match_operand:V4SI 1 "register_operand" "") @@ -5987,7 +5281,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] - "TARGET_SSE2 && !TARGET_SSE5" + "TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") (define_insn "*avx_eq<mode>3" @@ -6010,7 +5304,7 @@ (eq:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && !TARGET_SSE5 + "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") @@ -6056,7 +5350,7 @@ (gt:SSEMODE124 (match_operand:SSEMODE124 1 "register_operand" "0") (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 && !TARGET_SSE5" + "TARGET_SSE2" "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" [(set_attr "type" "ssecmp") (set_attr "prefix_data16" "1") @@ -6275,12 +5569,6 @@ { rtx op1, op2, h1, l1, h2, l2, h3, l3; - if (TARGET_SSE5) - { - ix86_expand_sse5_pack (operands); - DONE; - } - op1 = gen_lowpart (V16QImode, operands[1]); op2 = gen_lowpart (V16QImode, operands[2]); h1 = gen_reg_rtx (V16QImode); @@ -6316,12 +5604,6 @@ { rtx op1, op2, h1, l1, h2, l2; - if (TARGET_SSE5) - { - ix86_expand_sse5_pack (operands); - DONE; - } - op1 = gen_lowpart (V8HImode, operands[1]); op2 = gen_lowpart (V8HImode, operands[2]); h1 = gen_reg_rtx (V8HImode); @@ -6351,12 +5633,6 @@ { rtx op1, op2, h1, l1; - if (TARGET_SSE5) - { - ix86_expand_sse5_pack (operands); - DONE; - } - op1 = gen_lowpart (V4SImode, operands[1]); op2 = gen_lowpart (V4SImode, operands[2]); h1 = gen_reg_rtx (V4SImode); @@ -7568,8 +6844,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, true); else ix86_expand_sse_unpack (operands, true, true); DONE; @@ -7582,8 +6856,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, true); else ix86_expand_sse_unpack (operands, false, true); DONE; @@ -7596,8 +6868,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, false); else ix86_expand_sse_unpack (operands, true, false); DONE; @@ -7610,8 +6880,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, false); else ix86_expand_sse_unpack (operands, false, false); DONE; @@ -7624,8 +6892,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, true); else ix86_expand_sse_unpack (operands, true, true); DONE; @@ -7638,8 +6904,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, true); else ix86_expand_sse_unpack (operands, false, true); DONE; @@ -7652,8 +6916,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, false); else ix86_expand_sse_unpack (operands, true, false); DONE; @@ -7666,8 +6928,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, false); else ix86_expand_sse_unpack (operands, false, false); DONE; @@ -7680,8 +6940,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, true); else ix86_expand_sse_unpack (operands, true, true); DONE; @@ -7694,8 +6952,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, true); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, true); else ix86_expand_sse_unpack (operands, false, true); DONE; @@ -7708,8 +6964,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, true, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, true, false); else ix86_expand_sse_unpack (operands, true, false); DONE; @@ -7722,8 +6976,6 @@ { if (TARGET_SSE4_1) ix86_expand_sse4_unpack (operands, false, false); - else if (TARGET_SSE5) - ix86_expand_sse5_unpack (operands, false, false); else ix86_expand_sse_unpack (operands, false, false); DONE; @@ -10176,1646 +9428,6 @@ (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; SSE5 instructions -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; SSE5 parallel integer multiply/add instructions. -;; Note the instruction does not allow the value being added to be a memory -;; operation. However by pretending via the nonimmediate_operand predicate -;; that it does and splitting it later allows the following to be recognized: -;; a[i] = b[i] * c[i] + d[i]; -(define_insn "sse5_pmacsww" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") - (plus:V8HI - (mult:V8HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm") - (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")) - (match_operand:V8HI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" - "@ - pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -;; Split pmacsww with two memory operands into a load and the pmacsww. -(define_split - [(set (match_operand:V8HI 0 "register_operand" "") - (plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (match_operand:V8HI 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, V8HImode); - emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_insn "sse5_pmacssww" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") - (ss_plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) - (match_operand:V8HI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -;; Note the instruction does not allow the value being added to be a memory -;; operation. However by pretending via the nonimmediate_operand predicate -;; that it does and splitting it later allows the following to be recognized: -;; a[i] = b[i] * c[i] + d[i]; -(define_insn "sse5_pmacsdd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (plus:V4SI - (mult:V4SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" - "@ - pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -;; Split pmacsdd with two memory operands into a load and the pmacsdd. -(define_split - [(set (match_operand:V4SI 0 "register_operand" "") - (plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (match_operand:V4SI 3 "nonimmediate_operand" "")))] - "TARGET_SSE5 - && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true) - && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] -{ - ix86_expand_sse5_multiple_memory (operands, 4, V4SImode); - emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2], - operands[3])); - DONE; -}) - -(define_insn "sse5_pmacssdd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (ss_plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmacssdql" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") - (ss_plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3)]))) - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 1) - (const_int 3)]))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmacssdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") - (ss_plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 0) - (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmacsdql" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 1) - (const_int 3)])))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn_and_split "*sse5_pmacsdql_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 1) - (const_int 3)])))) - (match_operand:V2DI 3 "memory_operand" "m,m,m")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) - (match_dup 0)))]) - -;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so -;; fake it with a multiply/add. In general, we expect the define_split to -;; occur before register allocation, so we have to handle the corner case where -;; the target is the same as operands 1/2 -(define_insn_and_split "sse5_mulv2div2di3_low" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_SSE5" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V2DImode); -} - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmacsdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 0) - (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn_and_split "*sse5_pmacsdqh_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 0) - (const_int 2)])))) - (match_operand:V2DI 3 "memory_operand" "m,m,m")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) - (match_dup 0)))]) - -;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so -;; fake it with a multiply/add. In general, we expect the define_split to -;; occur before register allocation, so we have to handle the corner case where -;; the target is the same as either operands[1] or operands[2] -(define_insn_and_split "sse5_mulv2div2di3_high" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))))] - "TARGET_SSE5" - "#" - "&& (reload_completed - || (!reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2])))" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) - (match_dup 0)))] -{ - operands[3] = CONST0_RTX (V2DImode); -} - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -;; SSE5 parallel integer multiply/add instructions for the intrinisics -(define_insn "sse5_pmacsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (ss_plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmacswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmadcsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (ss_plus:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pmadcswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") - (plus:V4SI - (plus:V4SI - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)])))) - (mult:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "0,0,0")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "TI")]) - -;; SSE5 parallel XMM conditional moves -(define_insn "sse5_pcmov_<mode>" - [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x") - (if_then_else:SSEMODE - (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x") - (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0") - (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "@ - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} - pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg")]) - -;; SSE5 horizontal add/subtract instructions -(define_insn "sse5_phaddbw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (sign_extend:V8HI - (vec_select:V8QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)])))))] - "TARGET_SSE5" - "phaddbw\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddbd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (plus:V4SI - (sign_extend:V4SI - (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4) - (const_int 8) - (const_int 12)]))) - (sign_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5) - (const_int 9) - (const_int 13)])))) - (plus:V4SI - (sign_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6) - (const_int 10) - (const_int 14)]))) - (sign_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7) - (const_int 11) - (const_int 15)]))))))] - "TARGET_SSE5" - "phaddbd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddbq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (plus:V2DI - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))) - (plus:V2DI - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 8) - (const_int 12)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 9) - (const_int 13)])))) - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 10) - (const_int 14)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 11) - (const_int 15)])))))))] - "TARGET_SSE5" - "phaddbq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))))] - "TARGET_SSE5" - "phaddwd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddwq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (sign_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (sign_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))))] - "TARGET_SSE5" - "phaddwq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phadddq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_SSE5" - "phadddq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddubw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI - (zero_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (zero_extend:V8HI - (vec_select:V8QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)])))))] - "TARGET_SSE5" - "phaddubw\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddubd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (plus:V4SI - (zero_extend:V4SI - (vec_select:V4QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4) - (const_int 8) - (const_int 12)]))) - (zero_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5) - (const_int 9) - (const_int 13)])))) - (plus:V4SI - (zero_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6) - (const_int 10) - (const_int 14)]))) - (zero_extend:V4SI - (vec_select:V4QI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7) - (const_int 11) - (const_int 15)]))))))] - "TARGET_SSE5" - "phaddubd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddubq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (plus:V2DI - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (zero_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))) - (plus:V2DI - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 8) - (const_int 12)]))) - (sign_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 9) - (const_int 13)])))) - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 10) - (const_int 14)]))) - (zero_extend:V2DI - (vec_select:V2QI - (match_dup 1) - (parallel [(const_int 11) - (const_int 15)])))))))] - "TARGET_SSE5" - "phaddubq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phadduwd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI - (zero_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (zero_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))))] - "TARGET_SSE5" - "phadduwd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phadduwq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 4)]))) - (zero_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 5)])))) - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 2) - (const_int 6)]))) - (zero_extend:V2DI - (vec_select:V2HI - (match_dup 1) - (parallel [(const_int 3) - (const_int 7)]))))))] - "TARGET_SSE5" - "phadduwq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phaddudq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI - (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)]))) - (zero_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_SSE5" - "phaddudq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phsubbw" - [(set (match_operand:V8HI 0 "register_operand" "=x") - (minus:V8HI - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6) - (const_int 8) - (const_int 10) - (const_int 12) - (const_int 14)]))) - (sign_extend:V8HI - (vec_select:V8QI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7) - (const_int 9) - (const_int 11) - (const_int 13) - (const_int 15)])))))] - "TARGET_SSE5" - "phsubbw\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phsubwd" - [(set (match_operand:V4SI 0 "register_operand" "=x") - (minus:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2) - (const_int 4) - (const_int 6)]))) - (sign_extend:V4SI - (vec_select:V4HI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3) - (const_int 5) - (const_int 7)])))))] - "TARGET_SSE5" - "phsubwd\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -(define_insn "sse5_phsubdq" - [(set (match_operand:V2DI 0 "register_operand" "=x") - (minus:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)])))))] - "TARGET_SSE5" - "phsubdq\t{%1, %0|%0, %1}" - [(set_attr "type" "sseiadd1")]) - -;; SSE5 permute instructions -(define_insn "sse5_pperm" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") - (unspec:V16QI - [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") - (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") - (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] - UNSPEC_SSE5_PERMUTE))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "TI")]) - -;; The following are for the various unpack insns which doesn't need the first -;; source operand, so we can just use the output operand for the first operand. -;; This allows either of the other two operands to be a memory operand. We -;; can't just use the first operand as an argument to the normal pperm because -;; then an output only argument, suddenly becomes an input operand. -(define_insn "sse5_pperm_zero_v16qi_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (zero_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V16QImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_sign_v16qi_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") - (sign_extend:V8HI - (vec_select:V8QI - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V16QImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_zero_v8hi_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (zero_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V8HImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_sign_v8hi_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V8HImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_zero_v4si_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (zero_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V4SImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_sign_v4si_v2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") - (match_operand 2 "" "")))) ;; parallel with const_int's - (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] - "TARGET_SSE5 - && (register_operand (operands[1], V4SImode) - || register_operand (operands[2], V16QImode))" - "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" - [(set_attr "type" "sseadd") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -;; SSE5 pack instructions that combine two vectors into a smaller vector -(define_insn "sse5_pperm_pack_v2di_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x") - (vec_concat:V4SI - (truncate:V2SI - (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm")) - (truncate:V2SI - (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_pack_v4si_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") - (vec_concat:V8HI - (truncate:V4HI - (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm")) - (truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "TI")]) - -(define_insn "sse5_pperm_pack_v8hi_v16qi" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") - (vec_concat:V16QI - (truncate:V8QI - (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm")) - (truncate:V8QI - (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "TI")]) - -;; Floating point permutation (permps, permpd) -(define_insn "sse5_perm<mode>" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x") - (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] - UNSPEC_SSE5_PERMUTE))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" - "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "mode" "<MODE>")]) - -;; SSE5 packed rotate instructions -(define_expand "rotl<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "") - (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand")))] - "TARGET_SSE5" -{ - /* If we were given a scalar, convert it to parallel */ - if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) - { - rtvec vs = rtvec_alloc (<ssescalarnum>); - rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); - rtx reg = gen_reg_rtx (<MODE>mode); - rtx op2 = operands[2]; - int i; - - if (GET_MODE (op2) != <ssescalarmode>mode) - { - op2 = gen_reg_rtx (<ssescalarmode>mode); - convert_move (op2, operands[2], false); - } - - for (i = 0; i < <ssescalarnum>; i++) - RTVEC_ELT (vs, i) = op2; - - emit_insn (gen_vec_init<mode> (reg, par)); - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); - DONE; - } -}) - -(define_expand "rotr<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "") - (rotatert:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand")))] - "TARGET_SSE5" -{ - /* If we were given a scalar, convert it to parallel */ - if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) - { - rtvec vs = rtvec_alloc (<ssescalarnum>); - rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); - rtx neg = gen_reg_rtx (<MODE>mode); - rtx reg = gen_reg_rtx (<MODE>mode); - rtx op2 = operands[2]; - int i; - - if (GET_MODE (op2) != <ssescalarmode>mode) - { - op2 = gen_reg_rtx (<ssescalarmode>mode); - convert_move (op2, operands[2], false); - } - - for (i = 0; i < <ssescalarnum>; i++) - RTVEC_ELT (vs, i) = op2; - - emit_insn (gen_vec_init<mode> (reg, par)); - emit_insn (gen_neg<mode>2 (neg, reg)); - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg)); - DONE; - } -}) - -(define_insn "sse5_rotl<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] - "TARGET_SSE5" - "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseishft") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - -(define_insn "sse5_rotr<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (rotatert:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") - (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] - "TARGET_SSE5" -{ - operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); - return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; -} - [(set_attr "type" "sseishft") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - -(define_expand "vrotr<mode>3" - [(match_operand:SSEMODE1248 0 "register_operand" "") - (match_operand:SSEMODE1248 1 "register_operand" "") - (match_operand:SSEMODE1248 2 "register_operand" "")] - "TARGET_SSE5" -{ - rtx reg = gen_reg_rtx (<MODE>mode); - emit_insn (gen_neg<mode>2 (reg, operands[2])); - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); - DONE; -}) - -(define_expand "vrotl<mode>3" - [(match_operand:SSEMODE1248 0 "register_operand" "") - (match_operand:SSEMODE1248 1 "register_operand" "") - (match_operand:SSEMODE1248 2 "register_operand" "")] - "TARGET_SSE5" -{ - emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_insn "sse5_vrotl<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") - (if_then_else:SSEMODE1248 - (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") - (const_int 0)) - (rotate:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") - (match_dup 2)) - (rotatert:SSEMODE1248 - (match_dup 1) - (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" - "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseishft") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -;; SSE5 packed shift instructions. -;; FIXME: add V2DI back in -(define_expand "vlshr<mode>3" - [(match_operand:SSEMODE124 0 "register_operand" "") - (match_operand:SSEMODE124 1 "register_operand" "") - (match_operand:SSEMODE124 2 "register_operand" "")] - "TARGET_SSE5" -{ - rtx neg = gen_reg_rtx (<MODE>mode); - emit_insn (gen_neg<mode>2 (neg, operands[2])); - emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg)); - DONE; -}) - -(define_expand "vashr<mode>3" - [(match_operand:SSEMODE124 0 "register_operand" "") - (match_operand:SSEMODE124 1 "register_operand" "") - (match_operand:SSEMODE124 2 "register_operand" "")] - "TARGET_SSE5" -{ - rtx neg = gen_reg_rtx (<MODE>mode); - emit_insn (gen_neg<mode>2 (neg, operands[2])); - emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg)); - DONE; -}) - -(define_expand "vashl<mode>3" - [(match_operand:SSEMODE124 0 "register_operand" "") - (match_operand:SSEMODE124 1 "register_operand" "") - (match_operand:SSEMODE124 2 "register_operand" "")] - "TARGET_SSE5" -{ - emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_insn "sse5_ashl<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") - (if_then_else:SSEMODE1248 - (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") - (const_int 0)) - (ashift:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") - (match_dup 2)) - (ashiftrt:SSEMODE1248 - (match_dup 1) - (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" - "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseishft") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -(define_insn "sse5_lshl<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") - (if_then_else:SSEMODE1248 - (ge:SSEMODE1248 - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") - (const_int 0)) - (ashift:SSEMODE1248 - (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") - (match_dup 2)) - (lshiftrt:SSEMODE1248 - (match_dup 1) - (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" - "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseishft") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "mode" "TI")]) - -;; SSE2 doesn't have some shift varients, so define versions for SSE5 -(define_expand "ashlv16qi3" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")] - "TARGET_SSE5" -{ - rtvec vs = rtvec_alloc (16); - rtx par = gen_rtx_PARALLEL (V16QImode, vs); - rtx reg = gen_reg_rtx (V16QImode); - int i; - for (i = 0; i < 16; i++) - RTVEC_ELT (vs, i) = operands[2]; - - emit_insn (gen_vec_initv16qi (reg, par)); - emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); - DONE; -}) - -(define_expand "lshlv16qi3" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")] - "TARGET_SSE5" -{ - rtvec vs = rtvec_alloc (16); - rtx par = gen_rtx_PARALLEL (V16QImode, vs); - rtx reg = gen_reg_rtx (V16QImode); - int i; - for (i = 0; i < 16; i++) - RTVEC_ELT (vs, i) = operands[2]; - - emit_insn (gen_vec_initv16qi (reg, par)); - emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg)); - DONE; -}) - -(define_expand "ashrv16qi3" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")] - "TARGET_SSE5" -{ - rtvec vs = rtvec_alloc (16); - rtx par = gen_rtx_PARALLEL (V16QImode, vs); - rtx reg = gen_reg_rtx (V16QImode); - int i; - rtx ele = ((CONST_INT_P (operands[2])) - ? GEN_INT (- INTVAL (operands[2])) - : operands[2]); - - for (i = 0; i < 16; i++) - RTVEC_ELT (vs, i) = ele; - - emit_insn (gen_vec_initv16qi (reg, par)); - - if (!CONST_INT_P (operands[2])) - { - rtx neg = gen_reg_rtx (V16QImode); - emit_insn (gen_negv16qi2 (neg, reg)); - emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg)); - } - else - emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); - - DONE; -}) - -(define_expand "ashrv2di3" - [(match_operand:V2DI 0 "register_operand" "") - (match_operand:V2DI 1 "register_operand" "") - (match_operand:DI 2 "nonmemory_operand" "")] - "TARGET_SSE5" -{ - rtvec vs = rtvec_alloc (2); - rtx par = gen_rtx_PARALLEL (V2DImode, vs); - rtx reg = gen_reg_rtx (V2DImode); - rtx ele; - - if (CONST_INT_P (operands[2])) - ele = GEN_INT (- INTVAL (operands[2])); - else if (GET_MODE (operands[2]) != DImode) - { - rtx move = gen_reg_rtx (DImode); - ele = gen_reg_rtx (DImode); - convert_move (move, operands[2], false); - emit_insn (gen_negdi2 (ele, move)); - } - else - { - ele = gen_reg_rtx (DImode); - emit_insn (gen_negdi2 (ele, operands[2])); - } - - RTVEC_ELT (vs, 0) = ele; - RTVEC_ELT (vs, 1) = ele; - emit_insn (gen_vec_initv2di (reg, par)); - emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg)); - DONE; -}) - -;; SSE5 FRCZ support -;; parallel insns -(define_insn "sse5_frcz<mode>2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] - UNSPEC_FRCZ))] - "TARGET_SSE5" - "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt1") - (set_attr "mode" "<MODE>")]) - -;; scalar insns -(define_insn "sse5_vmfrcz<mode>2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] - UNSPEC_FRCZ) - (match_operand:SSEMODEF2P 1 "register_operand" "0") - (const_int 1)))] - "TARGET_SSE5" - "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt1") - (set_attr "mode" "<MODE>")]) - -(define_insn "sse5_cvtph2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")] - UNSPEC_CVTPH2PS))] - "TARGET_SSE5" - "cvtph2ps\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -(define_insn "sse5_cvtps2ph" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm") - (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")] - UNSPEC_CVTPS2PH))] - "TARGET_SSE5" - "cvtps2ph\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V4SF")]) - -;; Scalar versions of the com instructions that use vector types that are -;; called from the intrinsics. Unlike the the other s{s,d} instructions, the -;; com instructions fill in 0's in the upper bits instead of leaving them -;; unmodified, so we use const_vector of 0 instead of match_dup. -(define_expand "sse5_vmmaskcmp<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "") - (vec_merge:SSEMODEF2P - (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" - [(match_operand:SSEMODEF2P 2 "register_operand" "") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")]) - (match_dup 4) - (const_int 1)))] - "TARGET_SSE5" -{ - operands[4] = CONST0_RTX (<MODE>mode); -}) - -(define_insn "*sse5_vmmaskcmp<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (vec_merge:SSEMODEF2P - (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" - [(match_operand:SSEMODEF2P 2 "register_operand" "x") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]) - (match_operand:SSEMODEF2P 4 "") - (const_int 1)))] - "TARGET_SSE5" - "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "prefix_data16" "0") - (set_attr "prefix_rep" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "<ssescalarmode>")]) - -;; We don't have a comparison operator that always returns true/false, so -;; handle comfalse and comtrue specially. -(define_insn "sse5_com_tf<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "register_operand" "x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_int_operand" "n")] - UNSPEC_SSE5_TRUEFALSE))] - "TARGET_SSE5" -{ - const char *ret = NULL; - - switch (INTVAL (operands[3])) - { - case COM_FALSE_S: - ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - case COM_FALSE_P: - ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - case COM_TRUE_S: - ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - case COM_TRUE_P: - ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; - break; - - default: - gcc_unreachable (); - } - - return ret; -} - [(set_attr "type" "ssecmp") - (set_attr "prefix_data16" "0") - (set_attr "prefix_rep" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "<MODE>")]) - -(define_insn "sse5_maskcmp<mode>3" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" - [(match_operand:SSEMODEF2P 2 "register_operand" "x") - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE5" - "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "prefix_data16" "0") - (set_attr "prefix_rep" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "<MODE>")]) - -(define_insn "sse5_maskcmp<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" - [(match_operand:SSEMODE1248 2 "register_operand" "x") - (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE5" - "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse4arg") - (set_attr "prefix_data16" "0") - (set_attr "prefix_rep" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - -(define_insn "sse5_maskcmp_uns<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" - [(match_operand:SSEMODE1248 2 "register_operand" "x") - (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] - "TARGET_SSE5" - "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "prefix_data16" "0") - (set_attr "prefix_rep" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - -;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* -;; and pcomneu* not to be converted to the signed ones in case somebody needs -;; the exact instruction generated for the intrinsic. -(define_insn "sse5_maskcmp_uns2<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (unspec:SSEMODE1248 - [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" - [(match_operand:SSEMODE1248 2 "register_operand" "x") - (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] - UNSPEC_SSE5_UNSIGNED_CMP))] - "TARGET_SSE5" - "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - -;; Pcomtrue and pcomfalse support. These are useless instructions, but are -;; being added here to be complete. -(define_insn "sse5_pcom_tf<mode>3" - [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") - (unspec:SSEMODE1248 - [(match_operand:SSEMODE1248 1 "register_operand" "x") - (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "const_int_operand" "n")] - UNSPEC_SSE5_TRUEFALSE))] - "TARGET_SSE5" -{ - return ((INTVAL (operands[3]) != 0) - ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" - : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); -} - [(set_attr "type" "ssecmp") - (set_attr "prefix_data16" "0") - (set_attr "prefix_extra" "2") - (set_attr "length_immediate" "1") - (set_attr "mode" "TI")]) - (define_insn "*avx_aesenc" [(set (match_operand:V2DI 0 "register_operand" "=x") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") |