diff options
-rw-r--r-- | gcc/ChangeLog | 68 | ||||
-rw-r--r-- | gcc/config/mips/loongson.h | 4 | ||||
-rw-r--r-- | gcc/config/mips/loongson.md | 570 | ||||
-rw-r--r-- | gcc/config/mips/mips-modes.def | 12 | ||||
-rw-r--r-- | gcc/config/mips/mips-protos.h | 5 | ||||
-rw-r--r-- | gcc/config/mips/mips-ps-3d.md | 265 | ||||
-rw-r--r-- | gcc/config/mips/mips.c | 721 | ||||
-rw-r--r-- | gcc/config/mips/predicates.md | 11 |
8 files changed, 1461 insertions, 195 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 462a8d38815..0a426765c79 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,71 @@ +2011-12-23 Richard Henderson <rth@redhat.com> + + * config/mips/loongson.md (UNSPEC_LOONGSON_PINSR_0, + UNSPEC_LOONGSON_PINSR_1, UNSPEC_LOONGSON_PINSR_2, + UNSPEC_LOONGSON_PINSR_3): Replace with... + (UNSPEC_LOONGSON_PINSRH): ... this. + (UNSPEC_LOONGSON_VINIT): New. + (UNSPEC_LOONGSON_DSLL, UNSPEC_LOONGSON_DSRL): New. + (VWB): New mode iterator. + (V_inner): New mode attribute. + (loongson_vec_init1_<VHB>): New. + (*vec_concatv2si): New. + (and<VWHB>3, ior<VWHB>3, xor<VWHB>3, one_cmpl<VWHB>2): New. + (*loongson_nor): New. + (loongson_pextrh): Un-macro-ify. + (loongson_pmaddhw): Likewise. + (smaxv4hi3, umaxv8qi3, sminv4hi3, uminv8qi3): Likewise. + (loongson_pinsrh_0): Represent with vec_select+vec_concat. + (loongson_pinsrh_1, loongson_pinsrh_2, loongson_pinsrh_3): Likewise. + (*vec_setv4hi, vec_setv4hi): New. + (sdot_prodv4hi): New. + (smax<VWB>3, smin<VWB>3): New. + (reduc_uplus_v8qi): New. + (loongson_pshufh): Remove destination matching input. + (ashl<VWH>3, ashr<VWH>3, lshr<VWH>3): Fix type attribute. + (vec_interleave_high<VWHB>, vec_interleave_low<VWHB>): Remove. + (loongson_punpckhbh, loongson_punpckhhw, loongson_punpckhhw_qi, + loongson_punpckhwd, loongson_punpckhwd_qi, loongson_punpckhwd_hi, + loongson_punpcklbh, loongson_punpcklhw, loongson_punpcklhw_qi, + loongson_punpcklwd, loongson_punpcklwd_qi, loongson_punpcklwd_hi, + vec_perm_const<VWHB>, vec_unpacks_lo_<VHB>, vec_unpacks_hi_<VHB>, + vec_unpacku_lo_<VHB>, vec_unpacku_hi_<VHB>, vec_shl_<VWHBDI>, + vec_shr_<VWHBDI>, reduc_uplus_<VWH>, reduc_splus_<VWHB>, + reduc_smax_<VWHB>, reduc_smin_<VWHB>, reduc_umax_<VWHB>, + reduc_umin_<VB>): New. + * config/mips/mips-ps-3d.md (vec_perm_const_ps): New. + (mips_pul_ps, mips_puu_ps, mips_pll_ps, mips_plu_ps): Expand in + terms of vec_perm_const_ps. + (vec_perm_constv2sf): New. + (vec_initv2sf): Use mips_expand_vector_init. + (vec_concatv2sf): Rename from vec_initv2sf_internal. + (vec_setv2sf): Use vec_perm_const_ps. + (reduc_splus_v2sf, reduc_smin_v2sf, reduc_smax_v2sf): New. + * config/mips/loongson.h (pshufh_u, pshufh_s): Don't pass dest to + the builtin. + * config/mips/mips-modes.def (V16QI, V8HI, V4SI, V4SF): New modes. + * config/mips/mips-protos.h: Update. + * config/mips/mips.c (mips_get_arg_info): Match V2SFmode, not all + MODE_VECTOR_FLOAT. + (mips_return_mode_in_fpr_p): Likewise. + (mips_cannot_change_mode_class): Allow 8-byte integral mode changes. + (CODE_FOR_loongson_punpckhbh, CODE_FOR_loongson_punpckhhw, + CODE_FOR_loongson_punpckhwd, CODE_FOR_loongson_punpcklbh, + CODE_FOR_loongson_punpcklhw, CODE_FOR_loongson_punpcklwd): Remove. + (mips_builtins): Remove first operand for loongson pshufh builtins. + (MAX_VECT_LEN, struct expand_vec_perm_d): New. + (mips_expand_vselect, mips_expand_vselect_vconcat, + mips_expand_vpc_loongson_even_odd, mips_expand_vpc_loongson_pshufh, + mips_expand_vpc_loongson_bcast, mips_expand_vec_perm_const_1, + mips_expand_vec_perm_const, mips_vectorize_vec_perm_const_ok, + mips_expand_vec_unpack, mips_constant_elt_p, mips_expand_vi_broadcast, + mips_expand_vi_constant, mips_expand_vi_loongson_one_pinsrh, + mips_expand_vi_general, mips_expand_vec_reduc, mips_expand_vec_minmax, + TARGET_VECTORIZE_VEC_PERM_CONST_OK): New. + (mips_expand_vector_init): Rewrite. + * config/mips/predicates.md (const_2_or_3_operand): New. + (const_0_to_3_operand): New. + 2011-12-23 Dmitry Plotnikov <dplotnikov@ispras.ru> * config/arm/neon.md (float<mode><V_cvtto>2): New. diff --git a/gcc/config/mips/loongson.h b/gcc/config/mips/loongson.h index 6bfd4d7e502..fcaf55366f1 100644 --- a/gcc/config/mips/loongson.h +++ b/gcc/config/mips/loongson.h @@ -449,13 +449,13 @@ psadbh (uint8x8_t s, uint8x8_t t) __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) { - return __builtin_loongson_pshufh_u (dest, s, order); + return __builtin_loongson_pshufh_u (s, order); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) { - return __builtin_loongson_pshufh_s (dest, s, order); + return __builtin_loongson_pshufh_s (s, order); } /* Shift left logical. */ diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index 225f4d16da9..4f9cc7396ca 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -24,10 +24,8 @@ UNSPEC_LOONGSON_PCMPEQ UNSPEC_LOONGSON_PCMPGT UNSPEC_LOONGSON_PEXTR - UNSPEC_LOONGSON_PINSR_0 - UNSPEC_LOONGSON_PINSR_1 - UNSPEC_LOONGSON_PINSR_2 - UNSPEC_LOONGSON_PINSR_3 + UNSPEC_LOONGSON_PINSRH + UNSPEC_LOONGSON_VINIT UNSPEC_LOONGSON_PMADD UNSPEC_LOONGSON_PMOVMSK UNSPEC_LOONGSON_PMULHU @@ -41,6 +39,8 @@ UNSPEC_LOONGSON_PUNPCKL UNSPEC_LOONGSON_PADDD UNSPEC_LOONGSON_PSUBD + UNSPEC_LOONGSON_DSLL + UNSPEC_LOONGSON_DSRL ]) ;; Mode iterators and attributes. @@ -60,6 +60,9 @@ ;; 64-bit vectors of words and halfwords. (define_mode_iterator VWH [V2SI V4HI]) +;; 64-bit vectors of words and bytes +(define_mode_iterator VWB [V2SI V8QI]) + ;; 64-bit vectors of words, halfwords and bytes. (define_mode_iterator VWHB [V2SI V4HI V8QI]) @@ -86,6 +89,9 @@ ;; but with twice as many elements. (define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")]) +;; Given a vector type T, the inner mode. +(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) + ;; The Loongson instruction suffixes corresponding to the conversions ;; specified by V_half_width. (define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")]) @@ -122,6 +128,28 @@ DONE; }) +;; Helper for vec_init. Initialize element 0 of the output from the input. +;; All other elements are undefined. +(define_insn "loongson_vec_init1_<mode>" + [(set (match_operand:VHB 0 "register_operand" "=f") + (unspec:VHB [(truncate:<V_inner> + (match_operand:DI 1 "reg_or_0_operand" "Jd"))] + UNSPEC_LOONGSON_VINIT))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "dmtc1\t%z1,%0" + [(set_attr "move_type" "mtc") + (set_attr "mode" "DI")]) + +;; Helper for vec_initv2si. +(define_insn "*vec_concatv2si" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (vec_concat:V2SI + (match_operand:SI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + ;; Instruction patterns for SIMD instructions. ;; Pack with signed saturation. @@ -200,6 +228,51 @@ "pandn\t%0,%1,%2" [(set_attr "type" "fmul")]) +;; Logical AND. +(define_insn "and<mode>3" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (and:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "and\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical OR. +(define_insn "ior<mode>3" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (ior:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "or\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +;; Logical XOR. +(define_insn "xor<mode>3" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (xor:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "xor\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical NOR. +(define_insn "*loongson_nor" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (and:VWHB + (not:VWHB (match_operand:VWHB 1 "register_operand" "f")) + (not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "nor\t%0,%1,%2" + [(set_attr "type" "fmul")]) + +;; Logical NOT. +(define_insn "one_cmpl<mode>2" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (not:VWHB (match_operand:VWHB 1 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "nor\t%0,%1,%1" + [(set_attr "type" "fmul")]) + ;; Average. (define_insn "loongson_pavg<V_suffix>" [(set (match_operand:VHB 0 "register_operand" "=f") @@ -231,96 +304,166 @@ [(set_attr "type" "fadd")]) ;; Extract halfword. -(define_insn "loongson_pextr<V_suffix>" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:SI 2 "register_operand" "f")] +(define_insn "loongson_pextrh" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] UNSPEC_LOONGSON_PEXTR))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pextr<V_suffix>\t%0,%1,%2" - [(set_attr "type" "fmul")]) + "pextrh\t%0,%1,%2" + [(set_attr "type" "fcvt")]) ;; Insert halfword. -(define_insn "loongson_pinsr<V_suffix>_0" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_0))] +(define_insn "loongson_pinsrh_0" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pinsrh_0\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_pinsrh_1" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 2) (const_int 3)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_0\t%0,%1,%2" + "pinsrh_1\t%0,%1,%2" [(set_attr "type" "fdiv")]) -(define_insn "loongson_pinsr<V_suffix>_1" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_1))] +(define_insn "loongson_pinsrh_2" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 4) (const_int 3)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_1\t%0,%1,%2" + "pinsrh_2\t%0,%1,%2" [(set_attr "type" "fdiv")]) -(define_insn "loongson_pinsr<V_suffix>_2" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_2))] +(define_insn "loongson_pinsrh_3" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 4)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_2\t%0,%1,%2" + "pinsrh_3\t%0,%1,%2" [(set_attr "type" "fdiv")]) -(define_insn "loongson_pinsr<V_suffix>_3" - [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PINSR_3))] +(define_insn "*vec_setv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f") + (match_operand:SI 3 "const_0_to_3_operand" "")] + UNSPEC_LOONGSON_PINSRH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pinsr<V_suffix>_3\t%0,%1,%2" + "pinsrh_%3\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_expand "vec_setv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:HI 2 "register_operand" "f") + (match_operand:SI 3 "const_0_to_3_operand" "")] + UNSPEC_LOONGSON_PINSRH))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + rtx ext = gen_reg_rtx (SImode); + emit_move_insn (ext, gen_lowpart (SImode, operands[1])); + operands[1] = ext; +}) + ;; Multiply and add packed integers. -(define_insn "loongson_pmadd<V_stretch_half_suffix>" - [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") - (unspec:<V_stretch_half> [(match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")] - UNSPEC_LOONGSON_PMADD))] +(define_insn "loongson_pmaddhw" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")] + UNSPEC_LOONGSON_PMADD))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmadd<V_stretch_half_suffix>\t%0,%1,%2" + "pmaddhw\t%0,%1,%2" [(set_attr "type" "fmul")]) +(define_expand "sdot_prodv4hi" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:V4HI 1 "register_operand" "") + (match_operand:V4HI 2 "register_operand" "") + (match_operand:V2SI 3 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + rtx t = gen_reg_rtx (V2SImode); + emit_insn (gen_loongson_pmaddhw (t, operands[1], operands[2])); + emit_insn (gen_addv2si3 (operands[0], t, operands[3])); + DONE; +}) + ;; Maximum of signed halfwords. -(define_insn "smax<mode>3" - [(set (match_operand:VH 0 "register_operand" "=f") - (smax:VH (match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")))] +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (smax:V4HI (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmaxs<V_suffix>\t%0,%1,%2" + "pmaxsh\t%0,%1,%2" [(set_attr "type" "fadd")]) +(define_expand "smax<mode>3" + [(match_operand:VWB 0 "register_operand" "") + (match_operand:VWB 1 "register_operand" "") + (match_operand:VWB 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_minmax (operands[0], operands[1], operands[2], + gen_loongson_pcmpgt<V_suffix>, false); + DONE; +}) + ;; Maximum of unsigned bytes. -(define_insn "umax<mode>3" - [(set (match_operand:VB 0 "register_operand" "=f") - (umax:VB (match_operand:VB 1 "register_operand" "f") - (match_operand:VB 2 "register_operand" "f")))] +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (umax:V8QI (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmaxu<V_suffix>\t%0,%1,%2" + "pmaxub\t%0,%1,%2" [(set_attr "type" "fadd")]) ;; Minimum of signed halfwords. -(define_insn "smin<mode>3" - [(set (match_operand:VH 0 "register_operand" "=f") - (smin:VH (match_operand:VH 1 "register_operand" "f") - (match_operand:VH 2 "register_operand" "f")))] +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (smin:V4HI (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pmins<V_suffix>\t%0,%1,%2" + "pminsh\t%0,%1,%2" [(set_attr "type" "fadd")]) +(define_expand "smin<mode>3" + [(match_operand:VWB 0 "register_operand" "") + (match_operand:VWB 1 "register_operand" "") + (match_operand:VWB 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_minmax (operands[0], operands[1], operands[2], + gen_loongson_pcmpgt<V_suffix>, true); + DONE; +}) + ;; Minimum of unsigned bytes. -(define_insn "umin<mode>3" - [(set (match_operand:VB 0 "register_operand" "=f") - (umin:VB (match_operand:VB 1 "register_operand" "f") - (match_operand:VB 2 "register_operand" "f")))] +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (umin:V8QI (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pminu<V_suffix>\t%0,%1,%2" + "pminub\t%0,%1,%2" [(set_attr "type" "fadd")]) ;; Move byte mask. @@ -390,6 +533,14 @@ "biadd\t%0,%1" [(set_attr "type" "fabs")]) +(define_insn "reduc_uplus_v8qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")] + UNSPEC_LOONGSON_BIADD))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "biadd\t%0,%1" + [(set_attr "type" "fabs")]) + ;; Sum of absolute differences. (define_insn "loongson_psadbh" [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") @@ -403,12 +554,11 @@ ;; Shuffle halfwords. (define_insn "loongson_pshufh" [(set (match_operand:VH 0 "register_operand" "=f") - (unspec:VH [(match_operand:VH 1 "register_operand" "0") - (match_operand:VH 2 "register_operand" "f") - (match_operand:SI 3 "register_operand" "f")] + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] UNSPEC_LOONGSON_PSHUFH))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "pshufh\t%0,%2,%3" + "pshufh\t%0,%1,%2" [(set_attr "type" "fmul")]) ;; Shift left logical. @@ -418,7 +568,7 @@ (match_operand:SI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" "psll<V_suffix>\t%0,%1,%2" - [(set_attr "type" "fmul")]) + [(set_attr "type" "fcvt")]) ;; Shift right arithmetic. (define_insn "ashr<mode>3" @@ -427,7 +577,7 @@ (match_operand:SI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" "psra<V_suffix>\t%0,%1,%2" - [(set_attr "type" "fdiv")]) + [(set_attr "type" "fcvt")]) ;; Shift right logical. (define_insn "lshr<mode>3" @@ -436,7 +586,7 @@ (match_operand:SI 2 "register_operand" "f")))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" "psrl<V_suffix>\t%0,%1,%2" - [(set_attr "type" "fdiv")]) + [(set_attr "type" "fcvt")]) ;; Subtraction, treating overflow by wraparound. (define_insn "sub<mode>3" @@ -478,26 +628,286 @@ "psubus<V_suffix>\t%0,%1,%2" [(set_attr "type" "fadd")]) -;; Unpack high data. -(define_insn "vec_interleave_high<mode>" - [(set (match_operand:VWHB 0 "register_operand" "=f") - (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") - (match_operand:VWHB 2 "register_operand" "f")] - UNSPEC_LOONGSON_PUNPCKH))] +;; Unpack high data. Recall that Loongson only runs in little-endian. +(define_insn "loongson_punpckhbh" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhbh\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_punpckhhw" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "punpckh<V_stretch_half_suffix>\t%0,%1,%2" + "punpckhhw\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_insn "loongson_punpckhhw_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 5) + (const_int 12) (const_int 13) + (const_int 6) (const_int 7) + (const_int 14) (const_int 15)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhhw\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_punpckhwd" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "f") + (match_operand:V2SI 2 "register_operand" "f")) + (parallel [(const_int 1) (const_int 3)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "loongson_punpckhwd_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "loongson_punpckhwd_hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 2) (const_int 3) + (const_int 6) (const_int 7)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckhwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + ;; Unpack low data. -(define_insn "vec_interleave_low<mode>" - [(set (match_operand:VWHB 0 "register_operand" "=f") - (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") - (match_operand:VWHB 2 "register_operand" "f")] - UNSPEC_LOONGSON_PUNPCKL))] +(define_insn "loongson_punpcklbh" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklbh\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "loongson_punpcklhw" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" - "punpckl<V_stretch_half_suffix>\t%0,%1,%2" + "punpcklhw\t%0,%1,%2" + [(set_attr "type" "fdiv")]) + +(define_insn "*loongson_punpcklhw_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 8) (const_int 9) + (const_int 2) (const_int 3) + (const_int 10) (const_int 11)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklhw\t%0,%1,%2" [(set_attr "type" "fdiv")]) +(define_insn "loongson_punpcklwd" + [(set (match_operand:V2SI 0 "register_operand" "=f") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "register_operand" "f") + (match_operand:V2SI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 2)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "*loongson_punpcklwd_qi" + [(set (match_operand:V8QI 0 "register_operand" "=f") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "f") + (match_operand:V8QI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 8) (const_int 9) + (const_int 10) (const_int 11)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "*loongson_punpcklwd_hi" + [(set (match_operand:V4HI 0 "register_operand" "=f") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "register_operand" "f") + (match_operand:V4HI 2 "register_operand" "f")) + (parallel [(const_int 0) (const_int 1) + (const_int 4) (const_int 5)])))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpcklwd\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_expand "vec_perm_const<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "") + (match_operand:VWHB 2 "register_operand" "") + (match_operand:VWHB 3 "" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +(define_expand "vec_unpacks_lo_<mode>" + [(match_operand:<V_stretch_half> 0 "register_operand" "") + (match_operand:VHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_unpack (operands, false, false); + DONE; +}) + +(define_expand "vec_unpacks_hi_<mode>" + [(match_operand:<V_stretch_half> 0 "register_operand" "") + (match_operand:VHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_unpack (operands, false, true); + DONE; +}) + +(define_expand "vec_unpacku_lo_<mode>" + [(match_operand:<V_stretch_half> 0 "register_operand" "") + (match_operand:VHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_unpack (operands, true, false); + DONE; +}) + +(define_expand "vec_unpacku_hi_<mode>" + [(match_operand:<V_stretch_half> 0 "register_operand" "") + (match_operand:VHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_unpack (operands, true, true); + DONE; +}) + +;; Whole vector shifts, used for reduction epilogues. +(define_insn "vec_shl_<mode>" + [(set (match_operand:VWHBDI 0 "register_operand" "=f") + (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] + UNSPEC_LOONGSON_DSLL))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "dsll\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_insn "vec_shr_<mode>" + [(set (match_operand:VWHBDI 0 "register_operand" "=f") + (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] + UNSPEC_LOONGSON_DSRL))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "dsrl\t%0,%1,%2" + [(set_attr "type" "fcvt")]) + +(define_expand "reduc_uplus_<mode>" + [(match_operand:VWH 0 "register_operand" "") + (match_operand:VWH 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_add<mode>3); + DONE; +}) + +; ??? Given that we're not describing a widening reduction, we should +; not have separate optabs for signed and unsigned. +(define_expand "reduc_splus_<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + emit_insn (gen_reduc_uplus_<mode>(operands[0], operands[1])); + DONE; +}) + +(define_expand "reduc_smax_<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_smax<mode>3); + DONE; +}) + +(define_expand "reduc_smin_<mode>" + [(match_operand:VWHB 0 "register_operand" "") + (match_operand:VWHB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_smin<mode>3); + DONE; +}) + +(define_expand "reduc_umax_<mode>" + [(match_operand:VB 0 "register_operand" "") + (match_operand:VB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_umax<mode>3); + DONE; +}) + +(define_expand "reduc_umin_<mode>" + [(match_operand:VB 0 "register_operand" "") + (match_operand:VB 1 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_umin<mode>3); + DONE; +}) + ;; Integer division and modulus. For integer multiplication, see mips.md. (define_insn "<u>div<mode>3" diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index b9c508b5c5b..187c651bbbc 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -26,9 +26,15 @@ RESET_FLOAT_FORMAT (DF, mips_double_format); FLOAT_MODE (TF, 16, mips_quad_format); /* Vector modes. */ -VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ -VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ -VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ + +/* Double-sized vector modes for vec_concat. */ +VECTOR_MODE (INT, QI, 16); /* V16QI */ +VECTOR_MODE (INT, HI, 8); /* V8HI */ +VECTOR_MODE (INT, SI, 4); /* V4SI */ +VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index dbabdffaef0..1791ce7c143 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -328,6 +328,11 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, rtx, rtx, rtx, rtx); extern void mips_expand_vector_init (rtx, rtx); +extern bool mips_expand_vec_perm_const (rtx op[4]); +extern void mips_expand_vec_unpack (rtx op[2], bool, bool); +extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx)); +extern void mips_expand_vec_minmax (rtx, rtx, rtx, + rtx (*) (rtx, rtx, rtx), bool); extern bool mips_eh_uses (unsigned int); extern bool mips_epilogue_uses (unsigned int); diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 504f43ce46c..cc8a4c044ac 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -89,75 +89,181 @@ DONE; }) -; pul.ps - Pair Upper Lower -(define_insn "mips_pul_ps" +(define_insn "vec_perm_const_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (match_operand:V2SF 1 "register_operand" "f") - (match_operand:V2SF 2 "register_operand" "f") - (const_int 2)))] + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "f") + (match_operand:V2SF 2 "register_operand" "f")) + (parallel [(match_operand:SI 3 "const_0_or_1_operand" "") + (match_operand:SI 4 "const_2_or_3_operand" "")])))] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "pul.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) +{ + /* Let <op>L be the lower part of operand <op> and <op>U be the upper part. + The P[UL][UL].PS instruction always specifies the upper part of the + result first, so the instruction is: -; puu.ps - Pair upper upper -(define_insn "mips_puu_ps" - [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (match_operand:V2SF 1 "register_operand" "f") - (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "puu.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) + P<aUL><bUL>.PS %0,<aop>,<bop> -; pll.ps - Pair Lower Lower -(define_insn "mips_pll_ps" - [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (match_operand:V2SF 2 "register_operand" "f") - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "pll.ps\t%0,%1,%2" - [(set_attr "type" "fmove") - (set_attr "mode" "SF")]) + where 0U == <aop><aUL> and 0L == <bop><bUL>. -; plu.ps - Pair Lower Upper -(define_insn "mips_plu_ps" - [(set (match_operand:V2SF 0 "register_operand" "=f") - (vec_merge:V2SF - (vec_select:V2SF (match_operand:V2SF 1 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (vec_select:V2SF (match_operand:V2SF 2 "register_operand" "f") - (parallel [(const_int 1) - (const_int 0)])) - (const_int 2)))] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" - "plu.ps\t%0,%1,%2" + GCC's vector indices are specified in memory order, which means + that vector element 0 is the lower part (L) on little-endian targets + and the upper part (U) on big-endian targets. vec_concat likewise + concatenates in memory order, which means that operand 3 (being + 0 or 1) selects part of operand 1 and operand 4 (being 2 or 3) + selects part of operand 2. + + Let: + + I3 = INTVAL (operands[3]) + I4 = INTVAL (operands[4]) - 2 + + Taking the two endiannesses in turn: + + Little-endian: + + The semantics of the RTL pattern are: + + { 0L, 0U } = { X[I3], X[I4 + 2] }, where X = { 1L, 1U, 2L, 2U } + + so: 0L = { 1L, 1U }[I3] (= <bop><bUL>) + 0U = { 2L, 2U }[I4] (= <aop><aUL>) + + <aop> = 2, <aUL> = I4 ? U : L + <bop> = 1, <bUL> = I3 ? U : L + + [LL] !I4 && !I3 [UL] I4 && !I3 + [LU] !I4 && I3 [UU] I4 && I3 + + Big-endian: + + The semantics of the RTL pattern are: + + { 0U, 0L } = { X[I3], X[I4 + 2] }, where X = { 1U, 1L, 2U, 2L } + + so: 0U = { 1U, 1L }[I3] (= <aop><aUL>) + 0L = { 2U, 2L }[I4] (= <bop><bUL>) + + <aop> = 1, <aUL> = I3 ? L : U + <bop> = 2, <bUL> = I4 ? L : U + + [UU] !I3 && !I4 [UL] !I3 && I4 + [LU] I3 && !I4 [LL] I3 && I4. */ + + static const char * const mnemonics[2][4] = { + /* LE */ { "pll.ps\t%0,%2,%1", "pul.ps\t%0,%2,%1", + "plu.ps\t%0,%2,%1", "puu.ps\t%0,%2,%1" }, + /* BE */ { "puu.ps\t%0,%1,%2", "pul.ps\t%0,%1,%2", + "plu.ps\t%0,%1,%2", "pll.ps\t%0,%1,%2" }, + }; + + unsigned mask = INTVAL (operands[3]) * 2 + (INTVAL (operands[4]) - 2); + return mnemonics[BYTES_BIG_ENDIAN][mask]; +} [(set_attr "type" "fmove") (set_attr "mode" "SF")]) +(define_expand "vec_perm_constv2sf" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "") + (match_operand:V2SI 3 "" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (mips_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + +;; Expanders for builtins. The instruction: +;; +;; P[UL][UL].PS <result>, <a>, <b> +;; +;; says that the upper part of <result> is taken from half of <a> and +;; the lower part of <result> is taken from half of <b>. This means +;; that the P[UL][UL].PS operand order matches memory order on big-endian +;; targets; <a> is element 0 of the V2SF result while <b> is element 1. +;; However, the P[UL][UL].PS operand order is the reverse of memory order +;; on little-endian targets; <a> is element 1 of the V2SF result while +;; <b> is element 0. The arguments to vec_perm_const_ps are always in +;; memory order. +;; +;; Similarly, "U" corresponds to element 0 on big-endian targets but +;; to element 1 on little-endian targets. + +(define_expand "mips_puu_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const0_rtx, const2_rtx)); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const1_rtx, GEN_INT (3))); + DONE; +}) + +(define_expand "mips_pul_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const0_rtx, GEN_INT (3))); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const0_rtx, GEN_INT (3))); + DONE; +}) + +(define_expand "mips_plu_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const1_rtx, const2_rtx)); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const1_rtx, const2_rtx)); + DONE; +}) + +(define_expand "mips_pll_ps" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:V2SF 1 "register_operand" "") + (match_operand:V2SF 2 "register_operand" "")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + if (BYTES_BIG_ENDIAN) + emit_insn (gen_vec_perm_const_ps (operands[0], operands[1], operands[2], + const1_rtx, GEN_INT (3))); + else + emit_insn (gen_vec_perm_const_ps (operands[0], operands[2], operands[1], + const0_rtx, const2_rtx)); + DONE; +}) + ; vec_init (define_expand "vec_initv2sf" [(match_operand:V2SF 0 "register_operand") (match_operand:V2SF 1 "")] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" { - rtx op0 = force_reg (SFmode, XVECEXP (operands[1], 0, 0)); - rtx op1 = force_reg (SFmode, XVECEXP (operands[1], 0, 1)); - emit_insn (gen_vec_initv2sf_internal (operands[0], op0, op1)); + mips_expand_vector_init (operands[0], operands[1]); DONE; }) -(define_insn "vec_initv2sf_internal" +(define_insn "vec_concatv2sf" [(set (match_operand:V2SF 0 "register_operand" "=f") (vec_concat:V2SF (match_operand:SF 1 "register_operand" "f") @@ -195,22 +301,21 @@ ;; no other way to get a vector mode bitfield store currently. (define_expand "vec_setv2sf" - [(match_operand:V2SF 0 "register_operand") - (match_operand:SF 1 "register_operand") - (match_operand 2 "const_0_or_1_operand")] + [(set (match_operand:V2SF 0 "register_operand" "") + (vec_select:V2SF + (vec_concat:V4SF + (match_operand:SF 1 "register_operand" "") + (match_dup 0)) + (parallel [(match_operand 2 "const_0_or_1_operand" "") + (match_dup 3)])))] "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" { - rtx temp; - /* We don't have an insert instruction, so we duplicate the float, and then use a PUL instruction. */ - temp = gen_reg_rtx (V2SFmode); - emit_insn (gen_mips_cvt_ps_s (temp, operands[1], operands[1])); - if (INTVAL (operands[2]) == !BYTES_BIG_ENDIAN) - emit_insn (gen_mips_pul_ps (operands[0], temp, operands[0])); - else - emit_insn (gen_mips_pul_ps (operands[0], operands[0], temp)); - DONE; + rtx temp = gen_reg_rtx (V2SFmode); + emit_insn (gen_vec_concatv2sf (temp, operands[1], operands[1])); + operands[1] = temp; + operands[3] = GEN_INT (1 - INTVAL (operands[2]) + 2); }) ; cvt.ps.s - Floating Point Convert Pair to Paired Single @@ -221,11 +326,9 @@ "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" { if (BYTES_BIG_ENDIAN) - emit_insn (gen_vec_initv2sf_internal (operands[0], operands[1], - operands[2])); + emit_insn (gen_vec_concatv2sf (operands[0], operands[1], operands[2])); else - emit_insn (gen_vec_initv2sf_internal (operands[0], operands[2], - operands[1])); + emit_insn (gen_vec_concatv2sf (operands[0], operands[2], operands[1])); DONE; }) @@ -268,6 +371,14 @@ [(set_attr "type" "fadd") (set_attr "mode" "SF")]) +(define_insn "reduc_splus_v2sf" + [(set (match_operand:V2SF 0 "register_operand" "=f") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "f") + (match_dup 1)] + UNSPEC_ADDR_PS))] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" + "") + ; cvt.pw.ps - Floating Point Convert Paired Single to Paired Word (define_insn "mips_cvt_pw_ps" [(set (match_operand:V2SF 0 "register_operand" "=f") @@ -633,3 +744,21 @@ LE, operands[2], operands[1]); DONE; }) + +(define_expand "reduc_smin_v2sf" + [(match_operand:V2SF 0 "register_operand") + (match_operand:V2SF 1 "register_operand")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_sminv2sf3); + DONE; +}) + +(define_expand "reduc_smax_v2sf" + [(match_operand:V2SF 0 "register_operand") + (match_operand:V2SF 1 "register_operand")] + "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" +{ + mips_expand_vec_reduc (operands[0], operands[1], gen_smaxv2sf3); + DONE; +}) diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index cf908f6abbe..bdbf94a48b0 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -4638,7 +4638,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum, /* The EABI conventions have traditionally been defined in terms of TYPE_MODE, regardless of the actual type. */ info->fpr_p = ((GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + || mode == V2SFmode) && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE); break; @@ -4653,7 +4653,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum, || SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)) && (GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + || mode == V2SFmode) && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE); break; @@ -4666,7 +4666,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum, && (type == 0 || FLOAT_TYPE_P (type)) && (GET_MODE_CLASS (mode) == MODE_FLOAT || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + || mode == V2SFmode) && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FPVALUE); /* ??? According to the ABI documentation, the real and imaginary @@ -5103,7 +5103,7 @@ static bool mips_return_mode_in_fpr_p (enum machine_mode mode) { return ((GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || mode == V2SFmode || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) && GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_HWFPVALUE); } @@ -10782,12 +10782,18 @@ mips_class_max_nregs (enum reg_class rclass, enum machine_mode mode) /* Implement CANNOT_CHANGE_MODE_CLASS. */ bool -mips_cannot_change_mode_class (enum machine_mode from ATTRIBUTE_UNUSED, - enum machine_mode to ATTRIBUTE_UNUSED, +mips_cannot_change_mode_class (enum machine_mode from, + enum machine_mode to, enum reg_class rclass) { - /* There are several problems with changing the modes of values in - floating-point registers: + /* Allow conversions between different Loongson integer vectors, + and between those vectors and DImode. */ + if (GET_MODE_SIZE (from) == 8 && GET_MODE_SIZE (to) == 8 + && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to)) + return false; + + /* Otherwise, there are several problems with changing the modes of + values in floating-point registers: - When a multi-word value is stored in paired floating-point registers, the first register always holds the low word. We @@ -10808,6 +10814,7 @@ mips_cannot_change_mode_class (enum machine_mode from ATTRIBUTE_UNUSED, format. We therefore disallow all mode changes involving FPRs. */ + return reg_classes_intersect_p (FP_REGS, rclass); } @@ -12785,12 +12792,6 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) #define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3 #define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3 #define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3 -#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi -#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi -#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si -#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi -#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi -#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si static const struct mips_builtin_description mips_builtins[] = { DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single), @@ -13032,8 +13033,8 @@ static const struct mips_builtin_description mips_builtins[] = { LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI), LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI), - LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI), - LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI), LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI), @@ -15923,30 +15924,6 @@ mips_conditional_register_usage (void) } } -/* Initialize vector TARGET to VALS. */ - -void -mips_expand_vector_init (rtx target, rtx vals) -{ - enum machine_mode mode; - enum machine_mode inner; - unsigned int i, n_elts; - rtx mem; - - mode = GET_MODE (target); - inner = GET_MODE_INNER (mode); - n_elts = GET_MODE_NUNITS (mode); - - gcc_assert (VECTOR_MODE_P (mode)); - - mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); - for (i = 0; i < n_elts; i++) - emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)), - XVECEXP (vals, 0, i)); - - emit_move_insn (target, mem); -} - /* When generating MIPS16 code, we want to allocate $24 (T_REG) before other registers for instructions for which it is possible. This encourages the compiler to use CMP in cases where an XOR would @@ -16357,6 +16334,667 @@ mips_prepare_pch_save (void) mips16_globals = 0; } +/* Generate or test for an insn that supports a constant permutation. */ + +#define MAX_VECT_LEN 8 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + enum machine_mode vmode; + unsigned char nelt; + bool one_vector_p; + bool testing_p; +}; + +/* Construct (set target (vec_select op0 (parallel perm))) and + return true if that's a valid instruction in the active ISA. */ + +static bool +mips_expand_vselect (rtx target, rtx op0, + const unsigned char *perm, unsigned nelt) +{ + rtx rperm[MAX_VECT_LEN], x; + unsigned i; + + for (i = 0; i < nelt; ++i) + rperm[i] = GEN_INT (perm[i]); + + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); + x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); + x = gen_rtx_SET (VOIDmode, target, x); + + x = emit_insn (x); + if (recog_memoized (x) < 0) + { + remove_insn (x); + return false; + } + return true; +} + +/* Similar, but generate a vec_concat from op0 and op1 as well. */ + +static bool +mips_expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + const unsigned char *perm, unsigned nelt) +{ + enum machine_mode v2mode; + rtx x; + + v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0)); + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); + return mips_expand_vselect (target, x, perm, nelt); +} + +/* Recognize patterns for even-odd extraction. */ + +static bool +mips_expand_vpc_loongson_even_odd (struct expand_vec_perm_d *d) +{ + unsigned i, odd, nelt = d->nelt; + rtx t0, t1, t2, t3; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + /* Even-odd for V2SI/V2SFmode is matched by interleave directly. */ + if (nelt < 4) + return false; + + odd = d->perm[0]; + if (odd > 1) + return false; + for (i = 1; i < nelt; ++i) + if (d->perm[i] != i * 2 + odd) + return false; + + if (d->testing_p) + return true; + + /* We need 2*log2(N)-1 operations to achieve odd/even with interleave. */ + t0 = gen_reg_rtx (d->vmode); + t1 = gen_reg_rtx (d->vmode); + switch (d->vmode) + { + case V4HImode: + emit_insn (gen_loongson_punpckhhw (t0, d->op0, d->op1)); + emit_insn (gen_loongson_punpcklhw (t1, d->op0, d->op1)); + if (odd) + emit_insn (gen_loongson_punpckhhw (d->target, t1, t0)); + else + emit_insn (gen_loongson_punpcklhw (d->target, t1, t0)); + break; + + case V8QImode: + t2 = gen_reg_rtx (d->vmode); + t3 = gen_reg_rtx (d->vmode); + emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op1)); + emit_insn (gen_loongson_punpcklbh (t1, d->op0, d->op1)); + emit_insn (gen_loongson_punpckhbh (t2, t1, t0)); + emit_insn (gen_loongson_punpcklbh (t3, t1, t0)); + if (odd) + emit_insn (gen_loongson_punpckhbh (d->target, t3, t2)); + else + emit_insn (gen_loongson_punpcklbh (d->target, t3, t2)); + break; + + default: + gcc_unreachable (); + } + return true; +} + +/* Recognize patterns for the Loongson PSHUFH instruction. */ + +static bool +mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d) +{ + unsigned i, mask; + rtx rmask; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + if (d->vmode != V4HImode) + return false; + if (d->testing_p) + return true; + + /* Convert the selector into the packed 8-bit form for pshufh. */ + /* Recall that loongson is little-endian only. No big-endian + adjustment required. */ + for (i = mask = 0; i < 4; i++) + mask |= (d->perm[i] & 3) << (i * 2); + rmask = force_reg (SImode, GEN_INT (mask)); + + if (d->one_vector_p) + emit_insn (gen_loongson_pshufh (d->target, d->op0, rmask)); + else + { + rtx t0, t1, x, merge, rmerge[4]; + + t0 = gen_reg_rtx (V4HImode); + t1 = gen_reg_rtx (V4HImode); + emit_insn (gen_loongson_pshufh (t1, d->op1, rmask)); + emit_insn (gen_loongson_pshufh (t0, d->op0, rmask)); + + for (i = 0; i < 4; ++i) + rmerge[i] = (d->perm[i] & 4 ? constm1_rtx : const0_rtx); + merge = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmerge)); + merge = force_reg (V4HImode, merge); + + x = gen_rtx_AND (V4HImode, merge, t1); + emit_insn (gen_rtx_SET (VOIDmode, t1, x)); + + x = gen_rtx_NOT (V4HImode, merge); + x = gen_rtx_AND (V4HImode, x, t0); + emit_insn (gen_rtx_SET (VOIDmode, t0, x)); + + x = gen_rtx_IOR (V4HImode, t0, t1); + emit_insn (gen_rtx_SET (VOIDmode, d->target, x)); + } + + return true; +} + +/* Recognize broadcast patterns for the Loongson. */ + +static bool +mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d) +{ + unsigned i, elt; + rtx t0, t1; + + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) + return false; + /* Note that we've already matched V2SI via punpck and V4HI via pshufh. */ + if (d->vmode != V8QImode) + return false; + if (!d->one_vector_p) + return false; + + elt = d->perm[0]; + for (i = 1; i < 8; ++i) + if (d->perm[i] != elt) + return false; + + if (d->testing_p) + return true; + + /* With one interleave we put two of the desired element adjacent. */ + t0 = gen_reg_rtx (V8QImode); + if (elt < 4) + emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0)); + else + emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0)); + + /* Shuffle that one HImode element into all locations. */ + elt &= 3; + elt *= 0x55; + t1 = gen_reg_rtx (V4HImode); + emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0), + force_reg (SImode, GEN_INT (elt)))); + + emit_move_insn (d->target, gen_lowpart (V8QImode, t1)); + return true; +} + +static bool +mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + unsigned int i, nelt = d->nelt; + unsigned char perm2[MAX_VECT_LEN]; + + if (d->one_vector_p) + { + /* Try interleave with alternating operands. */ + memcpy (perm2, d->perm, sizeof(perm2)); + for (i = 1; i < nelt; i += 2) + perm2[i] += nelt; + if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2, nelt)) + return true; + } + else + { + if (mips_expand_vselect_vconcat (d->target, d->op0, d->op1, + d->perm, nelt)) + return true; + + /* Try again with swapped operands. */ + for (i = 0; i < nelt; ++i) + perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1); + if (mips_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) + return true; + } + + if (mips_expand_vpc_loongson_even_odd (d)) + return true; + if (mips_expand_vpc_loongson_pshufh (d)) + return true; + if (mips_expand_vpc_loongson_bcast (d)) + return true; + return false; +} + +/* Expand a vec_perm_const pattern. */ + +bool +mips_expand_vec_perm_const (rtx operands[4]) +{ + struct expand_vec_perm_d d; + int i, nelt, which; + unsigned char orig_perm[MAX_VECT_LEN]; + rtx sel; + bool ok; + + d.target = operands[0]; + d.op0 = operands[1]; + d.op1 = operands[2]; + sel = operands[3]; + + d.vmode = GET_MODE (d.target); + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = false; + + for (i = which = 0; i < nelt; ++i) + { + rtx e = XVECEXP (sel, 0, i); + int ei = INTVAL (e) & (2 * nelt - 1); + which |= (ei < nelt ? 1 : 2); + orig_perm[i] = ei; + } + memcpy (d.perm, orig_perm, MAX_VECT_LEN); + + switch (which) + { + default: + gcc_unreachable(); + + case 3: + d.one_vector_p = false; + if (!rtx_equal_p (d.op0, d.op1)) + break; + /* FALLTHRU */ + + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] &= nelt - 1; + d.op0 = d.op1; + d.one_vector_p = true; + break; + + case 1: + d.op1 = d.op0; + d.one_vector_p = true; + break; + } + + ok = mips_expand_vec_perm_const_1 (&d); + + /* If we were given a two-vector permutation which just happened to + have both input vectors equal, we folded this into a one-vector + permutation. There are several loongson patterns that are matched + via direct vec_select+vec_concat expansion, but we do not have + support in mips_expand_vec_perm_const_1 to guess the adjustment + that should be made for a single operand. Just try again with + the original permutation. */ + if (!ok && which == 3) + { + d.op0 = operands[1]; + d.op1 = operands[2]; + d.one_vector_p = false; + memcpy (d.perm, orig_perm, MAX_VECT_LEN); + ok = mips_expand_vec_perm_const_1 (&d); + } + + return ok; +} + +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ + +static bool +mips_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) +{ + struct expand_vec_perm_d d; + unsigned int i, nelt, which; + bool ret; + + d.vmode = vmode; + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = true; + memcpy (d.perm, sel, nelt); + + /* Categorize the set of elements in the selector. */ + for (i = which = 0; i < nelt; ++i) + { + unsigned char e = d.perm[i]; + gcc_assert (e < 2 * nelt); + which |= (e < nelt ? 1 : 2); + } + + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + + /* Check whether the mask can be applied to the vector type. */ + d.one_vector_p = (which != 3); + + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ret = mips_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; +} + +/* Expand an integral vector unpack operation. */ + +void +mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p) +{ + enum machine_mode imode = GET_MODE (operands[1]); + rtx (*unpack) (rtx, rtx, rtx); + rtx (*cmpgt) (rtx, rtx, rtx); + rtx tmp, dest, zero; + + switch (imode) + { + case V8QImode: + if (high_p) + unpack = gen_loongson_punpckhbh; + else + unpack = gen_loongson_punpcklbh; + cmpgt = gen_loongson_pcmpgtb; + break; + case V4HImode: + if (high_p) + unpack = gen_loongson_punpckhhw; + else + unpack = gen_loongson_punpcklhw; + cmpgt = gen_loongson_pcmpgth; + break; + default: + gcc_unreachable (); + } + + zero = force_reg (imode, CONST0_RTX (imode)); + if (unsigned_p) + tmp = zero; + else + { + tmp = gen_reg_rtx (imode); + emit_insn (cmpgt (tmp, zero, operands[1])); + } + + dest = gen_reg_rtx (imode); + emit_insn (unpack (dest, operands[1], tmp)); + + emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest)); +} + +/* A subroutine of mips_expand_vec_init, match constant vector elements. */ + +static inline bool +mips_constant_elt_p (rtx x) +{ + return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE; +} + +/* A subroutine of mips_expand_vec_init, expand via broadcast. */ + +static void +mips_expand_vi_broadcast (enum machine_mode vmode, rtx target, rtx elt) +{ + struct expand_vec_perm_d d; + rtx t1; + bool ok; + + if (elt != const0_rtx) + elt = force_reg (GET_MODE_INNER (vmode), elt); + if (REG_P (elt)) + elt = gen_lowpart (DImode, elt); + + t1 = gen_reg_rtx (vmode); + switch (vmode) + { + case V8QImode: + emit_insn (gen_loongson_vec_init1_v8qi (t1, elt)); + break; + case V4HImode: + emit_insn (gen_loongson_vec_init1_v4hi (t1, elt)); + break; + default: + gcc_unreachable (); + } + + memset (&d, 0, sizeof (d)); + d.target = target; + d.op0 = t1; + d.op1 = t1; + d.vmode = vmode; + d.nelt = GET_MODE_NUNITS (vmode); + d.one_vector_p = true; + + ok = mips_expand_vec_perm_const_1 (&d); + gcc_assert (ok); +} + +/* A subroutine of mips_expand_vec_init, replacing all of the non-constant + elements of VALS with zeros, copy the constant vector to TARGET. */ + +static void +mips_expand_vi_constant (enum machine_mode vmode, unsigned nelt, + rtx target, rtx vals) +{ + rtvec vec = shallow_copy_rtvec (XVEC (vals, 0)); + unsigned i; + + for (i = 0; i < nelt; ++i) + { + if (!mips_constant_elt_p (RTVEC_ELT (vec, i))) + RTVEC_ELT (vec, i) = const0_rtx; + } + + emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec)); +} + + +/* A subroutine of mips_expand_vec_init, expand via pinsrh. */ + +static void +mips_expand_vi_loongson_one_pinsrh (rtx target, rtx vals, unsigned one_var) +{ + mips_expand_vi_constant (V4HImode, 4, target, vals); + + emit_insn (gen_vec_setv4hi (target, target, XVECEXP (vals, 0, one_var), + GEN_INT (one_var))); +} + +/* A subroutine of mips_expand_vec_init, expand anything via memory. */ + +static void +mips_expand_vi_general (enum machine_mode vmode, enum machine_mode imode, + unsigned nelt, unsigned nvar, rtx target, rtx vals) +{ + rtx mem = assign_stack_temp (vmode, GET_MODE_SIZE (vmode), 0); + unsigned int i, isize = GET_MODE_SIZE (imode); + + if (nvar < nelt) + mips_expand_vi_constant (vmode, nelt, mem, vals); + + for (i = 0; i < nelt; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (!mips_constant_elt_p (x)) + emit_move_insn (adjust_address (mem, imode, i * isize), x); + } + + emit_move_insn (target, mem); +} + +/* Expand a vector initialization. */ + +void +mips_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode vmode = GET_MODE (target); + enum machine_mode imode = GET_MODE_INNER (vmode); + unsigned i, nelt = GET_MODE_NUNITS (vmode); + unsigned nvar = 0, one_var = -1u; + bool all_same = true; + rtx x; + + for (i = 0; i < nelt; ++i) + { + x = XVECEXP (vals, 0, i); + if (!mips_constant_elt_p (x)) + nvar++, one_var = i; + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + /* Load constants from the pool, or whatever's handy. */ + if (nvar == 0) + { + emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0))); + return; + } + + /* For two-part initialization, always use CONCAT. */ + if (nelt == 2) + { + rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0)); + rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1)); + x = gen_rtx_VEC_CONCAT (vmode, op0, op1); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); + return; + } + + /* Loongson is the only cpu with vectors with more elements. */ + gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS); + + /* If all values are identical, broadcast the value. */ + if (all_same) + { + mips_expand_vi_broadcast (vmode, target, XVECEXP (vals, 0, 0)); + return; + } + + /* If we've only got one non-variable V4HImode, use PINSRH. */ + if (nvar == 1 && vmode == V4HImode) + { + mips_expand_vi_loongson_one_pinsrh (target, vals, one_var); + return; + } + + mips_expand_vi_general (vmode, imode, nelt, nvar, target, vals); +} + +/* Expand a vector reduction. */ + +void +mips_expand_vec_reduc (rtx target, rtx in, rtx (*gen)(rtx, rtx, rtx)) +{ + enum machine_mode vmode = GET_MODE (in); + unsigned char perm2[2]; + rtx last, next, fold, x; + bool ok; + + last = in; + fold = gen_reg_rtx (vmode); + switch (vmode) + { + case V2SFmode: + /* Use PUL/PLU to produce { L, H } op { H, L }. + By reversing the pair order, rather than a pure interleave high, + we avoid erroneous exceptional conditions that we might otherwise + produce from the computation of H op H. */ + perm2[0] = 1; + perm2[1] = 2; + ok = mips_expand_vselect_vconcat (fold, last, last, perm2, 2); + gcc_assert (ok); + break; + + case V2SImode: + /* Use interleave to produce { H, L } op { H, H }. */ + emit_insn (gen_loongson_punpckhwd (fold, last, last)); + break; + + case V4HImode: + /* Perform the first reduction with interleave, + and subsequent reductions with shifts. */ + emit_insn (gen_loongson_punpckhwd_hi (fold, last, last)); + + next = gen_reg_rtx (vmode); + emit_insn (gen (next, last, fold)); + last = next; + + fold = gen_reg_rtx (vmode); + x = force_reg (SImode, GEN_INT (16)); + emit_insn (gen_vec_shr_v4hi (fold, last, x)); + break; + + case V8QImode: + emit_insn (gen_loongson_punpckhwd_qi (fold, last, last)); + + next = gen_reg_rtx (vmode); + emit_insn (gen (next, last, fold)); + last = next; + + fold = gen_reg_rtx (vmode); + x = force_reg (SImode, GEN_INT (16)); + emit_insn (gen_vec_shr_v8qi (fold, last, x)); + + next = gen_reg_rtx (vmode); + emit_insn (gen (next, last, fold)); + last = next; + + fold = gen_reg_rtx (vmode); + x = force_reg (SImode, GEN_INT (8)); + emit_insn (gen_vec_shr_v8qi (fold, last, x)); + break; + + default: + gcc_unreachable (); + } + + emit_insn (gen (target, last, fold)); +} + +/* Expand a vector minimum/maximum. */ + +void +mips_expand_vec_minmax (rtx target, rtx op0, rtx op1, + rtx (*cmp) (rtx, rtx, rtx), bool min_p) +{ + enum machine_mode vmode = GET_MODE (target); + rtx tc, t0, t1, x; + + tc = gen_reg_rtx (vmode); + t0 = gen_reg_rtx (vmode); + t1 = gen_reg_rtx (vmode); + + /* op0 > op1 */ + emit_insn (cmp (tc, op0, op1)); + + x = gen_rtx_AND (vmode, tc, (min_p ? op1 : op0)); + emit_insn (gen_rtx_SET (VOIDmode, t0, x)); + + x = gen_rtx_NOT (vmode, tc); + x = gen_rtx_AND (vmode, x, (min_p ? op0 : op1)); + emit_insn (gen_rtx_SET (VOIDmode, t1, x)); + + x = gen_rtx_IOR (vmode, t0, t1); + emit_insn (gen_rtx_SET (VOIDmode, target, x)); +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -16578,6 +17216,9 @@ mips_prepare_pch_save (void) #undef TARGET_PREPARE_PCH_SAVE #define TARGET_PREPARE_PCH_SAVE mips_prepare_pch_save +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-mips.h" diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md index 5e9398e69f3..b6113739786 100644 --- a/gcc/config/mips/predicates.md +++ b/gcc/config/mips/predicates.md @@ -73,8 +73,15 @@ ;; This is used for indexing into vectors, and hence only accepts const_int. (define_predicate "const_0_or_1_operand" (and (match_code "const_int") - (ior (match_test "op == CONST0_RTX (GET_MODE (op))") - (match_test "op == CONST1_RTX (GET_MODE (op))")))) + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + +(define_predicate "const_2_or_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 3)"))) + +(define_predicate "const_0_to_3_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 3)"))) (define_predicate "qi_mask_operand" (and (match_code "const_int") |