diff options
author | mkuvyrkov <mkuvyrkov@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-06-15 06:29:06 +0000 |
---|---|---|
committer | mkuvyrkov <mkuvyrkov@138bc75d-0d04-0410-961f-82ee72b054a4> | 2008-06-15 06:29:06 +0000 |
commit | 9636921b6f9c9162ae3aeb278b588b6ee37a3842 (patch) | |
tree | 79e85ed4cf20018db2f54c614c571cf27e619564 | |
parent | 9fed75026cca1c2bf8cbf67920c15fe52489d1c9 (diff) | |
download | gcc-9636921b6f9c9162ae3aeb278b588b6ee37a3842.tar.gz |
2008-06-15 Mark Shinwell <shinwell@codesourcery.com>
Nathan Sidwell <nathan@codesourcery.com>
Maxim Kuvyrkov <maxim@codesourcery.com>
Richard Sandiford <rdsandiford@googlemail.com>
* config/mips/mips-modes.def: Add V8QI, V4HI and V2SI modes.
* config/mips/mips-protos.h (mips_expand_vector_init): New.
* config/mips/mips-ftypes.def: Add function types for Loongson-2E/2F
builtins.
* config/mips/mips.c (mips_split_doubleword_move): Handle new modes.
(mips_hard_regno_mode_ok_p): Allow 64-bit vector modes for Loongson.
(mips_vector_mode_supported_p): Add V2SImode, V4HImode and
V8QImode cases.
(LOONGSON_BUILTIN, LOONGSON_BUILTIN_ALIAS): New.
(CODE_FOR_loongson_packsswh, CODE_FOR_loongson_packsshb,
(CODE_FOR_loongson_packushb, CODE_FOR_loongson_paddw,
(CODE_FOR_loongson_paddh, CODE_FOR_loongson_paddb,
(CODE_FOR_loongson_paddsh, CODE_FOR_loongson_paddsb)
(CODE_FOR_loongson_paddush, CODE_FOR_loongson_paddusb)
(CODE_FOR_loongson_pmaxsh, CODE_FOR_loongson_pmaxub)
(CODE_FOR_loongson_pminsh, CODE_FOR_loongson_pminub)
(CODE_FOR_loongson_pmulhuh, CODE_FOR_loongson_pmulhh)
(CODE_FOR_loongson_biadd, CODE_FOR_loongson_psubw)
(CODE_FOR_loongson_psubh, CODE_FOR_loongson_psubb)
(CODE_FOR_loongson_psubsh, CODE_FOR_loongson_psubsb)
(CODE_FOR_loongson_psubush, CODE_FOR_loongson_psubusb)
(CODE_FOR_loongson_punpckhbh, CODE_FOR_loongson_punpckhhw)
(CODE_FOR_loongson_punpckhwd, CODE_FOR_loongson_punpcklbh)
(CODE_FOR_loongson_punpcklhw, CODE_FOR_loongson_punpcklwd): New.
(mips_builtins): Add Loongson builtins.
(mips_loongson_2ef_bdesc): New.
(mips_bdesc_arrays): Add mips_loongson_2ef_bdesc.
(mips_builtin_vector_type): Handle unsigned versions of vector modes.
(MIPS_ATYPE_UQI, MIPS_ATYPE_UDI, MIPS_ATYPE_V2SI, MIPS_ATYPE_UV2SI)
(MIPS_ATYPE_V4HI, MIPS_ATYPE_UV4HI, MIPS_ATYPE_V8QI, MIPS_ATYPE_UV8QI):
New.
(mips_expand_vector_init): New.
* config/mips/mips.h (HAVE_LOONGSON_VECTOR_MODES): New.
(TARGET_CPU_CPP_BUILTINS): Define __mips_loongson_vector_rev
if appropriate.
* config/mips/mips.md: Add unspec numbers for Loongson
builtins. Include loongson.md.
(MOVE64): Include Loongson vector modes.
(SPLITF): Include Loongson vector modes.
(HALFMODE): Handle Loongson vector modes.
* config/mips/loongson.md: New.
* config/mips/loongson.h: New.
* config.gcc: Add loongson.h header for mips*-*-* targets.
* doc/extend.texi (MIPS Loongson Built-in Functions): New.
2008-06-15 Mark Shinwell <shinwell@codesourcery.com>
* lib/target-supports.exp (check_effective_target_mips_loongson): New.
* gcc.target/mips/loongson-simd.c: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@136800 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 50 | ||||
-rw-r--r-- | gcc/config.gcc | 1 | ||||
-rw-r--r-- | gcc/config/mips/loongson.h | 693 | ||||
-rw-r--r-- | gcc/config/mips/loongson.md | 437 | ||||
-rw-r--r-- | gcc/config/mips/mips-ftypes.def | 33 | ||||
-rw-r--r-- | gcc/config/mips/mips-modes.def | 1 | ||||
-rw-r--r-- | gcc/config/mips/mips-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/mips/mips.c | 222 | ||||
-rw-r--r-- | gcc/config/mips/mips.h | 10 | ||||
-rw-r--r-- | gcc/config/mips/mips.md | 40 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 127 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/mips/loongson-simd.c | 1963 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 11 |
14 files changed, 3588 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 132c5ac4713..8dd6b1cbc0b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,53 @@ +2008-06-15 Mark Shinwell <shinwell@codesourcery.com> + Nathan Sidwell <nathan@codesourcery.com> + Maxim Kuvyrkov <maxim@codesourcery.com> + Richard Sandiford <rdsandiford@googlemail.com> + + * config/mips/mips-modes.def: Add V8QI, V4HI and V2SI modes. + * config/mips/mips-protos.h (mips_expand_vector_init): New. + * config/mips/mips-ftypes.def: Add function types for Loongson-2E/2F + builtins. + * config/mips/mips.c (mips_split_doubleword_move): Handle new modes. + (mips_hard_regno_mode_ok_p): Allow 64-bit vector modes for Loongson. + (mips_vector_mode_supported_p): Add V2SImode, V4HImode and + V8QImode cases. + (LOONGSON_BUILTIN, LOONGSON_BUILTIN_ALIAS): New. + (CODE_FOR_loongson_packsswh, CODE_FOR_loongson_packsshb, + (CODE_FOR_loongson_packushb, CODE_FOR_loongson_paddw, + (CODE_FOR_loongson_paddh, CODE_FOR_loongson_paddb, + (CODE_FOR_loongson_paddsh, CODE_FOR_loongson_paddsb) + (CODE_FOR_loongson_paddush, CODE_FOR_loongson_paddusb) + (CODE_FOR_loongson_pmaxsh, CODE_FOR_loongson_pmaxub) + (CODE_FOR_loongson_pminsh, CODE_FOR_loongson_pminub) + (CODE_FOR_loongson_pmulhuh, CODE_FOR_loongson_pmulhh) + (CODE_FOR_loongson_biadd, CODE_FOR_loongson_psubw) + (CODE_FOR_loongson_psubh, CODE_FOR_loongson_psubb) + (CODE_FOR_loongson_psubsh, CODE_FOR_loongson_psubsb) + (CODE_FOR_loongson_psubush, CODE_FOR_loongson_psubusb) + (CODE_FOR_loongson_punpckhbh, CODE_FOR_loongson_punpckhhw) + (CODE_FOR_loongson_punpckhwd, CODE_FOR_loongson_punpcklbh) + (CODE_FOR_loongson_punpcklhw, CODE_FOR_loongson_punpcklwd): New. + (mips_builtins): Add Loongson builtins. + (mips_loongson_2ef_bdesc): New. + (mips_bdesc_arrays): Add mips_loongson_2ef_bdesc. + (mips_builtin_vector_type): Handle unsigned versions of vector modes. + (MIPS_ATYPE_UQI, MIPS_ATYPE_UDI, MIPS_ATYPE_V2SI, MIPS_ATYPE_UV2SI) + (MIPS_ATYPE_V4HI, MIPS_ATYPE_UV4HI, MIPS_ATYPE_V8QI, MIPS_ATYPE_UV8QI): + New. + (mips_expand_vector_init): New. + * config/mips/mips.h (HAVE_LOONGSON_VECTOR_MODES): New. + (TARGET_CPU_CPP_BUILTINS): Define __mips_loongson_vector_rev + if appropriate. + * config/mips/mips.md: Add unspec numbers for Loongson + builtins. Include loongson.md. + (MOVE64): Include Loongson vector modes. + (SPLITF): Include Loongson vector modes. + (HALFMODE): Handle Loongson vector modes. + * config/mips/loongson.md: New. + * config/mips/loongson.h: New. + * config.gcc: Add loongson.h header for mips*-*-* targets. + * doc/extend.texi (MIPS Loongson Built-in Functions): New. + 2008-06-14 Joseph Myers <joseph@codesourcery.com> * config.gcc (arc-*-elf*, avr-*-*, fr30-*-elf, frv-*-elf, diff --git a/gcc/config.gcc b/gcc/config.gcc index 5e2c326f926..8c8a66a6772 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -307,6 +307,7 @@ m68k-*-*) mips*-*-*) cpu_type=mips need_64bit_hwint=yes + extra_headers="loongson.h" ;; powerpc*-*-*) cpu_type=rs6000 diff --git a/gcc/config/mips/loongson.h b/gcc/config/mips/loongson.h new file mode 100644 index 00000000000..c742edeeb3f --- /dev/null +++ b/gcc/config/mips/loongson.h @@ -0,0 +1,693 @@ +/* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations. + + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the + Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +#ifndef _GCC_LOONGSON_H +#define _GCC_LOONGSON_H + +#if !defined(__mips_loongson_vector_rev) +# error "You must select -march=loongson2e or -march=loongson2f to use loongson.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +/* Vectors of unsigned bytes, halfwords and words. */ +typedef uint8_t uint8x8_t __attribute__((vector_size (8))); +typedef uint16_t uint16x4_t __attribute__((vector_size (8))); +typedef uint32_t uint32x2_t __attribute__((vector_size (8))); + +/* Vectors of signed bytes, halfwords and words. */ +typedef int8_t int8x8_t __attribute__((vector_size (8))); +typedef int16_t int16x4_t __attribute__((vector_size (8))); +typedef int32_t int32x2_t __attribute__((vector_size (8))); + +/* SIMD intrinsics. + Unless otherwise noted, calls to the functions below will expand into + precisely one machine instruction, modulo any moves required to + satisfy register allocation constraints. */ + +/* Pack with signed saturation. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +packsswh (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_packsswh (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +packsshb (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_packsshb (s, t); +} + +/* Pack with unsigned saturation. */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +packushb (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_packushb (s, t); +} + +/* Vector addition, treating overflow by wraparound. */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +paddw_u (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_paddw_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +paddh_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_paddh_u (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +paddb_u (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_paddb_u (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +paddw_s (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_paddw_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +paddh_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_paddh_s (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +paddb_s (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_paddb_s (s, t); +} + +/* Addition of doubleword integers, treating overflow by wraparound. */ +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +paddd_u (uint64_t s, uint64_t t) +{ + return __builtin_loongson_paddd_u (s, t); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +paddd_s (int64_t s, int64_t t) +{ + return __builtin_loongson_paddd_s (s, t); +} + +/* Vector addition, treating overflow by signed saturation. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +paddsh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_paddsh (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +paddsb (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_paddsb (s, t); +} + +/* Vector addition, treating overflow by unsigned saturation. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +paddush (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_paddush (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +paddusb (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_paddusb (s, t); +} + +/* Logical AND NOT. */ +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +pandn_ud (uint64_t s, uint64_t t) +{ + return __builtin_loongson_pandn_ud (s, t); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +pandn_uw (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_pandn_uw (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pandn_uh (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pandn_uh (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pandn_ub (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pandn_ub (s, t); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +pandn_sd (int64_t s, int64_t t) +{ + return __builtin_loongson_pandn_sd (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +pandn_sw (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_pandn_sw (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pandn_sh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pandn_sh (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +pandn_sb (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_pandn_sb (s, t); +} + +/* Average. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pavgh (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pavgh (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pavgb (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pavgb (s, t); +} + +/* Equality test. */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +pcmpeqw_u (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_pcmpeqw_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pcmpeqh_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pcmpeqh_u (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pcmpeqb_u (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pcmpeqb_u (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +pcmpeqw_s (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_pcmpeqw_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pcmpeqh_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pcmpeqh_s (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +pcmpeqb_s (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_pcmpeqb_s (s, t); +} + +/* Greater-than test. */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +pcmpgtw_u (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_pcmpgtw_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pcmpgth_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pcmpgth_u (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pcmpgtb_u (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pcmpgtb_u (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +pcmpgtw_s (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_pcmpgtw_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pcmpgth_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pcmpgth_s (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +pcmpgtb_s (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_pcmpgtb_s (s, t); +} + +/* Extract halfword. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pextrh_u (uint16x4_t s, int field /* 0--3 */) +{ + return __builtin_loongson_pextrh_u (s, field); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pextrh_s (int16x4_t s, int field /* 0--3 */) +{ + return __builtin_loongson_pextrh_s (s, field); +} + +/* Insert halfword. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pinsrh_0_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pinsrh_0_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pinsrh_1_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pinsrh_1_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pinsrh_2_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pinsrh_2_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pinsrh_3_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pinsrh_3_u (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pinsrh_0_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pinsrh_0_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pinsrh_1_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pinsrh_1_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pinsrh_2_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pinsrh_2_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pinsrh_3_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pinsrh_3_s (s, t); +} + +/* Multiply and add. */ +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +pmaddhw (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pmaddhw (s, t); +} + +/* Maximum of signed halfwords. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pmaxsh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pmaxsh (s, t); +} + +/* Maximum of unsigned bytes. */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pmaxub (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pmaxub (s, t); +} + +/* Minimum of signed halfwords. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pminsh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pminsh (s, t); +} + +/* Minimum of unsigned bytes. */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pminub (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pminub (s, t); +} + +/* Move byte mask. */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pmovmskb_u (uint8x8_t s) +{ + return __builtin_loongson_pmovmskb_u (s); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +pmovmskb_s (int8x8_t s) +{ + return __builtin_loongson_pmovmskb_s (s); +} + +/* Multiply unsigned integers and store high result. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pmulhuh (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_pmulhuh (s, t); +} + +/* Multiply signed integers and store high result. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pmulhh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pmulhh (s, t); +} + +/* Multiply signed integers and store low result. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pmullh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_pmullh (s, t); +} + +/* Multiply unsigned word integers. */ +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +pmuluw (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_pmuluw (s, t); +} + +/* Absolute difference. */ +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +pasubub (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_pasubub (s, t); +} + +/* Sum of unsigned byte integers. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +biadd (uint8x8_t s) +{ + return __builtin_loongson_biadd (s); +} + +/* Sum of absolute differences. + Note that this intrinsic expands into two machine instructions: + PASUBUB followed by BIADD. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +psadbh (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_psadbh (s, t); +} + +/* Shuffle halfwords. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) +{ + return __builtin_loongson_pshufh_u (dest, s, order); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) +{ + return __builtin_loongson_pshufh_s (dest, s, order); +} + +/* Shift left logical. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +psllh_u (uint16x4_t s, uint8_t amount) +{ + return __builtin_loongson_psllh_u (s, amount); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +psllh_s (int16x4_t s, uint8_t amount) +{ + return __builtin_loongson_psllh_s (s, amount); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +psllw_u (uint32x2_t s, uint8_t amount) +{ + return __builtin_loongson_psllw_u (s, amount); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +psllw_s (int32x2_t s, uint8_t amount) +{ + return __builtin_loongson_psllw_s (s, amount); +} + +/* Shift right logical. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +psrlh_u (uint16x4_t s, uint8_t amount) +{ + return __builtin_loongson_psrlh_u (s, amount); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +psrlh_s (int16x4_t s, uint8_t amount) +{ + return __builtin_loongson_psrlh_s (s, amount); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +psrlw_u (uint32x2_t s, uint8_t amount) +{ + return __builtin_loongson_psrlw_u (s, amount); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +psrlw_s (int32x2_t s, uint8_t amount) +{ + return __builtin_loongson_psrlw_s (s, amount); +} + +/* Shift right arithmetic. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +psrah_u (uint16x4_t s, uint8_t amount) +{ + return __builtin_loongson_psrah_u (s, amount); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +psrah_s (int16x4_t s, uint8_t amount) +{ + return __builtin_loongson_psrah_s (s, amount); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +psraw_u (uint32x2_t s, uint8_t amount) +{ + return __builtin_loongson_psraw_u (s, amount); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +psraw_s (int32x2_t s, uint8_t amount) +{ + return __builtin_loongson_psraw_s (s, amount); +} + +/* Vector subtraction, treating overflow by wraparound. */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +psubw_u (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_psubw_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +psubh_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_psubh_u (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +psubb_u (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_psubb_u (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +psubw_s (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_psubw_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +psubh_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_psubh_s (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +psubb_s (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_psubb_s (s, t); +} + +/* Subtraction of doubleword integers, treating overflow by wraparound. */ +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +psubd_u (uint64_t s, uint64_t t) +{ + return __builtin_loongson_psubd_u (s, t); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +psubd_s (int64_t s, int64_t t) +{ + return __builtin_loongson_psubd_s (s, t); +} + +/* Vector subtraction, treating overflow by signed saturation. */ +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +psubsh (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_psubsh (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +psubsb (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_psubsb (s, t); +} + +/* Vector subtraction, treating overflow by unsigned saturation. */ +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +psubush (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_psubush (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +psubusb (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_psubusb (s, t); +} + +/* Unpack high data. */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +punpckhwd_u (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_punpckhwd_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +punpckhhw_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_punpckhhw_u (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +punpckhbh_u (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_punpckhbh_u (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +punpckhwd_s (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_punpckhwd_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +punpckhhw_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_punpckhhw_s (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +punpckhbh_s (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_punpckhbh_s (s, t); +} + +/* Unpack low data. */ +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +punpcklwd_u (uint32x2_t s, uint32x2_t t) +{ + return __builtin_loongson_punpcklwd_u (s, t); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +punpcklhw_u (uint16x4_t s, uint16x4_t t) +{ + return __builtin_loongson_punpcklhw_u (s, t); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +punpcklbh_u (uint8x8_t s, uint8x8_t t) +{ + return __builtin_loongson_punpcklbh_u (s, t); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +punpcklwd_s (int32x2_t s, int32x2_t t) +{ + return __builtin_loongson_punpcklwd_s (s, t); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +punpcklhw_s (int16x4_t s, int16x4_t t) +{ + return __builtin_loongson_punpcklhw_s (s, t); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +punpcklbh_s (int8x8_t s, int8x8_t t) +{ + return __builtin_loongson_punpcklbh_s (s, t); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md new file mode 100644 index 00000000000..5177ae477dc --- /dev/null +++ b/gcc/config/mips/loongson.md @@ -0,0 +1,437 @@ +;; Machine description for ST Microelectronics Loongson-2E/2F. +;; Copyright (C) 2008 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; Mode iterators and attributes. + +;; 64-bit vectors of bytes. +(define_mode_iterator VB [V8QI]) + +;; 64-bit vectors of halfwords. +(define_mode_iterator VH [V4HI]) + +;; 64-bit vectors of words. +(define_mode_iterator VW [V2SI]) + +;; 64-bit vectors of halfwords and bytes. +(define_mode_iterator VHB [V4HI V8QI]) + +;; 64-bit vectors of words and halfwords. +(define_mode_iterator VWH [V2SI V4HI]) + +;; 64-bit vectors of words, halfwords and bytes. +(define_mode_iterator VWHB [V2SI V4HI V8QI]) + +;; 64-bit vectors of words, halfwords and bytes; and DImode. +(define_mode_iterator VWHBDI [V2SI V4HI V8QI DI]) + +;; The Loongson instruction suffixes corresponding to the modes in the +;; VWHBDI iterator. +(define_mode_attr V_suffix [(V2SI "w") (V4HI "h") (V8QI "b") (DI "d")]) + +;; Given a vector type T, the mode of a vector half the size of T +;; and with the same number of elements. +(define_mode_attr V_squash [(V2SI "V2HI") (V4HI "V4QI")]) + +;; Given a vector type T, the mode of a vector the same size as T +;; but with half as many elements. +(define_mode_attr V_stretch_half [(V2SI "DI") (V4HI "V2SI") (V8QI "V4HI")]) + +;; The Loongson instruction suffixes corresponding to the transformation +;; expressed by V_stretch_half. +(define_mode_attr V_stretch_half_suffix [(V2SI "wd") (V4HI "hw") (V8QI "bh")]) + +;; Given a vector type T, the mode of a vector the same size as T +;; but with twice as many elements. +(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")]) + +;; The Loongson instruction suffixes corresponding to the conversions +;; specified by V_half_width. +(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")]) + +;; Move patterns. + +;; Expander to legitimize moves involving values of vector modes. +(define_expand "mov<mode>" + [(set (match_operand:VWHB 0) + (match_operand:VWHB 1))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + if (mips_legitimize_move (<MODE>mode, operands[0], operands[1])) + DONE; +}) + +;; Handle legitimized moves between values of vector modes. +(define_insn "mov<mode>_internal" + [(set (match_operand:VWHB 0 "nonimmediate_operand" "=m,f,d,f, d, m, d") + (match_operand:VWHB 1 "move_operand" "f,m,f,dYG,dYG,dYG,m"))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + { return mips_output_move (operands[0], operands[1]); } + [(set_attr "type" "fpstore,fpload,mfc,mtc,move,store,load") + (set_attr "mode" "DI")]) + +;; Initialization of a vector. + +(define_expand "vec_init<mode>" + [(set (match_operand:VWHB 0 "register_operand") + (match_operand 1 ""))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" +{ + mips_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;; Instruction patterns for SIMD instructions. + +;; Pack with signed saturation. +(define_insn "vec_pack_ssat_<mode>" + [(set (match_operand:<V_squash_double> 0 "register_operand" "=f") + (vec_concat:<V_squash_double> + (ss_truncate:<V_squash> + (match_operand:VWH 1 "register_operand" "f")) + (ss_truncate:<V_squash> + (match_operand:VWH 2 "register_operand" "f"))))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "packss<V_squash_double_suffix>\t%0,%1,%2") + +;; Pack with unsigned saturation. +(define_insn "vec_pack_usat_<mode>" + [(set (match_operand:<V_squash_double> 0 "register_operand" "=f") + (vec_concat:<V_squash_double> + (us_truncate:<V_squash> + (match_operand:VH 1 "register_operand" "f")) + (us_truncate:<V_squash> + (match_operand:VH 2 "register_operand" "f"))))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "packus<V_squash_double_suffix>\t%0,%1,%2") + +;; Addition, treating overflow by wraparound. +(define_insn "add<mode>3" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (plus:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "padd<V_suffix>\t%0,%1,%2") + +;; Addition of doubleword integers stored in FP registers. +;; Overflow is treated by wraparound. +;; We use 'unspec' instead of 'plus' here to avoid clash with +;; mips.md::add<mode>3. If 'plus' was used, then such instruction +;; would be recognized as adddi3 and reload would make it use +;; GPRs instead of FPRs. +(define_insn "loongson_paddd" + [(set (match_operand:DI 0 "register_operand" "=f") + (unspec:DI [(match_operand:DI 1 "register_operand" "f") + (match_operand:DI 2 "register_operand" "f")] + UNSPEC_LOONGSON_PADDD))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "paddd\t%0,%1,%2") + +;; Addition, treating overflow by signed saturation. +(define_insn "ssadd<mode>3" + [(set (match_operand:VHB 0 "register_operand" "=f") + (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f") + (match_operand:VHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "padds<V_suffix>\t%0,%1,%2") + +;; Addition, treating overflow by unsigned saturation. +(define_insn "usadd<mode>3" + [(set (match_operand:VHB 0 "register_operand" "=f") + (us_plus:VHB (match_operand:VHB 1 "register_operand" "f") + (match_operand:VHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "paddus<V_suffix>\t%0,%1,%2") + +;; Logical AND NOT. +(define_insn "loongson_pandn_<V_suffix>" + [(set (match_operand:VWHBDI 0 "register_operand" "=f") + (and:VWHBDI + (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f")) + (match_operand:VWHBDI 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pandn\t%0,%1,%2") + +;; Average. +(define_insn "loongson_pavg<V_suffix>" + [(set (match_operand:VHB 0 "register_operand" "=f") + (unspec:VHB [(match_operand:VHB 1 "register_operand" "f") + (match_operand:VHB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PAVG))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pavg<V_suffix>\t%0,%1,%2") + +;; Equality test. +(define_insn "loongson_pcmpeq<V_suffix>" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PCMPEQ))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pcmpeq<V_suffix>\t%0,%1,%2") + +;; Greater-than test. +(define_insn "loongson_pcmpgt<V_suffix>" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PCMPGT))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pcmpgt<V_suffix>\t%0,%1,%2") + +;; Extract halfword. +(define_insn "loongson_pextr<V_suffix>" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")] + UNSPEC_LOONGSON_PEXTR))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pextr<V_suffix>\t%0,%1,%2") + +;; Insert halfword. +(define_insn "loongson_pinsr<V_suffix>_0" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PINSR_0))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pinsr<V_suffix>_0\t%0,%1,%2") + +(define_insn "loongson_pinsr<V_suffix>_1" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PINSR_1))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pinsr<V_suffix>_1\t%0,%1,%2") + +(define_insn "loongson_pinsr<V_suffix>_2" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PINSR_2))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pinsr<V_suffix>_2\t%0,%1,%2") + +(define_insn "loongson_pinsr<V_suffix>_3" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PINSR_3))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pinsr<V_suffix>_3\t%0,%1,%2") + +;; Multiply and add packed integers. +(define_insn "loongson_pmadd<V_stretch_half_suffix>" + [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") + (unspec:<V_stretch_half> [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PMADD))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmadd<V_stretch_half_suffix>\t%0,%1,%2") + +;; Maximum of signed halfwords. +(define_insn "smax<mode>3" + [(set (match_operand:VH 0 "register_operand" "=f") + (smax:VH (match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmaxs<V_suffix>\t%0,%1,%2") + +;; Maximum of unsigned bytes. +(define_insn "umax<mode>3" + [(set (match_operand:VB 0 "register_operand" "=f") + (umax:VB (match_operand:VB 1 "register_operand" "f") + (match_operand:VB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmaxu<V_suffix>\t%0,%1,%2") + +;; Minimum of signed halfwords. +(define_insn "smin<mode>3" + [(set (match_operand:VH 0 "register_operand" "=f") + (smin:VH (match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmins<V_suffix>\t%0,%1,%2") + +;; Minimum of unsigned bytes. +(define_insn "umin<mode>3" + [(set (match_operand:VB 0 "register_operand" "=f") + (umin:VB (match_operand:VB 1 "register_operand" "f") + (match_operand:VB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pminu<V_suffix>\t%0,%1,%2") + +;; Move byte mask. +(define_insn "loongson_pmovmsk<V_suffix>" + [(set (match_operand:VB 0 "register_operand" "=f") + (unspec:VB [(match_operand:VB 1 "register_operand" "f")] + UNSPEC_LOONGSON_PMOVMSK))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmovmsk<V_suffix>\t%0,%1") + +;; Multiply unsigned integers and store high result. +(define_insn "umul<mode>3_highpart" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PMULHU))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmulhu<V_suffix>\t%0,%1,%2") + +;; Multiply signed integers and store high result. +(define_insn "smul<mode>3_highpart" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PMULH))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmulh<V_suffix>\t%0,%1,%2") + +;; Multiply signed integers and store low result. +(define_insn "loongson_pmull<V_suffix>" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "f") + (match_operand:VH 2 "register_operand" "f")] + UNSPEC_LOONGSON_PMULL))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmull<V_suffix>\t%0,%1,%2") + +;; Multiply unsigned word integers. +(define_insn "loongson_pmulu<V_suffix>" + [(set (match_operand:DI 0 "register_operand" "=f") + (unspec:DI [(match_operand:VW 1 "register_operand" "f") + (match_operand:VW 2 "register_operand" "f")] + UNSPEC_LOONGSON_PMULU))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pmulu<V_suffix>\t%0,%1,%2") + +;; Absolute difference. +(define_insn "loongson_pasubub" + [(set (match_operand:VB 0 "register_operand" "=f") + (unspec:VB [(match_operand:VB 1 "register_operand" "f") + (match_operand:VB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PASUBUB))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pasubub\t%0,%1,%2") + +;; Sum of unsigned byte integers. +(define_insn "reduc_uplus_<mode>" + [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") + (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")] + UNSPEC_LOONGSON_BIADD))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "biadd\t%0,%1") + +;; Sum of absolute differences. +(define_insn "loongson_psadbh" + [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") + (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f") + (match_operand:VB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PSADBH))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pasubub\t%0,%1,%2;biadd\t%0,%0") + +;; Shuffle halfwords. +(define_insn "loongson_pshufh" + [(set (match_operand:VH 0 "register_operand" "=f") + (unspec:VH [(match_operand:VH 1 "register_operand" "0") + (match_operand:VH 2 "register_operand" "f") + (match_operand:SI 3 "register_operand" "f")] + UNSPEC_LOONGSON_PSHUFH))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "pshufh\t%0,%2,%3") + +;; Shift left logical. +(define_insn "loongson_psll<V_suffix>" + [(set (match_operand:VWH 0 "register_operand" "=f") + (ashift:VWH (match_operand:VWH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psll<V_suffix>\t%0,%1,%2") + +;; Shift right arithmetic. +(define_insn "loongson_psra<V_suffix>" + [(set (match_operand:VWH 0 "register_operand" "=f") + (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psra<V_suffix>\t%0,%1,%2") + +;; Shift right logical. +(define_insn "loongson_psrl<V_suffix>" + [(set (match_operand:VWH 0 "register_operand" "=f") + (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psrl<V_suffix>\t%0,%1,%2") + +;; Subtraction, treating overflow by wraparound. +(define_insn "sub<mode>3" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (minus:VWHB (match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psub<V_suffix>\t%0,%1,%2") + +;; Subtraction of doubleword integers stored in FP registers. +;; Overflow is treated by wraparound. +;; See loongson_paddd for the reason we use 'unspec' rather than +;; 'minus' here. +(define_insn "loongson_psubd" + [(set (match_operand:DI 0 "register_operand" "=f") + (unspec:DI [(match_operand:DI 1 "register_operand" "f") + (match_operand:DI 2 "register_operand" "f")] + UNSPEC_LOONGSON_PSUBD))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psubd\t%0,%1,%2") + +;; Subtraction, treating overflow by signed saturation. +(define_insn "sssub<mode>3" + [(set (match_operand:VHB 0 "register_operand" "=f") + (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f") + (match_operand:VHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psubs<V_suffix>\t%0,%1,%2") + +;; Subtraction, treating overflow by unsigned saturation. +(define_insn "ussub<mode>3" + [(set (match_operand:VHB 0 "register_operand" "=f") + (us_minus:VHB (match_operand:VHB 1 "register_operand" "f") + (match_operand:VHB 2 "register_operand" "f")))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "psubus<V_suffix>\t%0,%1,%2") + +;; Unpack high data. +(define_insn "vec_interleave_high<mode>" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PUNPCKH))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckh<V_stretch_half_suffix>\t%0,%1,%2") + +;; Unpack low data. +(define_insn "vec_interleave_low<mode>" + [(set (match_operand:VWHB 0 "register_operand" "=f") + (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") + (match_operand:VWHB 2 "register_operand" "f")] + UNSPEC_LOONGSON_PUNPCKL))] + "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" + "punpckl<V_stretch_half_suffix>\t%0,%1,%2") diff --git a/gcc/config/mips/mips-ftypes.def b/gcc/config/mips/mips-ftypes.def index 9d46f8c242e..8ae54ebe263 100644 --- a/gcc/config/mips/mips-ftypes.def +++ b/gcc/config/mips/mips-ftypes.def @@ -66,6 +66,24 @@ DEF_MIPS_FTYPE (1, (SF, SF)) DEF_MIPS_FTYPE (2, (SF, SF, SF)) DEF_MIPS_FTYPE (1, (SF, V2SF)) +DEF_MIPS_FTYPE (2, (UDI, UDI, UDI)) +DEF_MIPS_FTYPE (2, (UDI, UV2SI, UV2SI)) + +DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UQI)) +DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UV2SI)) + +DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, UQI)) +DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, USI)) +DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, UQI)) +DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, USI)) +DEF_MIPS_FTYPE (2, (UV4HI, UV4HI, UV4HI)) +DEF_MIPS_FTYPE (1, (UV4HI, UV8QI)) +DEF_MIPS_FTYPE (2, (UV4HI, UV8QI, UV8QI)) + +DEF_MIPS_FTYPE (2, (UV8QI, UV4HI, UV4HI)) +DEF_MIPS_FTYPE (1, (UV8QI, UV8QI)) +DEF_MIPS_FTYPE (2, (UV8QI, UV8QI, UV8QI)) + DEF_MIPS_FTYPE (1, (V2HI, SI)) DEF_MIPS_FTYPE (2, (V2HI, SI, SI)) DEF_MIPS_FTYPE (3, (V2HI, SI, SI, SI)) @@ -81,12 +99,27 @@ DEF_MIPS_FTYPE (2, (V2SF, V2SF, V2SF)) DEF_MIPS_FTYPE (3, (V2SF, V2SF, V2SF, INT)) DEF_MIPS_FTYPE (4, (V2SF, V2SF, V2SF, V2SF, V2SF)) +DEF_MIPS_FTYPE (2, (V2SI, V2SI, UQI)) +DEF_MIPS_FTYPE (2, (V2SI, V2SI, V2SI)) +DEF_MIPS_FTYPE (2, (V2SI, V4HI, V4HI)) + +DEF_MIPS_FTYPE (2, (V4HI, V2SI, V2SI)) +DEF_MIPS_FTYPE (2, (V4HI, V4HI, UQI)) +DEF_MIPS_FTYPE (2, (V4HI, V4HI, USI)) +DEF_MIPS_FTYPE (2, (V4HI, V4HI, V4HI)) +DEF_MIPS_FTYPE (3, (V4HI, V4HI, V4HI, UQI)) +DEF_MIPS_FTYPE (3, (V4HI, V4HI, V4HI, USI)) + DEF_MIPS_FTYPE (1, (V4QI, SI)) DEF_MIPS_FTYPE (2, (V4QI, V2HI, V2HI)) DEF_MIPS_FTYPE (1, (V4QI, V4QI)) DEF_MIPS_FTYPE (2, (V4QI, V4QI, SI)) DEF_MIPS_FTYPE (2, (V4QI, V4QI, V4QI)) +DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI)) +DEF_MIPS_FTYPE (1, (V8QI, V8QI)) +DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI)) + DEF_MIPS_FTYPE (2, (VOID, SI, SI)) DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI)) DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI)) diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def index 207f6da060b..d1617aed83f 100644 --- a/gcc/config/mips/mips-modes.def +++ b/gcc/config/mips/mips-modes.def @@ -26,6 +26,7 @@ RESET_FLOAT_FORMAT (DF, mips_double_format); FLOAT_MODE (TF, 16, mips_quad_format); /* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 1bf0ee9865d..b79b83a0bcb 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -303,4 +303,6 @@ union mips_gen_fn_ptrs extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, rtx, rtx, rtx, rtx); +extern void mips_expand_vector_init (rtx, rtx); + #endif /* ! GCC_MIPS_PROTOS_H */ diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 69f8258e8f0..cd94ac15e5a 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -3532,6 +3532,12 @@ mips_split_doubleword_move (rtx dest, rtx src) emit_insn (gen_move_doubleword_fprdf (dest, src)); else if (!TARGET_64BIT && GET_MODE (dest) == V2SFmode) emit_insn (gen_move_doubleword_fprv2sf (dest, src)); + else if (!TARGET_64BIT && GET_MODE (dest) == V2SImode) + emit_insn (gen_move_doubleword_fprv2si (dest, src)); + else if (!TARGET_64BIT && GET_MODE (dest) == V4HImode) + emit_insn (gen_move_doubleword_fprv4hi (dest, src)); + else if (!TARGET_64BIT && GET_MODE (dest) == V8QImode) + emit_insn (gen_move_doubleword_fprv8qi (dest, src)); else if (TARGET_64BIT && GET_MODE (dest) == TFmode) emit_insn (gen_move_doubleword_fprtf (dest, src)); else @@ -8960,6 +8966,14 @@ mips_hard_regno_mode_ok_p (unsigned int regno, enum machine_mode mode) if (mode == TFmode && ISA_HAS_8CC) return true; + /* Allow 64-bit vector modes for Loongson-2E/2F. */ + if (TARGET_LOONGSON_VECTORS + && (mode == V2SImode + || mode == V4HImode + || mode == V8QImode + || mode == DImode)) + return true; + if (class == MODE_FLOAT || class == MODE_COMPLEX_FLOAT || class == MODE_VECTOR_FLOAT) @@ -9323,6 +9337,11 @@ mips_vector_mode_supported_p (enum machine_mode mode) case V4UQQmode: return TARGET_DSP; + case V2SImode: + case V4HImode: + case V8QImode: + return TARGET_LOONGSON_VECTORS; + default: return false; } @@ -10192,6 +10211,7 @@ AVAIL_NON_MIPS16 (dsp, TARGET_DSP) AVAIL_NON_MIPS16 (dspr2, TARGET_DSPR2) AVAIL_NON_MIPS16 (dsp_32, !TARGET_64BIT && TARGET_DSP) AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2) +AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS) /* Construct a mips_builtin_description from the given arguments. @@ -10288,6 +10308,25 @@ AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2) MIPS_BUILTIN (bposge, f, "bposge" #VALUE, \ MIPS_BUILTIN_BPOSGE ## VALUE, MIPS_SI_FTYPE_VOID, AVAIL) +/* Define a Loongson MIPS_BUILTIN_DIRECT function __builtin_loongson_<FN_NAME> + for instruction CODE_FOR_loongson_<INSN>. FUNCTION_TYPE is a + builtin_description field. */ +#define LOONGSON_BUILTIN_ALIAS(INSN, FN_NAME, FUNCTION_TYPE) \ + { CODE_FOR_loongson_ ## INSN, 0, "__builtin_loongson_" #FN_NAME, \ + MIPS_BUILTIN_DIRECT, FUNCTION_TYPE, mips_builtin_avail_loongson } + +/* Define a Loongson MIPS_BUILTIN_DIRECT function __builtin_loongson_<INSN> + for instruction CODE_FOR_loongson_<INSN>. FUNCTION_TYPE is a + builtin_description field. */ +#define LOONGSON_BUILTIN(INSN, FUNCTION_TYPE) \ + LOONGSON_BUILTIN_ALIAS (INSN, INSN, FUNCTION_TYPE) + +/* Like LOONGSON_BUILTIN, but add _<SUFFIX> to the end of the function name. + We use functions of this form when the same insn can be usefully applied + to more than one datatype. */ +#define LOONGSON_BUILTIN_SUFFIX(INSN, SUFFIX, FUNCTION_TYPE) \ + LOONGSON_BUILTIN_ALIAS (INSN, INSN ## _ ## SUFFIX, FUNCTION_TYPE) + #define CODE_FOR_mips_sqrt_ps CODE_FOR_sqrtv2sf2 #define CODE_FOR_mips_addq_ph CODE_FOR_addv2hi3 #define CODE_FOR_mips_addu_qb CODE_FOR_addv4qi3 @@ -10295,6 +10334,37 @@ AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2) #define CODE_FOR_mips_subu_qb CODE_FOR_subv4qi3 #define CODE_FOR_mips_mul_ph CODE_FOR_mulv2hi3 +#define CODE_FOR_loongson_packsswh CODE_FOR_vec_pack_ssat_v2si +#define CODE_FOR_loongson_packsshb CODE_FOR_vec_pack_ssat_v4hi +#define CODE_FOR_loongson_packushb CODE_FOR_vec_pack_usat_v4hi +#define CODE_FOR_loongson_paddw CODE_FOR_addv2si3 +#define CODE_FOR_loongson_paddh CODE_FOR_addv4hi3 +#define CODE_FOR_loongson_paddb CODE_FOR_addv8qi3 +#define CODE_FOR_loongson_paddsh CODE_FOR_ssaddv4hi3 +#define CODE_FOR_loongson_paddsb CODE_FOR_ssaddv8qi3 +#define CODE_FOR_loongson_paddush CODE_FOR_usaddv4hi3 +#define CODE_FOR_loongson_paddusb CODE_FOR_usaddv8qi3 +#define CODE_FOR_loongson_pmaxsh CODE_FOR_smaxv4hi3 +#define CODE_FOR_loongson_pmaxub CODE_FOR_umaxv8qi3 +#define CODE_FOR_loongson_pminsh CODE_FOR_sminv4hi3 +#define CODE_FOR_loongson_pminub CODE_FOR_uminv8qi3 +#define CODE_FOR_loongson_pmulhuh CODE_FOR_umulv4hi3_highpart +#define CODE_FOR_loongson_pmulhh CODE_FOR_smulv4hi3_highpart +#define CODE_FOR_loongson_biadd CODE_FOR_reduc_uplus_v8qi +#define CODE_FOR_loongson_psubw CODE_FOR_subv2si3 +#define CODE_FOR_loongson_psubh CODE_FOR_subv4hi3 +#define CODE_FOR_loongson_psubb CODE_FOR_subv8qi3 +#define CODE_FOR_loongson_psubsh CODE_FOR_sssubv4hi3 +#define CODE_FOR_loongson_psubsb CODE_FOR_sssubv8qi3 +#define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3 +#define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3 +#define CODE_FOR_loongson_punpckhbh CODE_FOR_vec_interleave_highv8qi +#define CODE_FOR_loongson_punpckhhw CODE_FOR_vec_interleave_highv4hi +#define CODE_FOR_loongson_punpckhwd CODE_FOR_vec_interleave_highv2si +#define CODE_FOR_loongson_punpcklbh CODE_FOR_vec_interleave_lowv8qi +#define CODE_FOR_loongson_punpcklhw CODE_FOR_vec_interleave_lowv4hi +#define CODE_FOR_loongson_punpcklwd CODE_FOR_vec_interleave_lowv2si + static const struct mips_builtin_description mips_builtins[] = { DIRECT_BUILTIN (pll_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single), DIRECT_BUILTIN (pul_ps, MIPS_V2SF_FTYPE_V2SF_V2SF, paired_single), @@ -10471,7 +10541,108 @@ static const struct mips_builtin_description mips_builtins[] = { DIRECT_BUILTIN (dpaqx_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32), DIRECT_BUILTIN (dpaqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32), DIRECT_BUILTIN (dpsqx_s_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32), - DIRECT_BUILTIN (dpsqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32) + DIRECT_BUILTIN (dpsqx_sa_w_ph, MIPS_DI_FTYPE_DI_V2HI_V2HI, dspr2_32), + + /* Builtin functions for ST Microelectronics Loongson-2E/2F cores. */ + LOONGSON_BUILTIN (packsswh, MIPS_V4HI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN (packsshb, MIPS_V8QI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (packushb, MIPS_UV8QI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (paddw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_SUFFIX (paddh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (paddb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (paddw, s, MIPS_V2SI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN_SUFFIX (paddh, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (paddb, s, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN_SUFFIX (paddd, u, MIPS_UDI_FTYPE_UDI_UDI), + LOONGSON_BUILTIN_SUFFIX (paddd, s, MIPS_DI_FTYPE_DI_DI), + LOONGSON_BUILTIN (paddsh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (paddsb, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN (paddush, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN (paddusb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_ALIAS (pandn_d, pandn_ud, MIPS_UDI_FTYPE_UDI_UDI), + LOONGSON_BUILTIN_ALIAS (pandn_w, pandn_uw, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_ALIAS (pandn_h, pandn_uh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_ALIAS (pandn_b, pandn_ub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_ALIAS (pandn_d, pandn_sd, MIPS_DI_FTYPE_DI_DI), + LOONGSON_BUILTIN_ALIAS (pandn_w, pandn_sw, MIPS_V2SI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN_ALIAS (pandn_h, pandn_sh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_ALIAS (pandn_b, pandn_sb, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN (pavgh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN (pavgb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (pcmpeqw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_SUFFIX (pcmpeqh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (pcmpeqb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (pcmpeqw, s, MIPS_V2SI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN_SUFFIX (pcmpeqh, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (pcmpeqb, s, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN_SUFFIX (pcmpgtw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_SUFFIX (pcmpgth, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (pcmpgtb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (pcmpgtw, s, MIPS_V2SI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN_SUFFIX (pcmpgth, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (pcmpgtb, s, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN_SUFFIX (pextrh, u, MIPS_UV4HI_FTYPE_UV4HI_USI), + LOONGSON_BUILTIN_SUFFIX (pextrh, s, MIPS_V4HI_FTYPE_V4HI_USI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_0, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_1, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_2, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_3, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_0, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_1, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_2, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (pinsrh_3, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (pmaddhw, MIPS_V2SI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (pmaxsh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (pmaxub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN (pminsh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (pminub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (pmovmskb, u, MIPS_UV8QI_FTYPE_UV8QI), + LOONGSON_BUILTIN_SUFFIX (pmovmskb, s, MIPS_V8QI_FTYPE_V8QI), + LOONGSON_BUILTIN (pmulhuh, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN (pmulhh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (pmullh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (pmuluw, MIPS_UDI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN (pasubub, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN (biadd, MIPS_UV4HI_FTYPE_UV8QI), + LOONGSON_BUILTIN (psadbh, MIPS_UV4HI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (pshufh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (pshufh, s, MIPS_V4HI_FTYPE_V4HI_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psllh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psllh, s, MIPS_V4HI_FTYPE_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psllw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI), + LOONGSON_BUILTIN_SUFFIX (psllw, s, MIPS_V2SI_FTYPE_V2SI_UQI), + LOONGSON_BUILTIN_SUFFIX (psrah, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psrah, s, MIPS_V4HI_FTYPE_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psraw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI), + LOONGSON_BUILTIN_SUFFIX (psraw, s, MIPS_V2SI_FTYPE_V2SI_UQI), + LOONGSON_BUILTIN_SUFFIX (psrlh, u, MIPS_UV4HI_FTYPE_UV4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psrlh, s, MIPS_V4HI_FTYPE_V4HI_UQI), + LOONGSON_BUILTIN_SUFFIX (psrlw, u, MIPS_UV2SI_FTYPE_UV2SI_UQI), + LOONGSON_BUILTIN_SUFFIX (psrlw, s, MIPS_V2SI_FTYPE_V2SI_UQI), + LOONGSON_BUILTIN_SUFFIX (psubw, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_SUFFIX (psubh, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (psubb, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (psubw, s, MIPS_V2SI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN_SUFFIX (psubh, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (psubb, s, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN_SUFFIX (psubd, u, MIPS_UDI_FTYPE_UDI_UDI), + LOONGSON_BUILTIN_SUFFIX (psubd, s, MIPS_DI_FTYPE_DI_DI), + LOONGSON_BUILTIN (psubsh, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN (psubsb, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN (psubush, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN (psubusb, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (punpckhbh, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (punpckhhw, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (punpckhwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_SUFFIX (punpckhbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN_SUFFIX (punpckhhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (punpckhwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI), + LOONGSON_BUILTIN_SUFFIX (punpcklbh, u, MIPS_UV8QI_FTYPE_UV8QI_UV8QI), + LOONGSON_BUILTIN_SUFFIX (punpcklhw, u, MIPS_UV4HI_FTYPE_UV4HI_UV4HI), + LOONGSON_BUILTIN_SUFFIX (punpcklwd, u, MIPS_UV2SI_FTYPE_UV2SI_UV2SI), + LOONGSON_BUILTIN_SUFFIX (punpcklbh, s, MIPS_V8QI_FTYPE_V8QI_V8QI), + LOONGSON_BUILTIN_SUFFIX (punpcklhw, s, MIPS_V4HI_FTYPE_V4HI_V4HI), + LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI) }; /* MODE is a vector mode whose elements have type TYPE. Return the type @@ -10480,11 +10651,17 @@ static const struct mips_builtin_description mips_builtins[] = { static tree mips_builtin_vector_type (tree type, enum machine_mode mode) { - static tree types[(int) MAX_MACHINE_MODE]; + static tree types[2 * (int) MAX_MACHINE_MODE]; + int mode_index; - if (types[(int) mode] == NULL_TREE) - types[(int) mode] = build_vector_type_for_mode (type, mode); - return types[(int) mode]; + mode_index = (int) mode; + + if (TREE_CODE (type) == INTEGER_TYPE && TYPE_UNSIGNED (type)) + mode_index += MAX_MACHINE_MODE; + + if (types[mode_index] == NULL_TREE) + types[mode_index] = build_vector_type_for_mode (type, mode); + return types[mode_index]; } /* Source-level argument types. */ @@ -10493,16 +10670,27 @@ mips_builtin_vector_type (tree type, enum machine_mode mode) #define MIPS_ATYPE_POINTER ptr_type_node /* Standard mode-based argument types. */ +#define MIPS_ATYPE_UQI unsigned_intQI_type_node #define MIPS_ATYPE_SI intSI_type_node #define MIPS_ATYPE_USI unsigned_intSI_type_node #define MIPS_ATYPE_DI intDI_type_node +#define MIPS_ATYPE_UDI unsigned_intDI_type_node #define MIPS_ATYPE_SF float_type_node #define MIPS_ATYPE_DF double_type_node /* Vector argument types. */ #define MIPS_ATYPE_V2SF mips_builtin_vector_type (float_type_node, V2SFmode) #define MIPS_ATYPE_V2HI mips_builtin_vector_type (intHI_type_node, V2HImode) +#define MIPS_ATYPE_V2SI mips_builtin_vector_type (intSI_type_node, V2SImode) #define MIPS_ATYPE_V4QI mips_builtin_vector_type (intQI_type_node, V4QImode) +#define MIPS_ATYPE_V4HI mips_builtin_vector_type (intHI_type_node, V4HImode) +#define MIPS_ATYPE_V8QI mips_builtin_vector_type (intQI_type_node, V8QImode) +#define MIPS_ATYPE_UV2SI \ + mips_builtin_vector_type (unsigned_intSI_type_node, V2SImode) +#define MIPS_ATYPE_UV4HI \ + mips_builtin_vector_type (unsigned_intHI_type_node, V4HImode) +#define MIPS_ATYPE_UV8QI \ + mips_builtin_vector_type (unsigned_intQI_type_node, V8QImode) /* MIPS_FTYPE_ATYPESN takes N MIPS_FTYPES-like type codes and lists their associated MIPS_ATYPEs. */ @@ -12618,6 +12806,30 @@ mips_conditional_register_usage (void) } } +/* Initialize vector TARGET to VALS. */ + +void +mips_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode mode; + enum machine_mode inner; + unsigned int i, n_elts; + rtx mem; + + mode = GET_MODE (target); + inner = GET_MODE_INNER (mode); + n_elts = GET_MODE_NUNITS (mode); + + gcc_assert (VECTOR_MODE_P (mode)); + + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)), + XVECEXP (vals, 0, i)); + + emit_move_insn (target, mem); +} + /* When generating MIPS16 code, we want to allocate $24 (T_REG) before other registers for instructions for which it is possible. This encourages the compiler to use CMP in cases where an XOR would diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index e2129a0fcd5..74c079a7d4a 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -267,6 +267,12 @@ enum mips_code_readable_setting { || mips_tune == PROCESSOR_74KF3_2) #define TUNE_20KC (mips_tune == PROCESSOR_20KC) +/* Whether vector modes and intrinsics for ST Microelectronics + Loongson-2E/2F processors should be enabled. In o32 pairs of + floating-point registers provide 64-bit values. */ +#define TARGET_LOONGSON_VECTORS (TARGET_HARD_FLOAT_ABI \ + && TARGET_LOONGSON_2EF) + /* True if the pre-reload scheduler should try to create chains of multiply-add or multiply-subtract instructions. For example, suppose we have: @@ -497,6 +503,10 @@ enum mips_code_readable_setting { builtin_define_std ("MIPSEL"); \ builtin_define ("_MIPSEL"); \ } \ + \ + /* Whether Loongson vector modes are enabled. */ \ + if (TARGET_LOONGSON_VECTORS) \ + builtin_define ("__mips_loongson_vector_rev"); \ \ /* Macros dependent on the C dialect. */ \ if (preprocessing_asm_p ()) \ diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 2e860f6e1f1..ac1dc60dc81 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -215,6 +215,30 @@ (UNSPEC_DPAQX_SA_W_PH 446) (UNSPEC_DPSQX_S_W_PH 447) (UNSPEC_DPSQX_SA_W_PH 448) + + ;; ST Microelectronics Loongson-2E/2F. + (UNSPEC_LOONGSON_PAVG 500) + (UNSPEC_LOONGSON_PCMPEQ 501) + (UNSPEC_LOONGSON_PCMPGT 502) + (UNSPEC_LOONGSON_PEXTR 503) + (UNSPEC_LOONGSON_PINSR_0 504) + (UNSPEC_LOONGSON_PINSR_1 505) + (UNSPEC_LOONGSON_PINSR_2 506) + (UNSPEC_LOONGSON_PINSR_3 507) + (UNSPEC_LOONGSON_PMADD 508) + (UNSPEC_LOONGSON_PMOVMSK 509) + (UNSPEC_LOONGSON_PMULHU 510) + (UNSPEC_LOONGSON_PMULH 511) + (UNSPEC_LOONGSON_PMULL 512) + (UNSPEC_LOONGSON_PMULU 513) + (UNSPEC_LOONGSON_PASUBUB 514) + (UNSPEC_LOONGSON_BIADD 515) + (UNSPEC_LOONGSON_PSADBH 516) + (UNSPEC_LOONGSON_PSHUFH 517) + (UNSPEC_LOONGSON_PUNPCKH 518) + (UNSPEC_LOONGSON_PUNPCKL 519) + (UNSPEC_LOONGSON_PADDD 520) + (UNSPEC_LOONGSON_PSUBD 521) ] ) @@ -500,7 +524,11 @@ ;; 64-bit modes for which we provide move patterns. (define_mode_iterator MOVE64 - [DI DF (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT")]) + [DI DF + (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT") + (V2SI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS") + (V4HI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS") + (V8QI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")]) ;; 128-bit modes for which we provide move patterns on 64-bit targets. (define_mode_iterator MOVE128 [TI TF]) @@ -527,6 +555,9 @@ [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") (V2SF "!TARGET_64BIT && TARGET_PAIRED_SINGLE_FLOAT") + (V2SI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS") + (V4HI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS") + (V8QI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS") (TF "TARGET_64BIT && TARGET_FLOAT64")]) ;; In GPR templates, a string like "<d>subu" will expand to "subu" in the @@ -579,7 +610,9 @@ ;; This attribute gives the integer mode that has half the size of ;; the controlling mode. -(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI") (TF "DI")]) +(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (V2SF "SI") + (V2SI "SI") (V4HI "SI") (V8QI "SI") + (TF "DI")]) ;; This attribute works around the early SB-1 rev2 core "F2" erratum: ;; @@ -6435,3 +6468,6 @@ ; MIPS fixed-point instructions. (include "mips-fixed.md") + +; ST-Microelectronics Loongson-2E/2F-specific patterns. +(include "loongson.md") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 516ecc2af4e..3fb51ccf4c5 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -6788,6 +6788,7 @@ instructions, but allow the compiler to schedule those calls. * X86 Built-in Functions:: * MIPS DSP Built-in Functions:: * MIPS Paired-Single Support:: +* MIPS Loongson Built-in Functions:: * PowerPC AltiVec Built-in Functions:: * SPARC VIS Built-in Functions:: * SPU Built-in Functions:: @@ -8667,6 +8668,132 @@ value is the upper one. The opposite order applies to big-endian targets. For example, the code above will set the lower half of @code{a} to @code{1.5} on little-endian targets and @code{9.1} on big-endian targets. +@node MIPS Loongson Built-in Functions +@subsection MIPS Loongson Built-in Functions + +GCC provides intrinsics to access the SIMD instructions provided by the +ST Microelectronics Loongson-2E and -2F processors. These intrinsics, +available after inclusion of the @code{loongson.h} header file, +operate on the following 64-bit vector types: + +@itemize +@item @code{uint8x8_t}, a vector of eight unsigned 8-bit integers; +@item @code{uint16x4_t}, a vector of four unsigned 16-bit integers; +@item @code{uint32x2_t}, a vector of two unsigned 32-bit integers; +@item @code{int8x8_t}, a vector of eight signed 8-bit integers; +@item @code{int16x4_t}, a vector of four signed 16-bit integers; +@item @code{int32x2_t}, a vector of two signed 32-bit integers. +@end itemize + +The intrinsics provided are listed below; each is named after the +machine instruction to which it corresponds, with suffixes added as +appropriate to distinguish intrinsics that expand to the same machine +instruction yet have different argument types. Refer to the architecture +documentation for a description of the functionality of each +instruction. + +@smallexample +int16x4_t packsswh (int32x2_t s, int32x2_t t); +int8x8_t packsshb (int16x4_t s, int16x4_t t); +uint8x8_t packushb (uint16x4_t s, uint16x4_t t); +uint32x2_t paddw_u (uint32x2_t s, uint32x2_t t); +uint16x4_t paddh_u (uint16x4_t s, uint16x4_t t); +uint8x8_t paddb_u (uint8x8_t s, uint8x8_t t); +int32x2_t paddw_s (int32x2_t s, int32x2_t t); +int16x4_t paddh_s (int16x4_t s, int16x4_t t); +int8x8_t paddb_s (int8x8_t s, int8x8_t t); +uint64_t paddd_u (uint64_t s, uint64_t t); +int64_t paddd_s (int64_t s, int64_t t); +int16x4_t paddsh (int16x4_t s, int16x4_t t); +int8x8_t paddsb (int8x8_t s, int8x8_t t); +uint16x4_t paddush (uint16x4_t s, uint16x4_t t); +uint8x8_t paddusb (uint8x8_t s, uint8x8_t t); +uint64_t pandn_ud (uint64_t s, uint64_t t); +uint32x2_t pandn_uw (uint32x2_t s, uint32x2_t t); +uint16x4_t pandn_uh (uint16x4_t s, uint16x4_t t); +uint8x8_t pandn_ub (uint8x8_t s, uint8x8_t t); +int64_t pandn_sd (int64_t s, int64_t t); +int32x2_t pandn_sw (int32x2_t s, int32x2_t t); +int16x4_t pandn_sh (int16x4_t s, int16x4_t t); +int8x8_t pandn_sb (int8x8_t s, int8x8_t t); +uint16x4_t pavgh (uint16x4_t s, uint16x4_t t); +uint8x8_t pavgb (uint8x8_t s, uint8x8_t t); +uint32x2_t pcmpeqw_u (uint32x2_t s, uint32x2_t t); +uint16x4_t pcmpeqh_u (uint16x4_t s, uint16x4_t t); +uint8x8_t pcmpeqb_u (uint8x8_t s, uint8x8_t t); +int32x2_t pcmpeqw_s (int32x2_t s, int32x2_t t); +int16x4_t pcmpeqh_s (int16x4_t s, int16x4_t t); +int8x8_t pcmpeqb_s (int8x8_t s, int8x8_t t); +uint32x2_t pcmpgtw_u (uint32x2_t s, uint32x2_t t); +uint16x4_t pcmpgth_u (uint16x4_t s, uint16x4_t t); +uint8x8_t pcmpgtb_u (uint8x8_t s, uint8x8_t t); +int32x2_t pcmpgtw_s (int32x2_t s, int32x2_t t); +int16x4_t pcmpgth_s (int16x4_t s, int16x4_t t); +int8x8_t pcmpgtb_s (int8x8_t s, int8x8_t t); +uint16x4_t pextrh_u (uint16x4_t s, int field); +int16x4_t pextrh_s (int16x4_t s, int field); +uint16x4_t pinsrh_0_u (uint16x4_t s, uint16x4_t t); +uint16x4_t pinsrh_1_u (uint16x4_t s, uint16x4_t t); +uint16x4_t pinsrh_2_u (uint16x4_t s, uint16x4_t t); +uint16x4_t pinsrh_3_u (uint16x4_t s, uint16x4_t t); +int16x4_t pinsrh_0_s (int16x4_t s, int16x4_t t); +int16x4_t pinsrh_1_s (int16x4_t s, int16x4_t t); +int16x4_t pinsrh_2_s (int16x4_t s, int16x4_t t); +int16x4_t pinsrh_3_s (int16x4_t s, int16x4_t t); +int32x2_t pmaddhw (int16x4_t s, int16x4_t t); +int16x4_t pmaxsh (int16x4_t s, int16x4_t t); +uint8x8_t pmaxub (uint8x8_t s, uint8x8_t t); +int16x4_t pminsh (int16x4_t s, int16x4_t t); +uint8x8_t pminub (uint8x8_t s, uint8x8_t t); +uint8x8_t pmovmskb_u (uint8x8_t s); +int8x8_t pmovmskb_s (int8x8_t s); +uint16x4_t pmulhuh (uint16x4_t s, uint16x4_t t); +int16x4_t pmulhh (int16x4_t s, int16x4_t t); +int16x4_t pmullh (int16x4_t s, int16x4_t t); +int64_t pmuluw (uint32x2_t s, uint32x2_t t); +uint8x8_t pasubub (uint8x8_t s, uint8x8_t t); +uint16x4_t biadd (uint8x8_t s); +uint16x4_t psadbh (uint8x8_t s, uint8x8_t t); +uint16x4_t pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order); +int16x4_t pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order); +uint16x4_t psllh_u (uint16x4_t s, uint8_t amount); +int16x4_t psllh_s (int16x4_t s, uint8_t amount); +uint32x2_t psllw_u (uint32x2_t s, uint8_t amount); +int32x2_t psllw_s (int32x2_t s, uint8_t amount); +uint16x4_t psrlh_u (uint16x4_t s, uint8_t amount); +int16x4_t psrlh_s (int16x4_t s, uint8_t amount); +uint32x2_t psrlw_u (uint32x2_t s, uint8_t amount); +int32x2_t psrlw_s (int32x2_t s, uint8_t amount); +uint16x4_t psrah_u (uint16x4_t s, uint8_t amount); +int16x4_t psrah_s (int16x4_t s, uint8_t amount); +uint32x2_t psraw_u (uint32x2_t s, uint8_t amount); +int32x2_t psraw_s (int32x2_t s, uint8_t amount); +uint32x2_t psubw_u (uint32x2_t s, uint32x2_t t); +uint16x4_t psubh_u (uint16x4_t s, uint16x4_t t); +uint8x8_t psubb_u (uint8x8_t s, uint8x8_t t); +int32x2_t psubw_s (int32x2_t s, int32x2_t t); +int16x4_t psubh_s (int16x4_t s, int16x4_t t); +int8x8_t psubb_s (int8x8_t s, int8x8_t t); +uint64_t psubd_u (uint64_t s, uint64_t t); +int64_t psubd_s (int64_t s, int64_t t); +int16x4_t psubsh (int16x4_t s, int16x4_t t); +int8x8_t psubsb (int8x8_t s, int8x8_t t); +uint16x4_t psubush (uint16x4_t s, uint16x4_t t); +uint8x8_t psubusb (uint8x8_t s, uint8x8_t t); +uint32x2_t punpckhwd_u (uint32x2_t s, uint32x2_t t); +uint16x4_t punpckhhw_u (uint16x4_t s, uint16x4_t t); +uint8x8_t punpckhbh_u (uint8x8_t s, uint8x8_t t); +int32x2_t punpckhwd_s (int32x2_t s, int32x2_t t); +int16x4_t punpckhhw_s (int16x4_t s, int16x4_t t); +int8x8_t punpckhbh_s (int8x8_t s, int8x8_t t); +uint32x2_t punpcklwd_u (uint32x2_t s, uint32x2_t t); +uint16x4_t punpcklhw_u (uint16x4_t s, uint16x4_t t); +uint8x8_t punpcklbh_u (uint8x8_t s, uint8x8_t t); +int32x2_t punpcklwd_s (int32x2_t s, int32x2_t t); +int16x4_t punpcklhw_s (int16x4_t s, int16x4_t t); +int8x8_t punpcklbh_s (int8x8_t s, int8x8_t t); +@end smallexample + @menu * Paired-Single Arithmetic:: * Paired-Single Built-in Functions:: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index eed09251e96..95a49dc246d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2008-06-15 Mark Shinwell <shinwell@codesourcery.com> + + * lib/target-supports.exp (check_effective_target_mips_loongson): New. + * gcc.target/mips/loongson-simd.c: New. + 2008-06-14 Simon Martin <simartin@users.sourceforge.net> PR c++/35320 diff --git a/gcc/testsuite/gcc.target/mips/loongson-simd.c b/gcc/testsuite/gcc.target/mips/loongson-simd.c new file mode 100644 index 00000000000..e4cae5ea61d --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/loongson-simd.c @@ -0,0 +1,1963 @@ +/* Test cases for ST Microelectronics Loongson-2E/2F SIMD intrinsics. + Copyright (C) 2008 Free Software Foundation, Inc. + Contributed by CodeSourcery. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* { dg-do run } */ +/* { dg-require-effective-target mips_loongson } */ + +#include "loongson.h" +#include <stdio.h> +#include <stdint.h> +#include <assert.h> +#include <limits.h> + +typedef union { int32x2_t v; int32_t a[2]; } int32x2_encap_t; +typedef union { int16x4_t v; int16_t a[4]; } int16x4_encap_t; +typedef union { int8x8_t v; int8_t a[8]; } int8x8_encap_t; +typedef union { uint32x2_t v; uint32_t a[2]; } uint32x2_encap_t; +typedef union { uint16x4_t v; uint16_t a[4]; } uint16x4_encap_t; +typedef union { uint8x8_t v; uint8_t a[8]; } uint8x8_encap_t; + +#define UINT16x4_MAX USHRT_MAX +#define UINT8x8_MAX UCHAR_MAX +#define INT8x8_MAX SCHAR_MAX +#define INT16x4_MAX SHRT_MAX +#define INT32x2_MAX INT_MAX + +static void test_packsswh (void) +{ + int32x2_encap_t s, t; + int16x4_encap_t r; + s.a[0] = INT16x4_MAX - 2; + s.a[1] = INT16x4_MAX - 1; + t.a[0] = INT16x4_MAX; + t.a[1] = INT16x4_MAX + 1; + r.v = packsswh (s.v, t.v); + assert (r.a[0] == INT16x4_MAX - 2); + assert (r.a[1] == INT16x4_MAX - 1); + assert (r.a[2] == INT16x4_MAX); + assert (r.a[3] == INT16x4_MAX); +} + +static void test_packsshb (void) +{ + int16x4_encap_t s, t; + int8x8_encap_t r; + s.a[0] = INT8x8_MAX - 6; + s.a[1] = INT8x8_MAX - 5; + s.a[2] = INT8x8_MAX - 4; + s.a[3] = INT8x8_MAX - 3; + t.a[0] = INT8x8_MAX - 2; + t.a[1] = INT8x8_MAX - 1; + t.a[2] = INT8x8_MAX; + t.a[3] = INT8x8_MAX + 1; + r.v = packsshb (s.v, t.v); + assert (r.a[0] == INT8x8_MAX - 6); + assert (r.a[1] == INT8x8_MAX - 5); + assert (r.a[2] == INT8x8_MAX - 4); + assert (r.a[3] == INT8x8_MAX - 3); + assert (r.a[4] == INT8x8_MAX - 2); + assert (r.a[5] == INT8x8_MAX - 1); + assert (r.a[6] == INT8x8_MAX); + assert (r.a[7] == INT8x8_MAX); +} + +static void test_packushb (void) +{ + uint16x4_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = UINT8x8_MAX - 6; + s.a[1] = UINT8x8_MAX - 5; + s.a[2] = UINT8x8_MAX - 4; + s.a[3] = UINT8x8_MAX - 3; + t.a[0] = UINT8x8_MAX - 2; + t.a[1] = UINT8x8_MAX - 1; + t.a[2] = UINT8x8_MAX; + t.a[3] = UINT8x8_MAX + 1; + r.v = packushb (s.v, t.v); + assert (r.a[0] == UINT8x8_MAX - 6); + assert (r.a[1] == UINT8x8_MAX - 5); + assert (r.a[2] == UINT8x8_MAX - 4); + assert (r.a[3] == UINT8x8_MAX - 3); + assert (r.a[4] == UINT8x8_MAX - 2); + assert (r.a[5] == UINT8x8_MAX - 1); + assert (r.a[6] == UINT8x8_MAX); + assert (r.a[7] == UINT8x8_MAX); +} + +static void test_paddw_u (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 1; + s.a[1] = 2; + t.a[0] = 3; + t.a[1] = 4; + r.v = paddw_u (s.v, t.v); + assert (r.a[0] == 4); + assert (r.a[1] == 6); +} + +static void test_paddw_s (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = -2; + s.a[1] = -1; + t.a[0] = 3; + t.a[1] = 4; + r.v = paddw_s (s.v, t.v); + assert (r.a[0] == 1); + assert (r.a[1] == 3); +} + +static void test_paddh_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 1; + s.a[1] = 2; + s.a[2] = 3; + s.a[3] = 4; + t.a[0] = 5; + t.a[1] = 6; + t.a[2] = 7; + t.a[3] = 8; + r.v = paddh_u (s.v, t.v); + assert (r.a[0] == 6); + assert (r.a[1] == 8); + assert (r.a[2] == 10); + assert (r.a[3] == 12); +} + +static void test_paddh_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -10; + s.a[1] = -20; + s.a[2] = -30; + s.a[3] = -40; + t.a[0] = 1; + t.a[1] = 2; + t.a[2] = 3; + t.a[3] = 4; + r.v = paddh_s (s.v, t.v); + assert (r.a[0] == -9); + assert (r.a[1] == -18); + assert (r.a[2] == -27); + assert (r.a[3] == -36); +} + +static void test_paddb_u (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 1; + s.a[1] = 2; + s.a[2] = 3; + s.a[3] = 4; + s.a[4] = 5; + s.a[5] = 6; + s.a[6] = 7; + s.a[7] = 8; + t.a[0] = 9; + t.a[1] = 10; + t.a[2] = 11; + t.a[3] = 12; + t.a[4] = 13; + t.a[5] = 14; + t.a[6] = 15; + t.a[7] = 16; + r.v = paddb_u (s.v, t.v); + assert (r.a[0] == 10); + assert (r.a[1] == 12); + assert (r.a[2] == 14); + assert (r.a[3] == 16); + assert (r.a[4] == 18); + assert (r.a[5] == 20); + assert (r.a[6] == 22); + assert (r.a[7] == 24); +} + +static void test_paddb_s (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -10; + s.a[1] = -20; + s.a[2] = -30; + s.a[3] = -40; + s.a[4] = -50; + s.a[5] = -60; + s.a[6] = -70; + s.a[7] = -80; + t.a[0] = 1; + t.a[1] = 2; + t.a[2] = 3; + t.a[3] = 4; + t.a[4] = 5; + t.a[5] = 6; + t.a[6] = 7; + t.a[7] = 8; + r.v = paddb_s (s.v, t.v); + assert (r.a[0] == -9); + assert (r.a[1] == -18); + assert (r.a[2] == -27); + assert (r.a[3] == -36); + assert (r.a[4] == -45); + assert (r.a[5] == -54); + assert (r.a[6] == -63); + assert (r.a[7] == -72); +} + +static void test_paddd_u (void) +{ + uint64_t d = 123456; + uint64_t e = 789012; + uint64_t r; + r = paddd_u (d, e); + assert (r == 912468); +} + +static void test_paddd_s (void) +{ + int64_t d = 123456; + int64_t e = -789012; + int64_t r; + r = paddd_s (d, e); + assert (r == -665556); +} + +static void test_paddsh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = 0; + s.a[2] = 1; + s.a[3] = 2; + t.a[0] = INT16x4_MAX; + t.a[1] = INT16x4_MAX; + t.a[2] = INT16x4_MAX; + t.a[3] = INT16x4_MAX; + r.v = paddsh (s.v, t.v); + assert (r.a[0] == INT16x4_MAX - 1); + assert (r.a[1] == INT16x4_MAX); + assert (r.a[2] == INT16x4_MAX); + assert (r.a[3] == INT16x4_MAX); +} + +static void test_paddsb (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -6; + s.a[1] = -5; + s.a[2] = -4; + s.a[3] = -3; + s.a[4] = -2; + s.a[5] = -1; + s.a[6] = 0; + s.a[7] = 1; + t.a[0] = INT8x8_MAX; + t.a[1] = INT8x8_MAX; + t.a[2] = INT8x8_MAX; + t.a[3] = INT8x8_MAX; + t.a[4] = INT8x8_MAX; + t.a[5] = INT8x8_MAX; + t.a[6] = INT8x8_MAX; + t.a[7] = INT8x8_MAX; + r.v = paddsb (s.v, t.v); + assert (r.a[0] == INT8x8_MAX - 6); + assert (r.a[1] == INT8x8_MAX - 5); + assert (r.a[2] == INT8x8_MAX - 4); + assert (r.a[3] == INT8x8_MAX - 3); + assert (r.a[4] == INT8x8_MAX - 2); + assert (r.a[5] == INT8x8_MAX - 1); + assert (r.a[6] == INT8x8_MAX); + assert (r.a[7] == INT8x8_MAX); +} + +static void test_paddush (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 0; + s.a[1] = 1; + s.a[2] = 0; + s.a[3] = 1; + t.a[0] = UINT16x4_MAX; + t.a[1] = UINT16x4_MAX; + t.a[2] = UINT16x4_MAX; + t.a[3] = UINT16x4_MAX; + r.v = paddush (s.v, t.v); + assert (r.a[0] == UINT16x4_MAX); + assert (r.a[1] == UINT16x4_MAX); + assert (r.a[2] == UINT16x4_MAX); + assert (r.a[3] == UINT16x4_MAX); +} + +static void test_paddusb (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 0; + s.a[1] = 1; + s.a[2] = 0; + s.a[3] = 1; + s.a[4] = 0; + s.a[5] = 1; + s.a[6] = 0; + s.a[7] = 1; + t.a[0] = UINT8x8_MAX; + t.a[1] = UINT8x8_MAX; + t.a[2] = UINT8x8_MAX; + t.a[3] = UINT8x8_MAX; + t.a[4] = UINT8x8_MAX; + t.a[5] = UINT8x8_MAX; + t.a[6] = UINT8x8_MAX; + t.a[7] = UINT8x8_MAX; + r.v = paddusb (s.v, t.v); + assert (r.a[0] == UINT8x8_MAX); + assert (r.a[1] == UINT8x8_MAX); + assert (r.a[2] == UINT8x8_MAX); + assert (r.a[3] == UINT8x8_MAX); + assert (r.a[4] == UINT8x8_MAX); + assert (r.a[5] == UINT8x8_MAX); + assert (r.a[6] == UINT8x8_MAX); + assert (r.a[7] == UINT8x8_MAX); +} + +static void test_pandn_ud (void) +{ + uint64_t d1 = 0x0000ffff0000ffffull; + uint64_t d2 = 0x0000ffff0000ffffull; + uint64_t r; + r = pandn_ud (d1, d2); + assert (r == 0); +} + +static void test_pandn_sd (void) +{ + int64_t d1 = (int64_t) 0x0000000000000000ull; + int64_t d2 = (int64_t) 0xfffffffffffffffeull; + int64_t r; + r = pandn_sd (d1, d2); + assert (r == -2); +} + +static void test_pandn_uw (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 0xffffffff; + s.a[1] = 0x00000000; + t.a[0] = 0x00000000; + t.a[1] = 0xffffffff; + r.v = pandn_uw (s.v, t.v); + assert (r.a[0] == 0x00000000); + assert (r.a[1] == 0xffffffff); +} + +static void test_pandn_sw (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = 0xffffffff; + s.a[1] = 0x00000000; + t.a[0] = 0xffffffff; + t.a[1] = 0xfffffffe; + r.v = pandn_sw (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == -2); +} + +static void test_pandn_uh (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 0xffff; + s.a[1] = 0x0000; + s.a[2] = 0xffff; + s.a[3] = 0x0000; + t.a[0] = 0x0000; + t.a[1] = 0xffff; + t.a[2] = 0x0000; + t.a[3] = 0xffff; + r.v = pandn_uh (s.v, t.v); + assert (r.a[0] == 0x0000); + assert (r.a[1] == 0xffff); + assert (r.a[2] == 0x0000); + assert (r.a[3] == 0xffff); +} + +static void test_pandn_sh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = 0xffff; + s.a[1] = 0x0000; + s.a[2] = 0xffff; + s.a[3] = 0x0000; + t.a[0] = 0xffff; + t.a[1] = 0xfffe; + t.a[2] = 0xffff; + t.a[3] = 0xfffe; + r.v = pandn_sh (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == -2); + assert (r.a[2] == 0); + assert (r.a[3] == -2); +} + +static void test_pandn_ub (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 0xff; + s.a[1] = 0x00; + s.a[2] = 0xff; + s.a[3] = 0x00; + s.a[4] = 0xff; + s.a[5] = 0x00; + s.a[6] = 0xff; + s.a[7] = 0x00; + t.a[0] = 0x00; + t.a[1] = 0xff; + t.a[2] = 0x00; + t.a[3] = 0xff; + t.a[4] = 0x00; + t.a[5] = 0xff; + t.a[6] = 0x00; + t.a[7] = 0xff; + r.v = pandn_ub (s.v, t.v); + assert (r.a[0] == 0x00); + assert (r.a[1] == 0xff); + assert (r.a[2] == 0x00); + assert (r.a[3] == 0xff); + assert (r.a[4] == 0x00); + assert (r.a[5] == 0xff); + assert (r.a[6] == 0x00); + assert (r.a[7] == 0xff); +} + +static void test_pandn_sb (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = 0xff; + s.a[1] = 0x00; + s.a[2] = 0xff; + s.a[3] = 0x00; + s.a[4] = 0xff; + s.a[5] = 0x00; + s.a[6] = 0xff; + s.a[7] = 0x00; + t.a[0] = 0xff; + t.a[1] = 0xfe; + t.a[2] = 0xff; + t.a[3] = 0xfe; + t.a[4] = 0xff; + t.a[5] = 0xfe; + t.a[6] = 0xff; + t.a[7] = 0xfe; + r.v = pandn_sb (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == -2); + assert (r.a[2] == 0); + assert (r.a[3] == -2); + assert (r.a[4] == 0); + assert (r.a[5] == -2); + assert (r.a[6] == 0); + assert (r.a[7] == -2); +} + +static void test_pavgh (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 1; + s.a[1] = 2; + s.a[2] = 3; + s.a[3] = 4; + t.a[0] = 5; + t.a[1] = 6; + t.a[2] = 7; + t.a[3] = 8; + r.v = pavgh (s.v, t.v); + assert (r.a[0] == 3); + assert (r.a[1] == 4); + assert (r.a[2] == 5); + assert (r.a[3] == 6); +} + +static void test_pavgb (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 1; + s.a[1] = 2; + s.a[2] = 3; + s.a[3] = 4; + s.a[4] = 1; + s.a[5] = 2; + s.a[6] = 3; + s.a[7] = 4; + t.a[0] = 5; + t.a[1] = 6; + t.a[2] = 7; + t.a[3] = 8; + t.a[4] = 5; + t.a[5] = 6; + t.a[6] = 7; + t.a[7] = 8; + r.v = pavgb (s.v, t.v); + assert (r.a[0] == 3); + assert (r.a[1] == 4); + assert (r.a[2] == 5); + assert (r.a[3] == 6); + assert (r.a[4] == 3); + assert (r.a[5] == 4); + assert (r.a[6] == 5); + assert (r.a[7] == 6); +} + +static void test_pcmpeqw_u (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 42; + s.a[1] = 43; + t.a[0] = 43; + t.a[1] = 43; + r.v = pcmpeqw_u (s.v, t.v); + assert (r.a[0] == 0x00000000); + assert (r.a[1] == 0xffffffff); +} + +static void test_pcmpeqh_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 42; + s.a[1] = 43; + s.a[2] = 42; + s.a[3] = 43; + t.a[0] = 43; + t.a[1] = 43; + t.a[2] = 43; + t.a[3] = 43; + r.v = pcmpeqh_u (s.v, t.v); + assert (r.a[0] == 0x0000); + assert (r.a[1] == 0xffff); + assert (r.a[2] == 0x0000); + assert (r.a[3] == 0xffff); +} + +static void test_pcmpeqb_u (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 42; + s.a[1] = 43; + s.a[2] = 42; + s.a[3] = 43; + s.a[4] = 42; + s.a[5] = 43; + s.a[6] = 42; + s.a[7] = 43; + t.a[0] = 43; + t.a[1] = 43; + t.a[2] = 43; + t.a[3] = 43; + t.a[4] = 43; + t.a[5] = 43; + t.a[6] = 43; + t.a[7] = 43; + r.v = pcmpeqb_u (s.v, t.v); + assert (r.a[0] == 0x00); + assert (r.a[1] == 0xff); + assert (r.a[2] == 0x00); + assert (r.a[3] == 0xff); + assert (r.a[4] == 0x00); + assert (r.a[5] == 0xff); + assert (r.a[6] == 0x00); + assert (r.a[7] == 0xff); +} + +static void test_pcmpeqw_s (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = -42; + s.a[1] = -42; + t.a[0] = 42; + t.a[1] = -42; + r.v = pcmpeqw_s (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == -1); +} + +static void test_pcmpeqh_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -42; + s.a[1] = -42; + s.a[2] = -42; + s.a[3] = -42; + t.a[0] = 42; + t.a[1] = -42; + t.a[2] = 42; + t.a[3] = -42; + r.v = pcmpeqh_s (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == -1); + assert (r.a[2] == 0); + assert (r.a[3] == -1); +} + +static void test_pcmpeqb_s (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -42; + s.a[1] = -42; + s.a[2] = -42; + s.a[3] = -42; + s.a[4] = -42; + s.a[5] = -42; + s.a[6] = -42; + s.a[7] = -42; + t.a[0] = 42; + t.a[1] = -42; + t.a[2] = 42; + t.a[3] = -42; + t.a[4] = 42; + t.a[5] = -42; + t.a[6] = 42; + t.a[7] = -42; + r.v = pcmpeqb_s (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == -1); + assert (r.a[2] == 0); + assert (r.a[3] == -1); + assert (r.a[4] == 0); + assert (r.a[5] == -1); + assert (r.a[6] == 0); + assert (r.a[7] == -1); +} + +static void test_pcmpgtw_u (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 42; + s.a[1] = 43; + t.a[0] = 43; + t.a[1] = 42; + r.v = pcmpgtw_u (s.v, t.v); + assert (r.a[0] == 0x00000000); + assert (r.a[1] == 0xffffffff); +} + +static void test_pcmpgth_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 40; + s.a[1] = 41; + s.a[2] = 42; + s.a[3] = 43; + t.a[0] = 40; + t.a[1] = 41; + t.a[2] = 43; + t.a[3] = 42; + r.v = pcmpgth_u (s.v, t.v); + assert (r.a[0] == 0x0000); + assert (r.a[1] == 0x0000); + assert (r.a[2] == 0x0000); + assert (r.a[3] == 0xffff); +} + +static void test_pcmpgtb_u (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 40; + s.a[1] = 41; + s.a[2] = 42; + s.a[3] = 43; + s.a[4] = 44; + s.a[5] = 45; + s.a[6] = 46; + s.a[7] = 47; + t.a[0] = 48; + t.a[1] = 47; + t.a[2] = 46; + t.a[3] = 45; + t.a[4] = 44; + t.a[5] = 43; + t.a[6] = 42; + t.a[7] = 41; + r.v = pcmpgtb_u (s.v, t.v); + assert (r.a[0] == 0x00); + assert (r.a[1] == 0x00); + assert (r.a[2] == 0x00); + assert (r.a[3] == 0x00); + assert (r.a[4] == 0x00); + assert (r.a[5] == 0xff); + assert (r.a[6] == 0xff); + assert (r.a[7] == 0xff); +} + +static void test_pcmpgtw_s (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = 42; + s.a[1] = -42; + t.a[0] = -42; + t.a[1] = -42; + r.v = pcmpgtw_s (s.v, t.v); + assert (r.a[0] == -1); + assert (r.a[1] == 0); +} + +static void test_pcmpgth_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -42; + s.a[1] = -42; + s.a[2] = -42; + s.a[3] = -42; + t.a[0] = 42; + t.a[1] = 43; + t.a[2] = 44; + t.a[3] = -43; + r.v = pcmpgth_s (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == -1); +} + +static void test_pcmpgtb_s (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -42; + s.a[1] = -42; + s.a[2] = -42; + s.a[3] = -42; + s.a[4] = 42; + s.a[5] = 42; + s.a[6] = 42; + s.a[7] = 42; + t.a[0] = -45; + t.a[1] = -44; + t.a[2] = -43; + t.a[3] = -42; + t.a[4] = 42; + t.a[5] = 43; + t.a[6] = 41; + t.a[7] = 40; + r.v = pcmpgtb_s (s.v, t.v); + assert (r.a[0] == -1); + assert (r.a[1] == -1); + assert (r.a[2] == -1); + assert (r.a[3] == 0); + assert (r.a[4] == 0); + assert (r.a[5] == 0); + assert (r.a[6] == -1); + assert (r.a[7] == -1); +} + +static void test_pextrh_u (void) +{ + uint16x4_encap_t s; + uint16x4_encap_t r; + s.a[0] = 40; + s.a[1] = 41; + s.a[2] = 42; + s.a[3] = 43; + r.v = pextrh_u (s.v, 1); + assert (r.a[0] == 41); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); +} + +static void test_pextrh_s (void) +{ + int16x4_encap_t s; + int16x4_encap_t r; + s.a[0] = -40; + s.a[1] = -41; + s.a[2] = -42; + s.a[3] = -43; + r.v = pextrh_s (s.v, 2); + assert (r.a[0] == -42); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); +} + +static void test_pinsrh_0123_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 42; + s.a[1] = 0; + s.a[2] = 0; + s.a[3] = 0; + t.a[0] = 0; + t.a[1] = 0; + t.a[2] = 0; + t.a[3] = 0; + r.v = pinsrh_0_u (t.v, s.v); + r.v = pinsrh_1_u (r.v, s.v); + r.v = pinsrh_2_u (r.v, s.v); + r.v = pinsrh_3_u (r.v, s.v); + assert (r.a[0] == 42); + assert (r.a[1] == 42); + assert (r.a[2] == 42); + assert (r.a[3] == 42); +} + +static void test_pinsrh_0123_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -42; + s.a[1] = 0; + s.a[2] = 0; + s.a[3] = 0; + t.a[0] = 0; + t.a[1] = 0; + t.a[2] = 0; + t.a[3] = 0; + r.v = pinsrh_0_s (t.v, s.v); + r.v = pinsrh_1_s (r.v, s.v); + r.v = pinsrh_2_s (r.v, s.v); + r.v = pinsrh_3_s (r.v, s.v); + assert (r.a[0] == -42); + assert (r.a[1] == -42); + assert (r.a[2] == -42); + assert (r.a[3] == -42); +} + +static void test_pmaddhw (void) +{ + int16x4_encap_t s, t; + int32x2_encap_t r; + s.a[0] = -5; + s.a[1] = -4; + s.a[2] = -3; + s.a[3] = -2; + t.a[0] = 10; + t.a[1] = 11; + t.a[2] = 12; + t.a[3] = 13; + r.v = pmaddhw (s.v, t.v); + assert (r.a[0] == (-5*10 + -4*11)); + assert (r.a[1] == (-3*12 + -2*13)); +} + +static void test_pmaxsh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -20; + s.a[1] = 40; + s.a[2] = -10; + s.a[3] = 50; + t.a[0] = 20; + t.a[1] = -40; + t.a[2] = 10; + t.a[3] = -50; + r.v = pmaxsh (s.v, t.v); + assert (r.a[0] == 20); + assert (r.a[1] == 40); + assert (r.a[2] == 10); + assert (r.a[3] == 50); +} + +static void test_pmaxub (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 10; + s.a[1] = 20; + s.a[2] = 30; + s.a[3] = 40; + s.a[4] = 50; + s.a[5] = 60; + s.a[6] = 70; + s.a[7] = 80; + t.a[0] = 80; + t.a[1] = 70; + t.a[2] = 60; + t.a[3] = 50; + t.a[4] = 40; + t.a[5] = 30; + t.a[6] = 20; + t.a[7] = 10; + r.v = pmaxub (s.v, t.v); + assert (r.a[0] == 80); + assert (r.a[1] == 70); + assert (r.a[2] == 60); + assert (r.a[3] == 50); + assert (r.a[4] == 50); + assert (r.a[5] == 60); + assert (r.a[6] == 70); + assert (r.a[7] == 80); +} + +static void test_pminsh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -20; + s.a[1] = 40; + s.a[2] = -10; + s.a[3] = 50; + t.a[0] = 20; + t.a[1] = -40; + t.a[2] = 10; + t.a[3] = -50; + r.v = pminsh (s.v, t.v); + assert (r.a[0] == -20); + assert (r.a[1] == -40); + assert (r.a[2] == -10); + assert (r.a[3] == -50); +} + +static void test_pminub (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 10; + s.a[1] = 20; + s.a[2] = 30; + s.a[3] = 40; + s.a[4] = 50; + s.a[5] = 60; + s.a[6] = 70; + s.a[7] = 80; + t.a[0] = 80; + t.a[1] = 70; + t.a[2] = 60; + t.a[3] = 50; + t.a[4] = 40; + t.a[5] = 30; + t.a[6] = 20; + t.a[7] = 10; + r.v = pminub (s.v, t.v); + assert (r.a[0] == 10); + assert (r.a[1] == 20); + assert (r.a[2] == 30); + assert (r.a[3] == 40); + assert (r.a[4] == 40); + assert (r.a[5] == 30); + assert (r.a[6] == 20); + assert (r.a[7] == 10); +} + +static void test_pmovmskb_u (void) +{ + uint8x8_encap_t s; + uint8x8_encap_t r; + s.a[0] = 0xf0; + s.a[1] = 0x40; + s.a[2] = 0xf0; + s.a[3] = 0x40; + s.a[4] = 0xf0; + s.a[5] = 0x40; + s.a[6] = 0xf0; + s.a[7] = 0x40; + r.v = pmovmskb_u (s.v); + assert (r.a[0] == 0x55); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); + assert (r.a[4] == 0); + assert (r.a[5] == 0); + assert (r.a[6] == 0); + assert (r.a[7] == 0); +} + +static void test_pmovmskb_s (void) +{ + int8x8_encap_t s; + int8x8_encap_t r; + s.a[0] = -1; + s.a[1] = 1; + s.a[2] = -1; + s.a[3] = 1; + s.a[4] = -1; + s.a[5] = 1; + s.a[6] = -1; + s.a[7] = 1; + r.v = pmovmskb_s (s.v); + assert (r.a[0] == 0x55); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); + assert (r.a[4] == 0); + assert (r.a[5] == 0); + assert (r.a[6] == 0); + assert (r.a[7] == 0); +} + +static void test_pmulhuh (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 0xff00; + s.a[1] = 0xff00; + s.a[2] = 0xff00; + s.a[3] = 0xff00; + t.a[0] = 16; + t.a[1] = 16; + t.a[2] = 16; + t.a[3] = 16; + r.v = pmulhuh (s.v, t.v); + assert (r.a[0] == 0x000f); + assert (r.a[1] == 0x000f); + assert (r.a[2] == 0x000f); + assert (r.a[3] == 0x000f); +} + +static void test_pmulhh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = 0x0ff0; + s.a[1] = 0x0ff0; + s.a[2] = 0x0ff0; + s.a[3] = 0x0ff0; + t.a[0] = -16*16; + t.a[1] = -16*16; + t.a[2] = -16*16; + t.a[3] = -16*16; + r.v = pmulhh (s.v, t.v); + assert (r.a[0] == -16); + assert (r.a[1] == -16); + assert (r.a[2] == -16); + assert (r.a[3] == -16); +} + +static void test_pmullh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = 0x0ff0; + s.a[1] = 0x0ff0; + s.a[2] = 0x0ff0; + s.a[3] = 0x0ff0; + t.a[0] = -16*16; + t.a[1] = -16*16; + t.a[2] = -16*16; + t.a[3] = -16*16; + r.v = pmullh (s.v, t.v); + assert (r.a[0] == 4096); + assert (r.a[1] == 4096); + assert (r.a[2] == 4096); + assert (r.a[3] == 4096); +} + +static void test_pmuluw (void) +{ + uint32x2_encap_t s, t; + uint64_t r; + s.a[0] = 0xdeadbeef; + s.a[1] = 0; + t.a[0] = 0x0f00baaa; + t.a[1] = 0; + r = pmuluw (s.v, t.v); + assert (r == 0xd0cd08e1d1a70b6ull); +} + +static void test_pasubub (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 10; + s.a[1] = 20; + s.a[2] = 30; + s.a[3] = 40; + s.a[4] = 50; + s.a[5] = 60; + s.a[6] = 70; + s.a[7] = 80; + t.a[0] = 80; + t.a[1] = 70; + t.a[2] = 60; + t.a[3] = 50; + t.a[4] = 40; + t.a[5] = 30; + t.a[6] = 20; + t.a[7] = 10; + r.v = pasubub (s.v, t.v); + assert (r.a[0] == 70); + assert (r.a[1] == 50); + assert (r.a[2] == 30); + assert (r.a[3] == 10); + assert (r.a[4] == 10); + assert (r.a[5] == 30); + assert (r.a[6] == 50); + assert (r.a[7] == 70); +} + +static void test_biadd (void) +{ + uint8x8_encap_t s; + uint16x4_encap_t r; + s.a[0] = 10; + s.a[1] = 20; + s.a[2] = 30; + s.a[3] = 40; + s.a[4] = 50; + s.a[5] = 60; + s.a[6] = 70; + s.a[7] = 80; + r.v = biadd (s.v); + assert (r.a[0] == 360); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); +} + +static void test_psadbh (void) +{ + uint8x8_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 10; + s.a[1] = 20; + s.a[2] = 30; + s.a[3] = 40; + s.a[4] = 50; + s.a[5] = 60; + s.a[6] = 70; + s.a[7] = 80; + t.a[0] = 80; + t.a[1] = 70; + t.a[2] = 60; + t.a[3] = 50; + t.a[4] = 40; + t.a[5] = 30; + t.a[6] = 20; + t.a[7] = 10; + r.v = psadbh (s.v, t.v); + assert (r.a[0] == 0x0140); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); +} + +static void test_pshufh_u (void) +{ + uint16x4_encap_t s; + uint16x4_encap_t r; + s.a[0] = 1; + s.a[1] = 2; + s.a[2] = 3; + s.a[3] = 4; + r.a[0] = 0; + r.a[1] = 0; + r.a[2] = 0; + r.a[3] = 0; + r.v = pshufh_u (r.v, s.v, 0xe5); + assert (r.a[0] == 2); + assert (r.a[1] == 2); + assert (r.a[2] == 3); + assert (r.a[3] == 4); +} + +static void test_pshufh_s (void) +{ + int16x4_encap_t s; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = 2; + s.a[2] = -3; + s.a[3] = 4; + r.a[0] = 0; + r.a[1] = 0; + r.a[2] = 0; + r.a[3] = 0; + r.v = pshufh_s (r.v, s.v, 0xe5); + assert (r.a[0] == 2); + assert (r.a[1] == 2); + assert (r.a[2] == -3); + assert (r.a[3] == 4); +} + +static void test_psllh_u (void) +{ + uint16x4_encap_t s; + uint16x4_encap_t r; + s.a[0] = 0xffff; + s.a[1] = 0xffff; + s.a[2] = 0xffff; + s.a[3] = 0xffff; + r.v = psllh_u (s.v, 1); + assert (r.a[0] == 0xfffe); + assert (r.a[1] == 0xfffe); + assert (r.a[2] == 0xfffe); + assert (r.a[3] == 0xfffe); +} + +static void test_psllw_u (void) +{ + uint32x2_encap_t s; + uint32x2_encap_t r; + s.a[0] = 0xffffffff; + s.a[1] = 0xffffffff; + r.v = psllw_u (s.v, 2); + assert (r.a[0] == 0xfffffffc); + assert (r.a[1] == 0xfffffffc); +} + +static void test_psllh_s (void) +{ + int16x4_encap_t s; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = -1; + s.a[2] = -1; + s.a[3] = -1; + r.v = psllh_s (s.v, 1); + assert (r.a[0] == -2); + assert (r.a[1] == -2); + assert (r.a[2] == -2); + assert (r.a[3] == -2); +} + +static void test_psllw_s (void) +{ + int32x2_encap_t s; + int32x2_encap_t r; + s.a[0] = -1; + s.a[1] = -1; + r.v = psllw_s (s.v, 2); + assert (r.a[0] == -4); + assert (r.a[1] == -4); +} + +static void test_psrah_u (void) +{ + uint16x4_encap_t s; + uint16x4_encap_t r; + s.a[0] = 0xffef; + s.a[1] = 0xffef; + s.a[2] = 0xffef; + s.a[3] = 0xffef; + r.v = psrah_u (s.v, 1); + assert (r.a[0] == 0xfff7); + assert (r.a[1] == 0xfff7); + assert (r.a[2] == 0xfff7); + assert (r.a[3] == 0xfff7); +} + +static void test_psraw_u (void) +{ + uint32x2_encap_t s; + uint32x2_encap_t r; + s.a[0] = 0xffffffef; + s.a[1] = 0xffffffef; + r.v = psraw_u (s.v, 1); + assert (r.a[0] == 0xfffffff7); + assert (r.a[1] == 0xfffffff7); +} + +static void test_psrah_s (void) +{ + int16x4_encap_t s; + int16x4_encap_t r; + s.a[0] = -2; + s.a[1] = -2; + s.a[2] = -2; + s.a[3] = -2; + r.v = psrah_s (s.v, 1); + assert (r.a[0] == -1); + assert (r.a[1] == -1); + assert (r.a[2] == -1); + assert (r.a[3] == -1); +} + +static void test_psraw_s (void) +{ + int32x2_encap_t s; + int32x2_encap_t r; + s.a[0] = -2; + s.a[1] = -2; + r.v = psraw_s (s.v, 1); + assert (r.a[0] == -1); + assert (r.a[1] == -1); +} + +static void test_psrlh_u (void) +{ + uint16x4_encap_t s; + uint16x4_encap_t r; + s.a[0] = 0xffef; + s.a[1] = 0xffef; + s.a[2] = 0xffef; + s.a[3] = 0xffef; + r.v = psrlh_u (s.v, 1); + assert (r.a[0] == 0x7ff7); + assert (r.a[1] == 0x7ff7); + assert (r.a[2] == 0x7ff7); + assert (r.a[3] == 0x7ff7); +} + +static void test_psrlw_u (void) +{ + uint32x2_encap_t s; + uint32x2_encap_t r; + s.a[0] = 0xffffffef; + s.a[1] = 0xffffffef; + r.v = psrlw_u (s.v, 1); + assert (r.a[0] == 0x7ffffff7); + assert (r.a[1] == 0x7ffffff7); +} + +static void test_psrlh_s (void) +{ + int16x4_encap_t s; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = -1; + s.a[2] = -1; + s.a[3] = -1; + r.v = psrlh_s (s.v, 1); + assert (r.a[0] == INT16x4_MAX); + assert (r.a[1] == INT16x4_MAX); + assert (r.a[2] == INT16x4_MAX); + assert (r.a[3] == INT16x4_MAX); +} + +static void test_psrlw_s (void) +{ + int32x2_encap_t s; + int32x2_encap_t r; + s.a[0] = -1; + s.a[1] = -1; + r.v = psrlw_s (s.v, 1); + assert (r.a[0] == INT32x2_MAX); + assert (r.a[1] == INT32x2_MAX); +} + +static void test_psubw_u (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 3; + s.a[1] = 4; + t.a[0] = 2; + t.a[1] = 1; + r.v = psubw_u (s.v, t.v); + assert (r.a[0] == 1); + assert (r.a[1] == 3); +} + +static void test_psubw_s (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = -2; + s.a[1] = -1; + t.a[0] = 3; + t.a[1] = -4; + r.v = psubw_s (s.v, t.v); + assert (r.a[0] == -5); + assert (r.a[1] == 3); +} + +static void test_psubh_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 5; + s.a[1] = 6; + s.a[2] = 7; + s.a[3] = 8; + t.a[0] = 1; + t.a[1] = 2; + t.a[2] = 3; + t.a[3] = 4; + r.v = psubh_u (s.v, t.v); + assert (r.a[0] == 4); + assert (r.a[1] == 4); + assert (r.a[2] == 4); + assert (r.a[3] == 4); +} + +static void test_psubh_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -10; + s.a[1] = -20; + s.a[2] = -30; + s.a[3] = -40; + t.a[0] = 1; + t.a[1] = 2; + t.a[2] = 3; + t.a[3] = 4; + r.v = psubh_s (s.v, t.v); + assert (r.a[0] == -11); + assert (r.a[1] == -22); + assert (r.a[2] == -33); + assert (r.a[3] == -44); +} + +static void test_psubb_u (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 10; + s.a[1] = 11; + s.a[2] = 12; + s.a[3] = 13; + s.a[4] = 14; + s.a[5] = 15; + s.a[6] = 16; + s.a[7] = 17; + t.a[0] = 1; + t.a[1] = 2; + t.a[2] = 3; + t.a[3] = 4; + t.a[4] = 5; + t.a[5] = 6; + t.a[6] = 7; + t.a[7] = 8; + r.v = psubb_u (s.v, t.v); + assert (r.a[0] == 9); + assert (r.a[1] == 9); + assert (r.a[2] == 9); + assert (r.a[3] == 9); + assert (r.a[4] == 9); + assert (r.a[5] == 9); + assert (r.a[6] == 9); + assert (r.a[7] == 9); +} + +static void test_psubb_s (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -10; + s.a[1] = -20; + s.a[2] = -30; + s.a[3] = -40; + s.a[4] = -50; + s.a[5] = -60; + s.a[6] = -70; + s.a[7] = -80; + t.a[0] = 1; + t.a[1] = 2; + t.a[2] = 3; + t.a[3] = 4; + t.a[4] = 5; + t.a[5] = 6; + t.a[6] = 7; + t.a[7] = 8; + r.v = psubb_s (s.v, t.v); + assert (r.a[0] == -11); + assert (r.a[1] == -22); + assert (r.a[2] == -33); + assert (r.a[3] == -44); + assert (r.a[4] == -55); + assert (r.a[5] == -66); + assert (r.a[6] == -77); + assert (r.a[7] == -88); +} + +static void test_psubd_u (void) +{ + uint64_t d = 789012; + uint64_t e = 123456; + uint64_t r; + r = psubd_u (d, e); + assert (r == 665556); +} + +static void test_psubd_s (void) +{ + int64_t d = 123456; + int64_t e = -789012; + int64_t r; + r = psubd_s (d, e); + assert (r == 912468); +} + +static void test_psubsh (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = 0; + s.a[2] = 1; + s.a[3] = 2; + t.a[0] = -INT16x4_MAX; + t.a[1] = -INT16x4_MAX; + t.a[2] = -INT16x4_MAX; + t.a[3] = -INT16x4_MAX; + r.v = psubsh (s.v, t.v); + assert (r.a[0] == INT16x4_MAX - 1); + assert (r.a[1] == INT16x4_MAX); + assert (r.a[2] == INT16x4_MAX); + assert (r.a[3] == INT16x4_MAX); +} + +static void test_psubsb (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -6; + s.a[1] = -5; + s.a[2] = -4; + s.a[3] = -3; + s.a[4] = -2; + s.a[5] = -1; + s.a[6] = 0; + s.a[7] = 1; + t.a[0] = -INT8x8_MAX; + t.a[1] = -INT8x8_MAX; + t.a[2] = -INT8x8_MAX; + t.a[3] = -INT8x8_MAX; + t.a[4] = -INT8x8_MAX; + t.a[5] = -INT8x8_MAX; + t.a[6] = -INT8x8_MAX; + t.a[7] = -INT8x8_MAX; + r.v = psubsb (s.v, t.v); + assert (r.a[0] == INT8x8_MAX - 6); + assert (r.a[1] == INT8x8_MAX - 5); + assert (r.a[2] == INT8x8_MAX - 4); + assert (r.a[3] == INT8x8_MAX - 3); + assert (r.a[4] == INT8x8_MAX - 2); + assert (r.a[5] == INT8x8_MAX - 1); + assert (r.a[6] == INT8x8_MAX); + assert (r.a[7] == INT8x8_MAX); +} + +static void test_psubush (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 0; + s.a[1] = 1; + s.a[2] = 2; + s.a[3] = 3; + t.a[0] = 1; + t.a[1] = 1; + t.a[2] = 3; + t.a[3] = 3; + r.v = psubush (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); +} + +static void test_psubusb (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 0; + s.a[1] = 1; + s.a[2] = 2; + s.a[3] = 3; + s.a[4] = 4; + s.a[5] = 5; + s.a[6] = 6; + s.a[7] = 7; + t.a[0] = 1; + t.a[1] = 1; + t.a[2] = 3; + t.a[3] = 3; + t.a[4] = 5; + t.a[5] = 5; + t.a[6] = 7; + t.a[7] = 7; + r.v = psubusb (s.v, t.v); + assert (r.a[0] == 0); + assert (r.a[1] == 0); + assert (r.a[2] == 0); + assert (r.a[3] == 0); + assert (r.a[4] == 0); + assert (r.a[5] == 0); + assert (r.a[6] == 0); + assert (r.a[7] == 0); +} + +static void test_punpckhbh_s (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -1; + s.a[1] = -3; + s.a[2] = -5; + s.a[3] = -7; + s.a[4] = -9; + s.a[5] = -11; + s.a[6] = -13; + s.a[7] = -15; + t.a[0] = 2; + t.a[1] = 4; + t.a[2] = 6; + t.a[3] = 8; + t.a[4] = 10; + t.a[5] = 12; + t.a[6] = 14; + t.a[7] = 16; + r.v = punpckhbh_s (s.v, t.v); + assert (r.a[0] == -9); + assert (r.a[1] == 10); + assert (r.a[2] == -11); + assert (r.a[3] == 12); + assert (r.a[4] == -13); + assert (r.a[5] == 14); + assert (r.a[6] == -15); + assert (r.a[7] == 16); +} + +static void test_punpckhbh_u (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + s.a[2] = 5; + s.a[3] = 7; + s.a[4] = 9; + s.a[5] = 11; + s.a[6] = 13; + s.a[7] = 15; + t.a[0] = 2; + t.a[1] = 4; + t.a[2] = 6; + t.a[3] = 8; + t.a[4] = 10; + t.a[5] = 12; + t.a[6] = 14; + t.a[7] = 16; + r.v = punpckhbh_u (s.v, t.v); + assert (r.a[0] == 9); + assert (r.a[1] == 10); + assert (r.a[2] == 11); + assert (r.a[3] == 12); + assert (r.a[4] == 13); + assert (r.a[5] == 14); + assert (r.a[6] == 15); + assert (r.a[7] == 16); +} + +static void test_punpckhhw_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = 3; + s.a[2] = -5; + s.a[3] = 7; + t.a[0] = -2; + t.a[1] = 4; + t.a[2] = -6; + t.a[3] = 8; + r.v = punpckhhw_s (s.v, t.v); + assert (r.a[0] == -5); + assert (r.a[1] == -6); + assert (r.a[2] == 7); + assert (r.a[3] == 8); +} + +static void test_punpckhhw_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + s.a[2] = 5; + s.a[3] = 7; + t.a[0] = 2; + t.a[1] = 4; + t.a[2] = 6; + t.a[3] = 8; + r.v = punpckhhw_u (s.v, t.v); + assert (r.a[0] == 5); + assert (r.a[1] == 6); + assert (r.a[2] == 7); + assert (r.a[3] == 8); +} + +static void test_punpckhwd_s (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + t.a[0] = 2; + t.a[1] = -4; + r.v = punpckhwd_s (s.v, t.v); + assert (r.a[0] == 3); + assert (r.a[1] == -4); +} + +static void test_punpckhwd_u (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + t.a[0] = 2; + t.a[1] = 4; + r.v = punpckhwd_u (s.v, t.v); + assert (r.a[0] == 3); + assert (r.a[1] == 4); +} + +static void test_punpcklbh_s (void) +{ + int8x8_encap_t s, t; + int8x8_encap_t r; + s.a[0] = -1; + s.a[1] = -3; + s.a[2] = -5; + s.a[3] = -7; + s.a[4] = -9; + s.a[5] = -11; + s.a[6] = -13; + s.a[7] = -15; + t.a[0] = 2; + t.a[1] = 4; + t.a[2] = 6; + t.a[3] = 8; + t.a[4] = 10; + t.a[5] = 12; + t.a[6] = 14; + t.a[7] = 16; + r.v = punpcklbh_s (s.v, t.v); + assert (r.a[0] == -1); + assert (r.a[1] == 2); + assert (r.a[2] == -3); + assert (r.a[3] == 4); + assert (r.a[4] == -5); + assert (r.a[5] == 6); + assert (r.a[6] == -7); + assert (r.a[7] == 8); +} + +static void test_punpcklbh_u (void) +{ + uint8x8_encap_t s, t; + uint8x8_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + s.a[2] = 5; + s.a[3] = 7; + s.a[4] = 9; + s.a[5] = 11; + s.a[6] = 13; + s.a[7] = 15; + t.a[0] = 2; + t.a[1] = 4; + t.a[2] = 6; + t.a[3] = 8; + t.a[4] = 10; + t.a[5] = 12; + t.a[6] = 14; + t.a[7] = 16; + r.v = punpcklbh_u (s.v, t.v); + assert (r.a[0] == 1); + assert (r.a[1] == 2); + assert (r.a[2] == 3); + assert (r.a[3] == 4); + assert (r.a[4] == 5); + assert (r.a[5] == 6); + assert (r.a[6] == 7); + assert (r.a[7] == 8); +} + +static void test_punpcklhw_s (void) +{ + int16x4_encap_t s, t; + int16x4_encap_t r; + s.a[0] = -1; + s.a[1] = 3; + s.a[2] = -5; + s.a[3] = 7; + t.a[0] = -2; + t.a[1] = 4; + t.a[2] = -6; + t.a[3] = 8; + r.v = punpcklhw_s (s.v, t.v); + assert (r.a[0] == -1); + assert (r.a[1] == -2); + assert (r.a[2] == 3); + assert (r.a[3] == 4); +} + +static void test_punpcklhw_u (void) +{ + uint16x4_encap_t s, t; + uint16x4_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + s.a[2] = 5; + s.a[3] = 7; + t.a[0] = 2; + t.a[1] = 4; + t.a[2] = 6; + t.a[3] = 8; + r.v = punpcklhw_u (s.v, t.v); + assert (r.a[0] == 1); + assert (r.a[1] == 2); + assert (r.a[2] == 3); + assert (r.a[3] == 4); +} + +static void test_punpcklwd_s (void) +{ + int32x2_encap_t s, t; + int32x2_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + t.a[0] = -2; + t.a[1] = 4; + r.v = punpcklwd_s (s.v, t.v); + assert (r.a[0] == 1); + assert (r.a[1] == -2); +} + +static void test_punpcklwd_u (void) +{ + uint32x2_encap_t s, t; + uint32x2_encap_t r; + s.a[0] = 1; + s.a[1] = 3; + t.a[0] = 2; + t.a[1] = 4; + r.v = punpcklwd_u (s.v, t.v); + assert (r.a[0] == 1); + assert (r.a[1] == 2); +} + +int main (void) +{ + test_packsswh (); + test_packsshb (); + test_packushb (); + test_paddw_u (); + test_paddw_s (); + test_paddh_u (); + test_paddh_s (); + test_paddb_u (); + test_paddb_s (); + test_paddd_u (); + test_paddd_s (); + test_paddsh (); + test_paddsb (); + test_paddush (); + test_paddusb (); + test_pandn_ud (); + test_pandn_sd (); + test_pandn_uw (); + test_pandn_sw (); + test_pandn_uh (); + test_pandn_sh (); + test_pandn_ub (); + test_pandn_sb (); + test_pavgh (); + test_pavgb (); + test_pcmpeqw_u (); + test_pcmpeqh_u (); + test_pcmpeqb_u (); + test_pcmpeqw_s (); + test_pcmpeqh_s (); + test_pcmpeqb_s (); + test_pcmpgtw_u (); + test_pcmpgth_u (); + test_pcmpgtb_u (); + test_pcmpgtw_s (); + test_pcmpgth_s (); + test_pcmpgtb_s (); + test_pextrh_u (); + test_pextrh_s (); + test_pinsrh_0123_u (); + test_pinsrh_0123_s (); + test_pmaddhw (); + test_pmaxsh (); + test_pmaxub (); + test_pminsh (); + test_pminub (); + test_pmovmskb_u (); + test_pmovmskb_s (); + test_pmulhuh (); + test_pmulhh (); + test_pmullh (); + test_pmuluw (); + test_pasubub (); + test_biadd (); + test_psadbh (); + test_pshufh_u (); + test_pshufh_s (); + test_psllh_u (); + test_psllw_u (); + test_psllh_s (); + test_psllw_s (); + test_psrah_u (); + test_psraw_u (); + test_psrah_s (); + test_psraw_s (); + test_psrlh_u (); + test_psrlw_u (); + test_psrlh_s (); + test_psrlw_s (); + test_psubw_u (); + test_psubw_s (); + test_psubh_u (); + test_psubh_s (); + test_psubb_u (); + test_psubb_s (); + test_psubd_u (); + test_psubd_s (); + test_psubsh (); + test_psubsb (); + test_psubush (); + test_psubusb (); + test_punpckhbh_s (); + test_punpckhbh_u (); + test_punpckhhw_s (); + test_punpckhhw_u (); + test_punpckhwd_s (); + test_punpckhwd_u (); + test_punpcklbh_s (); + test_punpcklbh_u (); + test_punpcklhw_s (); + test_punpcklhw_u (); + test_punpcklwd_s (); + test_punpcklwd_u (); + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 04717c6f099..30804858a42 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1249,6 +1249,17 @@ proc check_effective_target_arm_neon_hw { } { } "-mfpu=neon -mfloat-abi=softfp"] } +# Return 1 if this a Loongson-2E or -2F target using an ABI that supports +# the Loongson vector modes. + +proc check_effective_target_mips_loongson { } { + return [check_no_compiler_messages loongson assembly { + #if !defined(__mips_loongson_vector_rev) + #error FOO + #endif + }] +} + # Return 1 if this is a PowerPC target with floating-point registers. proc check_effective_target_powerpc_fprs { } { |