diff options
author | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-07-28 21:02:06 +0000 |
---|---|---|
committer | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-07-28 21:02:06 +0000 |
commit | a52bb7a0277682cb25b582dc4e3740d3031d1d43 (patch) | |
tree | 9f3ec03c90e2cdf5132f804278fcdbcf1dc11be9 /gcc/config/rs6000/rs6000.c | |
parent | 4c64f327ae15d582dbf0752c4976526c8e646d7a (diff) | |
download | gcc-a52bb7a0277682cb25b582dc4e3740d3031d1d43.tar.gz |
[gcc]
2016-07-28 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var):
New declaration.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Add support for vec_extract of vector double or vector long having
a variable element number on 64-bit ISA 2.07 systems or newer.
* config/rs6000/rs6000.c (rs6000_expand_vector_extract):
Likewise.
(rs6000_split_vec_extract_var): New function to split a
vec_extract built-in function with variable element number.
(rtx_is_swappable_p): Variable vec_extracts and shifts are not
swappable.
* config/rs6000/vsx.md (UNSPEC_VSX_VSLO): New unspec.
(UNSPEC_VSX_EXTRACT): Likewise.
(vsx_extract_<mode>, VSX_D iterator): Fix constraints to allow
direct move instructions to be generated on 64-bit ISA 2.07
systems and newer, and to take advantage of the ISA 3.0 MFVSRLD
instruction.
(vsx_vslo_<mode>): New insn to do VSLO on V2DFmode and V2DImode
arguments for vec_extract variable element.
(vsx_extract_<mode>_var, VSX_D iterator): New insn to support
vec_extract with variable element on V2DFmode and V2DImode
vectors.
* config/rs6000/rs6000.h (TARGET_VEXTRACTUB): Remove
-mupper-regs-df requirement, since it isn't needed.
(TARGET_DIRECT_MOVE_64BIT): New macro to say whether we can
do direct moves on 64-bit systems, which allows optimization of
vec_extract on 64-bit ISA 2.07 systems and newer.
[gcc/testsuite]
2016-07-28 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-extract-1.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@238838 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 121 |
1 files changed, 119 insertions, 2 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e5d8ad07198..e9447f7a8b9 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6959,8 +6959,31 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) emit_insn (gen_vsx_extract_v4si (target, vec, elt)); return; } - else - break; + break; + } + } + else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt) + && TARGET_DIRECT_MOVE_64BIT) + { + if (GET_MODE (elt) != DImode) + { + rtx tmp = gen_reg_rtx (DImode); + convert_move (tmp, elt, 0); + elt = tmp; + } + + switch (mode) + { + case V2DFmode: + emit_insn (gen_vsx_extract_v2df_var (target, vec, elt)); + return; + + case V2DImode: + emit_insn (gen_vsx_extract_v2di_var (target, vec, elt)); + return; + + default: + gcc_unreachable (); } } @@ -6978,6 +7001,99 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); } +/* Split a variable vec_extract operation into the component instructions. */ + +void +rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, + rtx tmp_altivec) +{ + machine_mode mode = GET_MODE (src); + machine_mode scalar_mode = GET_MODE (dest); + unsigned scalar_size = GET_MODE_SIZE (scalar_mode); + int byte_shift = exact_log2 (scalar_size); + + gcc_assert (byte_shift >= 0); + + if (REG_P (src) || SUBREG_P (src)) + { + int bit_shift = byte_shift + 3; + rtx element2; + + gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec)); + + /* For little endian, adjust element ordering. For V2DI/V2DF, we can use + an XOR, otherwise we need to subtract. The shift amount is so VSLO + will shift the element into the upper position (adding 3 to convert a + byte shift into a bit shift). */ + if (scalar_size == 8) + { + if (!VECTOR_ELT_ORDER_BIG) + { + emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx)); + element2 = tmp_gpr; + } + else + element2 = element; + + /* Generate RLDIC directly to shift left 6 bits and retrieve 1 + bit. */ + emit_insn (gen_rtx_SET (tmp_gpr, + gen_rtx_AND (DImode, + gen_rtx_ASHIFT (DImode, + element2, + GEN_INT (6)), + GEN_INT (64)))); + } + else + { + if (!VECTOR_ELT_ORDER_BIG) + { + rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1); + + emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1)); + emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr)); + element2 = tmp_gpr; + } + else + element2 = element; + + emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift))); + } + + /* Get the value into the lower byte of the Altivec register where VSLO + expects it. */ + if (TARGET_P9_VECTOR) + emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr)); + else if (can_create_pseudo_p ()) + emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr)); + else + { + rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); + emit_move_insn (tmp_di, tmp_gpr); + emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di)); + } + + /* Do the VSLO to get the value into the final location. */ + switch (mode) + { + case V2DFmode: + emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec)); + return; + + case V2DImode: + emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec)); + return; + + default: + gcc_unreachable (); + } + + return; + } + else + gcc_unreachable (); + } + /* Return TRUE if OP is an invalid SUBREG operation on the e500. */ bool @@ -38640,6 +38756,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSX_CVDPSPN: case UNSPEC_VSX_CVSPDP: case UNSPEC_VSX_CVSPDPN: + case UNSPEC_VSX_EXTRACT: return 0; case UNSPEC_VSPLT_DIRECT: *special = SH_SPLAT; |