summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/rs6000.c
diff options
context:
space:
mode:
authormeissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>2016-07-28 21:02:06 +0000
committermeissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>2016-07-28 21:02:06 +0000
commita52bb7a0277682cb25b582dc4e3740d3031d1d43 (patch)
tree9f3ec03c90e2cdf5132f804278fcdbcf1dc11be9 /gcc/config/rs6000/rs6000.c
parent4c64f327ae15d582dbf0752c4976526c8e646d7a (diff)
downloadgcc-a52bb7a0277682cb25b582dc4e3740d3031d1d43.tar.gz
[gcc]
2016-07-28 Michael Meissner <meissner@linux.vnet.ibm.com> * config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var): New declaration. * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Add support for vec_extract of vector double or vector long having a variable element number on 64-bit ISA 2.07 systems or newer. * config/rs6000/rs6000.c (rs6000_expand_vector_extract): Likewise. (rs6000_split_vec_extract_var): New function to split a vec_extract built-in function with variable element number. (rtx_is_swappable_p): Variable vec_extracts and shifts are not swappable. * config/rs6000/vsx.md (UNSPEC_VSX_VSLO): New unspec. (UNSPEC_VSX_EXTRACT): Likewise. (vsx_extract_<mode>, VSX_D iterator): Fix constraints to allow direct move instructions to be generated on 64-bit ISA 2.07 systems and newer, and to take advantage of the ISA 3.0 MFVSRLD instruction. (vsx_vslo_<mode>): New insn to do VSLO on V2DFmode and V2DImode arguments for vec_extract variable element. (vsx_extract_<mode>_var, VSX_D iterator): New insn to support vec_extract with variable element on V2DFmode and V2DImode vectors. * config/rs6000/rs6000.h (TARGET_VEXTRACTUB): Remove -mupper-regs-df requirement, since it isn't needed. (TARGET_DIRECT_MOVE_64BIT): New macro to say whether we can do direct moves on 64-bit systems, which allows optimization of vec_extract on 64-bit ISA 2.07 systems and newer. [gcc/testsuite] 2016-07-28 Michael Meissner <meissner@linux.vnet.ibm.com> * gcc.target/powerpc/vec-extract-1.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@238838 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r--gcc/config/rs6000/rs6000.c121
1 files changed, 119 insertions, 2 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e5d8ad07198..e9447f7a8b9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6959,8 +6959,31 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
emit_insn (gen_vsx_extract_v4si (target, vec, elt));
return;
}
- else
- break;
+ break;
+ }
+ }
+ else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
+ && TARGET_DIRECT_MOVE_64BIT)
+ {
+ if (GET_MODE (elt) != DImode)
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ convert_move (tmp, elt, 0);
+ elt = tmp;
+ }
+
+ switch (mode)
+ {
+ case V2DFmode:
+ emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
+ return;
+
+ case V2DImode:
+ emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
+ return;
+
+ default:
+ gcc_unreachable ();
}
}
@@ -6978,6 +7001,99 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
}
+/* Split a variable vec_extract operation into the component instructions. */
+
+void
+rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
+ rtx tmp_altivec)
+{
+ machine_mode mode = GET_MODE (src);
+ machine_mode scalar_mode = GET_MODE (dest);
+ unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
+ int byte_shift = exact_log2 (scalar_size);
+
+ gcc_assert (byte_shift >= 0);
+
+ if (REG_P (src) || SUBREG_P (src))
+ {
+ int bit_shift = byte_shift + 3;
+ rtx element2;
+
+ gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
+
+ /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
+ an XOR, otherwise we need to subtract. The shift amount is so VSLO
+ will shift the element into the upper position (adding 3 to convert a
+ byte shift into a bit shift). */
+ if (scalar_size == 8)
+ {
+ if (!VECTOR_ELT_ORDER_BIG)
+ {
+ emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
+ element2 = tmp_gpr;
+ }
+ else
+ element2 = element;
+
+ /* Generate RLDIC directly to shift left 6 bits and retrieve 1
+ bit. */
+ emit_insn (gen_rtx_SET (tmp_gpr,
+ gen_rtx_AND (DImode,
+ gen_rtx_ASHIFT (DImode,
+ element2,
+ GEN_INT (6)),
+ GEN_INT (64))));
+ }
+ else
+ {
+ if (!VECTOR_ELT_ORDER_BIG)
+ {
+ rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
+
+ emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
+ emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
+ element2 = tmp_gpr;
+ }
+ else
+ element2 = element;
+
+ emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
+ }
+
+ /* Get the value into the lower byte of the Altivec register where VSLO
+ expects it. */
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
+ else if (can_create_pseudo_p ())
+ emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
+ else
+ {
+ rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
+ emit_move_insn (tmp_di, tmp_gpr);
+ emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
+ }
+
+ /* Do the VSLO to get the value into the final location. */
+ switch (mode)
+ {
+ case V2DFmode:
+ emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
+ return;
+
+ case V2DImode:
+ emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+ }
+ else
+ gcc_unreachable ();
+ }
+
/* Return TRUE if OP is an invalid SUBREG operation on the e500. */
bool
@@ -38640,6 +38756,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VSX_CVDPSPN:
case UNSPEC_VSX_CVSPDP:
case UNSPEC_VSX_CVSPDPN:
+ case UNSPEC_VSX_EXTRACT:
return 0;
case UNSPEC_VSPLT_DIRECT:
*special = SH_SPLAT;