summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386.c25
-rw-r--r--gcc/config/i386/i386.h6
-rw-r--r--gcc/config/i386/mmx.md8
-rw-r--r--gcc/config/i386/sse.md140
4 files changed, 136 insertions, 43 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 46505cc4804..aed74be112e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3045,9 +3045,9 @@ override_options (bool main_args_p)
ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
/* Validate -mincoming-stack-boundary= value or default it to
- ABI_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
+ MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
if (ix86_force_align_arg_pointer)
- ix86_default_incoming_stack_boundary = ABI_STACK_BOUNDARY;
+ ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
else
ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
@@ -7287,7 +7287,8 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
frame->hard_frame_pointer_offset = offset;
- /* Set offset to aligned because the realigned frame tarts from here. */
+ /* Set offset to aligned because the realigned frame starts from
+ here. */
if (stack_realign_fp)
offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
@@ -7520,10 +7521,10 @@ ix86_update_stack_boundary (void)
/* Incoming stack alignment can be changed on individual functions
via force_align_arg_pointer attribute. We use the smallest
incoming stack boundary. */
- if (ix86_incoming_stack_boundary > ABI_STACK_BOUNDARY
+ if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
&& lookup_attribute (ix86_force_align_arg_pointer_string,
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
- ix86_incoming_stack_boundary = ABI_STACK_BOUNDARY;
+ ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
/* Stack at entrance of main is aligned by runtime. We use the
smallest incoming stack boundary. */
@@ -7710,7 +7711,7 @@ ix86_expand_prologue (void)
if (stack_realign_fp)
{
int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
- gcc_assert (align_bytes > STACK_BOUNDARY / BITS_PER_UNIT);
+ gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
/* Align the stack. */
insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
@@ -25176,7 +25177,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
else
tmp = new_target;
- emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
+ emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
GEN_INT (1),
GEN_INT (one_var == 1 ? 0 : 1),
GEN_INT (one_var == 2 ? 0+4 : 1+4),
@@ -25740,7 +25741,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
/* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
/* target = A X C D */
- emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
GEN_INT (1), GEN_INT (0),
GEN_INT (2+4), GEN_INT (3+4)));
return;
@@ -25751,7 +25752,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
/* tmp = X B C D */
ix86_expand_vector_set (false, tmp, val, 0);
/* target = A B X D */
- emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (0+4), GEN_INT (3+4)));
return;
@@ -25762,7 +25763,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
/* tmp = X B C D */
ix86_expand_vector_set (false, tmp, val, 0);
/* target = A B X D */
- emit_insn (gen_sse_shufps_1 (target, target, tmp,
+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
GEN_INT (0), GEN_INT (1),
GEN_INT (2+4), GEN_INT (0+4)));
return;
@@ -25883,7 +25884,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case 1:
case 3:
tmp = gen_reg_rtx (mode);
- emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
+ emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
GEN_INT (elt), GEN_INT (elt),
GEN_INT (elt+4), GEN_INT (elt+4)));
break;
@@ -26000,7 +26001,7 @@ ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
emit_insn (gen_sse_movhlps (tmp1, in, in));
emit_insn (fn (tmp2, tmp1, in));
- emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
+ emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
GEN_INT (1), GEN_INT (1),
GEN_INT (1+4), GEN_INT (1+4)));
emit_insn (fn (dest, tmp2, tmp3));
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a98e278e9ad..3247c10d430 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -651,14 +651,14 @@ enum target_cpu_default
/* Stack boundary of the main function guaranteed by OS. */
#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
-/* Stack boundary guaranteed by ABI. */
-#define ABI_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
+/* Minimum stack boundary. */
+#define MIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
/* Boundary (in *bits*) on which the stack pointer prefers to be
aligned; the compiler cannot rely on having this alignment. */
#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
-/* It should be ABI_STACK_BOUNDARY. But we set it to 128 bits for
+/* It should be MIN_STACK_BOUNDARY. But we set it to 128 bits for
both 32bit and 64bit, to support codes that need 128 bit stack
alignment for SSE instructions, but can't realign the stack. */
#define PREFERRED_STACK_BOUNDARY_DEFAULT 128
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0a507e07a2f..8e77a30d353 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -65,9 +65,9 @@
(define_insn "*mov<mode>_internal_rex64"
[(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
- "=rm,r,!?y,!?y ,m ,!y,Y2,x,x ,m,r,x")
+ "=rm,r,!?y,!?y ,m ,!y,*Y2,x,x ,m,r,Yi")
(match_operand:MMXMODEI8 1 "vector_move_operand"
- "Cr ,m,C ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
+ "Cr ,m,C ,!?ym,!?y,*Y2,!y,C,xm,x,Yi,r"))]
"TARGET_64BIT && TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
@@ -124,9 +124,9 @@
(define_insn "*movv2sf_internal_rex64"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
- "=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
+ "=rm,r ,!?y,!?y ,m ,!y,*Y2,x,x,x,m,r,Yi")
(match_operand:V2SF 1 "vector_move_operand"
- "Cr ,m ,C ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))]
+ "Cr ,m ,C ,!?ym,!y,*Y2,!y,C,x,m,x,Yi,r"))]
"TARGET_64BIT && TARGET_MMX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c1d306054ad..baa9976d400 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -36,6 +36,10 @@
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+;; Int-float size matches
+(define_mode_iterator SSEMODE4S [V4SF V4SI])
+(define_mode_iterator SSEMODE2D [V2DF V2DI])
+
;; Mapping from float mode to required SSE level
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
@@ -57,6 +61,10 @@
(V16QI "QI") (V8HI "HI")
(V4SI "SI") (V2DI "DI")])
+;; Mapping of vector modes to a vector mode of double size
+(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
+ (V4SF "V8SF") (V4SI "V8SI")])
+
;; Number of scalar elements in each vector type
(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
(V16QI "16") (V8HI "8")
@@ -2129,7 +2137,7 @@
"TARGET_SSE"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
+ emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT (((mask >> 4) & 3) + 4),
@@ -2137,12 +2145,12 @@
DONE;
})
-(define_insn "sse_shufps_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+(define_insn "sse_shufps_<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "0")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
(parallel [(match_operand 3 "const_0_to_3_operand" "")
(match_operand 4 "const_0_to_3_operand" "")
(match_operand 5 "const_4_to_7_operand" "")
@@ -2540,18 +2548,62 @@
"TARGET_SSE2"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
+ emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
GEN_INT (mask & 1),
GEN_INT (mask & 2 ? 3 : 2)));
DONE;
})
-(define_insn "sse2_shufpd_1"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_select:V2DF
- (vec_concat:V4DF
- (match_operand:V2DF 1 "register_operand" "0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2")
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2")
+
+(define_insn "sse2_shufpd_<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "0")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
(parallel [(match_operand 3 "const_0_to_1_operand" "")
(match_operand 4 "const_2_to_3_operand" "")])))]
"TARGET_SSE2"
@@ -4195,6 +4247,46 @@
DONE;
})
+(define_expand "vec_interleave_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_interleave_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_interleave_highv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2")
+
+(define_expand "vec_interleave_lowv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2")
+
(define_insn "sse2_packsswb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_concat:V16QI
@@ -4685,7 +4777,7 @@
"")
(define_insn "*sse2_storeq_rex64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r,r")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
(vec_select:DI
(match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
(parallel [(const_int 0)])))]
@@ -4848,10 +4940,10 @@
(set_attr "mode" "TI,V4SF,V2SF")])
(define_insn "vec_concatv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x,x")
(vec_concat:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
- (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0,m")
+ (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m,0")))]
"!TARGET_64BIT && TARGET_SSE"
"@
movq\t{%1, %0|%0, %1}
@@ -4864,10 +4956,10 @@
(set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
(define_insn "*vec_concatv2di_rex64_sse4_1"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x,x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x,x")
(vec_concat:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " 0,m,r ,*y,0,0,0,m")
- (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,x,m,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0,m")
+ (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m,0")))]
"TARGET_64BIT && TARGET_SSE4_1"
"@
pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
@@ -4883,10 +4975,10 @@
(set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
(define_insn "*vec_concatv2di_rex64_sse"
- [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x,x")
(vec_concat:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " m,r ,*y ,0 ,0,0,m")
- (match_operand:DI 2 "vector_move_operand" " C,C ,C ,Y2,x,m,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0,m")
+ (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m,0")))]
"TARGET_64BIT && TARGET_SSE"
"@
movq\t{%1, %0|%0, %1}