summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>2005-06-29 17:27:23 +0000
committerrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>2005-06-29 17:27:23 +0000
commite313c83f86a56b386390dedacaa0264ce662c8b3 (patch)
tree5d2dfa3860bda0dd44b181d005bd603aada4f4fd
parentc27f5119edbb56875532bdc9dd826ab5bccb686e (diff)
downloadgcc-e313c83f86a56b386390dedacaa0264ce662c8b3.tar.gz
* config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
argument. Simplify canonicalization of condition. Use unsigned saturating subtraction for QI and HImode unsigned compares. Use bit arithmetic tricks for SImode unsigned compares. * config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl. * config/i386/sse.md (SSEMODE14): New. (umaxv8hi3): Use us_minus+plus to avoid vcond. (umaxv4si3): New. (smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize. (smin<SSEMODE14>3): Similarly with sminv16qi3. (umin<SSEMODE24>3): Similarly with uminv8hi3. * lib/target-supports.exp (check_effective_target_vect_no_max): Remove i386 and x86_64. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@101429 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c142
-rw-r--r--gcc/config/i386/sse.md118
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/lib/target-supports.exp4
6 files changed, 158 insertions, 127 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e8365ec02c2..c13d7f8d77a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2005-06-29 Richard Henderson <rth@redhat.com>
+
+ * config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
+ argument. Simplify canonicalization of condition. Use unsigned
+ saturating subtraction for QI and HImode unsigned compares. Use
+ bit arithmetic tricks for SImode unsigned compares.
+ * config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl.
+ * config/i386/sse.md (SSEMODE14): New.
+ (umaxv8hi3): Use us_minus+plus to avoid vcond.
+ (umaxv4si3): New.
+ (smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize.
+ (smin<SSEMODE14>3): Similarly with sminv16qi3.
+ (umin<SSEMODE24>3): Similarly with uminv8hi3.
+
2005-06-29 Ian Lance Taylor <ian@airs.com>
* dwarf2out.c (expand_builtin_init_dwarf_reg_sizes): Change
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index c7b74777359..15c52b090c2 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -149,7 +149,7 @@ extern int ix86_expand_setcc (enum rtx_code, rtx);
extern int ix86_expand_int_movcc (rtx[]);
extern int ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
-extern bool ix86_expand_int_vcond (rtx[], bool);
+extern bool ix86_expand_int_vcond (rtx[]);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 620d862f3ff..77437046621 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10501,94 +10501,102 @@ ix86_expand_fp_vcond (rtx operands[])
/* Expand a signed integral vector conditional move. */
bool
-ix86_expand_int_vcond (rtx operands[], bool unsignedp)
+ix86_expand_int_vcond (rtx operands[])
{
enum machine_mode mode = GET_MODE (operands[0]);
enum rtx_code code = GET_CODE (operands[3]);
- rtx cmp, x;
+ bool negate = false;
+ rtx x, cop0, cop1;
- if (unsignedp)
- code = signed_condition (code);
- if (code == NE || code == LE || code == GE)
+ cop0 = operands[4];
+ cop1 = operands[5];
+
+ /* Canonicalize the comparison to EQ, GT, GTU. */
+ switch (code)
{
- /* Inverse of a supported code. */
- x = operands[1];
- operands[1] = operands[2];
- operands[2] = x;
+ case EQ:
+ case GT:
+ case GTU:
+ break;
+
+ case NE:
+ case LE:
+ case LEU:
code = reverse_condition (code);
- }
- if (code == LT)
- {
- /* Swap of a supported code. */
- x = operands[4];
- operands[4] = operands[5];
- operands[5] = x;
+ negate = true;
+ break;
+
+ case GE:
+ case GEU:
+ code = reverse_condition (code);
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ case LTU:
code = swap_condition (code);
- }
- gcc_assert (code == EQ || code == GT);
+ x = cop0, cop0 = cop1, cop1 = x;
+ break;
- /* Unlike floating-point, we can rely on the optimizers to have already
- converted to MIN/MAX expressions, so we don't have to handle that. */
+ default:
+ gcc_unreachable ();
+ }
- /* Unsigned GT is not directly supported. We can zero-extend QI and
- HImode elements to the next wider element size, use a signed compare,
- then repack. For three extra instructions, this is definitely a win. */
- if (code == GT && unsignedp)
+ /* Unsigned parallel compare is not supported by the hardware. Play some
+ tricks to turn this into a signed comparison against 0. */
+ if (code == GTU)
{
- rtx o0l, o0h, o1l, o1h, cl, ch, zero;
- enum machine_mode wider;
- rtx (*unpackl) (rtx, rtx, rtx);
- rtx (*unpackh) (rtx, rtx, rtx);
- rtx (*pack) (rtx, rtx, rtx);
-
switch (mode)
{
- case V16QImode:
- wider = V8HImode;
- unpackl = gen_sse2_punpcklbw;
- unpackh = gen_sse2_punpckhbw;
- pack = gen_sse2_packsswb;
+ case V4SImode:
+ {
+ rtx t1, t2, mask;
+
+ /* Perform a parallel modulo subtraction. */
+ t1 = gen_reg_rtx (mode);
+ emit_insn (gen_subv4si3 (t1, cop0, cop1));
+
+ /* Extract the original sign bit of op0. */
+ mask = GEN_INT (-0x80000000);
+ mask = gen_rtx_CONST_VECTOR (mode,
+ gen_rtvec (4, mask, mask, mask, mask));
+ mask = force_reg (mode, mask);
+ t2 = gen_reg_rtx (mode);
+ emit_insn (gen_andv4si3 (t2, cop0, mask));
+
+ /* XOR it back into the result of the subtraction. This results
+ in the sign bit set iff we saw unsigned underflow. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_xorv4si3 (x, t1, t2));
+
+ code = GT;
+ }
break;
+
+ case V16QImode:
case V8HImode:
- wider = V4SImode;
- unpackl = gen_sse2_punpcklwd;
- unpackh = gen_sse2_punpckhwd;
- pack = gen_sse2_packssdw;
+ /* Perform a parallel unsigned saturating subtraction. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, x,
+ gen_rtx_US_MINUS (mode, cop0, cop1)));
+
+ code = EQ;
+ negate = !negate;
break;
+
default:
gcc_unreachable ();
}
- operands[4] = force_reg (mode, operands[4]);
- operands[5] = force_reg (mode, operands[5]);
-
- o0l = gen_reg_rtx (wider);
- o0h = gen_reg_rtx (wider);
- o1l = gen_reg_rtx (wider);
- o1h = gen_reg_rtx (wider);
- cl = gen_reg_rtx (wider);
- ch = gen_reg_rtx (wider);
- cmp = gen_reg_rtx (mode);
- zero = force_reg (mode, CONST0_RTX (mode));
-
- emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
- emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
- emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
- emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
-
- x = gen_rtx_GT (wider, o0l, o1l);
- emit_insn (gen_rtx_SET (VOIDmode, cl, x));
-
- x = gen_rtx_GT (wider, o0h, o1h);
- emit_insn (gen_rtx_SET (VOIDmode, ch, x));
-
- emit_insn (pack (cmp, cl, ch));
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
}
- else
- cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
- operands[1], operands[2]);
- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+
+ ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
+ operands[2-negate]);
return true;
}
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 37454f5af30..bb9f98e9eba 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30,6 +30,7 @@
;; Mix-n-match
(define_mode_macro SSEMODE12 [V16QI V8HI])
(define_mode_macro SSEMODE24 [V8HI V4SI])
+(define_mode_macro SSEMODE14 [V16QI V4SI])
(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
@@ -2741,26 +2742,6 @@
operands[1] = gen_lowpart (TImode, operands[1]);
})
-(define_expand "smaxv16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "")
- (smax:V16QI (match_operand:V16QI 1 "register_operand" "")
- (match_operand:V16QI 2 "register_operand" "")))]
- "TARGET_SSE2"
-{
- rtx xops[6];
- bool ok;
-
- xops[0] = operands[0];
- xops[1] = operands[1];
- xops[2] = operands[2];
- xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
- ok = ix86_expand_int_vcond (xops, false);
- gcc_assert (ok);
- DONE;
-})
-
(define_expand "umaxv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
@@ -2794,33 +2775,42 @@
(set_attr "mode" "TI")])
(define_expand "umaxv8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "")
- (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
- (match_operand:V8HI 2 "register_operand" "")))]
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (set (match_dup 3)
+ (plus:V8HI (match_dup 0) (match_dup 2)))]
"TARGET_SSE2"
{
- rtx xops[6], t1, t2;
- bool ok;
+ operands[3] = operands[0];
+ if (rtx_equal_p (operands[0], operands[2]))
+ operands[0] = gen_reg_rtx (V8HImode);
+})
- t1 = gen_reg_rtx (V8HImode);
- emit_insn (gen_sse2_ussubv8hi3 (t1, operands[2], operands[1]));
- t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
+(define_expand "smax<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "")
+ (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+ (match_operand:SSEMODE14 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx xops[6];
+ bool ok;
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
- xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
- xops[4] = t1;
- xops[5] = t2;
- ok = ix86_expand_int_vcond (xops, false);
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
-(define_expand "sminv16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "")
- (smin:V16QI (match_operand:V16QI 1 "register_operand" "")
- (match_operand:V16QI 2 "register_operand" "")))]
+(define_expand "umaxv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
@@ -2829,10 +2819,10 @@
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
- xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
- xops[4] = operands[2];
- xops[5] = operands[1];
- ok = ix86_expand_int_vcond (xops, false);
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
@@ -2869,26 +2859,42 @@
[(set_attr "type" "sseiadd")
(set_attr "mode" "TI")])
-(define_expand "uminv8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "")
- (umin:V8HI (match_operand:V8HI 1 "register_operand" "")
- (match_operand:V8HI 2 "register_operand" "")))]
+(define_expand "smin<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "")
+ (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+ (match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2"
{
- rtx xops[6], t1, t2;
+ rtx xops[6];
bool ok;
- t1 = gen_reg_rtx (V8HImode);
- emit_insn (gen_sse2_ussubv8hi3 (t1, operands[1], operands[2]));
- t2 = force_reg (V8HImode, CONST0_RTX (V8HImode));
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "umin<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "")
+ (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
+ (match_operand:SSEMODE24 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ rtx xops[6];
+ bool ok;
xops[0] = operands[0];
- xops[1] = operands[1];
- xops[2] = operands[2];
- xops[3] = gen_rtx_EQ (VOIDmode, t1, t2);
- xops[4] = t1;
- xops[5] = t2;
- ok = ix86_expand_int_vcond (xops, false);
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
@@ -2929,7 +2935,7 @@
(match_operand:SSEMODE124 2 "general_operand" "")))]
"TARGET_SSE2"
{
- if (ix86_expand_int_vcond (operands, false))
+ if (ix86_expand_int_vcond (operands))
DONE;
else
FAIL;
@@ -2945,7 +2951,7 @@
(match_operand:SSEMODE12 2 "general_operand" "")))]
"TARGET_SSE2"
{
- if (ix86_expand_int_vcond (operands, true))
+ if (ix86_expand_int_vcond (operands))
DONE;
else
FAIL;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 788ca8f66c4..10b2817eeb7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2005-05-29 Richard Henderson <rth@redhat.com>
+
+ * lib/target-supports.exp (check_effective_target_vect_no_max):
+ Remove i386 and x86_64.
+
2005-06-29 Steve Ellcey <sje@cup.hp.com>
PR testsuite/21969
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 059ab38e4b7..d00850ad3f0 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -973,9 +973,7 @@ proc check_effective_target_vect_no_max { } {
verbose "check_effective_target_vect_no_max: using cached result" 2
} else {
set et_vect_no_max_saved 0
- if { [istarget i?86-*-*]
- || [istarget x86_64-*-*]
- || [istarget sparc*-*-*]
+ if { [istarget sparc*-*-*]
|| [istarget alpha*-*-*] } {
set et_vect_no_max_saved 1
}