summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/rs6000.md
diff options
context:
space:
mode:
authormeissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>2017-03-27 19:19:00 +0000
committermeissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>2017-03-27 19:19:00 +0000
commita8f33854e5fa85c4849da170dcedb9542e5c3e64 (patch)
treebe558930e8119fcb8d4cd77c87409ffe198eaa01 /gcc/config/rs6000/rs6000.md
parent0cdc8a46270f6907fa10c9f591e2408f6cd162b9 (diff)
downloadgcc-a8f33854e5fa85c4849da170dcedb9542e5c3e64.tar.gz
[gcc]
2017-03-27 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/78543 * config/rs6000/rs6000.md (bswaphi2_extenddi): Combine bswap HImode and SImode with zero extend to DImode to one insn. (bswap<mode>2_extenddi): Likewise. (bswapsi2_extenddi): Likewise. (bswaphi2_extendsi): Likewise. (bswaphi2): Combine bswap HImode and SImode into one insn. Separate memory insns from swapping register. (bswapsi2): Likewise. (bswap<mode>2): Likewise. (bswaphi2_internal): Delete, no longer used. (bswapsi2_internal): Likewise. (bswap<mode>2_load): Split bswap HImode/SImode into separate load, store, and gpr<-gpr swap insns. (bswap<mode>2_store): Likewise. (bswaphi2_reg): Register only splitter, combine with the splitter. (bswaphi2 splitter): Likewise. (bswapsi2_reg): Likewise. (bswapsi2 splitter): Likewise. (bswapdi2): If we have the LDBRX and STDBRX instructions, split the insns into load, store, and register/register insns. (bswapdi2_ldbrx): Likewise. (bswapdi2_load): Likewise. (bswapdi2_store): Likewise. (bswapdi2_reg): Likewise. [gcc/testsuite] 2017-03-27 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/78543 * gcc.target/powerpc/pr78543.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@246508 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000/rs6000.md')
-rw-r--r--gcc/config/rs6000/rs6000.md176
1 files changed, 96 insertions, 80 deletions
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 802e25752de..d1da8042220 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2350,12 +2350,12 @@
;; Since the hardware zeros the upper part of the register, save generating the
;; AND immediate if we are converting to unsigned
-(define_insn "*bswaphi2_extenddi"
+(define_insn "*bswap<mode>2_extenddi"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(zero_extend:DI
- (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+ (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z"))))]
"TARGET_POWERPC64"
- "lhbrx %0,%y1"
+ "l<wd>brx %0,%y1"
[(set_attr "length" "4")
(set_attr "type" "load")])
@@ -2368,34 +2368,52 @@
[(set_attr "length" "4")
(set_attr "type" "load")])
-(define_expand "bswaphi2"
- [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "")
- (bswap:HI
- (match_operand:HI 1 "reg_or_mem_operand" "")))
- (clobber (match_scratch:SI 2 ""))])]
+;; Separate the bswap patterns into load, store, and gpr<-gpr. This prevents
+;; the register allocator from converting a gpr<-gpr swap into a store and then
+;; load with byte swap, which can be slower than doing it in the registers. It
+;; also prevents certain failures with the RELOAD register allocator.
+
+(define_expand "bswap<mode>2"
+ [(use (match_operand:HSI 0 "reg_or_mem_operand"))
+ (use (match_operand:HSI 1 "reg_or_mem_operand"))]
""
{
- if (!REG_P (operands[0]) && !REG_P (operands[1]))
- operands[1] = force_reg (HImode, operands[1]);
+ rtx dest = operands[0];
+ rtx src = operands[1];
+
+ if (!REG_P (dest) && !REG_P (src))
+ src = force_reg (<MODE>mode, src);
+
+ if (MEM_P (src))
+ emit_insn (gen_bswap<mode>2_load (dest, src));
+ else if (MEM_P (dest))
+ emit_insn (gen_bswap<mode>2_store (dest, src));
+ else
+ emit_insn (gen_bswap<mode>2_reg (dest, src));
+ DONE;
})
-(define_insn "bswaphi2_internal"
- [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r")
- (bswap:HI
- (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r")))
- (clobber (match_scratch:SI 2 "=X,X,&r"))]
+(define_insn "bswap<mode>2_load"
+ [(set (match_operand:HSI 0 "gpc_reg_operand" "=r")
+ (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z")))]
""
- "@
- lhbrx %0,%y1
- sthbrx %1,%y0
- #"
- [(set_attr "length" "4,4,12")
- (set_attr "type" "load,store,*")])
+ "l<wd>brx %0,%y1"
+ [(set_attr "type" "load")])
-(define_split
- [(set (match_operand:HI 0 "gpc_reg_operand" "")
- (bswap:HI (match_operand:HI 1 "gpc_reg_operand" "")))
- (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+(define_insn "bswap<mode>2_store"
+ [(set (match_operand:HSI 0 "memory_operand" "=Z")
+ (bswap:HSI (match_operand:HSI 1 "gpc_reg_operand" "r")))]
+ ""
+ "st<wd>brx %1,%y0"
+ [(set_attr "type" "store")])
+
+(define_insn_and_split "bswaphi2_reg"
+ [(set (match_operand:HI 0 "gpc_reg_operand" "=&r")
+ (bswap:HI
+ (match_operand:HI 1 "gpc_reg_operand" "r")))
+ (clobber (match_scratch:SI 2 "=&r"))]
+ ""
+ "#"
"reload_completed"
[(set (match_dup 3)
(and:SI (lshiftrt:SI (match_dup 4)
@@ -2408,48 +2426,21 @@
(set (match_dup 3)
(ior:SI (match_dup 3)
(match_dup 2)))]
- "
{
operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
-}")
-
-(define_insn "*bswapsi2_extenddi"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
- (zero_extend:DI
- (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))]
- "TARGET_POWERPC64"
- "lwbrx %0,%y1"
- [(set_attr "length" "4")
- (set_attr "type" "load")])
-
-(define_expand "bswapsi2"
- [(set (match_operand:SI 0 "reg_or_mem_operand" "")
- (bswap:SI
- (match_operand:SI 1 "reg_or_mem_operand" "")))]
- ""
-{
- if (!REG_P (operands[0]) && !REG_P (operands[1]))
- operands[1] = force_reg (SImode, operands[1]);
-})
-
-(define_insn "*bswapsi2_internal"
- [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r")
- (bswap:SI
- (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
- ""
- "@
- lwbrx %0,%y1
- stwbrx %1,%y0
- #"
- [(set_attr "length" "4,4,12")
- (set_attr "type" "load,store,*")])
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "*")])
;; We are always BITS_BIG_ENDIAN, so the bit positions below in
;; zero_extract insns do not change for -mlittle.
-(define_split
- [(set (match_operand:SI 0 "gpc_reg_operand" "")
- (bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
+(define_insn_and_split "bswapsi2_reg"
+ [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+ (bswap:SI
+ (match_operand:SI 1 "gpc_reg_operand" "r")))]
+ ""
+ "#"
"reload_completed"
[(set (match_dup 0) ; DABC
(rotate:SI (match_dup 1)
@@ -2465,11 +2456,13 @@
(const_int 24))
(const_int 255))
(and:SI (match_dup 0)
- (const_int -256))))
-
- ]
+ (const_int -256))))]
"")
+;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like
+;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more
+;; complex code.
+
(define_expand "bswapdi2"
[(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "")
(bswap:DI
@@ -2478,33 +2471,56 @@
(clobber (match_scratch:DI 3 ""))])]
""
{
- if (!REG_P (operands[0]) && !REG_P (operands[1]))
- operands[1] = force_reg (DImode, operands[1]);
+ rtx dest = operands[0];
+ rtx src = operands[1];
+
+ if (!REG_P (dest) && !REG_P (src))
+ operands[1] = src = force_reg (DImode, src);
+
+ if (TARGET_POWERPC64 && TARGET_LDBRX)
+ {
+ if (MEM_P (src))
+ emit_insn (gen_bswapdi2_load (dest, src));
+ else if (MEM_P (dest))
+ emit_insn (gen_bswapdi2_store (dest, src));
+ else
+ emit_insn (gen_bswapdi2_reg (dest, src));
+ DONE;
+ }
if (!TARGET_POWERPC64)
{
/* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode
that uses 64-bit registers needs the same scratch registers as 64-bit
mode. */
- emit_insn (gen_bswapdi2_32bit (operands[0], operands[1]));
+ emit_insn (gen_bswapdi2_32bit (dest, src));
DONE;
}
})
;; Power7/cell has ldbrx/stdbrx, so use it directly
-(define_insn "*bswapdi2_ldbrx"
- [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,&r")
- (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
- (clobber (match_scratch:DI 2 "=X,X,&r"))
- (clobber (match_scratch:DI 3 "=X,X,&r"))]
- "TARGET_POWERPC64 && TARGET_LDBRX
- && (REG_P (operands[0]) || REG_P (operands[1]))"
- "@
- ldbrx %0,%y1
- stdbrx %1,%y0
- #"
- [(set_attr "length" "4,4,36")
- (set_attr "type" "load,store,*")])
+(define_insn "bswapdi2_load"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (bswap:DI (match_operand:DI 1 "memory_operand" "Z")))]
+ "TARGET_POWERPC64 && TARGET_LDBRX"
+ "ldbrx %0,%y1"
+ [(set_attr "type" "load")])
+
+(define_insn "bswapdi2_store"
+ [(set (match_operand:DI 0 "memory_operand" "=Z")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))]
+ "TARGET_POWERPC64 && TARGET_LDBRX"
+ "stdbrx %1,%y0"
+ [(set_attr "type" "store")])
+
+(define_insn "bswapdi2_reg"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))
+ (clobber (match_scratch:DI 2 "=&r"))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_POWERPC64 && TARGET_LDBRX"
+ "#"
+ [(set_attr "length" "36")])
;; Non-power7/cell, fall back to use lwbrx/stwbrx
(define_insn "*bswapdi2_64bit"