diff options
author | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2017-03-27 19:19:00 +0000 |
---|---|---|
committer | meissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4> | 2017-03-27 19:19:00 +0000 |
commit | a8f33854e5fa85c4849da170dcedb9542e5c3e64 (patch) | |
tree | be558930e8119fcb8d4cd77c87409ffe198eaa01 /gcc/config/rs6000/rs6000.md | |
parent | 0cdc8a46270f6907fa10c9f591e2408f6cd162b9 (diff) | |
download | gcc-a8f33854e5fa85c4849da170dcedb9542e5c3e64.tar.gz |
[gcc]
2017-03-27 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/78543
* config/rs6000/rs6000.md (bswaphi2_extenddi): Combine bswap
HImode and SImode with zero extend to DImode to one insn.
(bswap<mode>2_extenddi): Likewise.
(bswapsi2_extenddi): Likewise.
(bswaphi2_extendsi): Likewise.
(bswaphi2): Combine bswap HImode and SImode into one insn.
Separate memory insns from swapping register.
(bswapsi2): Likewise.
(bswap<mode>2): Likewise.
(bswaphi2_internal): Delete, no longer used.
(bswapsi2_internal): Likewise.
(bswap<mode>2_load): Split bswap HImode/SImode into separate load,
store, and gpr<-gpr swap insns.
(bswap<mode>2_store): Likewise.
(bswaphi2_reg): Register only splitter, combine with the splitter.
(bswaphi2 splitter): Likewise.
(bswapsi2_reg): Likewise.
(bswapsi2 splitter): Likewise.
(bswapdi2): If we have the LDBRX and STDBRX instructions, split
the insns into load, store, and register/register insns.
(bswapdi2_ldbrx): Likewise.
(bswapdi2_load): Likewise.
(bswapdi2_store): Likewise.
(bswapdi2_reg): Likewise.
[gcc/testsuite]
2017-03-27 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/78543
* gcc.target/powerpc/pr78543.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@246508 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000/rs6000.md')
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 176 |
1 files changed, 96 insertions, 80 deletions
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 802e25752de..d1da8042220 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2350,12 +2350,12 @@ ;; Since the hardware zeros the upper part of the register, save generating the ;; AND immediate if we are converting to unsigned -(define_insn "*bswaphi2_extenddi" +(define_insn "*bswap<mode>2_extenddi" [(set (match_operand:DI 0 "gpc_reg_operand" "=r") (zero_extend:DI - (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))] + (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z"))))] "TARGET_POWERPC64" - "lhbrx %0,%y1" + "l<wd>brx %0,%y1" [(set_attr "length" "4") (set_attr "type" "load")]) @@ -2368,34 +2368,52 @@ [(set_attr "length" "4") (set_attr "type" "load")]) -(define_expand "bswaphi2" - [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "") - (bswap:HI - (match_operand:HI 1 "reg_or_mem_operand" ""))) - (clobber (match_scratch:SI 2 ""))])] +;; Separate the bswap patterns into load, store, and gpr<-gpr. This prevents +;; the register allocator from converting a gpr<-gpr swap into a store and then +;; load with byte swap, which can be slower than doing it in the registers. It +;; also prevents certain failures with the RELOAD register allocator. + +(define_expand "bswap<mode>2" + [(use (match_operand:HSI 0 "reg_or_mem_operand")) + (use (match_operand:HSI 1 "reg_or_mem_operand"))] "" { - if (!REG_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (HImode, operands[1]); + rtx dest = operands[0]; + rtx src = operands[1]; + + if (!REG_P (dest) && !REG_P (src)) + src = force_reg (<MODE>mode, src); + + if (MEM_P (src)) + emit_insn (gen_bswap<mode>2_load (dest, src)); + else if (MEM_P (dest)) + emit_insn (gen_bswap<mode>2_store (dest, src)); + else + emit_insn (gen_bswap<mode>2_reg (dest, src)); + DONE; }) -(define_insn "bswaphi2_internal" - [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r") - (bswap:HI - (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r"))) - (clobber (match_scratch:SI 2 "=X,X,&r"))] +(define_insn "bswap<mode>2_load" + [(set (match_operand:HSI 0 "gpc_reg_operand" "=r") + (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z")))] "" - "@ - lhbrx %0,%y1 - sthbrx %1,%y0 - #" - [(set_attr "length" "4,4,12") - (set_attr "type" "load,store,*")]) + "l<wd>brx %0,%y1" + [(set_attr "type" "load")]) -(define_split - [(set (match_operand:HI 0 "gpc_reg_operand" "") - (bswap:HI (match_operand:HI 1 "gpc_reg_operand" ""))) - (clobber (match_operand:SI 2 "gpc_reg_operand" ""))] +(define_insn "bswap<mode>2_store" + [(set (match_operand:HSI 0 "memory_operand" "=Z") + (bswap:HSI (match_operand:HSI 1 "gpc_reg_operand" "r")))] + "" + "st<wd>brx %1,%y0" + [(set_attr "type" "store")]) + +(define_insn_and_split "bswaphi2_reg" + [(set (match_operand:HI 0 "gpc_reg_operand" "=&r") + (bswap:HI + (match_operand:HI 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:SI 2 "=&r"))] + "" + "#" "reload_completed" [(set (match_dup 3) (and:SI (lshiftrt:SI (match_dup 4) @@ -2408,48 +2426,21 @@ (set (match_dup 3) (ior:SI (match_dup 3) (match_dup 2)))] - " { operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0); operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0); -}") - -(define_insn "*bswapsi2_extenddi" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (zero_extend:DI - (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))] - "TARGET_POWERPC64" - "lwbrx %0,%y1" - [(set_attr "length" "4") - (set_attr "type" "load")]) - -(define_expand "bswapsi2" - [(set (match_operand:SI 0 "reg_or_mem_operand" "") - (bswap:SI - (match_operand:SI 1 "reg_or_mem_operand" "")))] - "" -{ - if (!REG_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (SImode, operands[1]); -}) - -(define_insn "*bswapsi2_internal" - [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r") - (bswap:SI - (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))] - "" - "@ - lwbrx %0,%y1 - stwbrx %1,%y0 - #" - [(set_attr "length" "4,4,12") - (set_attr "type" "load,store,*")]) +} + [(set_attr "length" "12") + (set_attr "type" "*")]) ;; We are always BITS_BIG_ENDIAN, so the bit positions below in ;; zero_extract insns do not change for -mlittle. -(define_split - [(set (match_operand:SI 0 "gpc_reg_operand" "") - (bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))] +(define_insn_and_split "bswapsi2_reg" + [(set (match_operand:SI 0 "gpc_reg_operand" "=&r") + (bswap:SI + (match_operand:SI 1 "gpc_reg_operand" "r")))] + "" + "#" "reload_completed" [(set (match_dup 0) ; DABC (rotate:SI (match_dup 1) @@ -2465,11 +2456,13 @@ (const_int 24)) (const_int 255)) (and:SI (match_dup 0) - (const_int -256)))) - - ] + (const_int -256))))] "") +;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like +;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more +;; complex code. + (define_expand "bswapdi2" [(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "") (bswap:DI @@ -2478,33 +2471,56 @@ (clobber (match_scratch:DI 3 ""))])] "" { - if (!REG_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (DImode, operands[1]); + rtx dest = operands[0]; + rtx src = operands[1]; + + if (!REG_P (dest) && !REG_P (src)) + operands[1] = src = force_reg (DImode, src); + + if (TARGET_POWERPC64 && TARGET_LDBRX) + { + if (MEM_P (src)) + emit_insn (gen_bswapdi2_load (dest, src)); + else if (MEM_P (dest)) + emit_insn (gen_bswapdi2_store (dest, src)); + else + emit_insn (gen_bswapdi2_reg (dest, src)); + DONE; + } if (!TARGET_POWERPC64) { /* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode that uses 64-bit registers needs the same scratch registers as 64-bit mode. */ - emit_insn (gen_bswapdi2_32bit (operands[0], operands[1])); + emit_insn (gen_bswapdi2_32bit (dest, src)); DONE; } }) ;; Power7/cell has ldbrx/stdbrx, so use it directly -(define_insn "*bswapdi2_ldbrx" - [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,&r") - (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r"))) - (clobber (match_scratch:DI 2 "=X,X,&r")) - (clobber (match_scratch:DI 3 "=X,X,&r"))] - "TARGET_POWERPC64 && TARGET_LDBRX - && (REG_P (operands[0]) || REG_P (operands[1]))" - "@ - ldbrx %0,%y1 - stdbrx %1,%y0 - #" - [(set_attr "length" "4,4,36") - (set_attr "type" "load,store,*")]) +(define_insn "bswapdi2_load" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (bswap:DI (match_operand:DI 1 "memory_operand" "Z")))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "ldbrx %0,%y1" + [(set_attr "type" "load")]) + +(define_insn "bswapdi2_store" + [(set (match_operand:DI 0 "memory_operand" "=Z") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "stdbrx %1,%y0" + [(set_attr "type" "store")]) + +(define_insn "bswapdi2_reg" + [(set (match_operand:DI 0 "gpc_reg_operand" "=&r") + (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r"))) + (clobber (match_scratch:DI 2 "=&r")) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_POWERPC64 && TARGET_LDBRX" + "#" + [(set_attr "length" "36")]) ;; Non-power7/cell, fall back to use lwbrx/stwbrx (define_insn "*bswapdi2_64bit" |