diff options
author | Michael Munday <mike.munday@ibm.com> | 2019-04-30 17:46:23 +0100 |
---|---|---|
committer | Michael Munday <mike.munday@ibm.com> | 2019-05-03 10:41:15 +0000 |
commit | 2c1b5130aa03b850e21a8c0e82943228305a60d9 (patch) | |
tree | 6aa2d5bb15659b5bde4f855be6f8f7579dbb5e1e /test/codegen/mathbits.go | |
parent | 004fb5cb8dd74b9ac46a9990e9a028673b88e838 (diff) | |
download | go-git-2c1b5130aa03b850e21a8c0e82943228305a60d9.tar.gz |
cmd/compile: add math/bits.{Add,Sub}64 intrinsics on s390x
This CL adds intrinsics for the 64-bit addition and subtraction
functions in math/bits. These intrinsics use the condition code
to propagate the carry or borrow bit.
To make the carry chains more efficient I've removed the
'clobberFlags' property from most of the load and store
operations. Originally these ops did clobber flags when using
offsets that didn't fit in a signed 20-bit integer, however
that is no longer true.
As with other platforms the intrinsics are faster when executed
in a chain rather than a loop because currently we need to spill
and restore the carry bit between each loop iteration. We may
be able to reduce the need to do this on s390x (e.g. by using
compare-and-branch instructions that do not clobber flags) in the
future.
name old time/op new time/op delta
Add64 1.21ns ± 2% 2.03ns ± 2% +67.18% (p=0.000 n=7+10)
Add64multiple 2.98ns ± 3% 1.03ns ± 0% -65.39% (p=0.000 n=10+9)
Sub64 1.23ns ± 4% 2.03ns ± 1% +64.85% (p=0.000 n=10+10)
Sub64multiple 3.73ns ± 4% 1.04ns ± 1% -72.28% (p=0.000 n=10+8)
Change-Id: I913bbd5e19e6b95bef52f5bc4f14d6fe40119083
Reviewed-on: https://go-review.googlesource.com/c/go/+/174303
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'test/codegen/mathbits.go')
-rw-r--r-- | test/codegen/mathbits.go | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index b60e0ff519..0d94bd1bc8 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -377,32 +377,38 @@ func IterateBits8(n uint8) int { func Add(x, y, ci uint) (r, co uint) { // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // s390x:"ADDE","ADDC\t[$]-1," return bits.Add(x, y, ci) } func AddC(x, ci uint) (r, co uint) { // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // s390x:"ADDE","ADDC\t[$]-1," return bits.Add(x, 7, ci) } func AddZ(x, y uint) (r, co uint) { // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" + // s390x:"ADDC",-"ADDC\t[$]-1," return bits.Add(x, y, 0) } func AddR(x, y, ci uint) uint { // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" + // s390x:"ADDE","ADDC\t[$]-1," r, _ := bits.Add(x, y, ci) return r } + func AddM(p, q, r *[3]uint) { var c uint r[0], c = bits.Add(p[0], q[0], c) // arm64:"ADCS",-"ADD\t",-"CMP" // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" + // s390x:"ADDE",-"ADDC\t[$]-1," r[1], c = bits.Add(p[1], q[1], c) r[2], c = bits.Add(p[2], q[2], c) } @@ -412,6 +418,7 @@ func Add64(x, y, ci uint64) (r, co uint64) { // amd64:"NEGL","ADCQ","SBBQ","NEGQ" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," return bits.Add64(x, y, ci) } @@ -420,6 +427,7 @@ func Add64C(x, ci uint64) (r, co uint64) { // amd64:"NEGL","ADCQ","SBBQ","NEGQ" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," return bits.Add64(x, 7, ci) } @@ -428,6 +436,7 @@ func Add64Z(x, y uint64) (r, co uint64) { // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDC",-"ADDC\t[$]-1," return bits.Add64(x, y, 0) } @@ -436,6 +445,7 @@ func Add64R(x, y, ci uint64) uint64 { // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE","ADDC\t[$]-1," r, _ := bits.Add64(x, y, ci) return r } @@ -446,6 +456,7 @@ func Add64M(p, q, r *[3]uint64) { // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" + // s390x:"ADDE",-"ADDC\t[$]-1," r[1], c = bits.Add64(p[1], q[1], c) r[2], c = bits.Add64(p[2], q[2], c) } @@ -457,24 +468,28 @@ func Add64M(p, q, r *[3]uint64) { func Sub(x, y, ci uint) (r, co uint) { // amd64:"NEGL","SBBQ","NEGQ" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" return bits.Sub(x, y, ci) } func SubC(x, ci uint) (r, co uint) { // amd64:"NEGL","SBBQ","NEGQ" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" return bits.Sub(x, 7, ci) } func SubZ(x, y uint) (r, co uint) { // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" + // s390x:"SUBC" return bits.Sub(x, y, 0) } func SubR(x, y, ci uint) uint { // amd64:"NEGL","SBBQ",-"NEGQ" // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" r, _ := bits.Sub(x, y, ci) return r } @@ -483,6 +498,7 @@ func SubM(p, q, r *[3]uint) { r[0], c = bits.Sub(p[0], q[0], c) // amd64:"SBBQ",-"NEGL",-"NEGQ" // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" r[1], c = bits.Sub(p[1], q[1], c) r[2], c = bits.Sub(p[2], q[2], c) } @@ -490,24 +506,28 @@ func SubM(p, q, r *[3]uint) { func Sub64(x, y, ci uint64) (r, co uint64) { // amd64:"NEGL","SBBQ","NEGQ" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" return bits.Sub64(x, y, ci) } func Sub64C(x, ci uint64) (r, co uint64) { // amd64:"NEGL","SBBQ","NEGQ" // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" return bits.Sub64(x, 7, ci) } func Sub64Z(x, y uint64) (r, co uint64) { // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL" // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP" + // s390x:"SUBC" return bits.Sub64(x, y, 0) } func Sub64R(x, y, ci uint64) uint64 { // amd64:"NEGL","SBBQ",-"NEGQ" // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" r, _ := bits.Sub64(x, y, ci) return r } @@ -516,6 +536,7 @@ func Sub64M(p, q, r *[3]uint64) { r[0], c = bits.Sub64(p[0], q[0], c) // amd64:"SBBQ",-"NEGL",-"NEGQ" // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP" + // s390x:"SUBE" r[1], c = bits.Sub64(p[1], q[1], c) r[2], c = bits.Sub64(p[2], q[2], c) } |