diff options
author | Keith Randall <khr@golang.org> | 2023-05-09 13:25:40 -0700 |
---|---|---|
committer | Keith Randall <khr@golang.org> | 2023-05-16 19:13:26 +0000 |
commit | 6042a062dc2556a0a1c06d3b85b6c080644da04e (patch) | |
tree | 8e7e0c857c322ef3411cd34e486c17e87778a1ab | |
parent | 6fc5e7d4b52986f82ec25d5993ff7f8bde8b61f5 (diff) | |
download | go-git-6042a062dc2556a0a1c06d3b85b6c080644da04e.tar.gz |
cmd/compile: make memcombine pass a bit more robust to reassociation of exprs
Be more liberal about expanding the OR tree. Handle any tree shape
instead of a fully left or right associative tree.
Also remove tail feature, it isn't ever needed.
Change-Id: If16bebef94b952a604d6069e9be3d9129994cb6f
Reviewed-on: https://go-review.googlesource.com/c/go/+/494056
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Ryan Berger <ryanbberger@gmail.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
-rw-r--r-- | src/cmd/compile/internal/ssa/memcombine.go | 41 | ||||
-rw-r--r-- | test/codegen/memcombine.go | 26 |
2 files changed, 43 insertions, 24 deletions
diff --git a/src/cmd/compile/internal/ssa/memcombine.go b/src/cmd/compile/internal/ssa/memcombine.go index fc0b665b34..b2c5fe3abf 100644 --- a/src/cmd/compile/internal/ssa/memcombine.go +++ b/src/cmd/compile/internal/ssa/memcombine.go @@ -141,27 +141,26 @@ func combineLoads(root *Value, n int64) bool { // Find n values that are ORed together with the above op. a := make([]*Value, 0, 8) - v := root - for int64(len(a)) < n { - if v.Args[0].Op == orOp { - a = append(a, v.Args[1]) - v = v.Args[0] - } else if v.Args[1].Op == orOp { - a = append(a, v.Args[0]) - v = v.Args[1] - } else if int64(len(a)) == n-2 { - a = append(a, v.Args[0]) - a = append(a, v.Args[1]) - v = nil - } else { + a = append(a, root) + for i := 0; i < len(a) && int64(len(a)) < n; i++ { + v := a[i] + if v.Uses != 1 && v != root { + // Something in this subtree is used somewhere else. return false } + if v.Op == orOp { + a[i] = v.Args[0] + a = append(a, v.Args[1]) + i-- + } + } + if int64(len(a)) != n { + return false } - tail := v // Value to OR in beyond the ones we're working with (or nil if none). // Check that the first entry to see what ops we're looking for. // All the entries should be of the form shift(extend(load)), maybe with no shift. - v = a[0] + v := a[0] if v.Op == shiftOp { v = v.Args[0] } @@ -317,15 +316,9 @@ func combineLoads(root *Value, n int64) bool { v = leftShift(loadBlock, pos, v, shift0-(n-1)*8) } - // Install. If there's a tail, make the root (OR v tail). - // If not, do (Copy v). - if tail != nil { - root.SetArg(0, v) - root.SetArg(1, tail) - } else { - root.reset(OpCopy) - root.AddArg(v) - } + // Install with (Copy v). + root.reset(OpCopy) + root.AddArg(v) // Clobber the loads, just to prevent additional work being done on // subtrees (which are now unreachable). diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index c7a2c7e5ac..0d1c390dfc 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -338,6 +338,32 @@ func load_be_byte8_uint64_idx8(s []byte, idx int) uint64 { return uint64(s[idx<<3])<<56 | uint64(s[(idx<<3)+1])<<48 | uint64(s[(idx<<3)+2])<<40 | uint64(s[(idx<<3)+3])<<32 | uint64(s[(idx<<3)+4])<<24 | uint64(s[(idx<<3)+5])<<16 | uint64(s[(idx<<3)+6])<<8 | uint64(s[(idx<<3)+7]) } +// Some tougher cases for the memcombine pass. + +func reassoc_load_uint32(b []byte) uint32 { + // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + return (uint32(b[0]) | uint32(b[1])<<8) | (uint32(b[2])<<16 | uint32(b[3])<<24) +} + +func extrashift_load_uint32(b []byte) uint32 { + // amd64:`MOVL\s\([A-Z]+\)`,`SHLL\s[$]2`,-`MOV[BW]`,-`OR` + return uint32(b[0])<<2 | uint32(b[1])<<10 | uint32(b[2])<<18 | uint32(b[3])<<26 + +} + +func outoforder_load_uint32(b []byte) uint32 { + // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + return uint32(b[0]) | uint32(b[2])<<16 | uint32(b[1])<<8 | uint32(b[3])<<24 +} + +func extraOr_load_uint32(b []byte, x, y uint32) uint32 { + // amd64:`ORL\s\([A-Z]+\)`,-`MOV[BW]` + return x | binary.LittleEndian.Uint32(b) | y + // TODO: Note that + // x | uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | y + // doesn't work because it associates in a way that memcombine can't detect it. +} + // Check load combining across function calls. func fcall_byte(a [2]byte) [2]byte { |