From d5e5b143057921721061c3f39f14061370e4a732 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Wed, 5 Jun 2024 16:12:49 -0500 Subject: [PATCH] cmd/compile/ssa: fix (MOVWZreg (RLWINM)) folding on PPC64 RLIWNM does not clear the upper 32 bits of the target register if the mask wraps around (e.g 0xF000000F). Don't elide MOVWZreg for such masks. All other usage clears the upper 32 bits. Fixes #67844. Change-Id: I11b89f1da9ae077624369bfe2bf25e9b7c9b79bc Reviewed-on: https://go-review.googlesource.com/c/go/+/590896 Reviewed-by: Keith Randall Reviewed-by: Cherry Mui Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/ssa/_gen/PPC64.rules | 2 +- src/cmd/compile/internal/ssa/rewrite.go | 10 ++++++++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 8 ++++---- test/codegen/bits.go | 5 ++++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules index b515b537f5..d89cc59714 100644 --- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules @@ -646,7 +646,7 @@ (MOVBZreg (SRWconst x [s])) && mergePPC64AndSrwi(0xFF,s) != 0 => (RLWINM [mergePPC64AndSrwi(0xFF,s)] x) (MOVBZreg (RLWINM [r] y)) && mergePPC64AndRlwinm(0xFF,r) != 0 => (RLWINM [mergePPC64AndRlwinm(0xFF,r)] y) (MOVHZreg (RLWINM [r] y)) && mergePPC64AndRlwinm(0xFFFF,r) != 0 => (RLWINM [mergePPC64AndRlwinm(0xFFFF,r)] y) -(MOVWZreg (RLWINM [r] y)) && mergePPC64AndRlwinm(0xFFFFFFFF,r) != 0 => (RLWINM [mergePPC64AndRlwinm(0xFFFFFFFF,r)] y) +(MOVWZreg (RLWINM [r] y)) && mergePPC64MovwzregRlwinm(r) != 0 => (RLWINM [mergePPC64MovwzregRlwinm(r)] y) (ANDconst [m] (RLWINM [r] y)) && mergePPC64AndRlwinm(uint32(m),r) != 0 => (RLWINM [mergePPC64AndRlwinm(uint32(m),r)] y) (SLDconst [s] (RLWINM [r] y)) && mergePPC64SldiRlwinm(s,r) != 0 => (RLWINM [mergePPC64SldiRlwinm(s,r)] y) (RLWINM [r] (MOVHZreg u)) && mergePPC64RlwinmAnd(r,0xFFFF) != 0 => (RLWINM [mergePPC64RlwinmAnd(r,0xFFFF)] u) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index f90e65f492..aeec2b3768 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1666,6 +1666,16 @@ func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 { return encodePPC64RotateMask(r, int64(mask_out), 32) } +// Test if RLWINM opcode rlw clears the upper 32 bits of the +// result. Return rlw if it does, 0 otherwise. +func mergePPC64MovwzregRlwinm(rlw int64) int64 { + _, mb, me, _ := DecodePPC64RotateMask(rlw) + if mb > me { + return 0 + } + return rlw +} + // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant, // or 0 if they cannot be merged. func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 209f029e33..b45770995e 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -10102,19 +10102,19 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool { return true } // match: (MOVWZreg (RLWINM [r] y)) - // cond: mergePPC64AndRlwinm(0xFFFFFFFF,r) != 0 - // result: (RLWINM [mergePPC64AndRlwinm(0xFFFFFFFF,r)] y) + // cond: mergePPC64MovwzregRlwinm(r) != 0 + // result: (RLWINM [mergePPC64MovwzregRlwinm(r)] y) for { if v_0.Op != OpPPC64RLWINM { break } r := auxIntToInt64(v_0.AuxInt) y := v_0.Args[0] - if !(mergePPC64AndRlwinm(0xFFFFFFFF, r) != 0) { + if !(mergePPC64MovwzregRlwinm(r) != 0) { break } v.reset(OpPPC64RLWINM) - v.AuxInt = int64ToAuxInt(mergePPC64AndRlwinm(0xFFFFFFFF, r)) + v.AuxInt = int64ToAuxInt(mergePPC64MovwzregRlwinm(r)) v.AddArg(y) return true } diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 4b6c8b94b8..554e363ef5 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -394,7 +394,7 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) { } // Verify rotate and mask instructions, and further simplified instructions for small types -func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) { +func bitRotateAndMask(io64 [8]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) { // ppc64x: "RLDICR\t[$]0, R[0-9]*, [$]47, R" io64[0] = io64[0] & 0xFFFFFFFFFFFF0000 // ppc64x: "RLDICL\t[$]0, R[0-9]*, [$]16, R" @@ -404,6 +404,9 @@ func bitRotateAndMask(io64 [4]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uin // ppc64x: -"SRD", -"AND", "RLDICL\t[$]36, R[0-9]*, [$]28, R" io64[3] = (io64[3] >> 28) & 0x0000FFFFFFFFFFFF + // ppc64x: "MOVWZ", "RLWNM\t[$]1, R[0-9]*, [$]28, [$]3, R" + io64[4] = uint64(bits.RotateLeft32(io32[0], 1) & 0xF000000F) + // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]4, [$]19, R" io32[0] = io32[0] & 0x0FFFF000 // ppc64x: "RLWNM\t[$]0, R[0-9]*, [$]20, [$]3, R" -- 2.48.1