From: Lynn Boger Date: Thu, 26 Mar 2020 20:01:40 +0000 (-0400) Subject: cmd/compile: add rules to eliminate unnecessary signed shifts X-Git-Tag: go1.15beta1~737 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e4a1cf8a5698d7351af0e33d61e4f7078f3ab1ce;p=gostls13.git cmd/compile: add rules to eliminate unnecessary signed shifts This change to the rules removes some unnecessary signed shifts that appear in the math/rand functions. Existing rules did not cover some of the signed cases. A little improvement seen in math/rand due to removing 1 of 2 instructions generated for Int31n, which is inlined quite a bit. Intn1000 46.9ns ± 0% 45.5ns ± 0% -2.99% (p=1.000 n=1+1) Int63n1000 33.5ns ± 0% 32.8ns ± 0% -2.09% (p=1.000 n=1+1) Int31n1000 32.7ns ± 0% 32.6ns ± 0% -0.31% (p=1.000 n=1+1) Float32 32.7ns ± 0% 30.3ns ± 0% -7.34% (p=1.000 n=1+1) Float64 21.7ns ± 0% 20.9ns ± 0% -3.69% (p=1.000 n=1+1) Perm3 205ns ± 0% 202ns ± 0% -1.46% (p=1.000 n=1+1) Perm30 1.71µs ± 0% 1.68µs ± 0% -1.35% (p=1.000 n=1+1) Perm30ViaShuffle 1.65µs ± 0% 1.65µs ± 0% -0.30% (p=1.000 n=1+1) ShuffleOverhead 2.83µs ± 0% 2.83µs ± 0% -0.07% (p=1.000 n=1+1) Read3 18.7ns ± 0% 16.1ns ± 0% -13.90% (p=1.000 n=1+1) Read64 126ns ± 0% 124ns ± 0% -1.59% (p=1.000 n=1+1) Read1000 1.75µs ± 0% 1.63µs ± 0% -7.08% (p=1.000 n=1+1) Change-Id: I11502dfca7d65aafc76749a8d713e9e50c24a858 Reviewed-on: https://go-review.googlesource.com/c/go/+/225917 Run-TryBot: Lynn Boger Reviewed-by: Cherry Zhang --- diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 740f9fbaa7..be7a9858ef 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -704,19 +704,24 @@ (MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x) (MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x) (MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x) +(MOVBreg (SRADconst [c] x)) && c>=56 -> (SRADconst [c] x) (MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x) (MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x) (MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x) +(MOVBreg (SRAWconst [c] x)) && c>=24 -> (SRAWconst [c] x) (MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x) (MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x) (MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x) +(MOVHreg (SRADconst [c] x)) && c>=48 -> (SRADconst [c] x) (MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x) (MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x) +(MOVHreg (SRAWconst [c] x)) && c>=16 -> (SRAWconst [c] x) (MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x) (MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x) (MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x) +(MOVWreg (SRADconst [c] x)) && c>=32 -> (SRADconst [c] x) (MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x) // Various redundant zero/sign extension combinations. diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 695445a8bd..d5568b696f 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -6427,6 +6427,23 @@ func rewriteValuePPC64_OpPPC64MOVBreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBreg (SRADconst [c] x)) + // cond: c>=56 + // result: (SRADconst [c] x) + for { + if v_0.Op != OpPPC64SRADconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + if !(c >= 56) { + break + } + v.reset(OpPPC64SRADconst) + v.AuxInt = c + v.AddArg(x) + return true + } // match: (MOVBreg (SRWconst [c] x)) // cond: c>24 // result: (SRWconst [c] x) @@ -6461,6 +6478,23 @@ func rewriteValuePPC64_OpPPC64MOVBreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBreg (SRAWconst [c] x)) + // cond: c>=24 + // result: (SRAWconst [c] x) + for { + if v_0.Op != OpPPC64SRAWconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + if !(c >= 24) { + break + } + v.reset(OpPPC64SRAWconst) + v.AuxInt = c + v.AddArg(x) + return true + } // match: (MOVBreg y:(MOVBreg _)) // result: y for { @@ -8487,6 +8521,23 @@ func rewriteValuePPC64_OpPPC64MOVHreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHreg (SRADconst [c] x)) + // cond: c>=48 + // result: (SRADconst [c] x) + for { + if v_0.Op != OpPPC64SRADconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + if !(c >= 48) { + break + } + v.reset(OpPPC64SRADconst) + v.AuxInt = c + v.AddArg(x) + return true + } // match: (MOVHreg (SRWconst [c] x)) // cond: c>16 // result: (SRWconst [c] x) @@ -8504,6 +8555,23 @@ func rewriteValuePPC64_OpPPC64MOVHreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHreg (SRAWconst [c] x)) + // cond: c>=16 + // result: (SRAWconst [c] x) + for { + if v_0.Op != OpPPC64SRAWconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + if !(c >= 16) { + break + } + v.reset(OpPPC64SRAWconst) + v.AuxInt = c + v.AddArg(x) + return true + } // match: (MOVHreg (SRWconst [c] x)) // cond: c==16 // result: (SRAWconst [c] x) @@ -9648,6 +9716,23 @@ func rewriteValuePPC64_OpPPC64MOVWreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVWreg (SRADconst [c] x)) + // cond: c>=32 + // result: (SRADconst [c] x) + for { + if v_0.Op != OpPPC64SRADconst { + break + } + c := v_0.AuxInt + x := v_0.Args[0] + if !(c >= 32) { + break + } + v.reset(OpPPC64SRADconst) + v.AuxInt = c + v.AddArg(x) + return true + } // match: (MOVWreg (SRDconst [c] x)) // cond: c==32 // result: (SRADconst [c] x) diff --git a/test/codegen/shift.go b/test/codegen/shift.go index f287ca68b7..305c39a1d8 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -125,3 +125,26 @@ func lshGuarded64(v int64, s uint) int64 { } panic("shift too large") } + +func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { + + // ppc64le:-".*MOVW" + f := int32(v>>32) + // ppc64le:".*MOVW" + f += int32(v>>31) + // ppc64le:-".*MOVH" + g := int16(v>>48) + // ppc64le:".*MOVH" + g += int16(v>>30) + // ppc64le:-".*MOVH" + g += int16(f>>16) + // ppc64le:-".*MOVB" + h := int8(v>>56) + // ppc64le:".*MOVB" + h += int8(v>>28) + // ppc64le:-".*MOVB" + h += int8(f>>24) + // ppc64le:".*MOVB" + h += int8(f>>16) + return int64(h),uint64(g) +}