From: Ben Shi Date: Wed, 7 Aug 2019 05:20:32 +0000 (+0000) Subject: cmd/compile: optimize 386's math.bits.TrailingZeros16 X-Git-Tag: go1.14beta1~1328 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=22355d6cd2421f13e560f90e77ca04ef816456da;p=gostls13.git cmd/compile: optimize 386's math.bits.TrailingZeros16 This CL optimizes math.bits.TrailingZeros16 on 386 with a pair of BSFL and ORL instrcutions. The case TrailingZeros16-4 of the benchmark test in math/bits shows big improvement. name old time/op new time/op delta TrailingZeros16-4 1.55ns ± 1% 0.87ns ± 1% -43.87% (p=0.000 n=50+49) Change-Id: Ia899975b0e46f45dcd20223b713ed632bc32740b Reviewed-on: https://go-review.googlesource.com/c/go/+/189277 Run-TryBot: Ben Shi TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 9871e11a09..069e2f6c9e 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3370,7 +3370,7 @@ func init() { func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0]) }, - sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm) + sys.AMD64, sys.ARM, sys.I386, sys.ARM64, sys.Wasm) addF("math/bits", "TrailingZeros16", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0]) diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index d7cbba1718..395ebb085d 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -66,6 +66,8 @@ (Sqrt x) -> (SQRTSD x) +(Ctz16 x) -> (BSFL (ORLconst [0x10000] x)) + // Lowering extension (SignExt8to16 x) -> (MOVBLSX x) (SignExt8to32 x) -> (MOVBLSX x) diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index 812aa9420d..2336561560 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -360,6 +360,8 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_OpConstBool_0(v) case OpConstNil: return rewriteValue386_OpConstNil_0(v) + case OpCtz16: + return rewriteValue386_OpCtz16_0(v) case OpCvt32Fto32: return rewriteValue386_OpCvt32Fto32_0(v) case OpCvt32Fto64F: @@ -20783,6 +20785,22 @@ func rewriteValue386_OpConstNil_0(v *Value) bool { return true } } +func rewriteValue386_OpCtz16_0(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz16 x) + // cond: + // result: (BSFL (ORLconst [0x10000] x)) + for { + x := v.Args[0] + v.reset(Op386BSFL) + v0 := b.NewValue0(v.Pos, Op386ORLconst, typ.UInt32) + v0.AuxInt = 0x10000 + v0.AddArg(x) + v.AddArg(v0) + return true + } +} func rewriteValue386_OpCvt32Fto32_0(v *Value) bool { // match: (Cvt32Fto32 x) // cond: diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 0d94bd1bc8..779ea6e322 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -296,6 +296,7 @@ func TrailingZeros32(n uint32) int { func TrailingZeros16(n uint16) int { // amd64:"BSFL","BTSL\\t\\$16" + // 386:"BSFL\t" // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" // s390x:"FLOGR","OR\t\\$65536"