]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: eliminate unnecessary type conversions in TrailingZeros(16|8) for arm
authorTobias Klauser <tklauser@distanz.ch>
Fri, 15 Mar 2019 07:49:38 +0000 (08:49 +0100)
committerTobias Klauser <tobias.klauser@gmail.com>
Fri, 15 Mar 2019 18:37:22 +0000 (18:37 +0000)
This follows CL 156999 which did the same for arm64.

name               old time/op  new time/op  delta
TrailingZeros-4    7.30ns ± 1%  7.30ns ± 0%     ~     (p=0.413 n=9+9)
TrailingZeros8-4   8.32ns ± 0%  7.17ns ± 0%  -13.77%  (p=0.000 n=10+9)
TrailingZeros16-4  8.30ns ± 0%  7.18ns ± 0%  -13.50%  (p=0.000 n=9+10)
TrailingZeros32-4  6.46ns ± 1%  6.47ns ± 1%     ~     (p=0.325 n=10+10)
TrailingZeros64-4  16.3ns ± 0%  16.2ns ± 0%   -0.61%  (p=0.000 n=7+10)

Change-Id: I7e9e1abf7e30d811aa474d272b2824ec7cbbaa98
Reviewed-on: https://go-review.googlesource.com/c/go/+/167797
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/rewriteARM.go
test/codegen/mathbits.go

index 62301642f5fc515a3e26feba067504e4e84af482..3ccb59e105d67425aee9975236d6f6c7caf49129 100644 (file)
@@ -3288,12 +3288,12 @@ func init() {
                        y := s.newValue2(ssa.OpOr32, types.Types[TUINT32], x, c)
                        return s.newValue1(ssa.OpCtz32, types.Types[TINT], y)
                },
-               sys.ARM, sys.MIPS)
+               sys.MIPS)
        addF("math/bits", "TrailingZeros16",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.Wasm)
+               sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
        addF("math/bits", "TrailingZeros16",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
@@ -3309,12 +3309,12 @@ func init() {
                        y := s.newValue2(ssa.OpOr32, types.Types[TUINT32], x, c)
                        return s.newValue1(ssa.OpCtz32, types.Types[TINT], y)
                },
-               sys.ARM, sys.MIPS)
+               sys.MIPS)
        addF("math/bits", "TrailingZeros8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz8, types.Types[TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.Wasm)
+               sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
        addF("math/bits", "TrailingZeros8",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
index db418b76a664f8519b2eeb97edc0c2d85b972a95..a3f36d3009ac41b4fe59bb640d484cbf81ff04c3 100644 (file)
 
 // TODO: optimize this for ARMv5 and ARMv6
 (Ctz32NonZero x) -> (Ctz32 x)
+(Ctz16NonZero x) -> (Ctz32 x)
+(Ctz8NonZero x) -> (Ctz32 x)
 
 // count trailing zero for ARMv5 and ARMv6
 // 32 - CLZ(x&-x - 1)
-(Ctz32 <t> x) && objabi.GOARM<=6 -> (RSBconst [32] (CLZ <t> (SUBconst <t> (AND <t> x (RSBconst <t> [0] x)) [1])))
+(Ctz32 <t> x) && objabi.GOARM<=6 ->
+       (RSBconst [32] (CLZ <t> (SUBconst <t> (AND <t> x (RSBconst <t> [0] x)) [1])))
+(Ctz16 <t> x) && objabi.GOARM<=6 ->
+       (RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x10000] x))) [1])))
+(Ctz8 <t> x) && objabi.GOARM<=6 ->
+       (RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x100] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x100] x))) [1])))
 
 // count trailing zero for ARMv7
 (Ctz32 <t> x) && objabi.GOARM==7 -> (CLZ <t> (RBIT <t> x))
+(Ctz16 <t> x) && objabi.GOARM==7 -> (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
+(Ctz8 <t> x) && objabi.GOARM==7 -> (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
 
 // bit length
 (BitLen32 <t> x) -> (RSBconst [32] (CLZ <t> x))
index e6635ad6b5a29cced8f558b783442218d1ac9fdf..37a34a9977295906140a64809b15dce5020affe7 100644 (file)
@@ -483,10 +483,18 @@ func rewriteValueARM(v *Value) bool {
                return rewriteValueARM_OpConstBool_0(v)
        case OpConstNil:
                return rewriteValueARM_OpConstNil_0(v)
+       case OpCtz16:
+               return rewriteValueARM_OpCtz16_0(v)
+       case OpCtz16NonZero:
+               return rewriteValueARM_OpCtz16NonZero_0(v)
        case OpCtz32:
                return rewriteValueARM_OpCtz32_0(v)
        case OpCtz32NonZero:
                return rewriteValueARM_OpCtz32NonZero_0(v)
+       case OpCtz8:
+               return rewriteValueARM_OpCtz8_0(v)
+       case OpCtz8NonZero:
+               return rewriteValueARM_OpCtz8NonZero_0(v)
        case OpCvt32Fto32:
                return rewriteValueARM_OpCvt32Fto32_0(v)
        case OpCvt32Fto32U:
@@ -17550,6 +17558,72 @@ func rewriteValueARM_OpConstNil_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM_OpCtz16_0(v *Value) bool {
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Ctz16 <t> x)
+       // cond: objabi.GOARM<=6
+       // result: (RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x10000] x))) [1])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(objabi.GOARM <= 6) {
+                       break
+               }
+               v.reset(OpARMRSBconst)
+               v.AuxInt = 32
+               v0 := b.NewValue0(v.Pos, OpARMCLZ, t)
+               v1 := b.NewValue0(v.Pos, OpARMSUBconst, typ.UInt32)
+               v1.AuxInt = 1
+               v2 := b.NewValue0(v.Pos, OpARMAND, typ.UInt32)
+               v3 := b.NewValue0(v.Pos, OpARMORconst, typ.UInt32)
+               v3.AuxInt = 0x10000
+               v3.AddArg(x)
+               v2.AddArg(v3)
+               v4 := b.NewValue0(v.Pos, OpARMRSBconst, typ.UInt32)
+               v4.AuxInt = 0
+               v5 := b.NewValue0(v.Pos, OpARMORconst, typ.UInt32)
+               v5.AuxInt = 0x10000
+               v5.AddArg(x)
+               v4.AddArg(v5)
+               v2.AddArg(v4)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Ctz16 <t> x)
+       // cond: objabi.GOARM==7
+       // result: (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(objabi.GOARM == 7) {
+                       break
+               }
+               v.reset(OpARMCLZ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpARMRBIT, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpARMORconst, typ.UInt32)
+               v1.AuxInt = 0x10000
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM_OpCtz16NonZero_0(v *Value) bool {
+       // match: (Ctz16NonZero x)
+       // cond:
+       // result: (Ctz32 x)
+       for {
+               x := v.Args[0]
+               v.reset(OpCtz32)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueARM_OpCtz32_0(v *Value) bool {
        b := v.Block
        // match: (Ctz32 <t> x)
@@ -17606,6 +17680,72 @@ func rewriteValueARM_OpCtz32NonZero_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM_OpCtz8_0(v *Value) bool {
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Ctz8 <t> x)
+       // cond: objabi.GOARM<=6
+       // result: (RSBconst [32] (CLZ <t> (SUBconst <typ.UInt32> (AND <typ.UInt32> (ORconst <typ.UInt32> [0x100] x) (RSBconst <typ.UInt32> [0] (ORconst <typ.UInt32> [0x100] x))) [1])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(objabi.GOARM <= 6) {
+                       break
+               }
+               v.reset(OpARMRSBconst)
+               v.AuxInt = 32
+               v0 := b.NewValue0(v.Pos, OpARMCLZ, t)
+               v1 := b.NewValue0(v.Pos, OpARMSUBconst, typ.UInt32)
+               v1.AuxInt = 1
+               v2 := b.NewValue0(v.Pos, OpARMAND, typ.UInt32)
+               v3 := b.NewValue0(v.Pos, OpARMORconst, typ.UInt32)
+               v3.AuxInt = 0x100
+               v3.AddArg(x)
+               v2.AddArg(v3)
+               v4 := b.NewValue0(v.Pos, OpARMRSBconst, typ.UInt32)
+               v4.AuxInt = 0
+               v5 := b.NewValue0(v.Pos, OpARMORconst, typ.UInt32)
+               v5.AuxInt = 0x100
+               v5.AddArg(x)
+               v4.AddArg(v5)
+               v2.AddArg(v4)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Ctz8 <t> x)
+       // cond: objabi.GOARM==7
+       // result: (CLZ <t> (RBIT <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               if !(objabi.GOARM == 7) {
+                       break
+               }
+               v.reset(OpARMCLZ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpARMRBIT, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpARMORconst, typ.UInt32)
+               v1.AuxInt = 0x100
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueARM_OpCtz8NonZero_0(v *Value) bool {
+       // match: (Ctz8NonZero x)
+       // cond:
+       // result: (Ctz32 x)
+       for {
+               x := v.Args[0]
+               v.reset(OpCtz32)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueARM_OpCvt32Fto32_0(v *Value) bool {
        // match: (Cvt32Fto32 x)
        // cond:
index 9a4051a0cecafdb7f793921bebc52f756c41ff5f..3d5f1f64c8cc3cc17f38d069536c1848cc10b73b 100644 (file)
@@ -258,6 +258,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
 
 func TrailingZeros(n uint) int {
        // amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
+       // arm:"CLZ"
        // arm64:"RBIT","CLZ"
        // s390x:"FLOGR"
        // ppc64:"ANDN","POPCNTD"
@@ -278,6 +279,7 @@ func TrailingZeros64(n uint64) int {
 
 func TrailingZeros32(n uint32) int {
        // amd64:"BTSQ\\t\\$32","BSFQ"
+       // arm:"CLZ"
        // arm64:"RBITW","CLZW"
        // s390x:"FLOGR","MOVWZ"
        // ppc64:"ANDN","POPCNTW"
@@ -288,6 +290,7 @@ func TrailingZeros32(n uint32) int {
 
 func TrailingZeros16(n uint16) int {
        // amd64:"BSFL","BTSL\\t\\$16"
+       // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR"
        // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
        // s390x:"FLOGR","OR\t\\$65536"
        // ppc64:"POPCNTD","OR\\t\\$65536"
@@ -298,6 +301,7 @@ func TrailingZeros16(n uint16) int {
 
 func TrailingZeros8(n uint8) int {
        // amd64:"BSFL","BTSL\\t\\$8"
+       // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
        // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
        // s390x:"FLOGR","OR\t\\$256"
        // wasm:"I64Ctz"