]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: instrinsify TrailingZeros{8,32,64} for 386
authorWayne Zuo <wdvxdr@golangcn.org>
Sun, 12 Mar 2023 07:34:20 +0000 (15:34 +0800)
committerWayne Zuo <wdvxdr@golangcn.org>
Tue, 14 Mar 2023 08:10:32 +0000 (08:10 +0000)
This CL add support for instrinsifying the TrialingZeros{8,32,64}
functions for 386 architecture. We need handle the case when the input
is 0, which could lead to undefined output from the BSFL instruction.

Next CL will remove the assembly code in runtime/internal/sys package.

Change-Id: Ic168edf68e81bf69a536102100fdd3f56f0f4a1b
Reviewed-on: https://go-review.googlesource.com/c/go/+/475735
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/cmd/compile/internal/ssa/_gen/386.rules
src/cmd/compile/internal/ssa/_gen/386Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssagen/ssa.go
src/cmd/compile/internal/x86/ssa.go
test/codegen/mathbits.go

index db16ab0961face035adda4f07fca004b932fbb7b..03413b289edc475b4cb89049789a194b913c12b2 100644 (file)
 (Sqrt ...) => (SQRTSD ...)
 (Sqrt32 ...) => (SQRTSS ...)
 
+(Ctz8 x) => (BSFL (ORLconst <typ.UInt32> [0x100] x))
+(Ctz8NonZero ...) => (BSFL ...)
 (Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [0x10000] x))
 (Ctz16NonZero ...) => (BSFL ...)
+(Ctz32 ...) => (LoweredCtz32 ...)
+(Ctz32NonZero ...) => (BSFL ...)
 
 // Lowering extension
 (SignExt8to16  ...) => (MOVBLSX ...)
index 6f19ea64274961159b8caf1b36edb605075eb533..7401ac871c3ffed2cc72fa520eb01ed01b4f150e 100644 (file)
@@ -302,6 +302,7 @@ func init() {
 
                {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
                {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
+               {name: "LoweredCtz32", argLength: 1, reg: gp11, clobberFlags: true},      // arg0 # of low-order zeroes
 
                {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
                {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
index 4a24012b1da9c72f5feb5ee66bc951a3ce38f59d..b5ca35953cfea117f4ffc900cc4b3f04063c52d6 100644 (file)
@@ -456,6 +456,7 @@ const (
        Op386NOTL
        Op386BSFL
        Op386BSFW
+       Op386LoweredCtz32
        Op386BSRL
        Op386BSRW
        Op386BSWAPL
@@ -5034,6 +5035,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "LoweredCtz32",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABSFL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
        {
                name:         "BSRL",
                argLen:       1,
index f658d9380a29d703c7874d2429f6d930839e0dc5..fe5bbe56a3603b66261dab2d957b7d7838e29a29 100644 (file)
@@ -315,6 +315,17 @@ func rewriteValue386(v *Value) bool {
        case OpCtz16NonZero:
                v.Op = Op386BSFL
                return true
+       case OpCtz32:
+               v.Op = Op386LoweredCtz32
+               return true
+       case OpCtz32NonZero:
+               v.Op = Op386BSFL
+               return true
+       case OpCtz8:
+               return rewriteValue386_OpCtz8(v)
+       case OpCtz8NonZero:
+               v.Op = Op386BSFL
+               return true
        case OpCvt32Fto32:
                v.Op = Op386CVTTSS2SL
                return true
@@ -8527,6 +8538,22 @@ func rewriteValue386_OpCtz16(v *Value) bool {
                return true
        }
 }
+func rewriteValue386_OpCtz8(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Ctz8 x)
+       // result: (BSFL (ORLconst <typ.UInt32> [0x100] x))
+       for {
+               x := v_0
+               v.reset(Op386BSFL)
+               v0 := b.NewValue0(v.Pos, Op386ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(0x100)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValue386_OpDiv8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index b4a55c00af0ca2528eca62d1f6ade05d2ef0b8ef..e49ba5ee71645d555735a07efe02b4fb3311907a 100644 (file)
@@ -4492,12 +4492,12 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+               sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
        addF("math/bits", "TrailingZeros32",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
                },
-               sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+               sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
        addF("math/bits", "TrailingZeros16",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0])
@@ -4531,7 +4531,7 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
                },
-               sys.AMD64, sys.ARM, sys.ARM64, sys.Wasm)
+               sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm)
        addF("math/bits", "TrailingZeros8",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0])
index 6c92ca1f56553341cc10d15a21811b125f16856a..811a34cc0bf5931894b70c53fba44771bbdb7d47 100644 (file)
@@ -831,6 +831,29 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
                        base.WarnfAt(v.Pos, "generated nil check")
                }
+       case ssa.Op386LoweredCtz32:
+               // BSFL in, out
+               p := s.Prog(x86.ABSFL)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[0].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+
+               // JNZ 2(PC)
+               p1 := s.Prog(x86.AJNE)
+               p1.To.Type = obj.TYPE_BRANCH
+
+               // MOVL $32, out
+               p2 := s.Prog(x86.AMOVL)
+               p2.From.Type = obj.TYPE_CONST
+               p2.From.Offset = 32
+               p2.To.Type = obj.TYPE_REG
+               p2.To.Reg = v.Reg()
+
+               // NOP (so the JNZ has somewhere to land)
+               nop := s.Prog(obj.ANOP)
+               p1.To.SetTarget(nop)
+
        case ssa.OpClobber:
                p := s.Prog(x86.AMOVL)
                p.From.Type = obj.TYPE_CONST
index 86a44d7c9359a9c135daa5c25708ce277447bbbe..8c971cf760c60cb0a783ef3272b07723bba3e6ad 100644 (file)
@@ -293,6 +293,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
 func TrailingZeros(n uint) int {
        // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
        // amd64/v3:"TZCNTQ"
+       // 386:"BSFL"
        // arm:"CLZ"
        // arm64:"RBIT","CLZ"
        // s390x:"FLOGR"
@@ -305,6 +306,7 @@ func TrailingZeros(n uint) int {
 func TrailingZeros64(n uint64) int {
        // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ"
        // amd64/v3:"TZCNTQ"
+       // 386:"BSFL"
        // arm64:"RBIT","CLZ"
        // s390x:"FLOGR"
        // ppc64x/power8:"ANDN","POPCNTD"
@@ -322,6 +324,7 @@ func TrailingZeros64Subtract(n uint64) int {
 func TrailingZeros32(n uint32) int {
        // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ"
        // amd64/v3:"TZCNTL"
+       // 386:"BSFL"
        // arm:"CLZ"
        // arm64:"RBITW","CLZW"
        // s390x:"FLOGR","MOVWZ"
@@ -345,6 +348,7 @@ func TrailingZeros16(n uint16) int {
 
 func TrailingZeros8(n uint8) int {
        // amd64:"BSFL","BTSL\\t\\$8"
+       // 386:"BSFL"
        // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR"
        // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
        // s390x:"FLOGR","OR\t\\$256"