(Ctz16NonZero ...) => (BSFL ...)
(Ctz32 ...) => (LoweredCtz32 ...)
(Ctz32NonZero ...) => (BSFL ...)
+(Ctz64On32 ...) => (LoweredCtz64 ...)
// Lowering extension
(SignExt8to16 ...) => (MOVBLSX ...)
{name: "NOTL", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
- {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
- {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
- {name: "LoweredCtz32", argLength: 1, reg: gp11, clobberFlags: true}, // arg0 # of low-order zeroes
+ {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
+ {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW", clobberFlags: true}, // arg0 # of low-order zeroes ; undef if zero
+ {name: "LoweredCtz32", argLength: 1, reg: gp11, clobberFlags: true}, // arg0 # of low-order zeroes
+ {name: "LoweredCtz64", argLength: 2, reg: gp21, resultNotInArgs: true, clobberFlags: true}, // arg1<<32+arg0 # of low-order zeroes
{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW", clobberFlags: true}, // arg0 # of high-order zeroes ; undef if zero
{name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16)
{name: "Ctz32", argLength: 1}, // Count trailing (low order) zeroes (returns 0-32)
{name: "Ctz64", argLength: 1}, // Count trailing (low order) zeroes (returns 0-64)
+ {name: "Ctz64On32", argLength: 2}, // Count trailing (low order) zeroes (returns 0-64) in arg[1]<<32+arg[0]
{name: "Ctz8NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-7
{name: "Ctz16NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-15
{name: "Ctz32NonZero", argLength: 1}, // same as above, but arg[0] known to be non-zero, returns 0-31
Op386BSFL
Op386BSFW
Op386LoweredCtz32
+ Op386LoweredCtz64
Op386BSRL
Op386BSRW
Op386BSWAPL
OpCtz16
OpCtz32
OpCtz64
+ OpCtz64On32
OpCtz8NonZero
OpCtz16NonZero
OpCtz32NonZero
},
},
},
+ {
+ name: "LoweredCtz64",
+ argLen: 2,
+ resultNotInArgs: true,
+ clobberFlags: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 239}, // AX CX DX BX BP SI DI
+ {1, 239}, // AX CX DX BX BP SI DI
+ },
+ outputs: []outputInfo{
+ {0, 239}, // AX CX DX BX BP SI DI
+ },
+ },
+ },
{
name: "BSRL",
argLen: 1,
argLen: 1,
generic: true,
},
+ {
+ name: "Ctz64On32",
+ argLen: 2,
+ generic: true,
+ },
{
name: "Ctz8NonZero",
argLen: 1,
case OpCtz32NonZero:
v.Op = Op386BSFL
return true
+ case OpCtz64On32:
+ v.Op = Op386LoweredCtz64
+ return true
case OpCtz8:
return rewriteValue386_OpCtz8(v)
case OpCtz8NonZero:
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
},
- sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+ sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+ addF("math/bits", "TrailingZeros64",
+ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ lo := s.newValue1(ssa.OpInt64Lo, types.Types[types.TUINT32], args[0])
+ hi := s.newValue1(ssa.OpInt64Hi, types.Types[types.TUINT32], args[0])
+ return s.newValue2(ssa.OpCtz64On32, types.Types[types.TINT], lo, hi)
+ },
+ sys.I386)
addF("math/bits", "TrailingZeros32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
p2.To.Type = obj.TYPE_REG
p2.To.Reg = v.Reg()
+ // NOP (so the JNZ has somewhere to land)
+ nop := s.Prog(obj.ANOP)
+ p1.To.SetTarget(nop)
+ case ssa.Op386LoweredCtz64:
+ if v.Args[0].Reg() == v.Reg() {
+ v.Fatalf("input[0] and output in the same register %s", v.LongString())
+ }
+ if v.Args[1].Reg() == v.Reg() {
+ v.Fatalf("input[1] and output in the same register %s", v.LongString())
+ }
+
+ // BSFL arg0, out
+ p := s.Prog(x86.ABSFL)
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = v.Args[0].Reg()
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = v.Reg()
+
+ // JNZ 5(PC)
+ p1 := s.Prog(x86.AJNE)
+ p1.To.Type = obj.TYPE_BRANCH
+
+ // BSFL arg1, out
+ p2 := s.Prog(x86.ABSFL)
+ p2.From.Type = obj.TYPE_REG
+ p2.From.Reg = v.Args[1].Reg()
+ p2.To.Type = obj.TYPE_REG
+ p2.To.Reg = v.Reg()
+
+ // JNZ 2(PC)
+ p3 := s.Prog(x86.AJNE)
+ p3.To.Type = obj.TYPE_BRANCH
+
+ // MOVL $32, out
+ p4 := s.Prog(x86.AMOVL)
+ p4.From.Type = obj.TYPE_CONST
+ p4.From.Offset = 32
+ p4.To.Type = obj.TYPE_REG
+ p4.To.Reg = v.Reg()
+
+ // ADDL $32, out
+ p5 := s.Prog(x86.AADDL)
+ p5.From.Type = obj.TYPE_CONST
+ p5.From.Offset = 32
+ p5.To.Type = obj.TYPE_REG
+ p5.To.Reg = v.Reg()
+ p3.To.SetTarget(p5)
+
// NOP (so the JNZ has somewhere to land)
nop := s.Prog(obj.ANOP)
p1.To.SetTarget(nop)