]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: added some intrinsics to SSA back end
authorDavid Chase <drchase@google.com>
Fri, 11 Mar 2016 05:10:52 +0000 (00:10 -0500)
committerDavid Chase <drchase@google.com>
Mon, 28 Mar 2016 16:29:59 +0000 (16:29 +0000)
One intrinsic was needed to help get the very best
performance out of a future GC; as long as that one was
being added, I also added Bswap since that is sometimes
a handy thing to have.  I had intended to fill out the
bit-scan intrinsic family, but the mismatch between the
"scan forward" instruction and "count leading zeroes"
was large enough to cause me to leave it out -- it poses
a dilemma that I'd rather dodge right now.

These intrinsics are not exposed for general use.
That's a separate issue requiring an API proposal change
( https://github.com/golang/proposal )

All intrinsics are tested, both that they are substituted
on the appropriate architecture, and that they produce the
expected result.

Change-Id: I5848037cfd97de4f75bdc33bdd89bba00af4a8ee
Reviewed-on: https://go-review.googlesource.com/20564
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

15 files changed:
src/cmd/compile/internal/amd64/prog.go
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/gc/go.go
src/cmd/compile/internal/gc/inl.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/runtime/internal/sys/intrinsics.go [new file with mode: 0644]
test/intrinsic.dir/main.go [new file with mode: 0644]
test/intrinsic.go [new file with mode: 0644]
test/run.go

index 55ea7ee82a62606ece45135a7fa8c305780500cd..91b479be2217dad0c4b8ce67f42b3804e7c15f94 100644 (file)
@@ -36,26 +36,44 @@ var progtable = [x86.ALAST & obj.AMask]obj.ProgInfo{
 
        // NOP is an internal no-op that also stands
        // for USED and SET annotations, not the Intel opcode.
-       obj.ANOP:                   {Flags: gc.LeftRead | gc.RightWrite},
-       x86.AADCL & obj.AMask:      {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
-       x86.AADCQ & obj.AMask:      {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
-       x86.AADCW & obj.AMask:      {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
-       x86.AADDB & obj.AMask:      {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AADDL & obj.AMask:      {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AADDW & obj.AMask:      {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AADDQ & obj.AMask:      {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AADDSD & obj.AMask:     {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
-       x86.AADDSS & obj.AMask:     {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
-       x86.AANDB & obj.AMask:      {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AANDL & obj.AMask:      {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AANDQ & obj.AMask:      {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
-       x86.AANDW & obj.AMask:      {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
-       obj.ACALL & obj.AMask:      {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
-       x86.ACDQ & obj.AMask:       {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
-       x86.ACQO & obj.AMask:       {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
-       x86.ACWD & obj.AMask:       {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
-       x86.ACLD & obj.AMask:       {Flags: gc.OK},
-       x86.ASTD & obj.AMask:       {Flags: gc.OK},
+       obj.ANOP:               {Flags: gc.LeftRead | gc.RightWrite},
+       x86.AADCL & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
+       x86.AADCQ & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
+       x86.AADCW & obj.AMask:  {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
+       x86.AADDB & obj.AMask:  {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AADDL & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AADDW & obj.AMask:  {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AADDQ & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
+       x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
+       x86.AANDB & obj.AMask:  {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AANDL & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AANDQ & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
+       x86.AANDW & obj.AMask:  {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
+
+       x86.ABSFL & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+       x86.ABSFQ & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+       x86.ABSFW & obj.AMask:   {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+       x86.ABSRL & obj.AMask:   {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+       x86.ABSRQ & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+       x86.ABSRW & obj.AMask:   {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
+       x86.ABSWAPL & obj.AMask: {Flags: gc.SizeL | RightRdwr},
+       x86.ABSWAPQ & obj.AMask: {Flags: gc.SizeQ | RightRdwr},
+
+       obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
+       x86.ACDQ & obj.AMask:  {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
+       x86.ACQO & obj.AMask:  {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
+       x86.ACWD & obj.AMask:  {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
+       x86.ACLD & obj.AMask:  {Flags: gc.OK},
+       x86.ASTD & obj.AMask:  {Flags: gc.OK},
+
+       x86.ACMOVLEQ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
+       x86.ACMOVLNE & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
+       x86.ACMOVQEQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
+       x86.ACMOVQNE & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
+       x86.ACMOVWEQ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
+       x86.ACMOVWNE & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
+
        x86.ACMPB & obj.AMask:      {Flags: gc.SizeB | gc.LeftRead | gc.RightRead | gc.SetCarry},
        x86.ACMPL & obj.AMask:      {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.SetCarry},
        x86.ACMPQ & obj.AMask:      {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.SetCarry},
index 307ba28e5e94c9bb49679dab7dd7831c8dc506b4..dfacff6f40bd5f41029adfcd725394204327be02 100644 (file)
@@ -477,6 +477,33 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Offset = v.AuxInt2Int64()
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
+
+       case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst,
+               ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
+               r := gc.SSARegNum(v)
+               x := gc.SSARegNum(v.Args[0])
+               // Arg0 is in/out, move in to out if not already same
+               if r != x {
+                       p := gc.Prog(moveByType(v.Type))
+                       p.From.Type = obj.TYPE_REG
+                       p.From.Reg = x
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = r
+               }
+
+               // Constant into AX, after arg0 movement in case arg0 is in AX
+               p := gc.Prog(moveByType(v.Type))
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = v.AuxInt2Int64()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = x86.REG_AX
+
+               p = gc.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = x86.REG_AX
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+
        case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst:
                r := gc.SSARegNum(v)
                x := gc.SSARegNum(v.Args[0])
@@ -955,6 +982,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                        gc.Maxarg = v.AuxInt
                }
        case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB,
+               ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
                ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB:
                x := gc.SSARegNum(v.Args[0])
                r := gc.SSARegNum(v)
@@ -968,7 +996,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p := gc.Prog(v.Op.Asm())
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
-       case ssa.OpAMD64SQRTSD:
+       case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW,
+               ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW,
+               ssa.OpAMD64SQRTSD:
                p := gc.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = gc.SSARegNum(v.Args[0])
@@ -1008,9 +1038,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX)
 
        case ssa.OpAMD64InvertFlags:
-               v.Fatalf("InvertFlags should never make it to codegen %v", v)
+               v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
        case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
-               v.Fatalf("Flag* ops should never make it to codegen %v", v)
+               v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
        case ssa.OpAMD64REPSTOSQ:
                gc.Prog(x86.AREP)
                gc.Prog(x86.ASTOSQ)
index 78c177e616fe290b887910ee3d29c1be2099e2b0..448a0fd3222036e0b6cd72ebf7d91a60154a1191 100644 (file)
@@ -55,8 +55,8 @@ func (v Val) Ctype() Ctype {
 }
 
 type Pkg struct {
-       Name     string // package name
-       Path     string // string literal used in import statement
+       Name     string // package name, e.g. "sys"
+       Path     string // string literal used in import statement, e.g. "runtime/internal/sys"
        Pathsym  *Sym
        Prefix   string // escaped path for use in symbol table
        Imported bool   // export data of this package was parsed
@@ -469,6 +469,9 @@ const (
 
        // Set, use, or kill of carry bit.
        // Kill means we never look at the carry bit after this kind of instruction.
+       // Originally for understanding ADC, RCR, and so on, but now also
+       // tracks set, use, and kill of the zero and overflow bits as well.
+       // TODO rename to {Set,Use,Kill}Flags
        SetCarry  = 1 << 24
        UseCarry  = 1 << 25
        KillCarry = 1 << 26
index ff0791c5377dae7b473ccf4d80128ab4f5bd0857..e25ce132daee7e947b156cb75a36d3e4c2d0dea7 100644 (file)
@@ -453,7 +453,7 @@ func inlnode(n *Node) *Node {
                if Debug['m'] > 3 {
                        fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
                }
-               if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 { // normal case
+               if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 && !isIntrinsicCall1(n) { // normal case
                        n = mkinlcall(n, n.Left, n.Isddd)
                } else if n.Left.Op == ONAME && n.Left.Left != nil && n.Left.Left.Op == OTYPE && n.Left.Right != nil && n.Left.Right.Op == ONAME { // methods called as functions
                        if n.Left.Sym.Def != nil {
index 93b820b17ec755bc8aa5b36a5516be7e85ac7bd9..9b8ef20fed5ae9726fddb25de603133e85add592 100644 (file)
@@ -2052,7 +2052,13 @@ func (s *state) expr(n *Node) *ssa.Value {
                p, l, c := s.slice(n.Left.Type, v, i, j, k)
                return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c)
 
-       case OCALLFUNC, OCALLINTER, OCALLMETH:
+       case OCALLFUNC:
+               if isIntrinsicCall1(n) {
+                       return s.intrinsicCall1(n)
+               }
+               fallthrough
+
+       case OCALLINTER, OCALLMETH:
                a := s.call(n, callNormal)
                return s.newValue2(ssa.OpLoad, n.Type, a, s.mem())
 
@@ -2373,6 +2379,75 @@ const (
        callGo
 )
 
+// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
+// that can be handled by the SSA backend.
+// SSA uses this, but so does the front end to see if should not
+// inline a function because it is a candidate for intrinsic
+// substitution.
+func isSSAIntrinsic1(s *Sym) bool {
+       // The test below is not quite accurate -- in the event that
+       // a function is disabled on a per-function basis, for example
+       // because of hash-keyed binary failure search, SSA might be
+       // disabled for that function but it would not be noted here,
+       // and thus an inlining would not occur (in practice, inlining
+       // so far has only been noticed for Bswap32 and the 16-bit count
+       // leading/trailing instructions, but heuristics might change
+       // in the future or on different architectures).
+       if !ssaEnabled || ssa.IntrinsicsDisable || Thearch.Thechar != '6' {
+               return false
+       }
+       if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" {
+               switch s.Name {
+               case
+                       "Ctz64", "Ctz32", "Ctz16",
+                       "Bswap64", "Bswap32":
+                       return true
+               }
+       }
+       return false
+}
+
+func isIntrinsicCall1(n *Node) bool {
+       if n == nil || n.Left == nil {
+               return false
+       }
+       return isSSAIntrinsic1(n.Left.Sym)
+}
+
+// intrinsicFirstArg extracts arg from n.List and eval
+func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
+       x := n.List.First()
+       if x.Op == OAS {
+               x = x.Right
+       }
+       return s.expr(x)
+}
+
+// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
+// into the intrinsic
+func (s *state) intrinsicCall1(n *Node) *ssa.Value {
+       var result *ssa.Value
+       switch n.Left.Sym.Name {
+       case "Ctz64":
+               result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
+       case "Ctz32":
+               result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
+       case "Ctz16":
+               result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n))
+       case "Bswap64":
+               result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
+       case "Bswap32":
+               result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
+       }
+       if result == nil {
+               Fatalf("Unknown special call: %v", n.Left.Sym)
+       }
+       if ssa.IntrinsicsDebug > 0 {
+               Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
+       }
+       return result
+}
+
 // Calls the function n using the specified call type.
 // Returns the address of the return value (or nil if none).
 func (s *state) call(n *Node, k callKind) *ssa.Value {
index b8e2b42c3e5733729f95f822fd6c4ae3d35c25ca..d6c2bf83efa180cd2af2b16728443240696d2e9f 100644 (file)
@@ -120,6 +120,10 @@ type pass struct {
 // Run consistency checker between each phase
 var checkEnabled = false
 
+// Debug output
+var IntrinsicsDebug int
+var IntrinsicsDisable bool
+
 // PhaseOption sets the specified flag in the specified ssa phase,
 // returning empty string if this was successful or a string explaining
 // the error if it was not.
@@ -157,6 +161,20 @@ func PhaseOption(phase, flag string, val int) string {
                }
        }
 
+       if phase == "intrinsics" {
+               switch flag {
+               case "on":
+                       IntrinsicsDisable = val == 0
+               case "off":
+                       IntrinsicsDisable = val != 0
+               case "debug":
+                       IntrinsicsDebug = val
+               default:
+                       return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
+               }
+               return ""
+       }
+
        underphase := strings.Replace(phase, "_", " ", -1)
        var re *regexp.Regexp
        if phase[0] == '~' {
index b595912cc670c4637b4a7bfa5dbf1462931ca18d..cc210978ef0eb157f74687bbd09f4d15041caf8d 100644 (file)
 (Com16 x) -> (NOTW x)
 (Com8 x) -> (NOTB x)
 
+// CMPQconst 0 below is redundant because BSF sets Z but how to remove?
+(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
+(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
+(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
+
+(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c])
+(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c])
+(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c])
+
+(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c])
+(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c])
+(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c])
+
+(CMOVQEQconst x (FlagLT_ULT)) -> x
+(CMOVLEQconst x (FlagLT_ULT)) -> x
+(CMOVWEQconst x (FlagLT_ULT)) -> x
+
+(CMOVQEQconst x (FlagLT_UGT)) -> x
+(CMOVLEQconst x (FlagLT_UGT)) -> x
+(CMOVWEQconst x (FlagLT_UGT)) -> x
+
+(CMOVQEQconst x (FlagGT_ULT)) -> x
+(CMOVLEQconst x (FlagGT_ULT)) -> x
+(CMOVWEQconst x (FlagGT_ULT)) -> x
+
+(CMOVQEQconst x (FlagGT_UGT)) -> x
+(CMOVLEQconst x (FlagGT_UGT)) -> x
+(CMOVWEQconst x (FlagGT_UGT)) -> x
+
+(Bswap64 x) -> (BSWAPQ x)
+(Bswap32 x) -> (BSWAPL x)
+
 (Sqrt x) -> (SQRTSD x)
 
 // Note: we always extend to 64 bits even though some ops don't need that many result bits.
index 116e3ff9e3d1244be5cc3bd50746ee7e8cc02222..9dc09aab533175cb5ec54e434206a63198c9eb58 100644 (file)
@@ -103,9 +103,13 @@ func init() {
                gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
                        clobbers: ax | flags}
 
-               gp2flags  = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
-               gp1flags  = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
-               flagsgp   = regInfo{inputs: flagsonly, outputs: gponly}
+               gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
+               gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
+               flagsgp  = regInfo{inputs: flagsonly, outputs: gponly}
+
+               // for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp.
+               gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
+
                readflags = regInfo{inputs: flagsonly, outputs: gponly}
                flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
 
@@ -307,6 +311,25 @@ func init() {
                {name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
                {name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
 
+               {name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero
+               {name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero
+               {name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero
+
+               {name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero
+               {name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero
+               {name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero
+
+               // Note ASM for ops moves whole register
+               {name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set
+               {name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set
+               {name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set
+               {name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+               {name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+               {name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set
+
+               {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes
+               {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes
+
                {name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
 
                {name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
index ab5e335765e5673f8ba98eb5312e45b1551289f6..6d92926e3aab8597dd6d3c48bb6d8ffb9a8734b1 100644 (file)
@@ -237,6 +237,17 @@ var genericOps = []opData{
        {name: "Com32", argLength: 1},
        {name: "Com64", argLength: 1},
 
+       {name: "Ctz16", argLength: 1}, // Count trailing (low  order) zeroes (returns 0-16)
+       {name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32)
+       {name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
+
+       {name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16)
+       {name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32)
+       {name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64)
+
+       {name: "Bswap32", argLength: 1}, // Swap bytes
+       {name: "Bswap64", argLength: 1}, // Swap bytes
+
        {name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
 
        // Data movement, max argument length for Phi is indefinite so just pick
index 3ff2b5ac60f367ebdd361267ed1a048464e14658..e76efd40ca97fb77aff16141d989e6cbf24e0fa7 100644 (file)
@@ -237,6 +237,20 @@ const (
        OpAMD64NOTL
        OpAMD64NOTW
        OpAMD64NOTB
+       OpAMD64BSFQ
+       OpAMD64BSFL
+       OpAMD64BSFW
+       OpAMD64BSRQ
+       OpAMD64BSRL
+       OpAMD64BSRW
+       OpAMD64CMOVQEQconst
+       OpAMD64CMOVLEQconst
+       OpAMD64CMOVWEQconst
+       OpAMD64CMOVQNEconst
+       OpAMD64CMOVLNEconst
+       OpAMD64CMOVWNEconst
+       OpAMD64BSWAPQ
+       OpAMD64BSWAPL
        OpAMD64SQRTSD
        OpAMD64SBBQcarrymask
        OpAMD64SBBLcarrymask
@@ -521,6 +535,14 @@ const (
        OpCom16
        OpCom32
        OpCom64
+       OpCtz16
+       OpCtz32
+       OpCtz64
+       OpClz16
+       OpClz32
+       OpClz64
+       OpBswap32
+       OpBswap64
        OpSqrt
        OpPhi
        OpCopy
@@ -2803,6 +2825,222 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "BSFQ",
+               argLen: 1,
+               asm:    x86.ABSFQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:   "BSFL",
+               argLen: 1,
+               asm:    x86.ABSFL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:   "BSFW",
+               argLen: 1,
+               asm:    x86.ABSFW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:   "BSRQ",
+               argLen: 1,
+               asm:    x86.ABSRQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:   "BSRL",
+               argLen: 1,
+               asm:    x86.ABSRL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:   "BSRW",
+               argLen: 1,
+               asm:    x86.ABSRW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQEQconst",
+               auxType:      auxInt64,
+               argLen:       2,
+               resultInArg0: true,
+               asm:          x86.ACMOVQEQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 8589934592}, // FLAGS
+                               {0, 65519},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934593, // AX FLAGS
+                       outputs: []regMask{
+                               65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLEQconst",
+               auxType:      auxInt32,
+               argLen:       2,
+               resultInArg0: true,
+               asm:          x86.ACMOVLEQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 8589934592}, // FLAGS
+                               {0, 65519},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934593, // AX FLAGS
+                       outputs: []regMask{
+                               65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWEQconst",
+               auxType:      auxInt16,
+               argLen:       2,
+               resultInArg0: true,
+               asm:          x86.ACMOVLEQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 8589934592}, // FLAGS
+                               {0, 65519},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934593, // AX FLAGS
+                       outputs: []regMask{
+                               65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQNEconst",
+               auxType:      auxInt64,
+               argLen:       2,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 8589934592}, // FLAGS
+                               {0, 65519},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934593, // AX FLAGS
+                       outputs: []regMask{
+                               65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLNEconst",
+               auxType:      auxInt32,
+               argLen:       2,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 8589934592}, // FLAGS
+                               {0, 65519},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934593, // AX FLAGS
+                       outputs: []regMask{
+                               65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWNEconst",
+               auxType:      auxInt16,
+               argLen:       2,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 8589934592}, // FLAGS
+                               {0, 65519},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934593, // AX FLAGS
+                       outputs: []regMask{
+                               65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "BSWAPQ",
+               argLen:       1,
+               resultInArg0: true,
+               asm:          x86.ABSWAPQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "BSWAPL",
+               argLen:       1,
+               resultInArg0: true,
+               asm:          x86.ABSWAPL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 8589934592, // FLAGS
+                       outputs: []regMask{
+                               65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:   "SQRTSD",
                argLen: 1,
@@ -4981,6 +5219,46 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "Ctz16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Ctz32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Ctz64",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Clz16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Clz32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Clz64",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Bswap32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "Bswap64",
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "Sqrt",
                argLen:  1,
index 046973859a009ecfeaff6c9a7797a954ca11cb37..8dd1b15f135a878f909e3958edbe6acc1e6481cd 100644 (file)
@@ -66,6 +66,16 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpAnd8(v, config)
        case OpAvg64u:
                return rewriteValueAMD64_OpAvg64u(v, config)
+       case OpBswap32:
+               return rewriteValueAMD64_OpBswap32(v, config)
+       case OpBswap64:
+               return rewriteValueAMD64_OpBswap64(v, config)
+       case OpAMD64CMOVLEQconst:
+               return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config)
+       case OpAMD64CMOVQEQconst:
+               return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config)
+       case OpAMD64CMOVWEQconst:
+               return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config)
        case OpAMD64CMPB:
                return rewriteValueAMD64_OpAMD64CMPB(v, config)
        case OpAMD64CMPBconst:
@@ -110,6 +120,12 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
                return rewriteValueAMD64_OpConstNil(v, config)
        case OpConvert:
                return rewriteValueAMD64_OpConvert(v, config)
+       case OpCtz16:
+               return rewriteValueAMD64_OpCtz16(v, config)
+       case OpCtz32:
+               return rewriteValueAMD64_OpCtz32(v, config)
+       case OpCtz64:
+               return rewriteValueAMD64_OpCtz64(v, config)
        case OpCvt32Fto32:
                return rewriteValueAMD64_OpCvt32Fto32(v, config)
        case OpCvt32Fto64:
@@ -2119,6 +2135,307 @@ func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpBswap32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Bswap32 x)
+       // cond:
+       // result: (BSWAPL x)
+       for {
+               x := v.Args[0]
+               v.reset(OpAMD64BSWAPL)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpBswap64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Bswap64 x)
+       // cond:
+       // result: (BSWAPQ x)
+       for {
+               x := v.Args[0]
+               v.reset(OpAMD64BSWAPQ)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMOVLEQconst x (InvertFlags y) [c])
+       // cond:
+       // result: (CMOVLNEconst x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
+                       break
+               }
+               y := v_1.Args[0]
+               c := v.AuxInt
+               v.reset(OpAMD64CMOVLNEconst)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AuxInt = c
+               return true
+       }
+       // match: (CMOVLEQconst _ (FlagEQ) [c])
+       // cond:
+       // result: (Const32 [c])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagEQ {
+                       break
+               }
+               c := v.AuxInt
+               v.reset(OpConst32)
+               v.AuxInt = c
+               return true
+       }
+       // match: (CMOVLEQconst x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVLEQconst x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVLEQconst x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVLEQconst x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMOVQEQconst x (InvertFlags y) [c])
+       // cond:
+       // result: (CMOVQNEconst x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
+                       break
+               }
+               y := v_1.Args[0]
+               c := v.AuxInt
+               v.reset(OpAMD64CMOVQNEconst)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AuxInt = c
+               return true
+       }
+       // match: (CMOVQEQconst _ (FlagEQ) [c])
+       // cond:
+       // result: (Const64 [c])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagEQ {
+                       break
+               }
+               c := v.AuxInt
+               v.reset(OpConst64)
+               v.AuxInt = c
+               return true
+       }
+       // match: (CMOVQEQconst x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQEQconst x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQEQconst x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQEQconst x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (CMOVWEQconst x (InvertFlags y) [c])
+       // cond:
+       // result: (CMOVWNEconst x y [c])
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
+                       break
+               }
+               y := v_1.Args[0]
+               c := v.AuxInt
+               v.reset(OpAMD64CMOVWNEconst)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AuxInt = c
+               return true
+       }
+       // match: (CMOVWEQconst _ (FlagEQ) [c])
+       // cond:
+       // result: (Const16 [c])
+       for {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagEQ {
+                       break
+               }
+               c := v.AuxInt
+               v.reset(OpConst16)
+               v.AuxInt = c
+               return true
+       }
+       // match: (CMOVWEQconst x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWEQconst x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWEQconst x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWEQconst x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
        b := v.Block
        _ = b
@@ -3026,6 +3343,72 @@ func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Ctz16 <t> x)
+       // cond:
+       // result: (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpAMD64CMOVWEQconst)
+               v0 := b.NewValue0(v.Line, OpAMD64BSFW, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags)
+               v1.AddArg(x)
+               v1.AuxInt = 0
+               v.AddArg(v1)
+               v.AuxInt = 16
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Ctz32 <t> x)
+       // cond:
+       // result: (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpAMD64CMOVLEQconst)
+               v0 := b.NewValue0(v.Line, OpAMD64BSFL, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags)
+               v1.AddArg(x)
+               v1.AuxInt = 0
+               v.AddArg(v1)
+               v.AuxInt = 32
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpCtz64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Ctz64 <t> x)
+       // cond:
+       // result: (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpAMD64CMOVQEQconst)
+               v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags)
+               v1.AddArg(x)
+               v1.AuxInt = 0
+               v.AddArg(v1)
+               v.AuxInt = 64
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpCvt32Fto32(v *Value, config *Config) bool {
        b := v.Block
        _ = b
diff --git a/src/runtime/internal/sys/intrinsics.go b/src/runtime/internal/sys/intrinsics.go
new file mode 100644 (file)
index 0000000..8feb754
--- /dev/null
@@ -0,0 +1,105 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sys
+
+// Ctz64 counts trailing (low-order) zeroes,
+// and if all are zero, then 64.
+func Ctz64(x uint64) uint64 {
+       if x&0xffffffff == 0 {
+               return 32 + uint64(Ctz32(uint32(x>>32)))
+       }
+       return uint64(Ctz32(uint32(x)))
+
+}
+
+// Ctz32 counts trailing (low-order) zeroes,
+// and if all are zero, then 32.
+func Ctz32(x uint32) uint32 {
+       if x&0xffff == 0 {
+               return 16 + uint32(Ctz16(uint16(x>>16)))
+       }
+       return uint32(Ctz16(uint16(x)))
+}
+
+// Ctz16 counts trailing (low-order) zeroes,
+// and if all are zero, then 16.
+func Ctz16(x uint16) uint16 {
+       if x&0xff == 0 {
+               return 8 + uint16(Ctz8(uint8(x>>8)))
+       }
+       return uint16(Ctz8(uint8(x)))
+}
+
+// Ctz8 counts trailing (low-order) zeroes,
+// and if all are zero, then 8.
+func Ctz8(x uint8) uint8 {
+       return ctzVals[x]
+}
+
+var ctzVals = [256]uint8{
+       8, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       6, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       7, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       6, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       5, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0,
+       4, 0, 1, 0, 2, 0, 1, 0,
+       3, 0, 1, 0, 2, 0, 1, 0}
+
+// Bswap64 returns its input with byte order reversed
+// 0x0102030405060708 -> 0x0807060504030201
+func Bswap64(x uint64) uint64 {
+       c8 := uint64(0xff00ff00ff00ff00)
+       a := (x & c8) >> 8
+       b := (x &^ c8) << 8
+       x = a | b
+       c16 := uint64(0xffff0000ffff0000)
+       a = (x & c16) >> 16
+       b = (x &^ c16) << 16
+       x = a | b
+       c32 := uint64(0xffffffff00000000)
+       a = (x & c32) >> 32
+       b = (x &^ c32) << 32
+       x = a | b
+       return x
+}
+
+// Bswap32 returns its input with byte order reversed
+// 0x01020304 -> 0x04030201
+func Bswap32(x uint32) uint32 {
+       c8 := uint32(0xff00ff00)
+       a := (x & c8) >> 8
+       b := (x &^ c8) << 8
+       x = a | b
+       c16 := uint32(0xffff0000)
+       a = (x & c16) >> 16
+       b = (x &^ c16) << 16
+       x = a | b
+       return x
+}
diff --git a/test/intrinsic.dir/main.go b/test/intrinsic.dir/main.go
new file mode 100644 (file)
index 0000000..46e6cb3
--- /dev/null
@@ -0,0 +1,109 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+       "fmt"
+       T "runtime/internal/sys"
+)
+
+var A = []uint64{0x0102030405060708, 0x1122334455667788}
+var B = []uint64{0x0807060504030201, 0x8877665544332211}
+
+var errors int
+
+func logf(f string, args ...interface{}) {
+       errors++
+       fmt.Printf(f, args...)
+       if errors > 100 { // 100 is enough spewage
+               panic("100 errors is plenty is enough")
+       }
+}
+
+func test(i, x uint64) {
+       t := T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
+       if i != t {
+               logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
+       }
+       x = -x
+       t = T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
+       if i != t {
+               logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
+       }
+
+       if i <= 32 {
+               x32 := uint32(x)
+               t32 := T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
+               if uint32(i) != t32 {
+                       logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
+               }
+               x32 = -x32
+               t32 = T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
+               if uint32(i) != t32 {
+                       logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
+               }
+       }
+       if i <= 16 {
+               x16 := uint16(x)
+               t16 := T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
+               if uint16(i) != t16 {
+                       logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
+               }
+               x16 = -x16
+               t16 = T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
+               if uint16(i) != t16 {
+                       logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
+               }
+       }
+}
+
+func main() {
+       // Test Bswap first because the other test relies on it
+       // working correctly (to implement bit reversal).
+       for i := range A {
+               x := A[i]
+               y := B[i]
+               X := T.Bswap64(x) // ERROR "intrinsic substitution for Bswap64"
+               Y := T.Bswap64(y) // ERROR "intrinsic substitution for Bswap64"
+               if y != X {
+                       logf("Bswap64(0x%08x) expected 0x%08x but got 0x%08x\n", x, y, X)
+               }
+               if x != Y {
+                       logf("Bswap64(0x%08x) expected 0x%08x but got 0x%08x\n", y, x, Y)
+               }
+
+               x32 := uint32(X)
+               y32 := uint32(Y >> 32)
+
+               X32 := T.Bswap32(x32) // ERROR "intrinsic substitution for Bswap32"
+               Y32 := T.Bswap32(y32) // ERROR "intrinsic substitution for Bswap32"
+               if y32 != X32 {
+                       logf("Bswap32(0x%08x) expected 0x%08x but got 0x%08x\n", x32, y32, X32)
+               }
+               if x32 != Y32 {
+                       logf("Bswap32(0x%08x) expected 0x%08x but got 0x%08x\n", y32, x32, Y32)
+               }
+       }
+
+       // Zero is a special case, be sure it is done right.
+       if T.Ctz16(0) != 16 { // ERROR "intrinsic substitution for Ctz16"
+               logf("ctz16(0) != 16")
+       }
+       if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32"
+               logf("ctz32(0) != 32")
+       }
+       if T.Ctz64(0) != 64 { // ERROR "intrinsic substitution for Ctz64"
+               logf("ctz64(0) != 64")
+       }
+
+       for i := uint64(0); i <= 64; i++ {
+               for j := uint64(1); j <= 255; j += 2 {
+                       for k := uint64(1); k <= 65537; k += 128 {
+                               x := (j * k) << i
+                               test(i, x)
+                       }
+               }
+       }
+}
diff --git a/test/intrinsic.go b/test/intrinsic.go
new file mode 100644 (file)
index 0000000..f774128
--- /dev/null
@@ -0,0 +1,8 @@
+// errorcheckandrundir -0 -d=ssa/intrinsics/debug
+// +build !ppc64,!ppc64le,amd64
+
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ignored
index 53a859dda975596055a367a6a6ae2727caced8bb..ffaf4d992ab3d1a30a0b867db4eaa2ec4b81c5e9 100644 (file)
@@ -34,6 +34,7 @@ import (
 
 var (
        verbose        = flag.Bool("v", false, "verbose. if set, parallelism is set to 1.")
+       keep           = flag.Bool("k", false, "keep. keep temporary directory.")
        numParallel    = flag.Int("n", runtime.NumCPU(), "number of parallel tests to run")
        summary        = flag.Bool("summary", false, "show summary of results")
        showSkips      = flag.Bool("show_skips", false, "show skipped tests")
@@ -201,8 +202,9 @@ func compileFile(runcmd runCmd, longname string) (out []byte, err error) {
        return runcmd(cmd...)
 }
 
-func compileInDir(runcmd runCmd, dir string, names ...string) (out []byte, err error) {
+func compileInDir(runcmd runCmd, dir string, flags []string, names ...string) (out []byte, err error) {
        cmd := []string{"go", "tool", "compile", "-e", "-D", ".", "-I", "."}
+       cmd = append(cmd, flags...)
        if *linkshared {
                cmd = append(cmd, "-dynlink", "-installsuffix=dynlink")
        }
@@ -477,6 +479,9 @@ func (t *test) run() {
                fallthrough
        case "compile", "compiledir", "build", "run", "runoutput", "rundir":
                t.action = action
+       case "errorcheckandrundir":
+               wantError = false // should be no error if also will run
+               fallthrough
        case "errorcheck", "errorcheckdir", "errorcheckoutput":
                t.action = action
                wantError = true
@@ -501,7 +506,9 @@ func (t *test) run() {
        }
 
        t.makeTempDir()
-       defer os.RemoveAll(t.tempDir)
+       if !*keep {
+               defer os.RemoveAll(t.tempDir)
+       }
 
        err = ioutil.WriteFile(filepath.Join(t.tempDir, t.gofile), srcBytes, 0644)
        check(err)
@@ -577,13 +584,13 @@ func (t *test) run() {
                        return
                }
                for _, gofiles := range pkgs {
-                       _, t.err = compileInDir(runcmd, longdir, gofiles...)
+                       _, t.err = compileInDir(runcmd, longdir, flags, gofiles...)
                        if t.err != nil {
                                return
                        }
                }
 
-       case "errorcheckdir":
+       case "errorcheckdir", "errorcheckandrundir":
                // errorcheck all files in lexicographic order
                // useful for finding importing errors
                longdir := filepath.Join(cwd, t.goDirName())
@@ -593,7 +600,7 @@ func (t *test) run() {
                        return
                }
                for i, gofiles := range pkgs {
-                       out, err := compileInDir(runcmd, longdir, gofiles...)
+                       out, err := compileInDir(runcmd, longdir, flags, gofiles...)
                        if i == len(pkgs)-1 {
                                if wantError && err == nil {
                                        t.err = fmt.Errorf("compilation succeeded unexpectedly\n%s", out)
@@ -615,6 +622,10 @@ func (t *test) run() {
                                break
                        }
                }
+               if action == "errorcheckdir" {
+                       return
+               }
+               fallthrough
 
        case "rundir":
                // Compile all files in the directory in lexicographic order.
@@ -626,7 +637,7 @@ func (t *test) run() {
                        return
                }
                for i, gofiles := range pkgs {
-                       _, err := compileInDir(runcmd, longdir, gofiles...)
+                       _, err := compileInDir(runcmd, longdir, flags, gofiles...)
                        if err != nil {
                                t.err = err
                                return
@@ -774,6 +785,9 @@ func (t *test) makeTempDir() {
        var err error
        t.tempDir, err = ioutil.TempDir("", "")
        check(err)
+       if *keep {
+               log.Printf("Temporary directory is %s", t.tempDir)
+       }
 }
 
 func (t *test) expectedOutput() string {