]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: move constant divide strength reduction to SSA rules
authorKeith Randall <khr@golang.org>
Tue, 14 Feb 2017 00:00:09 +0000 (16:00 -0800)
committerKeith Randall <khr@golang.org>
Fri, 17 Feb 2017 06:16:44 +0000 (06:16 +0000)
Currently the conversion from constant divides to multiplies is mostly
done during the walk pass.  This is suboptimal because SSA can
determine that the value being divided by is constant more often
(e.g. after inlining).

Change-Id: If1a9b993edd71be37396b9167f77da271966f85f
Reviewed-on: https://go-review.googlesource.com/37015
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
26 files changed:
src/cmd/compile/fmt_test.go
src/cmd/compile/internal/gc/subr.go
src/cmd/compile/internal/gc/walk.go
src/cmd/compile/internal/ssa/gen/386.rules
src/cmd/compile/internal/ssa/gen/386Ops.go
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/MIPS.rules
src/cmd/compile/internal/ssa/gen/MIPS64.rules
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/gen/generic.rules
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/magic.go
src/cmd/compile/internal/ssa/magic_test.go [new file with mode: 0644]
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssa/rewriteARM.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/ssa/rewriteMIPS.go
src/cmd/compile/internal/ssa/rewriteMIPS64.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/compile/internal/ssa/rewritegeneric.go
src/cmd/compile/internal/x86/ssa.go

index 4170adf35246e919c752a704f8e5330ec8643b94..fa5ea3861241081f3d0ee57d828a6f252cf9cbe1 100644 (file)
@@ -596,6 +596,7 @@ var knownFormats = map[string]string{
        "*cmd/internal/obj.Prog %s":                       "",
        "*cmd/internal/obj.Prog %v":                       "",
        "*math/big.Int %#x":                               "",
+       "*math/big.Int %s":                                "",
        "[16]byte %x":                                     "",
        "[]*cmd/compile/internal/gc.Node %v":              "",
        "[]*cmd/compile/internal/gc.Sig %#v":              "",
index 98aebc528e8d22da46b461fdcdcdbb6a565bbbf8..884b87971492d2fda96bb581b7a2ffd1da0c45dd 100644 (file)
@@ -1964,42 +1964,6 @@ func liststmt(l []*Node) *Node {
        return n
 }
 
-// return power of 2 of the constant
-// operand. -1 if it is not a power of 2.
-// 1000+ if it is a -(power of 2)
-func powtwo(n *Node) int {
-       if n == nil || n.Op != OLITERAL || n.Type == nil {
-               return -1
-       }
-       if !n.Type.IsInteger() {
-               return -1
-       }
-
-       v := uint64(n.Int64())
-       b := uint64(1)
-       for i := 0; i < 64; i++ {
-               if b == v {
-                       return i
-               }
-               b = b << 1
-       }
-
-       if !n.Type.IsSigned() {
-               return -1
-       }
-
-       v = -v
-       b = 1
-       for i := 0; i < 64; i++ {
-               if b == v {
-                       return i + 1000
-               }
-               b = b << 1
-       }
-
-       return -1
-}
-
 func ngotype(n *Node) *Sym {
        if n.Type != nil {
                return typenamesym(n.Type)
index b82618af6bb6a9669af32d7be8ed8ea7bc542cde..f728943b83ff746be6799fd9a74fe042981a0f04 100644 (file)
@@ -1071,15 +1071,28 @@ opswitch:
                        break
                }
 
-               // Try rewriting as shifts or magic multiplies.
-               n = walkdiv(n, init)
-
-               // rewrite 64-bit div and mod into function calls
-               // on 32-bit architectures.
-               switch n.Op {
-               case OMOD, ODIV:
-                       if Widthreg >= 8 || (et != TUINT64 && et != TINT64) {
-                               break opswitch
+               // rewrite 64-bit div and mod on 32-bit architectures.
+               // TODO: Remove this code once we can introduce
+               // runtime calls late in SSA processing.
+               if Widthreg < 8 && (et == TINT64 || et == TUINT64) {
+                       if n.Right.Op == OLITERAL {
+                               // Leave div/mod by constant powers of 2.
+                               // The SSA backend will handle those.
+                               switch et {
+                               case TINT64:
+                                       c := n.Right.Int64()
+                                       if c < 0 {
+                                               c = -c
+                                       }
+                                       if c != 0 && c&(c-1) == 0 {
+                                               break opswitch
+                                       }
+                               case TUINT64:
+                                       c := uint64(n.Right.Int64())
+                                       if c != 0 && c&(c-1) == 0 {
+                                               break opswitch
+                                       }
+                               }
                        }
                        var fn string
                        if et == TINT64 {
@@ -3324,263 +3337,6 @@ func walkinrange(n *Node, init *Nodes) *Node {
        return cmp
 }
 
-// walkdiv rewrites division by a constant as less expensive
-// operations.
-// The result of walkdiv MUST be assigned back to n, e.g.
-//     n.Left = walkdiv(n.Left, init)
-func walkdiv(n *Node, init *Nodes) *Node {
-       // if >= 0, nr is 1<<pow // 1 if nr is negative.
-
-       if n.Right.Op != OLITERAL {
-               return n
-       }
-
-       // nr is a constant.
-       nl := cheapexpr(n.Left, init)
-
-       nr := n.Right
-
-       // special cases of mod/div
-       // by a constant
-       w := int(nl.Type.Width * 8)
-
-       s := 0            // 1 if nr is negative.
-       pow := powtwo(nr) // if >= 0, nr is 1<<pow
-       if pow >= 1000 {
-               // negative power of 2
-               s = 1
-
-               pow -= 1000
-       }
-
-       if pow+1 >= w {
-               // divisor too large.
-               return n
-       }
-
-       if pow < 0 {
-               // try to do division by multiply by (2^w)/d
-               // see hacker's delight chapter 10
-               // TODO: support 64-bit magic multiply here.
-               var m Magic
-               m.W = w
-
-               if nl.Type.IsSigned() {
-                       m.Sd = nr.Int64()
-                       smagic(&m)
-               } else {
-                       m.Ud = uint64(nr.Int64())
-                       umagic(&m)
-               }
-
-               if m.Bad != 0 {
-                       return n
-               }
-
-               // We have a quick division method so use it
-               // for modulo too.
-               if n.Op == OMOD {
-                       // rewrite as A%B = A - (A/B*B).
-                       n1 := nod(ODIV, nl, nr)
-
-                       n2 := nod(OMUL, n1, nr)
-                       n = nod(OSUB, nl, n2)
-                       goto ret
-               }
-
-               switch simtype[nl.Type.Etype] {
-               default:
-                       return n
-
-                       // n1 = nl * magic >> w (HMUL)
-               case TUINT8, TUINT16, TUINT32:
-                       var nc Node
-
-                       nodconst(&nc, nl.Type, int64(m.Um))
-                       n1 := nod(OHMUL, nl, &nc)
-                       n1 = typecheck(n1, Erv)
-                       if m.Ua != 0 {
-                               // Select a Go type with (at least) twice the width.
-                               var twide *Type
-                               switch simtype[nl.Type.Etype] {
-                               default:
-                                       return n
-
-                               case TUINT8, TUINT16:
-                                       twide = Types[TUINT32]
-
-                               case TUINT32:
-                                       twide = Types[TUINT64]
-
-                               case TINT8, TINT16:
-                                       twide = Types[TINT32]
-
-                               case TINT32:
-                                       twide = Types[TINT64]
-                               }
-
-                               // add numerator (might overflow).
-                               // n2 = (n1 + nl)
-                               n2 := nod(OADD, conv(n1, twide), conv(nl, twide))
-
-                               // shift by m.s
-                               var nc Node
-
-                               nodconst(&nc, Types[TUINT], int64(m.S))
-                               n = conv(nod(ORSH, n2, &nc), nl.Type)
-                       } else {
-                               // n = n1 >> m.s
-                               var nc Node
-
-                               nodconst(&nc, Types[TUINT], int64(m.S))
-                               n = nod(ORSH, n1, &nc)
-                       }
-
-                       // n1 = nl * magic >> w
-               case TINT8, TINT16, TINT32:
-                       var nc Node
-
-                       nodconst(&nc, nl.Type, m.Sm)
-                       n1 := nod(OHMUL, nl, &nc)
-                       n1 = typecheck(n1, Erv)
-                       if m.Sm < 0 {
-                               // add the numerator.
-                               n1 = nod(OADD, n1, nl)
-                       }
-
-                       // shift by m.s
-                       var ns Node
-
-                       nodconst(&ns, Types[TUINT], int64(m.S))
-                       n2 := conv(nod(ORSH, n1, &ns), nl.Type)
-
-                       // add 1 iff n1 is negative.
-                       var nneg Node
-
-                       nodconst(&nneg, Types[TUINT], int64(w)-1)
-                       n3 := nod(ORSH, nl, &nneg) // n4 = -1 iff n1 is negative.
-                       n = nod(OSUB, n2, n3)
-
-                       // apply sign.
-                       if m.Sd < 0 {
-                               n = nod(OMINUS, n, nil)
-                       }
-               }
-
-               goto ret
-       }
-
-       switch pow {
-       case 0:
-               if n.Op == OMOD {
-                       // nl % 1 is zero.
-                       nodconst(n, n.Type, 0)
-               } else if s != 0 {
-                       // divide by -1
-                       n.Op = OMINUS
-
-                       n.Right = nil
-               } else {
-                       // divide by 1
-                       n = nl
-               }
-
-       default:
-               if n.Type.IsSigned() {
-                       if n.Op == OMOD {
-                               // signed modulo 2^pow is like ANDing
-                               // with the last pow bits, but if nl < 0,
-                               // nl & (2^pow-1) is (nl+1)%2^pow - 1.
-                               var nc Node
-
-                               nodconst(&nc, Types[simtype[TUINT]], int64(w)-1)
-                               n1 := nod(ORSH, nl, &nc) // n1 = -1 iff nl < 0.
-                               if pow == 1 {
-                                       n1 = typecheck(n1, Erv)
-                                       n1 = cheapexpr(n1, init)
-
-                                       // n = (nl+ε)&1 -ε where Îµ=1 iff nl<0.
-                                       n2 := nod(OSUB, nl, n1)
-
-                                       var nc Node
-                                       nodconst(&nc, nl.Type, 1)
-                                       n3 := nod(OAND, n2, &nc)
-                                       n = nod(OADD, n3, n1)
-                               } else {
-                                       // n = (nl+ε)&(nr-1) - Îµ where Îµ=2^pow-1 iff nl<0.
-                                       var nc Node
-
-                                       nodconst(&nc, nl.Type, (1<<uint(pow))-1)
-                                       n2 := nod(OAND, n1, &nc) // n2 = 2^pow-1 iff nl<0.
-                                       n2 = typecheck(n2, Erv)
-                                       n2 = cheapexpr(n2, init)
-
-                                       n3 := nod(OADD, nl, n2)
-                                       n4 := nod(OAND, n3, &nc)
-                                       n = nod(OSUB, n4, n2)
-                               }
-
-                               break
-                       } else {
-                               // arithmetic right shift does not give the correct rounding.
-                               // if nl >= 0, nl >> n == nl / nr
-                               // if nl < 0, we want to add 2^n-1 first.
-                               var nc Node
-
-                               nodconst(&nc, Types[simtype[TUINT]], int64(w)-1)
-                               n1 := nod(ORSH, nl, &nc) // n1 = -1 iff nl < 0.
-                               if pow == 1 {
-                                       // nl+1 is nl-(-1)
-                                       n.Left = nod(OSUB, nl, n1)
-                               } else {
-                                       // Do a logical right right on -1 to keep pow bits.
-                                       var nc Node
-
-                                       nodconst(&nc, Types[simtype[TUINT]], int64(w)-int64(pow))
-                                       n2 := nod(ORSH, conv(n1, nl.Type.toUnsigned()), &nc)
-                                       n.Left = nod(OADD, nl, conv(n2, nl.Type))
-                               }
-
-                               // n = (nl + 2^pow-1) >> pow
-                               n.Op = ORSH
-
-                               var n2 Node
-                               nodconst(&n2, Types[simtype[TUINT]], int64(pow))
-                               n.Right = &n2
-                               n.Typecheck = 0
-                       }
-
-                       if s != 0 {
-                               n = nod(OMINUS, n, nil)
-                       }
-                       break
-               }
-
-               var nc Node
-               if n.Op == OMOD {
-                       // n = nl & (nr-1)
-                       n.Op = OAND
-
-                       nodconst(&nc, nl.Type, nr.Int64()-1)
-               } else {
-                       // n = nl >> pow
-                       n.Op = ORSH
-
-                       nodconst(&nc, Types[simtype[TUINT]], int64(pow))
-               }
-
-               n.Typecheck = 0
-               n.Right = &nc
-       }
-
-       goto ret
-
-ret:
-       n = typecheck(n, Erv)
-       n = walkexpr(n, init)
-       return n
-}
-
 // return 1 if integer n must be in range [0, max), 0 otherwise
 func bounded(n *Node, max int64) bool {
        if n.Type == nil || !n.Type.IsInteger() {
index 2c5357553c12af33faa2b88f1a85c9413523fbd6..c3503860d8a41b11ca545e67043d5c9883acc7a4 100644 (file)
@@ -31,6 +31,8 @@
 
 (Mul32uhilo x y) -> (MULLQU x y)
 
+(Avg32u x y) -> (AVGLU x y)
+
 (Div32F x y) -> (DIVSS x y)
 (Div64F x y) -> (DIVSD x y)
 
index bcbf2cbed3c802c535fd8824105426e296c9ea7e..5562c88c4b782bcae36ad12f41581600973c1b39 100644 (file)
@@ -202,6 +202,8 @@ func init() {
 
                {name: "MULLQU", argLength: 2, reg: gp21mul, asm: "MULL", clobberFlags: true}, // arg0 * arg1, high 32 in result[0], low 32 in result[1]
 
+               {name: "AVGLU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true, clobberFlags: true}, // (arg0 + arg1) / 2 as unsigned, all 32 result bits
+
                {name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL", clobberFlags: true}, // arg0 / arg1
                {name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW", clobberFlags: true}, // arg0 / arg1
                {name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL", clobberFlags: true}, // arg0 / arg1
index a81f060ef3c306a27ba4219b7c5d7239610a5dec..7f5bc9e510215f20eef34409204788064ad3e110 100644 (file)
@@ -63,6 +63,9 @@
 (Mod8 x y) -> (Mod32 (SignExt8to32 x) (SignExt8to32 y))
 (Mod8u x y) -> (Mod32u (ZeroExt8to32 x) (ZeroExt8to32 y))
 
+// (x + y) / 2 with x>=y -> (x - y) / 2 + y
+(Avg32u <t> x y) -> (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
+
 (And32 x y) -> (AND x y)
 (And16 x y) -> (AND x y)
 (And8 x y) -> (AND x y)
index a09620d4e143dd80b7ae17a0e950ff9aabc9bfd0..7d2a9a5a120a9a6db0c15037f755816369327b42 100644 (file)
@@ -54,7 +54,8 @@
 (Mod8 x y) -> (MODW (SignExt8to32 x) (SignExt8to32 y))
 (Mod8u x y) -> (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
 
-(Avg64u <t> x y) -> (ADD (ADD <t> (SRLconst <t> x [1]) (SRLconst <t> y [1])) (AND <t> (AND <t> x y) (MOVDconst [1])))
+// (x + y) / 2 with x>=y -> (x - y) / 2 + y
+(Avg64u <t> x y) -> (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
 
 (And64 x y) -> (AND x y)
 (And32 x y) -> (AND x y)
index e4aba36b4308467c5d555f3a9f4e8a34f7d0db62..1baa0028e0ac5e493c20a4c1f16d81fe3f605eb0 100644 (file)
@@ -55,6 +55,9 @@
 (Mod8 x y) -> (Select0 (DIV (SignExt8to32 x) (SignExt8to32 y)))
 (Mod8u x y) -> (Select0 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y)))
 
+// (x + y) / 2 with x>=y -> (x - y) / 2 + y
+(Avg32u <t> x y) -> (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
+
 (And32 x y) -> (AND x y)
 (And16 x y) -> (AND x y)
 (And8 x y) -> (AND x y)
index efa14ef3e27f9091a3b7577a3f697165c7010370..47487bff36eff3333b8158d17ba232838c049d7d 100644 (file)
@@ -54,7 +54,8 @@
 (Mod8 x y) -> (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
 (Mod8u x y) -> (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
 
-(Avg64u <t> x y) -> (ADDV (ADDV <t> (SRLVconst <t> x [1]) (SRLVconst <t> y [1])) (AND <t> (AND <t> x y) (MOVVconst [1])))
+// (x + y) / 2 with x>=y -> (x - y) / 2 + y
+(Avg64u <t> x y) -> (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y)
 
 (And64 x y) -> (AND x y)
 (And32 x y) -> (AND x y)
index 23ddead3c42acec220319c64923a8ef8ff2e9a7f..56605dc1a06f60831da54b102e811a9a768437bd 100644 (file)
@@ -28,7 +28,8 @@
 (Mod32 x y) -> (SUB x (MULLW y (DIVW x y)))
 (Mod32u x y) -> (SUB x (MULLW y (DIVWU x y)))
 
-(Avg64u <t> x y) -> (ADD (ADD <t> (SRD <t> x (MOVDconst <t> [1])) (SRD <t> y (MOVDconst <t> [1]))) (ANDconst <t> (AND <t> x y) [1]))
+// (x + y) / 2 with x>=y -> (x - y) / 2 + y
+(Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
 
 (Mul64  x y) -> (MULLD  x y)
 (Mul32  x y) -> (MULLW  x y)
index abca8bf519aff039779961dbadcf00ee18f55502..c2bbc3dee0e846a68cf1344be7e47242e7b2ebed 100644 (file)
@@ -60,7 +60,8 @@
 (Mod8   x y) -> (MODW  (MOVBreg x) (MOVBreg y))
 (Mod8u  x y) -> (MODWU (MOVBZreg x) (MOVBZreg y))
 
-(Avg64u <t> x y) -> (ADD (ADD <t> (SRDconst <t> x [1]) (SRDconst <t> y [1])) (ANDconst <t> (AND <t> x y) [1]))
+// (x + y) / 2 with x>=y -> (x - y) / 2 + y
+(Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
 
 (And64 x y) -> (AND x y)
 (And32 x y) -> (ANDW x y)
index a59d24654ba7a64b20c99a4ecd98783143bd30bc..b8028b84432fcc7399ab67cfcd8511e1b8870817 100644 (file)
         (Const32F [f2i(float64(i2f32(c) * i2f32(d)))])
 (Mul64F (Const64F [c]) (Const64F [d])) -> (Const64F [f2i(i2f(c) * i2f(d))])
 
-// Convert x * -1 to -x. The front-end catches some but not all of these.
+(Div8   (Const8  [c])  (Const8  [d])) && d != 0 -> (Const8  [int64(int8(c)/int8(d))])
+(Div16  (Const16 [c])  (Const16 [d])) && d != 0 -> (Const16 [int64(int16(c)/int16(d))])
+(Div32  (Const32 [c])  (Const32 [d])) && d != 0 -> (Const32 [int64(int32(c)/int32(d))])
+(Div64  (Const64 [c])  (Const64 [d])) && d != 0 -> (Const64 [c/d])
+(Div8u  (Const8  [c])  (Const8  [d])) && d != 0 -> (Const8  [int64(int8(uint8(c)/uint8(d)))])
+(Div16u (Const16 [c])  (Const16 [d])) && d != 0 -> (Const16 [int64(int16(uint16(c)/uint16(d)))])
+(Div32u (Const32 [c])  (Const32 [d])) && d != 0 -> (Const32 [int64(int32(uint32(c)/uint32(d)))])
+(Div64u (Const64 [c])  (Const64 [d])) && d != 0 -> (Const64 [int64(uint64(c)/uint64(d))])
+(Div32F (Const32F [c]) (Const32F [d])) -> (Const32F [f2i(float64(i2f32(c) / i2f32(d)))])
+(Div64F (Const64F [c]) (Const64F [d])) -> (Const64F [f2i(i2f(c) / i2f(d))])
+
+// Convert x * 1 to x.
+(Mul8  (Const8  [1]) x) -> x
+(Mul16 (Const16 [1]) x) -> x
+(Mul32 (Const32 [1]) x) -> x
+(Mul64 (Const64 [1]) x) -> x
+
+// Convert x * -1 to -x.
 (Mul8  (Const8  [-1]) x) -> (Neg8  x)
 (Mul16 (Const16 [-1]) x) -> (Neg16 x)
 (Mul32 (Const32 [-1]) x) -> (Neg32 x)
   (ArrayMake1 (Arg <t.ElemType()> {n} [off]))
 
 // strength reduction of divide by a constant.
-// Note: frontend does <=32 bits. We only need to do 64 bits here.
-// TODO: Do them all here?
+// See ../magic.go for a detailed description of these algorithms.
 
-// Div/mod by 1.  Currently handled by frontend.
-//(Div64 n (Const64 [1])) -> n
-//(Div64u n (Const64 [1])) -> n
-//(Mod64 n (Const64 [1])) -> (Const64 [0])
-//(Mod64u n (Const64 [1])) -> (Const64 [0])
-
-// Unsigned divide by power of 2.
-(Div64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (Rsh64Ux64 n (Const64 <t> [log2(c)]))
-(Mod64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (And64 n (Const64 <t> [c-1]))
-
-// Signed divide by power of 2.  Currently handled by frontend.
-// n / c = n >> log(c)       if n >= 0
-//       = (n+c-1) >> log(c) if n < 0
-// We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
-//(Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) ->
-//  (Rsh64x64
-//    (Add64 <t>
-//      n
-//      (Rsh64Ux64 <t>
-//        (Rsh64x64 <t> n (Const64 <t> [63]))
-//        (Const64 <t> [64-log2(c)])))
-//    (Const64 <t> [log2(c)]))
+// Unsigned divide by power of 2.  Strength reduce to a shift.
+(Div8u  n (Const8  [c])) && isPowerOfTwo(c&0xff)       -> (Rsh8Ux64 n  (Const64 <config.fe.TypeUInt64()> [log2(c&0xff)]))
+(Div16u n (Const16 [c])) && isPowerOfTwo(c&0xffff)     -> (Rsh16Ux64 n (Const64 <config.fe.TypeUInt64()> [log2(c&0xffff)]))
+(Div32u n (Const32 [c])) && isPowerOfTwo(c&0xffffffff) -> (Rsh32Ux64 n (Const64 <config.fe.TypeUInt64()> [log2(c&0xffffffff)]))
+(Div64u n (Const64 [c])) && isPowerOfTwo(c)            -> (Rsh64Ux64 n (Const64 <config.fe.TypeUInt64()> [log2(c)]))
 
 // Unsigned divide, not a power of 2.  Strength reduce to a multiply.
-(Div64u <t> x (Const64 [c])) && umagic64ok(c) && !umagic64a(c) ->
-  (Rsh64Ux64
-    (Hmul64u <t>
-      (Const64 <t> [umagic64m(c)])
+// For 8-bit divides, we just do a direct 9-bit by 8-bit multiply.
+(Div8u x (Const8 [c])) && umagicOK(8, c) ->
+  (Trunc32to8
+    (Rsh32Ux64 <config.fe.TypeUInt32()>
+      (Mul32 <config.fe.TypeUInt32()>
+        (Const32 <config.fe.TypeUInt32()> [int64(1<<8+umagic(8,c).m)])
+        (ZeroExt8to32 x))
+      (Const64 <config.fe.TypeUInt64()> [8+umagic(8,c).s])))
+
+// For 16-bit divides on 64-bit machines, we do a direct 17-bit by 16-bit multiply.
+(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 8 ->
+  (Trunc64to16
+    (Rsh64Ux64 <config.fe.TypeUInt64()>
+      (Mul64 <config.fe.TypeUInt64()>
+        (Const64 <config.fe.TypeUInt64()> [int64(1<<16+umagic(16,c).m)])
+        (ZeroExt16to64 x))
+      (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s])))
+
+// For 16-bit divides on 32-bit machines
+(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 4 && umagic(16,c).m&1 == 0 ->
+  (Trunc32to16
+    (Rsh32Ux64 <config.fe.TypeUInt32()>
+      (Mul32 <config.fe.TypeUInt32()>
+        (Const32 <config.fe.TypeUInt32()> [int64(1<<15+umagic(16,c).m/2)])
+        (ZeroExt16to32 x))
+      (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s-1])))
+(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 4 && c&1 == 0 ->
+  (Trunc32to16
+    (Rsh32Ux64 <config.fe.TypeUInt32()>
+      (Mul32 <config.fe.TypeUInt32()>
+        (Const32 <config.fe.TypeUInt32()> [int64(1<<15+(umagic(16,c).m+1)/2)])
+        (Rsh32Ux64 <config.fe.TypeUInt32()> (ZeroExt16to32 x) (Const64 <config.fe.TypeUInt64()> [1])))
+      (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s-2])))
+(Div16u x (Const16 [c])) && umagicOK(16, c) && config.RegSize == 4 ->
+  (Trunc32to16
+    (Rsh32Ux64 <config.fe.TypeUInt32()>
+      (Avg32u
+        (Lsh32x64 <config.fe.TypeUInt32()> (ZeroExt16to32 x) (Const64 <config.fe.TypeUInt64()> [16]))
+        (Mul32 <config.fe.TypeUInt32()>
+          (Const32 <config.fe.TypeUInt32()> [int64(umagic(16,c).m)])
+          (ZeroExt16to32 x)))
+      (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s-1])))
+
+// For 32-bit divides on 32-bit machines
+(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 4 && umagic(32,c).m&1 == 0 ->
+  (Rsh32Ux64 <config.fe.TypeUInt32()>
+    (Hmul32u <config.fe.TypeUInt32()>
+      (Const32 <config.fe.TypeUInt32()> [int64(int32(1<<31+umagic(32,c).m/2))])
       x)
-    (Const64 <t> [umagic64s(c)]))
-(Div64u <t> x (Const64 [c])) && umagic64ok(c) && umagic64a(c) ->
-  (Rsh64Ux64
-    (Avg64u <t>
-      (Hmul64u <t>
-        x
-        (Const64 <t> [umagic64m(c)]))
+    (Const64 <config.fe.TypeUInt64()> [umagic(32,c).s-1]))
+(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 4 && c&1 == 0 ->
+  (Rsh32Ux64 <config.fe.TypeUInt32()>
+    (Hmul32u <config.fe.TypeUInt32()>
+      (Const32 <config.fe.TypeUInt32()> [int64(int32(1<<31+(umagic(32,c).m+1)/2))])
+      (Rsh32Ux64 <config.fe.TypeUInt32()> x (Const64 <config.fe.TypeUInt64()> [1])))
+    (Const64 <config.fe.TypeUInt64()> [umagic(32,c).s-2]))
+(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 4 ->
+  (Rsh32Ux64 <config.fe.TypeUInt32()>
+    (Avg32u
+      x
+      (Hmul32u <config.fe.TypeUInt32()>
+        (Const32 <config.fe.TypeUInt32()> [int64(int32(umagic(32,c).m))])
+        x))
+    (Const64 <config.fe.TypeUInt64()> [umagic(32,c).s-1]))
+
+// For 32-bit divides on 64-bit machines
+// We'll use a regular (non-hi) multiply for this case.
+(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 8 && umagic(32,c).m&1 == 0 ->
+  (Trunc64to32
+    (Rsh64Ux64 <config.fe.TypeUInt64()>
+      (Mul64 <config.fe.TypeUInt64()>
+        (Const64 <config.fe.TypeUInt64()> [int64(1<<31+umagic(32,c).m/2)])
+        (ZeroExt32to64 x))
+      (Const64 <config.fe.TypeUInt64()> [32+umagic(32,c).s-1])))
+(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 8 && c&1 == 0 ->
+  (Trunc64to32
+    (Rsh64Ux64 <config.fe.TypeUInt64()>
+      (Mul64 <config.fe.TypeUInt64()>
+        (Const64 <config.fe.TypeUInt64()> [int64(1<<31+(umagic(32,c).m+1)/2)])
+        (Rsh64Ux64 <config.fe.TypeUInt64()> (ZeroExt32to64 x) (Const64 <config.fe.TypeUInt64()> [1])))
+      (Const64 <config.fe.TypeUInt64()> [32+umagic(32,c).s-2])))
+(Div32u x (Const32 [c])) && umagicOK(32, c) && config.RegSize == 8 ->
+  (Trunc64to32
+    (Rsh64Ux64 <config.fe.TypeUInt64()>
+      (Avg64u
+        (Lsh64x64 <config.fe.TypeUInt64()> (ZeroExt32to64 x) (Const64 <config.fe.TypeUInt64()> [32]))
+        (Mul64 <config.fe.TypeUInt32()>
+          (Const64 <config.fe.TypeUInt32()> [int64(umagic(32,c).m)])
+          (ZeroExt32to64 x)))
+      (Const64 <config.fe.TypeUInt64()> [32+umagic(32,c).s-1])))
+
+// For 64-bit divides on 64-bit machines
+// (64-bit divides on 32-bit machines are lowered to a runtime call by the walk pass.)
+(Div64u x (Const64 [c])) && umagicOK(64, c) && config.RegSize == 8 && umagic(64,c).m&1 == 0 ->
+  (Rsh64Ux64 <config.fe.TypeUInt64()>
+    (Hmul64u <config.fe.TypeUInt64()>
+      (Const64 <config.fe.TypeUInt64()> [int64(1<<63+umagic(64,c).m/2)])
       x)
-    (Const64 <t> [umagic64s(c)-1]))
+    (Const64 <config.fe.TypeUInt64()> [umagic(64,c).s-1]))
+(Div64u x (Const64 [c])) && umagicOK(64, c) && config.RegSize == 8 && c&1 == 0 ->
+  (Rsh64Ux64 <config.fe.TypeUInt64()>
+    (Hmul64u <config.fe.TypeUInt64()>
+      (Const64 <config.fe.TypeUInt64()> [int64(1<<63+(umagic(64,c).m+1)/2)])
+      (Rsh64Ux64 <config.fe.TypeUInt64()> x (Const64 <config.fe.TypeUInt64()> [1])))
+    (Const64 <config.fe.TypeUInt64()> [umagic(64,c).s-2]))
+(Div64u x (Const64 [c])) && umagicOK(64, c) && config.RegSize == 8 ->
+  (Rsh64Ux64 <config.fe.TypeUInt64()>
+    (Avg64u
+      x
+      (Hmul64u <config.fe.TypeUInt64()>
+        (Const64 <config.fe.TypeUInt64()> [int64(umagic(64,c).m)])
+        x))
+    (Const64 <config.fe.TypeUInt64()> [umagic(64,c).s-1]))
+
+// Signed divide by a negative constant.  Rewrite to divide by a positive constant.
+(Div8  <t> n (Const8  [c])) && c < 0 && c != -1<<7  -> (Neg8  (Div8  <t> n (Const8  <t> [-c])))
+(Div16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 -> (Neg16 (Div16 <t> n (Const16 <t> [-c])))
+(Div32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 -> (Neg32 (Div32 <t> n (Const32 <t> [-c])))
+(Div64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 -> (Neg64 (Div64 <t> n (Const64 <t> [-c])))
+
+// Dividing by the most-negative number.  Result is always 0 except
+// if the input is also the most-negative number.
+// We can detect that using the sign bit of x & -x.
+(Div8  <t> x (Const8  [-1<<7 ])) -> (Rsh8Ux64  (And8  <t> x (Neg8  <t> x)) (Const64 <config.fe.TypeUInt64()> [7 ]))
+(Div16 <t> x (Const16 [-1<<15])) -> (Rsh16Ux64 (And16 <t> x (Neg16 <t> x)) (Const64 <config.fe.TypeUInt64()> [15]))
+(Div32 <t> x (Const32 [-1<<31])) -> (Rsh32Ux64 (And32 <t> x (Neg32 <t> x)) (Const64 <config.fe.TypeUInt64()> [31]))
+(Div64 <t> x (Const64 [-1<<63])) -> (Rsh64Ux64 (And64 <t> x (Neg64 <t> x)) (Const64 <config.fe.TypeUInt64()> [63]))
+
+// Signed divide by power of 2.
+// n / c =       n >> log(c) if n >= 0
+//       = (n+c-1) >> log(c) if n < 0
+// We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
+(Div8  <t> n (Const8  [c])) && isPowerOfTwo(c) ->
+  (Rsh8x64
+    (Add8  <t> n (Rsh8Ux64  <t> (Rsh8x64  <t> n (Const64 <config.fe.TypeUInt64()> [ 7])) (Const64 <config.fe.TypeUInt64()> [ 8-log2(c)])))
+    (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+(Div16 <t> n (Const16 [c])) && isPowerOfTwo(c) ->
+  (Rsh16x64
+    (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <config.fe.TypeUInt64()> [15])) (Const64 <config.fe.TypeUInt64()> [16-log2(c)])))
+    (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+(Div32 <t> n (Const32 [c])) && isPowerOfTwo(c) ->
+  (Rsh32x64
+    (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <config.fe.TypeUInt64()> [31])) (Const64 <config.fe.TypeUInt64()> [32-log2(c)])))
+    (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+(Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) ->
+  (Rsh64x64
+    (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <config.fe.TypeUInt64()> [63])) (Const64 <config.fe.TypeUInt64()> [64-log2(c)])))
+    (Const64 <config.fe.TypeUInt64()> [log2(c)]))
 
 // Signed divide, not a power of 2.  Strength reduce to a multiply.
-(Div64 <t> x (Const64 [c])) && c > 0 && smagic64ok(c) && smagic64m(c) > 0 ->
+(Div8 <t> x (Const8 [c])) && smagicOK(8,c) ->
+  (Sub8 <t>
+    (Rsh32x64 <t>
+      (Mul32 <config.fe.TypeUInt32()>
+        (Const32 <config.fe.TypeUInt32()> [int64(smagic(8,c).m)])
+        (SignExt8to32 x))
+      (Const64 <config.fe.TypeUInt64()> [8+smagic(8,c).s]))
+    (Rsh32x64 <t>
+      (SignExt8to32 x)
+      (Const64 <config.fe.TypeUInt64()> [31])))
+(Div16 <t> x (Const16 [c])) && smagicOK(16,c) ->
+  (Sub16 <t>
+    (Rsh32x64 <t>
+      (Mul32 <config.fe.TypeUInt32()>
+        (Const32 <config.fe.TypeUInt32()> [int64(smagic(16,c).m)])
+        (SignExt16to32 x))
+      (Const64 <config.fe.TypeUInt64()> [16+smagic(16,c).s]))
+    (Rsh32x64 <t>
+      (SignExt16to32 x)
+      (Const64 <config.fe.TypeUInt64()> [31])))
+(Div32 <t> x (Const32 [c])) && smagicOK(32,c) && config.RegSize == 8 ->
+  (Sub32 <t>
+    (Rsh64x64 <t>
+      (Mul64 <config.fe.TypeUInt64()>
+        (Const64 <config.fe.TypeUInt64()> [int64(smagic(32,c).m)])
+        (SignExt32to64 x))
+      (Const64 <config.fe.TypeUInt64()> [32+smagic(32,c).s]))
+    (Rsh64x64 <t>
+      (SignExt32to64 x)
+      (Const64 <config.fe.TypeUInt64()> [63])))
+(Div32 <t> x (Const32 [c])) && smagicOK(32,c) && config.RegSize == 4 && smagic(32,c).m&1 == 0 ->
+  (Sub32 <t>
+    (Rsh32x64 <t>
+      (Hmul32 <t>
+        (Const32 <config.fe.TypeUInt32()> [int64(int32(smagic(32,c).m/2))])
+        x)
+      (Const64 <config.fe.TypeUInt64()> [smagic(32,c).s-1]))
+    (Rsh32x64 <t>
+      x
+      (Const64 <config.fe.TypeUInt64()> [31])))
+(Div32 <t> x (Const32 [c])) && smagicOK(32,c) && config.RegSize == 4 && smagic(32,c).m&1 != 0 ->
+  (Sub32 <t>
+    (Rsh32x64 <t>
+      (Add32 <t>
+        (Hmul32 <t>
+          (Const32 <config.fe.TypeUInt32()> [int64(int32(smagic(32,c).m))])
+          x)
+        x)
+      (Const64 <config.fe.TypeUInt64()> [smagic(32,c).s]))
+    (Rsh32x64 <t>
+      x
+      (Const64 <config.fe.TypeUInt64()> [31])))
+(Div64 <t> x (Const64 [c])) && smagicOK(64,c) && smagic(64,c).m&1 == 0 ->
   (Sub64 <t>
     (Rsh64x64 <t>
       (Hmul64 <t>
-        (Const64 <t> [smagic64m(c)])
+        (Const64 <config.fe.TypeUInt64()> [int64(smagic(64,c).m/2)])
         x)
-      (Const64 <t> [smagic64s(c)]))
+      (Const64 <config.fe.TypeUInt64()> [smagic(64,c).s-1]))
     (Rsh64x64 <t>
       x
-      (Const64 <t> [63])))
-(Div64 <t> x (Const64 [c])) && c > 0 && smagic64ok(c) && smagic64m(c) < 0 ->
+      (Const64 <config.fe.TypeUInt64()> [63])))
+(Div64 <t> x (Const64 [c])) && smagicOK(64,c) && smagic(64,c).m&1 != 0 ->
   (Sub64 <t>
     (Rsh64x64 <t>
       (Add64 <t>
         (Hmul64 <t>
-          (Const64 <t> [smagic64m(c)])
+          (Const64 <config.fe.TypeUInt64()> [int64(smagic(64,c).m)])
           x)
         x)
-      (Const64 <t> [smagic64s(c)]))
+      (Const64 <config.fe.TypeUInt64()> [smagic(64,c).s]))
     (Rsh64x64 <t>
       x
-      (Const64 <t> [63])))
-(Div64 <t> x (Const64 [c])) && c < 0 && smagic64ok(c) && smagic64m(c) > 0 ->
-  (Neg64 <t>
-    (Sub64 <t>
-      (Rsh64x64 <t>
-        (Hmul64 <t>
-          (Const64 <t> [smagic64m(c)])
-          x)
-        (Const64 <t> [smagic64s(c)]))
-      (Rsh64x64 <t>
-        x
-        (Const64 <t> [63]))))
-(Div64 <t> x (Const64 [c])) && c < 0 && smagic64ok(c) && smagic64m(c) < 0 ->
-  (Neg64 <t>
-    (Sub64 <t>
-      (Rsh64x64 <t>
-        (Add64 <t>
-          (Hmul64 <t>
-            (Const64 <t> [smagic64m(c)])
-            x)
-          x)
-        (Const64 <t> [smagic64s(c)]))
-      (Rsh64x64 <t>
-        x
-        (Const64 <t> [63]))))
+      (Const64 <config.fe.TypeUInt64()> [63])))
+
+// Unsigned mod by power of 2 constant.
+(Mod8u  <t> n (Const8  [c])) && isPowerOfTwo(c&0xff)       -> (And8 n (Const8 <t> [(c&0xff)-1]))
+(Mod16u <t> n (Const16 [c])) && isPowerOfTwo(c&0xffff)     -> (And16 n (Const16 <t> [(c&0xffff)-1]))
+(Mod32u <t> n (Const32 [c])) && isPowerOfTwo(c&0xffffffff) -> (And32 n (Const32 <t> [(c&0xffffffff)-1]))
+(Mod64u <t> n (Const64 [c])) && isPowerOfTwo(c)            -> (And64 n (Const64 <t> [c-1]))
+
+// Signed mod by negative constant.
+(Mod8  <t> n (Const8  [c])) && c < 0 && c != -1<<7  -> (Mod8  <t> n (Const8  <t> [-c]))
+(Mod16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 -> (Mod16 <t> n (Const16 <t> [-c]))
+(Mod32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 -> (Mod32 <t> n (Const32 <t> [-c]))
+(Mod64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 -> (Mod64 <t> n (Const64 <t> [-c]))
 
-// A%B = A-(A/B*B).
+// All other mods by constants, do A%B = A-(A/B*B).
 // This implements % with two * and a bunch of ancillary ops.
 // One of the * is free if the user's code also computes A/B.
-(Mod64  <t> x (Const64 [c])) && x.Op != OpConst64 && smagic64ok(c)
+(Mod8   <t> x (Const8  [c])) && x.Op != OpConst8  && (c > 0 || c == -1<<7)
+  -> (Sub8  x (Mul8  <t> (Div8   <t> x (Const8  <t> [c])) (Const8  <t> [c])))
+(Mod16  <t> x (Const16 [c])) && x.Op != OpConst16 && (c > 0 || c == -1<<15)
+  -> (Sub16 x (Mul16 <t> (Div16  <t> x (Const16 <t> [c])) (Const16 <t> [c])))
+(Mod32  <t> x (Const32 [c])) && x.Op != OpConst32 && (c > 0 || c == -1<<31)
+  -> (Sub32 x (Mul32 <t> (Div32  <t> x (Const32 <t> [c])) (Const32 <t> [c])))
+(Mod64  <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63)
   -> (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
-(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && umagic64ok(c)
+(Mod8u  <t> x (Const8  [c])) && x.Op != OpConst8  && c > 0 && umagicOK(8 ,c)
+  -> (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
+(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK(16,c)
+  -> (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
+(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK(32,c)
+  -> (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
+(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK(64,c)
   -> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
 
 // floating point optimizations
index b825f1347506d68a312e5f83bc52e2cae43a1aed..f39598e9afeb1116bd8c036f5af04360d6efde9d 100644 (file)
@@ -61,8 +61,12 @@ var genericOps = []opData{
        {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
        {name: "Mul64uhilo", argLength: 2, typ: "(UInt64,UInt64)"}, // arg0 * arg1, returns (hi, lo)
 
-       // Weird special instruction for strength reduction of divides.
-       {name: "Avg64u", argLength: 2}, // (uint64(arg0) + uint64(arg1)) / 2, correct to all 64 bits.
+       // Weird special instructions for use in the strength reduction of divides.
+       // These ops compute unsigned (arg0 + arg1) / 2, correct to all
+       // 32/64 bits, even when the intermediate result of the add has 33/65 bits.
+       // These ops can assume arg0 >= arg1.
+       {name: "Avg32u", argLength: 2, typ: "UInt32"}, // 32-bit platforms only
+       {name: "Avg64u", argLength: 2, typ: "UInt64"}, // 64-bit platforms only
 
        {name: "Div8", argLength: 2},  // arg0 / arg1, signed
        {name: "Div8u", argLength: 2}, // arg0 / arg1, unsigned
@@ -263,11 +267,13 @@ var genericOps = []opData{
        {name: "Const8", aux: "Int8"},        // auxint is sign-extended 8 bits
        {name: "Const16", aux: "Int16"},      // auxint is sign-extended 16 bits
        {name: "Const32", aux: "Int32"},      // auxint is sign-extended 32 bits
-       {name: "Const64", aux: "Int64"},      // value is auxint
-       {name: "Const32F", aux: "Float32"},   // value is math.Float64frombits(uint64(auxint)) and is exactly prepresentable as float 32
-       {name: "Const64F", aux: "Float64"},   // value is math.Float64frombits(uint64(auxint))
-       {name: "ConstInterface"},             // nil interface
-       {name: "ConstSlice"},                 // nil slice
+       // Note: ConstX are sign-extended even when the type of the value is unsigned.
+       // For instance, uint8(0xaa) is stored as auxint=0xffffffffffffffaa.
+       {name: "Const64", aux: "Int64"},    // value is auxint
+       {name: "Const32F", aux: "Float32"}, // value is math.Float64frombits(uint64(auxint)) and is exactly prepresentable as float 32
+       {name: "Const64F", aux: "Float64"}, // value is math.Float64frombits(uint64(auxint))
+       {name: "ConstInterface"},           // nil interface
+       {name: "ConstSlice"},               // nil slice
 
        // Constant-like things
        {name: "InitMem"},            // memory input to the function.
index f6297fdfa541a11ba6d8be1f1a5c50dda8c34ea7..0457e90b53c935b085f67ee3854b16d4a98167c7 100644 (file)
 
 package ssa
 
-// A copy of the code in ../gc/subr.go.
-// We can't use it directly because it would generate
-// an import cycle. TODO: move to a common support package.
-
-// argument passing to/from
-// smagic and umagic
-type magic struct {
-       W   int // input for both - width
-       S   int // output for both - shift
-       Bad int // output for both - unexpected failure
-
-       // magic multiplier for signed literal divisors
-       Sd int64 // input - literal divisor
-       Sm int64 // output - multiplier
-
-       // magic multiplier for unsigned literal divisors
-       Ud uint64 // input - literal divisor
-       Um uint64 // output - multiplier
-       Ua int    // output - adder
+import "math/big"
+
+// So you want to compute x / c for some constant c?
+// Machine division instructions are slow, so we try to
+// compute this division with a multiplication + a few
+// other cheap instructions instead.
+// (We assume here that c != 0, +/- 1, or +/- 2^i.  Those
+// cases are easy to handle in different ways).
+
+// Technique from https://gmplib.org/~tege/divcnst-pldi94.pdf
+
+// First consider unsigned division.
+// Our strategy is to precompute 1/c then do
+//   âŽ£x / c⎦ = âŽ£x * (1/c)⎦.
+// 1/c is less than 1, so we can't compute it directly in
+// integer arithmetic.  Let's instead compute 2^e/c
+// for a value of e TBD (^ = exponentiation).  Then
+//   âŽ£x / c⎦ = âŽ£x * (2^e/c) / 2^e⎦.
+// Dividing by 2^e is easy.  2^e/c isn't an integer, unfortunately.
+// So we must approximate it.  Let's call its approximation m.
+// We'll then compute
+//   âŽ£x * m / 2^e⎦
+// Which we want to be equal to âŽ£x / c⎦ for 0 <= x < 2^n-1
+// where n is the word size.
+// Setting x = c gives us c * m >= 2^e.
+// We'll chose m = âŽ¡2^e/c⎤ to satisfy that equation.
+// What remains is to choose e.
+// Let m = 2^e/c + delta, 0 <= delta < 1
+//   âŽ£x * (2^e/c + delta) / 2^e⎦
+//   âŽ£x / c + x * delta / 2^e⎦
+// We must have x * delta / 2^e < 1/c so that this
+// additional term never rounds differently than âŽ£x / c⎦ does.
+// Rearranging,
+//   2^e > x * delta * c
+// x can be at most 2^n-1 and delta can be at most 1.
+// So it is sufficient to have 2^e >= 2^n*c.
+// So we'll choose e = n + s, with s = âŽ¡log2(c)⎤.
+//
+// An additional complication arises because m has n+1 bits in it.
+// Hardware restricts us to n bit by n bit multiplies.
+// We divide into 3 cases:
+//
+// Case 1: m is even.
+//   âŽ£x / c⎦ = âŽ£x * m / 2^(n+s)⎦
+//   âŽ£x / c⎦ = âŽ£x * (m/2) / 2^(n+s-1)⎦
+//   âŽ£x / c⎦ = âŽ£x * (m/2) / 2^n / 2^(s-1)⎦
+//   âŽ£x / c⎦ = âŽ£âŽ£x * (m/2) / 2^n⎦ / 2^(s-1)⎦
+//   multiply + shift
+//
+// Case 2: c is even.
+//   âŽ£x / c⎦ = âŽ£(x/2) / (c/2)⎦
+//   âŽ£x / c⎦ = âŽ£âŽ£x/2⎦ / (c/2)⎦
+//     This is just the original problem, with x' = âŽ£x/2⎦, c' = c/2, n' = n-1.
+//       s' = s-1
+//       m' = âŽ¡2^(n'+s')/c'⎤
+//          = âŽ¡2^(n+s-1)/c⎤
+//          = âŽ¡m/2⎤
+//   âŽ£x / c⎦ = âŽ£x' * m' / 2^(n'+s')⎦
+//   âŽ£x / c⎦ = âŽ£âŽ£x/2⎦ * âŽ¡m/2⎤ / 2^(n+s-2)⎦
+//   âŽ£x / c⎦ = âŽ£âŽ£âŽ£x/2⎦ * âŽ¡m/2⎤ / 2^n⎦ / 2^(s-2)⎦
+//   shift + multiply + shift
+//
+// Case 3: everything else
+//   let k = m - 2^n. k fits in n bits.
+//   âŽ£x / c⎦ = âŽ£x * m / 2^(n+s)⎦
+//   âŽ£x / c⎦ = âŽ£x * (2^n + k) / 2^(n+s)⎦
+//   âŽ£x / c⎦ = âŽ£(x + x * k / 2^n) / 2^s⎦
+//   âŽ£x / c⎦ = âŽ£(x + âŽ£x * k / 2^n⎦) / 2^s⎦
+//   âŽ£x / c⎦ = âŽ£(x + âŽ£x * k / 2^n⎦) / 2^s⎦
+//   âŽ£x / c⎦ = âŽ£âŽ£(x + âŽ£x * k / 2^n⎦) / 2⎦ / 2^(s-1)⎦
+//   multiply + avg + shift
+//
+// These can be implemented in hardware using:
+//  âŽ£a * b / 2^n⎦ - aka high n bits of an n-bit by n-bit multiply.
+//  âŽ£(a+b) / 2⎦   - aka "average" of two n-bit numbers.
+//                  (Not just a regular add & shift because the intermediate result
+//                   a+b has n+1 bits in it.  Nevertheless, can be done
+//                   in 2 instructions on x86.)
+
+// umagicOK returns whether we should strength reduce a n-bit divide by c.
+func umagicOK(n uint, c int64) bool {
+       // Convert from ConstX auxint values to the real uint64 constant they represent.
+       d := uint64(c) << (64 - n) >> (64 - n)
+
+       // Doesn't work for 0.
+       // Don't use for powers of 2.
+       return d&(d-1) != 0
 }
 
-// magic number for signed division
-// see hacker's delight chapter 10
-func smagic(m *magic) {
-       var mask uint64
-
-       m.Bad = 0
-       switch m.W {
-       default:
-               m.Bad = 1
-               return
-
-       case 8:
-               mask = 0xff
-
-       case 16:
-               mask = 0xffff
-
-       case 32:
-               mask = 0xffffffff
-
-       case 64:
-               mask = 0xffffffffffffffff
-       }
-
-       two31 := mask ^ (mask >> 1)
-
-       p := m.W - 1
-       ad := uint64(m.Sd)
-       if m.Sd < 0 {
-               ad = -uint64(m.Sd)
-       }
-
-       // bad denominators
-       if ad == 0 || ad == 1 || ad == two31 {
-               m.Bad = 1
-               return
-       }
-
-       t := two31
-       ad &= mask
-
-       anc := t - 1 - t%ad
-       anc &= mask
-
-       q1 := two31 / anc
-       r1 := two31 - q1*anc
-       q1 &= mask
-       r1 &= mask
-
-       q2 := two31 / ad
-       r2 := two31 - q2*ad
-       q2 &= mask
-       r2 &= mask
-
-       var delta uint64
-       for {
-               p++
-               q1 <<= 1
-               r1 <<= 1
-               q1 &= mask
-               r1 &= mask
-               if r1 >= anc {
-                       q1++
-                       r1 -= anc
-                       q1 &= mask
-                       r1 &= mask
-               }
-
-               q2 <<= 1
-               r2 <<= 1
-               q2 &= mask
-               r2 &= mask
-               if r2 >= ad {
-                       q2++
-                       r2 -= ad
-                       q2 &= mask
-                       r2 &= mask
-               }
-
-               delta = ad - r2
-               delta &= mask
-               if q1 < delta || (q1 == delta && r1 == 0) {
-                       continue
-               }
-
-               break
-       }
-
-       m.Sm = int64(q2 + 1)
-       if uint64(m.Sm)&two31 != 0 {
-               m.Sm |= ^int64(mask)
-       }
-       m.S = p - m.W
+type umagicData struct {
+       s int64  // âŽ¡log2(c)⎤
+       m uint64 // âŽ¡2^(n+s)/c⎤ - 2^n
 }
 
-// magic number for unsigned division
-// see hacker's delight chapter 10
-func umagic(m *magic) {
-       var mask uint64
-
-       m.Bad = 0
-       m.Ua = 0
-
-       switch m.W {
-       default:
-               m.Bad = 1
-               return
-
-       case 8:
-               mask = 0xff
-
-       case 16:
-               mask = 0xffff
-
-       case 32:
-               mask = 0xffffffff
-
-       case 64:
-               mask = 0xffffffffffffffff
-       }
-
-       two31 := mask ^ (mask >> 1)
-
-       m.Ud &= mask
-       if m.Ud == 0 || m.Ud == two31 {
-               m.Bad = 1
-               return
+// umagic computes the constants needed to strength reduce unsigned n-bit divides by the constant uint64(c).
+// The return values satisfy for all 0 <= x < 2^n
+//  floor(x / uint64(c)) = x * (m + 2^n) >> (n+s)
+func umagic(n uint, c int64) umagicData {
+       // Convert from ConstX auxint values to the real uint64 constant they represent.
+       d := uint64(c) << (64 - n) >> (64 - n)
+
+       C := new(big.Int).SetUint64(d)
+       s := C.BitLen()
+       M := big.NewInt(1)
+       M.Lsh(M, n+uint(s))     // 2^(n+s)
+       M.Add(M, C)             // 2^(n+s)+c
+       M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
+       M.Div(M, C)             // âŽ¡2^(n+s)/c⎤
+       if M.Bit(int(n)) != 1 {
+               panic("n+1st bit isn't set")
        }
+       M.SetBit(M, int(n), 0)
+       m := M.Uint64()
+       return umagicData{s: int64(s), m: m}
+}
 
-       nc := mask - (-m.Ud&mask)%m.Ud
-       p := m.W - 1
-
-       q1 := two31 / nc
-       r1 := two31 - q1*nc
-       q1 &= mask
-       r1 &= mask
-
-       q2 := (two31 - 1) / m.Ud
-       r2 := (two31 - 1) - q2*m.Ud
-       q2 &= mask
-       r2 &= mask
-
-       var delta uint64
-       for {
-               p++
-               if r1 >= nc-r1 {
-                       q1 <<= 1
-                       q1++
-                       r1 <<= 1
-                       r1 -= nc
-               } else {
-                       q1 <<= 1
-                       r1 <<= 1
-               }
-
-               q1 &= mask
-               r1 &= mask
-               if r2+1 >= m.Ud-r2 {
-                       if q2 >= two31-1 {
-                               m.Ua = 1
-                       }
-
-                       q2 <<= 1
-                       q2++
-                       r2 <<= 1
-                       r2++
-                       r2 -= m.Ud
-               } else {
-                       if q2 >= two31 {
-                               m.Ua = 1
-                       }
-
-                       q2 <<= 1
-                       r2 <<= 1
-                       r2++
-               }
-
-               q2 &= mask
-               r2 &= mask
-
-               delta = m.Ud - 1 - r2
-               delta &= mask
-
-               if p < m.W+m.W {
-                       if q1 < delta || (q1 == delta && r1 == 0) {
-                               continue
-                       }
-               }
-
-               break
+// For signed division, we use a similar strategy.
+// First, we enforce a positive c.
+//   x / c = -(x / (-c))
+// This will require an additional Neg op for c<0.
+//
+// If x is positive we're in a very similar state
+// to the unsigned case above.  We define:
+//   s = âŽ¡log2(c)⎤-1
+//   m = âŽ¡2^(n+s)/c⎤
+// Then
+//   âŽ£x / c⎦ = âŽ£x * m / 2^(n+s)⎦
+// If x is negative we have
+//   âŽ¡x / c⎤ = âŽ£x * m / 2^(n+s)⎦ + 1
+// (TODO: derivation?)
+//
+// The multiply is a bit odd, as it is a signed n-bit value
+// times an unsigned n-bit value.  For n smaller than the
+// word size, we can extend x and m appropriately and use the
+// signed multiply instruction.  For n == word size,
+// we must use the signed multiply high and correct
+// the result by adding x*2^n.
+//
+// Adding 1 if x<0 is done by subtracting x>>(n-1).
+
+func smagicOK(n uint, c int64) bool {
+       if c < 0 {
+               // Doesn't work for negative c.
+               return false
        }
-
-       m.Um = q2 + 1
-       m.S = p - m.W
+       // Doesn't work for 0.
+       // Don't use it for powers of 2.
+       return c&(c-1) != 0
 }
 
-// adaptors for use by rewrite rules
-func smagic64ok(d int64) bool {
-       m := magic{W: 64, Sd: d}
-       smagic(&m)
-       return m.Bad == 0
-}
-func smagic64m(d int64) int64 {
-       m := magic{W: 64, Sd: d}
-       smagic(&m)
-       return m.Sm
-}
-func smagic64s(d int64) int64 {
-       m := magic{W: 64, Sd: d}
-       smagic(&m)
-       return int64(m.S)
+type smagicData struct {
+       s int64  // âŽ¡log2(c)⎤-1
+       m uint64 // âŽ¡2^(n+s)/c⎤
 }
 
-func umagic64ok(d int64) bool {
-       m := magic{W: 64, Ud: uint64(d)}
-       umagic(&m)
-       return m.Bad == 0
-}
-func umagic64m(d int64) int64 {
-       m := magic{W: 64, Ud: uint64(d)}
-       umagic(&m)
-       return int64(m.Um)
-}
-func umagic64s(d int64) int64 {
-       m := magic{W: 64, Ud: uint64(d)}
-       umagic(&m)
-       return int64(m.S)
-}
-func umagic64a(d int64) bool {
-       m := magic{W: 64, Ud: uint64(d)}
-       umagic(&m)
-       return m.Ua != 0
+// magic computes the constants needed to strength reduce signed n-bit divides by the constant c.
+// Must have c>0.
+// The return values satisfy for all -2^(n-1) <= x < 2^(n-1)
+//  trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0)
+func smagic(n uint, c int64) smagicData {
+       C := new(big.Int).SetInt64(c)
+       s := C.BitLen() - 1
+       M := big.NewInt(1)
+       M.Lsh(M, n+uint(s))     // 2^(n+s)
+       M.Add(M, C)             // 2^(n+s)+c
+       M.Sub(M, big.NewInt(1)) // 2^(n+s)+c-1
+       M.Div(M, C)             // âŽ¡2^(n+s)/c⎤
+       if M.Bit(int(n)) != 0 {
+               panic("n+1st bit is set")
+       }
+       if M.Bit(int(n-1)) == 0 {
+               panic("nth bit is not set")
+       }
+       m := M.Uint64()
+       return smagicData{s: int64(s), m: m}
 }
diff --git a/src/cmd/compile/internal/ssa/magic_test.go b/src/cmd/compile/internal/ssa/magic_test.go
new file mode 100644 (file)
index 0000000..9599524
--- /dev/null
@@ -0,0 +1,205 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+       "math/big"
+       "testing"
+)
+
+func TestMagicExhaustive8(t *testing.T) {
+       testMagicExhaustive(t, 8)
+}
+func TestMagicExhaustive8U(t *testing.T) {
+       testMagicExhaustiveU(t, 8)
+}
+func TestMagicExhaustive16(t *testing.T) {
+       if testing.Short() {
+               t.Skip("slow test; skipping")
+       }
+       testMagicExhaustive(t, 16)
+}
+func TestMagicExhaustive16U(t *testing.T) {
+       if testing.Short() {
+               t.Skip("slow test; skipping")
+       }
+       testMagicExhaustiveU(t, 16)
+}
+
+// exhaustive test of magic for n bits
+func testMagicExhaustive(t *testing.T, n uint) {
+       min := -int64(1) << (n - 1)
+       max := int64(1) << (n - 1)
+       for c := int64(1); c < max; c++ {
+               if !smagicOK(n, int64(c)) {
+                       continue
+               }
+               m := int64(smagic(n, c).m)
+               s := smagic(n, c).s
+               for i := min; i < max; i++ {
+                       want := i / c
+                       got := (i * m) >> (n + uint(s))
+                       if i < 0 {
+                               got++
+                       }
+                       if want != got {
+                               t.Errorf("signed magic wrong for %d / %d: got %d, want %d (m=%d,s=%d)\n", i, c, got, want, m, s)
+                       }
+               }
+       }
+}
+func testMagicExhaustiveU(t *testing.T, n uint) {
+       max := uint64(1) << n
+       for c := uint64(1); c < max; c++ {
+               if !umagicOK(n, int64(c)) {
+                       continue
+               }
+               m := umagic(n, int64(c)).m
+               s := umagic(n, int64(c)).s
+               for i := uint64(0); i < max; i++ {
+                       want := i / c
+                       got := (i * (max + m)) >> (n + uint(s))
+                       if want != got {
+                               t.Errorf("unsigned magic wrong for %d / %d: got %d, want %d (m=%d,s=%d)\n", i, c, got, want, m, s)
+                       }
+               }
+       }
+}
+
+func TestMagicUnsigned(t *testing.T) {
+       One := new(big.Int).SetUint64(1)
+       for _, n := range [...]uint{8, 16, 32, 64} {
+               TwoN := new(big.Int).Lsh(One, n)
+               Max := new(big.Int).Sub(TwoN, One)
+               for _, c := range [...]uint64{
+                       3,
+                       5,
+                       6,
+                       7,
+                       9,
+                       10,
+                       11,
+                       12,
+                       13,
+                       14,
+                       15,
+                       17,
+                       1<<8 - 1,
+                       1<<8 + 1,
+                       1<<16 - 1,
+                       1<<16 + 1,
+                       1<<32 - 1,
+                       1<<32 + 1,
+                       1<<64 - 1,
+               } {
+                       if c>>n != 0 {
+                               continue // not appropriate for the given n.
+                       }
+                       if !umagicOK(n, int64(c)) {
+                               t.Errorf("expected n=%d c=%d to pass\n", n, c)
+                       }
+                       m := umagic(n, int64(c)).m
+                       s := umagic(n, int64(c)).s
+
+                       C := new(big.Int).SetUint64(c)
+                       M := new(big.Int).SetUint64(m)
+                       M.Add(M, TwoN)
+
+                       // Find largest multiple of c.
+                       Mul := new(big.Int).Div(Max, C)
+                       Mul.Mul(Mul, C)
+                       mul := Mul.Uint64()
+
+                       // Try some input values, mostly around multiples of c.
+                       for _, x := range [...]uint64{0, 1,
+                               c - 1, c, c + 1,
+                               2*c - 1, 2 * c, 2*c + 1,
+                               mul - 1, mul, mul + 1,
+                               uint64(1)<<n - 1,
+                       } {
+                               X := new(big.Int).SetUint64(x)
+                               if X.Cmp(Max) > 0 {
+                                       continue
+                               }
+                               Want := new(big.Int).Quo(X, C)
+                               Got := new(big.Int).Mul(X, M)
+                               Got.Rsh(Got, n+uint(s))
+                               if Want.Cmp(Got) != 0 {
+                                       t.Errorf("umagic for %d/%d n=%d doesn't work, got=%s, want %s\n", x, c, n, Got, Want)
+                               }
+                       }
+               }
+       }
+}
+
+func TestMagicSigned(t *testing.T) {
+       One := new(big.Int).SetInt64(1)
+       for _, n := range [...]uint{8, 16, 32, 64} {
+               TwoNMinusOne := new(big.Int).Lsh(One, n-1)
+               Max := new(big.Int).Sub(TwoNMinusOne, One)
+               Min := new(big.Int).Neg(TwoNMinusOne)
+               for _, c := range [...]int64{
+                       3,
+                       5,
+                       6,
+                       7,
+                       9,
+                       10,
+                       11,
+                       12,
+                       13,
+                       14,
+                       15,
+                       17,
+                       1<<7 - 1,
+                       1<<7 + 1,
+                       1<<15 - 1,
+                       1<<15 + 1,
+                       1<<31 - 1,
+                       1<<31 + 1,
+                       1<<63 - 1,
+               } {
+                       if c>>(n-1) != 0 {
+                               continue // not appropriate for the given n.
+                       }
+                       if !smagicOK(n, int64(c)) {
+                               t.Errorf("expected n=%d c=%d to pass\n", n, c)
+                       }
+                       m := smagic(n, int64(c)).m
+                       s := smagic(n, int64(c)).s
+
+                       C := new(big.Int).SetInt64(c)
+                       M := new(big.Int).SetUint64(m)
+
+                       // Find largest multiple of c.
+                       Mul := new(big.Int).Div(Max, C)
+                       Mul.Mul(Mul, C)
+                       mul := Mul.Int64()
+
+                       // Try some input values, mostly around multiples of c.
+                       for _, x := range [...]int64{
+                               -1, 1,
+                               -c - 1, -c, -c + 1, c - 1, c, c + 1,
+                               -2*c - 1, -2 * c, -2*c + 1, 2*c - 1, 2 * c, 2*c + 1,
+                               -mul - 1, -mul, -mul + 1, mul - 1, mul, mul + 1,
+                               int64(1)<<n - 1, -int64(1)<<n + 1,
+                       } {
+                               X := new(big.Int).SetInt64(x)
+                               if X.Cmp(Min) < 0 || X.Cmp(Max) > 0 {
+                                       continue
+                               }
+                               Want := new(big.Int).Quo(X, C)
+                               Got := new(big.Int).Mul(X, M)
+                               Got.Rsh(Got, n+uint(s))
+                               if x < 0 {
+                                       Got.Add(Got, One)
+                               }
+                               if Want.Cmp(Got) != 0 {
+                                       t.Errorf("smagic for %d/%d n=%d doesn't work, got=%s, want %s\n", x, c, n, Got, Want)
+                               }
+                       }
+               }
+       }
+}
index e30a08d361c435729c0ef0916206b7a1cd1242cf..0105c37cd594d05d0a526dd0552ec7470c07d14b 100644 (file)
@@ -273,6 +273,7 @@ const (
        Op386HMULWU
        Op386HMULBU
        Op386MULLQU
+       Op386AVGLU
        Op386DIVL
        Op386DIVW
        Op386DIVLU
@@ -1595,6 +1596,7 @@ const (
        OpHmul64u
        OpMul32uhilo
        OpMul64uhilo
+       OpAvg32u
        OpAvg64u
        OpDiv8
        OpDiv8u
@@ -2547,6 +2549,22 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "AVGLU",
+               argLen:       2,
+               commutative:  true,
+               resultInArg0: true,
+               clobberFlags: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                               {1, 239}, // AX CX DX BX BP SI DI
+                       },
+                       outputs: []outputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
        {
                name:         "DIVL",
                argLen:       2,
@@ -19967,6 +19985,11 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Avg32u",
+               argLen:  2,
+               generic: true,
+       },
        {
                name:    "Avg64u",
                argLen:  2,
index a0e278fb159672daca79deb3f17b98c37f8dbab8..f7d256df1625ea622cc7484e122ce3425030ae43 100644 (file)
@@ -5,10 +5,12 @@
 package ssa
 
 import (
+       "crypto/sha1"
        "fmt"
        "math"
        "os"
        "path/filepath"
+       "strings"
 )
 
 func applyRewrite(f *Func, rb func(*Block, *Config) bool, rv func(*Value, *Config) bool) {
@@ -298,7 +300,7 @@ func nto(x int64) int64 {
        return ntz(^x)
 }
 
-// log2 returns logarithm in base of uint64(n), with log2(0) = -1.
+// log2 returns logarithm in base of uint64(n), with log2(0) = -1.
 // Rounds down.
 func log2(n int64) (l int64) {
        l = -1
@@ -525,3 +527,17 @@ func min(x, y int64) int64 {
        }
        return y
 }
+
+func experiment(f *Func) bool {
+       hstr := ""
+       for _, b := range sha1.Sum([]byte(f.Name)) {
+               hstr += fmt.Sprintf("%08b", b)
+       }
+       r := strings.HasSuffix(hstr, "00011")
+       _ = r
+       r = f.Name == "(*fmt).fmt_integer"
+       if r {
+               fmt.Printf("             enabled for %s\n", f.Name)
+       }
+       return r
+}
index 7d9f56922d9ea2ded8c273491d8365913f016727..a396ec1976838aa7754cb80a119772da75783d90 100644 (file)
@@ -236,6 +236,8 @@ func rewriteValue386(v *Value, config *Config) bool {
                return rewriteValue386_OpAnd8(v, config)
        case OpAndB:
                return rewriteValue386_OpAndB(v, config)
+       case OpAvg32u:
+               return rewriteValue386_OpAvg32u(v, config)
        case OpBswap32:
                return rewriteValue386_OpBswap32(v, config)
        case OpClosureCall:
@@ -9714,6 +9716,21 @@ func rewriteValue386_OpAndB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValue386_OpAvg32u(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Avg32u x y)
+       // cond:
+       // result: (AVGLU x y)
+       for {
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(Op386AVGLU)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
 func rewriteValue386_OpBswap32(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index f76299e8d3207886ddea938a990e5b2e1a141bec..2ad662f8fe2947d246c1689a8cb9c421c10ab60b 100644 (file)
@@ -360,6 +360,8 @@ func rewriteValueARM(v *Value, config *Config) bool {
                return rewriteValueARM_OpAnd8(v, config)
        case OpAndB:
                return rewriteValueARM_OpAndB(v, config)
+       case OpAvg32u:
+               return rewriteValueARM_OpAvg32u(v, config)
        case OpBswap32:
                return rewriteValueARM_OpBswap32(v, config)
        case OpClosureCall:
@@ -13018,6 +13020,28 @@ func rewriteValueARM_OpAndB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueARM_OpAvg32u(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Avg32u <t> x y)
+       // cond:
+       // result: (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpARMADD)
+               v0 := b.NewValue0(v.Pos, OpARMSRLconst, t)
+               v0.AuxInt = 1
+               v1 := b.NewValue0(v.Pos, OpARMSUB, t)
+               v1.AddArg(x)
+               v1.AddArg(y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v.AddArg(y)
+               return true
+       }
+}
 func rewriteValueARM_OpBswap32(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index a39554c045701fda8090dc5d617501a271d717ae..19acc61e094a7660a61946bcf87ace5f488d25f6 100644 (file)
@@ -9647,31 +9647,20 @@ func rewriteValueARM64_OpAvg64u(v *Value, config *Config) bool {
        _ = b
        // match: (Avg64u <t> x y)
        // cond:
-       // result: (ADD (ADD <t> (SRLconst <t> x [1]) (SRLconst <t> y [1])) (AND <t> (AND <t> x y) (MOVDconst [1])))
+       // result: (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
        for {
                t := v.Type
                x := v.Args[0]
                y := v.Args[1]
                v.reset(OpARM64ADD)
-               v0 := b.NewValue0(v.Pos, OpARM64ADD, t)
-               v1 := b.NewValue0(v.Pos, OpARM64SRLconst, t)
-               v1.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpARM64SRLconst, t)
+               v0.AuxInt = 1
+               v1 := b.NewValue0(v.Pos, OpARM64SUB, t)
                v1.AddArg(x)
+               v1.AddArg(y)
                v0.AddArg(v1)
-               v2 := b.NewValue0(v.Pos, OpARM64SRLconst, t)
-               v2.AuxInt = 1
-               v2.AddArg(y)
-               v0.AddArg(v2)
                v.AddArg(v0)
-               v3 := b.NewValue0(v.Pos, OpARM64AND, t)
-               v4 := b.NewValue0(v.Pos, OpARM64AND, t)
-               v4.AddArg(x)
-               v4.AddArg(y)
-               v3.AddArg(v4)
-               v5 := b.NewValue0(v.Pos, OpARM64MOVDconst, config.fe.TypeUInt64())
-               v5.AuxInt = 1
-               v3.AddArg(v5)
-               v.AddArg(v3)
+               v.AddArg(y)
                return true
        }
 }
index 37b4d0a7c5e92b40eadd465fa44a7fcaaf46c777..2c320a9216fadae3b44e8a26288bf56ce428db5f 100644 (file)
@@ -50,6 +50,8 @@ func rewriteValueMIPS(v *Value, config *Config) bool {
                return rewriteValueMIPS_OpAtomicStore32(v, config)
        case OpAtomicStorePtrNoWB:
                return rewriteValueMIPS_OpAtomicStorePtrNoWB(v, config)
+       case OpAvg32u:
+               return rewriteValueMIPS_OpAvg32u(v, config)
        case OpClosureCall:
                return rewriteValueMIPS_OpClosureCall(v, config)
        case OpCom16:
@@ -991,6 +993,28 @@ func rewriteValueMIPS_OpAtomicStorePtrNoWB(v *Value, config *Config) bool {
                return true
        }
 }
+func rewriteValueMIPS_OpAvg32u(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Avg32u <t> x y)
+       // cond:
+       // result: (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
+       for {
+               t := v.Type
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpMIPSADD)
+               v0 := b.NewValue0(v.Pos, OpMIPSSRLconst, t)
+               v0.AuxInt = 1
+               v1 := b.NewValue0(v.Pos, OpMIPSSUB, t)
+               v1.AddArg(x)
+               v1.AddArg(y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v.AddArg(y)
+               return true
+       }
+}
 func rewriteValueMIPS_OpClosureCall(v *Value, config *Config) bool {
        b := v.Block
        _ = b
index 0748013d4f8edca0a5f3547ac9801e74fcda9fd3..f3d0fe3aa63f1b591884dcf982989d8aa75b862d 100644 (file)
@@ -773,31 +773,20 @@ func rewriteValueMIPS64_OpAvg64u(v *Value, config *Config) bool {
        _ = b
        // match: (Avg64u <t> x y)
        // cond:
-       // result: (ADDV (ADDV <t> (SRLVconst <t> x [1]) (SRLVconst <t> y [1])) (AND <t> (AND <t> x y) (MOVVconst [1])))
+       // result: (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y)
        for {
                t := v.Type
                x := v.Args[0]
                y := v.Args[1]
                v.reset(OpMIPS64ADDV)
-               v0 := b.NewValue0(v.Pos, OpMIPS64ADDV, t)
-               v1 := b.NewValue0(v.Pos, OpMIPS64SRLVconst, t)
-               v1.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpMIPS64SRLVconst, t)
+               v0.AuxInt = 1
+               v1 := b.NewValue0(v.Pos, OpMIPS64SUBV, t)
                v1.AddArg(x)
+               v1.AddArg(y)
                v0.AddArg(v1)
-               v2 := b.NewValue0(v.Pos, OpMIPS64SRLVconst, t)
-               v2.AuxInt = 1
-               v2.AddArg(y)
-               v0.AddArg(v2)
                v.AddArg(v0)
-               v3 := b.NewValue0(v.Pos, OpMIPS64AND, t)
-               v4 := b.NewValue0(v.Pos, OpMIPS64AND, t)
-               v4.AddArg(x)
-               v4.AddArg(y)
-               v3.AddArg(v4)
-               v5 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, config.fe.TypeUInt64())
-               v5.AuxInt = 1
-               v3.AddArg(v5)
-               v.AddArg(v3)
+               v.AddArg(y)
                return true
        }
 }
index 1c0ae0ab682795f5158e6a9159c5ab7c8159cff1..2a8bc65d1b5a3bd8967261cbfdbdd04e936b250d 100644 (file)
@@ -771,33 +771,20 @@ func rewriteValuePPC64_OpAvg64u(v *Value, config *Config) bool {
        _ = b
        // match: (Avg64u <t> x y)
        // cond:
-       // result: (ADD (ADD <t> (SRD <t> x (MOVDconst <t> [1])) (SRD <t> y (MOVDconst <t> [1]))) (ANDconst <t> (AND <t> x y) [1]))
+       // result: (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
        for {
                t := v.Type
                x := v.Args[0]
                y := v.Args[1]
                v.reset(OpPPC64ADD)
-               v0 := b.NewValue0(v.Pos, OpPPC64ADD, t)
-               v1 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+               v0 := b.NewValue0(v.Pos, OpPPC64SRDconst, t)
+               v0.AuxInt = 1
+               v1 := b.NewValue0(v.Pos, OpPPC64SUB, t)
                v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, t)
-               v2.AuxInt = 1
-               v1.AddArg(v2)
+               v1.AddArg(y)
                v0.AddArg(v1)
-               v3 := b.NewValue0(v.Pos, OpPPC64SRD, t)
-               v3.AddArg(y)
-               v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, t)
-               v4.AuxInt = 1
-               v3.AddArg(v4)
-               v0.AddArg(v3)
                v.AddArg(v0)
-               v5 := b.NewValue0(v.Pos, OpPPC64ANDconst, t)
-               v5.AuxInt = 1
-               v6 := b.NewValue0(v.Pos, OpPPC64AND, t)
-               v6.AddArg(x)
-               v6.AddArg(y)
-               v5.AddArg(v6)
-               v.AddArg(v5)
+               v.AddArg(y)
                return true
        }
 }
index 23fb6756369511101eac56d7d71680fed6b3a609..5ee0ee62eb128e835b8456d5cf2a248d9aaa6603 100644 (file)
@@ -1117,29 +1117,20 @@ func rewriteValueS390X_OpAvg64u(v *Value, config *Config) bool {
        _ = b
        // match: (Avg64u <t> x y)
        // cond:
-       // result: (ADD (ADD <t> (SRDconst <t> x [1]) (SRDconst <t> y [1])) (ANDconst <t> (AND <t> x y) [1]))
+       // result: (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
        for {
                t := v.Type
                x := v.Args[0]
                y := v.Args[1]
                v.reset(OpS390XADD)
-               v0 := b.NewValue0(v.Pos, OpS390XADD, t)
-               v1 := b.NewValue0(v.Pos, OpS390XSRDconst, t)
-               v1.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpS390XSRDconst, t)
+               v0.AuxInt = 1
+               v1 := b.NewValue0(v.Pos, OpS390XSUB, t)
                v1.AddArg(x)
+               v1.AddArg(y)
                v0.AddArg(v1)
-               v2 := b.NewValue0(v.Pos, OpS390XSRDconst, t)
-               v2.AuxInt = 1
-               v2.AddArg(y)
-               v0.AddArg(v2)
                v.AddArg(v0)
-               v3 := b.NewValue0(v.Pos, OpS390XANDconst, t)
-               v3.AuxInt = 1
-               v4 := b.NewValue0(v.Pos, OpS390XAND, t)
-               v4.AddArg(x)
-               v4.AddArg(y)
-               v3.AddArg(v4)
-               v.AddArg(v3)
+               v.AddArg(y)
                return true
        }
 }
index 5c4f7ceeaaf0829b0d004741263ebde015f8cdab..d24ceff40717edf8c9a4543e69b2daa1ee4fc9fc 100644 (file)
@@ -54,14 +54,26 @@ func rewriteValuegeneric(v *Value, config *Config) bool {
                return rewriteValuegeneric_OpCvt32Fto64F(v, config)
        case OpCvt64Fto32F:
                return rewriteValuegeneric_OpCvt64Fto32F(v, config)
+       case OpDiv16:
+               return rewriteValuegeneric_OpDiv16(v, config)
+       case OpDiv16u:
+               return rewriteValuegeneric_OpDiv16u(v, config)
+       case OpDiv32:
+               return rewriteValuegeneric_OpDiv32(v, config)
        case OpDiv32F:
                return rewriteValuegeneric_OpDiv32F(v, config)
+       case OpDiv32u:
+               return rewriteValuegeneric_OpDiv32u(v, config)
        case OpDiv64:
                return rewriteValuegeneric_OpDiv64(v, config)
        case OpDiv64F:
                return rewriteValuegeneric_OpDiv64F(v, config)
        case OpDiv64u:
                return rewriteValuegeneric_OpDiv64u(v, config)
+       case OpDiv8:
+               return rewriteValuegeneric_OpDiv8(v, config)
+       case OpDiv8u:
+               return rewriteValuegeneric_OpDiv8u(v, config)
        case OpEq16:
                return rewriteValuegeneric_OpEq16(v, config)
        case OpEq32:
@@ -2006,314 +2018,1427 @@ func rewriteValuegeneric_OpCvt64Fto32F(v *Value, config *Config) bool {
        }
        return false
 }
+func rewriteValuegeneric_OpDiv16(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div16  (Const16 [c])  (Const16 [d]))
+       // cond: d != 0
+       // result: (Const16 [int64(int16(c)/int16(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(c) / int16(d))
+               return true
+       }
+       // match: (Div16 <t> n (Const16 [c]))
+       // cond: c < 0 && c != -1<<15
+       // result: (Neg16 (Div16 <t> n (Const16 <t> [-c])))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<15) {
+                       break
+               }
+               v.reset(OpNeg16)
+               v0 := b.NewValue0(v.Pos, OpDiv16, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst16, t)
+               v1.AuxInt = -c
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div16 <t> x (Const16 [-1<<15]))
+       // cond:
+       // result: (Rsh16Ux64 (And16 <t> x (Neg16 <t> x)) (Const64 <config.fe.TypeUInt64()> [15]))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               if v_1.AuxInt != -1<<15 {
+                       break
+               }
+               v.reset(OpRsh16Ux64)
+               v0 := b.NewValue0(v.Pos, OpAnd16, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpNeg16, t)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = 15
+               v.AddArg(v2)
+               return true
+       }
+       // match: (Div16 <t> n (Const16 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Rsh16x64     (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <config.fe.TypeUInt64()> [15])) (Const64 <config.fe.TypeUInt64()> [16-log2(c)])))     (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpRsh16x64)
+               v0 := b.NewValue0(v.Pos, OpAdd16, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpRsh16Ux64, t)
+               v2 := b.NewValue0(v.Pos, OpRsh16x64, t)
+               v2.AddArg(n)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = 15
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 16 - log2(c)
+               v1.AddArg(v4)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = log2(c)
+               v.AddArg(v5)
+               return true
+       }
+       // match: (Div16 <t> x (Const16 [c]))
+       // cond: smagicOK(16,c)
+       // result: (Sub16 <t>     (Rsh32x64 <t>       (Mul32 <config.fe.TypeUInt32()>         (Const32 <config.fe.TypeUInt32()> [int64(smagic(16,c).m)])         (SignExt16to32 x))       (Const64 <config.fe.TypeUInt64()> [16+smagic(16,c).s]))     (Rsh32x64 <t>       (SignExt16to32 x)       (Const64 <config.fe.TypeUInt64()> [31])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(smagicOK(16, c)) {
+                       break
+               }
+               v.reset(OpSub16)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v1 := b.NewValue0(v.Pos, OpMul32, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(smagic(16, c).m)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpSignExt16to32, config.fe.TypeInt32())
+               v3.AddArg(x)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 16 + smagic(16, c).s
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v6 := b.NewValue0(v.Pos, OpSignExt16to32, config.fe.TypeInt32())
+               v6.AddArg(x)
+               v5.AddArg(v6)
+               v7 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v7.AuxInt = 31
+               v5.AddArg(v7)
+               v.AddArg(v5)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv16u(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div16u (Const16 [c])  (Const16 [d]))
+       // cond: d != 0
+       // result: (Const16 [int64(int16(uint16(c)/uint16(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = int64(int16(uint16(c) / uint16(d)))
+               return true
+       }
+       // match: (Div16u n (Const16 [c]))
+       // cond: isPowerOfTwo(c&0xffff)
+       // result: (Rsh16Ux64 n (Const64 <config.fe.TypeUInt64()> [log2(c&0xffff)]))
+       for {
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c & 0xffff)) {
+                       break
+               }
+               v.reset(OpRsh16Ux64)
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v0.AuxInt = log2(c & 0xffff)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div16u x (Const16 [c]))
+       // cond: umagicOK(16, c) && config.RegSize == 8
+       // result: (Trunc64to16     (Rsh64Ux64 <config.fe.TypeUInt64()>       (Mul64 <config.fe.TypeUInt64()>         (Const64 <config.fe.TypeUInt64()> [int64(1<<16+umagic(16,c).m)])         (ZeroExt16to64 x))       (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(16, c) && config.RegSize == 8) {
+                       break
+               }
+               v.reset(OpTrunc64to16)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux64, config.fe.TypeUInt64())
+               v1 := b.NewValue0(v.Pos, OpMul64, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = int64(1<<16 + umagic(16, c).m)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to64, config.fe.TypeUInt64())
+               v3.AddArg(x)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 16 + umagic(16, c).s
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div16u x (Const16 [c]))
+       // cond: umagicOK(16, c) && config.RegSize == 4 && umagic(16,c).m&1 == 0
+       // result: (Trunc32to16     (Rsh32Ux64 <config.fe.TypeUInt32()>       (Mul32 <config.fe.TypeUInt32()>         (Const32 <config.fe.TypeUInt32()> [int64(1<<15+umagic(16,c).m/2)])         (ZeroExt16to32 x))       (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s-1])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(16, c) && config.RegSize == 4 && umagic(16, c).m&1 == 0) {
+                       break
+               }
+               v.reset(OpTrunc32to16)
+               v0 := b.NewValue0(v.Pos, OpRsh32Ux64, config.fe.TypeUInt32())
+               v1 := b.NewValue0(v.Pos, OpMul32, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(1<<15 + umagic(16, c).m/2)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to32, config.fe.TypeUInt32())
+               v3.AddArg(x)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 16 + umagic(16, c).s - 1
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div16u x (Const16 [c]))
+       // cond: umagicOK(16, c) && config.RegSize == 4 && c&1 == 0
+       // result: (Trunc32to16     (Rsh32Ux64 <config.fe.TypeUInt32()>       (Mul32 <config.fe.TypeUInt32()>         (Const32 <config.fe.TypeUInt32()> [int64(1<<15+(umagic(16,c).m+1)/2)])         (Rsh32Ux64 <config.fe.TypeUInt32()> (ZeroExt16to32 x) (Const64 <config.fe.TypeUInt64()> [1])))       (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s-2])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(16, c) && config.RegSize == 4 && c&1 == 0) {
+                       break
+               }
+               v.reset(OpTrunc32to16)
+               v0 := b.NewValue0(v.Pos, OpRsh32Ux64, config.fe.TypeUInt32())
+               v1 := b.NewValue0(v.Pos, OpMul32, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(1<<15 + (umagic(16, c).m+1)/2)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpRsh32Ux64, config.fe.TypeUInt32())
+               v4 := b.NewValue0(v.Pos, OpZeroExt16to32, config.fe.TypeUInt32())
+               v4.AddArg(x)
+               v3.AddArg(v4)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = 1
+               v3.AddArg(v5)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v6 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v6.AuxInt = 16 + umagic(16, c).s - 2
+               v0.AddArg(v6)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div16u x (Const16 [c]))
+       // cond: umagicOK(16, c) && config.RegSize == 4
+       // result: (Trunc32to16     (Rsh32Ux64 <config.fe.TypeUInt32()>       (Avg32u         (Lsh32x64 <config.fe.TypeUInt32()> (ZeroExt16to32 x) (Const64 <config.fe.TypeUInt64()> [16]))         (Mul32 <config.fe.TypeUInt32()>           (Const32 <config.fe.TypeUInt32()> [int64(umagic(16,c).m)])           (ZeroExt16to32 x)))       (Const64 <config.fe.TypeUInt64()> [16+umagic(16,c).s-1])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(16, c) && config.RegSize == 4) {
+                       break
+               }
+               v.reset(OpTrunc32to16)
+               v0 := b.NewValue0(v.Pos, OpRsh32Ux64, config.fe.TypeUInt32())
+               v1 := b.NewValue0(v.Pos, OpAvg32u, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpLsh32x64, config.fe.TypeUInt32())
+               v3 := b.NewValue0(v.Pos, OpZeroExt16to32, config.fe.TypeUInt32())
+               v3.AddArg(x)
+               v2.AddArg(v3)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 16
+               v2.AddArg(v4)
+               v1.AddArg(v2)
+               v5 := b.NewValue0(v.Pos, OpMul32, config.fe.TypeUInt32())
+               v6 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v6.AuxInt = int64(umagic(16, c).m)
+               v5.AddArg(v6)
+               v7 := b.NewValue0(v.Pos, OpZeroExt16to32, config.fe.TypeUInt32())
+               v7.AddArg(x)
+               v5.AddArg(v7)
+               v1.AddArg(v5)
+               v0.AddArg(v1)
+               v8 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v8.AuxInt = 16 + umagic(16, c).s - 1
+               v0.AddArg(v8)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv32(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div32  (Const32 [c])  (Const32 [d]))
+       // cond: d != 0
+       // result: (Const32 [int64(int32(c)/int32(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(c) / int32(d))
+               return true
+       }
+       // match: (Div32 <t> n (Const32 [c]))
+       // cond: c < 0 && c != -1<<31
+       // result: (Neg32 (Div32 <t> n (Const32 <t> [-c])))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<31) {
+                       break
+               }
+               v.reset(OpNeg32)
+               v0 := b.NewValue0(v.Pos, OpDiv32, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst32, t)
+               v1.AuxInt = -c
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div32 <t> x (Const32 [-1<<31]))
+       // cond:
+       // result: (Rsh32Ux64 (And32 <t> x (Neg32 <t> x)) (Const64 <config.fe.TypeUInt64()> [31]))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               if v_1.AuxInt != -1<<31 {
+                       break
+               }
+               v.reset(OpRsh32Ux64)
+               v0 := b.NewValue0(v.Pos, OpAnd32, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpNeg32, t)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = 31
+               v.AddArg(v2)
+               return true
+       }
+       // match: (Div32 <t> n (Const32 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Rsh32x64     (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <config.fe.TypeUInt64()> [31])) (Const64 <config.fe.TypeUInt64()> [32-log2(c)])))     (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpRsh32x64)
+               v0 := b.NewValue0(v.Pos, OpAdd32, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpRsh32Ux64, t)
+               v2 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v2.AddArg(n)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = 31
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 32 - log2(c)
+               v1.AddArg(v4)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = log2(c)
+               v.AddArg(v5)
+               return true
+       }
+       // match: (Div32 <t> x (Const32 [c]))
+       // cond: smagicOK(32,c) && config.RegSize == 8
+       // result: (Sub32 <t>     (Rsh64x64 <t>       (Mul64 <config.fe.TypeUInt64()>         (Const64 <config.fe.TypeUInt64()> [int64(smagic(32,c).m)])         (SignExt32to64 x))       (Const64 <config.fe.TypeUInt64()> [32+smagic(32,c).s]))     (Rsh64x64 <t>       (SignExt32to64 x)       (Const64 <config.fe.TypeUInt64()> [63])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(smagicOK(32, c) && config.RegSize == 8) {
+                       break
+               }
+               v.reset(OpSub32)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v1 := b.NewValue0(v.Pos, OpMul64, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = int64(smagic(32, c).m)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpSignExt32to64, config.fe.TypeInt64())
+               v3.AddArg(x)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 32 + smagic(32, c).s
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v6 := b.NewValue0(v.Pos, OpSignExt32to64, config.fe.TypeInt64())
+               v6.AddArg(x)
+               v5.AddArg(v6)
+               v7 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v7.AuxInt = 63
+               v5.AddArg(v7)
+               v.AddArg(v5)
+               return true
+       }
+       // match: (Div32 <t> x (Const32 [c]))
+       // cond: smagicOK(32,c) && config.RegSize == 4 && smagic(32,c).m&1 == 0
+       // result: (Sub32 <t>     (Rsh32x64 <t>       (Hmul32 <t>         (Const32 <config.fe.TypeUInt32()> [int64(int32(smagic(32,c).m/2))])         x)       (Const64 <config.fe.TypeUInt64()> [smagic(32,c).s-1]))     (Rsh32x64 <t>       x       (Const64 <config.fe.TypeUInt64()> [31])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(smagicOK(32, c) && config.RegSize == 4 && smagic(32, c).m&1 == 0) {
+                       break
+               }
+               v.reset(OpSub32)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v1 := b.NewValue0(v.Pos, OpHmul32, t)
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(int32(smagic(32, c).m / 2))
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = smagic(32, c).s - 1
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               v4 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v4.AddArg(x)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = 31
+               v4.AddArg(v5)
+               v.AddArg(v4)
+               return true
+       }
+       // match: (Div32 <t> x (Const32 [c]))
+       // cond: smagicOK(32,c) && config.RegSize == 4 && smagic(32,c).m&1 != 0
+       // result: (Sub32 <t>     (Rsh32x64 <t>       (Add32 <t>         (Hmul32 <t>           (Const32 <config.fe.TypeUInt32()> [int64(int32(smagic(32,c).m))])           x)         x)       (Const64 <config.fe.TypeUInt64()> [smagic(32,c).s]))     (Rsh32x64 <t>       x       (Const64 <config.fe.TypeUInt64()> [31])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(smagicOK(32, c) && config.RegSize == 4 && smagic(32, c).m&1 != 0) {
+                       break
+               }
+               v.reset(OpSub32)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v1 := b.NewValue0(v.Pos, OpAdd32, t)
+               v2 := b.NewValue0(v.Pos, OpHmul32, t)
+               v3 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v3.AuxInt = int64(int32(smagic(32, c).m))
+               v2.AddArg(v3)
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = smagic(32, c).s
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v5.AddArg(x)
+               v6 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v6.AuxInt = 31
+               v5.AddArg(v6)
+               v.AddArg(v5)
+               return true
+       }
+       return false
+}
 func rewriteValuegeneric_OpDiv32F(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (Div32F x (Const32F [f2i(1)]))
+       // match: (Div32F (Const32F [c]) (Const32F [d]))
+       // cond:
+       // result: (Const32F [f2i(float64(i2f32(c) / i2f32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32F {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst32F)
+               v.AuxInt = f2i(float64(i2f32(c) / i2f32(d)))
+               return true
+       }
+       // match: (Div32F x (Const32F [f2i(1)]))
+       // cond:
+       // result: x
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               if v_1.AuxInt != f2i(1) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Div32F x (Const32F [f2i(-1)]))
+       // cond:
+       // result: (Neg32F x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32F {
+                       break
+               }
+               if v_1.AuxInt != f2i(-1) {
+                       break
+               }
+               v.reset(OpNeg32F)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv32u(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div32u (Const32 [c])  (Const32 [d]))
+       // cond: d != 0
+       // result: (Const32 [int64(int32(uint32(c)/uint32(d)))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst32)
+               v.AuxInt = int64(int32(uint32(c) / uint32(d)))
+               return true
+       }
+       // match: (Div32u n (Const32 [c]))
+       // cond: isPowerOfTwo(c&0xffffffff)
+       // result: (Rsh32Ux64 n (Const64 <config.fe.TypeUInt64()> [log2(c&0xffffffff)]))
+       for {
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c & 0xffffffff)) {
+                       break
+               }
+               v.reset(OpRsh32Ux64)
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v0.AuxInt = log2(c & 0xffffffff)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div32u x (Const32 [c]))
+       // cond: umagicOK(32, c) && config.RegSize == 4 && umagic(32,c).m&1 == 0
+       // result: (Rsh32Ux64 <config.fe.TypeUInt32()>     (Hmul32u <config.fe.TypeUInt32()>       (Const32 <config.fe.TypeUInt32()> [int64(int32(1<<31+umagic(32,c).m/2))])       x)     (Const64 <config.fe.TypeUInt64()> [umagic(32,c).s-1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(32, c) && config.RegSize == 4 && umagic(32, c).m&1 == 0) {
+                       break
+               }
+               v.reset(OpRsh32Ux64)
+               v.Type = config.fe.TypeUInt32()
+               v0 := b.NewValue0(v.Pos, OpHmul32u, config.fe.TypeUInt32())
+               v1 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v1.AuxInt = int64(int32(1<<31 + umagic(32, c).m/2))
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = umagic(32, c).s - 1
+               v.AddArg(v2)
+               return true
+       }
+       // match: (Div32u x (Const32 [c]))
+       // cond: umagicOK(32, c) && config.RegSize == 4 && c&1 == 0
+       // result: (Rsh32Ux64 <config.fe.TypeUInt32()>     (Hmul32u <config.fe.TypeUInt32()>       (Const32 <config.fe.TypeUInt32()> [int64(int32(1<<31+(umagic(32,c).m+1)/2))])       (Rsh32Ux64 <config.fe.TypeUInt32()> x (Const64 <config.fe.TypeUInt64()> [1])))     (Const64 <config.fe.TypeUInt64()> [umagic(32,c).s-2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(32, c) && config.RegSize == 4 && c&1 == 0) {
+                       break
+               }
+               v.reset(OpRsh32Ux64)
+               v.Type = config.fe.TypeUInt32()
+               v0 := b.NewValue0(v.Pos, OpHmul32u, config.fe.TypeUInt32())
+               v1 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v1.AuxInt = int64(int32(1<<31 + (umagic(32, c).m+1)/2))
+               v0.AddArg(v1)
+               v2 := b.NewValue0(v.Pos, OpRsh32Ux64, config.fe.TypeUInt32())
+               v2.AddArg(x)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = 1
+               v2.AddArg(v3)
+               v0.AddArg(v2)
+               v.AddArg(v0)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = umagic(32, c).s - 2
+               v.AddArg(v4)
+               return true
+       }
+       // match: (Div32u x (Const32 [c]))
+       // cond: umagicOK(32, c) && config.RegSize == 4
+       // result: (Rsh32Ux64 <config.fe.TypeUInt32()>     (Avg32u       x       (Hmul32u <config.fe.TypeUInt32()>         (Const32 <config.fe.TypeUInt32()> [int64(int32(umagic(32,c).m))])         x))     (Const64 <config.fe.TypeUInt64()> [umagic(32,c).s-1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(32, c) && config.RegSize == 4) {
+                       break
+               }
+               v.reset(OpRsh32Ux64)
+               v.Type = config.fe.TypeUInt32()
+               v0 := b.NewValue0(v.Pos, OpAvg32u, config.fe.TypeUInt32())
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpHmul32u, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(int32(umagic(32, c).m))
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = umagic(32, c).s - 1
+               v.AddArg(v3)
+               return true
+       }
+       // match: (Div32u x (Const32 [c]))
+       // cond: umagicOK(32, c) && config.RegSize == 8 && umagic(32,c).m&1 == 0
+       // result: (Trunc64to32     (Rsh64Ux64 <config.fe.TypeUInt64()>       (Mul64 <config.fe.TypeUInt64()>         (Const64 <config.fe.TypeUInt64()> [int64(1<<31+umagic(32,c).m/2)])         (ZeroExt32to64 x))       (Const64 <config.fe.TypeUInt64()> [32+umagic(32,c).s-1])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(32, c) && config.RegSize == 8 && umagic(32, c).m&1 == 0) {
+                       break
+               }
+               v.reset(OpTrunc64to32)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux64, config.fe.TypeUInt64())
+               v1 := b.NewValue0(v.Pos, OpMul64, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = int64(1<<31 + umagic(32, c).m/2)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, config.fe.TypeUInt64())
+               v3.AddArg(x)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 32 + umagic(32, c).s - 1
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div32u x (Const32 [c]))
+       // cond: umagicOK(32, c) && config.RegSize == 8 && c&1 == 0
+       // result: (Trunc64to32     (Rsh64Ux64 <config.fe.TypeUInt64()>       (Mul64 <config.fe.TypeUInt64()>         (Const64 <config.fe.TypeUInt64()> [int64(1<<31+(umagic(32,c).m+1)/2)])         (Rsh64Ux64 <config.fe.TypeUInt64()> (ZeroExt32to64 x) (Const64 <config.fe.TypeUInt64()> [1])))       (Const64 <config.fe.TypeUInt64()> [32+umagic(32,c).s-2])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(32, c) && config.RegSize == 8 && c&1 == 0) {
+                       break
+               }
+               v.reset(OpTrunc64to32)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux64, config.fe.TypeUInt64())
+               v1 := b.NewValue0(v.Pos, OpMul64, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = int64(1<<31 + (umagic(32, c).m+1)/2)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpRsh64Ux64, config.fe.TypeUInt64())
+               v4 := b.NewValue0(v.Pos, OpZeroExt32to64, config.fe.TypeUInt64())
+               v4.AddArg(x)
+               v3.AddArg(v4)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = 1
+               v3.AddArg(v5)
+               v1.AddArg(v3)
+               v0.AddArg(v1)
+               v6 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v6.AuxInt = 32 + umagic(32, c).s - 2
+               v0.AddArg(v6)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div32u x (Const32 [c]))
+       // cond: umagicOK(32, c) && config.RegSize == 8
+       // result: (Trunc64to32     (Rsh64Ux64 <config.fe.TypeUInt64()>       (Avg64u         (Lsh64x64 <config.fe.TypeUInt64()> (ZeroExt32to64 x) (Const64 <config.fe.TypeUInt64()> [32]))         (Mul64 <config.fe.TypeUInt32()>           (Const64 <config.fe.TypeUInt32()> [int64(umagic(32,c).m)])           (ZeroExt32to64 x)))       (Const64 <config.fe.TypeUInt64()> [32+umagic(32,c).s-1])))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(32, c) && config.RegSize == 8) {
+                       break
+               }
+               v.reset(OpTrunc64to32)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux64, config.fe.TypeUInt64())
+               v1 := b.NewValue0(v.Pos, OpAvg64u, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Pos, OpLsh64x64, config.fe.TypeUInt64())
+               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, config.fe.TypeUInt64())
+               v3.AddArg(x)
+               v2.AddArg(v3)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 32
+               v2.AddArg(v4)
+               v1.AddArg(v2)
+               v5 := b.NewValue0(v.Pos, OpMul64, config.fe.TypeUInt32())
+               v6 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt32())
+               v6.AuxInt = int64(umagic(32, c).m)
+               v5.AddArg(v6)
+               v7 := b.NewValue0(v.Pos, OpZeroExt32to64, config.fe.TypeUInt64())
+               v7.AddArg(x)
+               v5.AddArg(v7)
+               v1.AddArg(v5)
+               v0.AddArg(v1)
+               v8 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v8.AuxInt = 32 + umagic(32, c).s - 1
+               v0.AddArg(v8)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv64(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div64  (Const64 [c])  (Const64 [d]))
+       // cond: d != 0
+       // result: (Const64 [c/d])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst64)
+               v.AuxInt = c / d
+               return true
+       }
+       // match: (Div64 <t> n (Const64 [c]))
+       // cond: c < 0 && c != -1<<63
+       // result: (Neg64 (Div64 <t> n (Const64 <t> [-c])))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<63) {
+                       break
+               }
+               v.reset(OpNeg64)
+               v0 := b.NewValue0(v.Pos, OpDiv64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst64, t)
+               v1.AuxInt = -c
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div64 <t> x (Const64 [-1<<63]))
+       // cond:
+       // result: (Rsh64Ux64 (And64 <t> x (Neg64 <t> x)) (Const64 <config.fe.TypeUInt64()> [63]))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               if v_1.AuxInt != -1<<63 {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v0 := b.NewValue0(v.Pos, OpAnd64, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpNeg64, t)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = 63
+               v.AddArg(v2)
+               return true
+       }
+       // match: (Div64 <t> n (Const64 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Rsh64x64     (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <config.fe.TypeUInt64()> [63])) (Const64 <config.fe.TypeUInt64()> [64-log2(c)])))     (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpRsh64x64)
+               v0 := b.NewValue0(v.Pos, OpAdd64, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpRsh64Ux64, t)
+               v2 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v2.AddArg(n)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = 63
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 64 - log2(c)
+               v1.AddArg(v4)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = log2(c)
+               v.AddArg(v5)
+               return true
+       }
+       // match: (Div64 <t> x (Const64 [c]))
+       // cond: smagicOK(64,c) && smagic(64,c).m&1 == 0
+       // result: (Sub64 <t>     (Rsh64x64 <t>       (Hmul64 <t>         (Const64 <config.fe.TypeUInt64()> [int64(smagic(64,c).m/2)])         x)       (Const64 <config.fe.TypeUInt64()> [smagic(64,c).s-1]))     (Rsh64x64 <t>       x       (Const64 <config.fe.TypeUInt64()> [63])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(smagicOK(64, c) && smagic(64, c).m&1 == 0) {
+                       break
+               }
+               v.reset(OpSub64)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v1 := b.NewValue0(v.Pos, OpHmul64, t)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = int64(smagic(64, c).m / 2)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = smagic(64, c).s - 1
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               v4 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v4.AddArg(x)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = 63
+               v4.AddArg(v5)
+               v.AddArg(v4)
+               return true
+       }
+       // match: (Div64 <t> x (Const64 [c]))
+       // cond: smagicOK(64,c) && smagic(64,c).m&1 != 0
+       // result: (Sub64 <t>     (Rsh64x64 <t>       (Add64 <t>         (Hmul64 <t>           (Const64 <config.fe.TypeUInt64()> [int64(smagic(64,c).m)])           x)         x)       (Const64 <config.fe.TypeUInt64()> [smagic(64,c).s]))     (Rsh64x64 <t>       x       (Const64 <config.fe.TypeUInt64()> [63])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(smagicOK(64, c) && smagic(64, c).m&1 != 0) {
+                       break
+               }
+               v.reset(OpSub64)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v1 := b.NewValue0(v.Pos, OpAdd64, t)
+               v2 := b.NewValue0(v.Pos, OpHmul64, t)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = int64(smagic(64, c).m)
+               v2.AddArg(v3)
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = smagic(64, c).s
+               v0.AddArg(v4)
+               v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpRsh64x64, t)
+               v5.AddArg(x)
+               v6 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v6.AuxInt = 63
+               v5.AddArg(v6)
+               v.AddArg(v5)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv64F(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div64F (Const64F [c]) (Const64F [d]))
+       // cond:
+       // result: (Const64F [f2i(i2f(c) / i2f(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64F {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               d := v_1.AuxInt
+               v.reset(OpConst64F)
+               v.AuxInt = f2i(i2f(c) / i2f(d))
+               return true
+       }
+       // match: (Div64F x (Const64F [f2i(1)]))
        // cond:
        // result: x
        for {
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               if v_1.Op != OpConst64F {
                        break
                }
                if v_1.AuxInt != f2i(1) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (Div32F x (Const32F [f2i(-1)]))
-       // cond:
-       // result: (Neg32F x)
-       for {
-               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (Div64F x (Const64F [f2i(-1)]))
+       // cond:
+       // result: (Neg32F x)
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64F {
+                       break
+               }
+               if v_1.AuxInt != f2i(-1) {
+                       break
+               }
+               v.reset(OpNeg32F)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv64u(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div64u (Const64 [c])  (Const64 [d]))
+       // cond: d != 0
+       // result: (Const64 [int64(uint64(c)/uint64(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst64)
+               v.AuxInt = int64(uint64(c) / uint64(d))
+               return true
+       }
+       // match: (Div64u n (Const64 [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Rsh64Ux64 n (Const64 <config.fe.TypeUInt64()> [log2(c)]))
+       for {
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c)) {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v0.AuxInt = log2(c)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Div64u x (Const64 [c]))
+       // cond: umagicOK(64, c) && config.RegSize == 8 && umagic(64,c).m&1 == 0
+       // result: (Rsh64Ux64 <config.fe.TypeUInt64()>     (Hmul64u <config.fe.TypeUInt64()>       (Const64 <config.fe.TypeUInt64()> [int64(1<<63+umagic(64,c).m/2)])       x)     (Const64 <config.fe.TypeUInt64()> [umagic(64,c).s-1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(64, c) && config.RegSize == 8 && umagic(64, c).m&1 == 0) {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v.Type = config.fe.TypeUInt64()
+               v0 := b.NewValue0(v.Pos, OpHmul64u, config.fe.TypeUInt64())
+               v1 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v1.AuxInt = int64(1<<63 + umagic(64, c).m/2)
+               v0.AddArg(v1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = umagic(64, c).s - 1
+               v.AddArg(v2)
+               return true
+       }
+       // match: (Div64u x (Const64 [c]))
+       // cond: umagicOK(64, c) && config.RegSize == 8 && c&1 == 0
+       // result: (Rsh64Ux64 <config.fe.TypeUInt64()>     (Hmul64u <config.fe.TypeUInt64()>       (Const64 <config.fe.TypeUInt64()> [int64(1<<63+(umagic(64,c).m+1)/2)])       (Rsh64Ux64 <config.fe.TypeUInt64()> x (Const64 <config.fe.TypeUInt64()> [1])))     (Const64 <config.fe.TypeUInt64()> [umagic(64,c).s-2]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(64, c) && config.RegSize == 8 && c&1 == 0) {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v.Type = config.fe.TypeUInt64()
+               v0 := b.NewValue0(v.Pos, OpHmul64u, config.fe.TypeUInt64())
+               v1 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v1.AuxInt = int64(1<<63 + (umagic(64, c).m+1)/2)
+               v0.AddArg(v1)
+               v2 := b.NewValue0(v.Pos, OpRsh64Ux64, config.fe.TypeUInt64())
+               v2.AddArg(x)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = 1
+               v2.AddArg(v3)
+               v0.AddArg(v2)
+               v.AddArg(v0)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = umagic(64, c).s - 2
+               v.AddArg(v4)
+               return true
+       }
+       // match: (Div64u x (Const64 [c]))
+       // cond: umagicOK(64, c) && config.RegSize == 8
+       // result: (Rsh64Ux64 <config.fe.TypeUInt64()>     (Avg64u       x       (Hmul64u <config.fe.TypeUInt64()>         (Const64 <config.fe.TypeUInt64()> [int64(umagic(64,c).m)])         x))     (Const64 <config.fe.TypeUInt64()> [umagic(64,c).s-1]))
+       for {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(umagicOK(64, c) && config.RegSize == 8) {
+                       break
+               }
+               v.reset(OpRsh64Ux64)
+               v.Type = config.fe.TypeUInt64()
+               v0 := b.NewValue0(v.Pos, OpAvg64u, config.fe.TypeUInt64())
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpHmul64u, config.fe.TypeUInt64())
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = int64(umagic(64, c).m)
+               v1.AddArg(v2)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = umagic(64, c).s - 1
+               v.AddArg(v3)
+               return true
+       }
+       return false
+}
+func rewriteValuegeneric_OpDiv8(v *Value, config *Config) bool {
+       b := v.Block
+       _ = b
+       // match: (Div8   (Const8  [c])  (Const8  [d]))
+       // cond: d != 0
+       // result: (Const8  [int64(int8(c)/int8(d))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst32F {
+               if v_1.Op != OpConst8 {
                        break
                }
-               if v_1.AuxInt != f2i(-1) {
+               d := v_1.AuxInt
+               if !(d != 0) {
                        break
                }
-               v.reset(OpNeg32F)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(c) / int8(d))
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpDiv64(v *Value, config *Config) bool {
-       b := v.Block
-       _ = b
-       // match: (Div64 <t> x (Const64 [c]))
-       // cond: c > 0 && smagic64ok(c) && smagic64m(c) > 0
-       // result: (Sub64 <t>     (Rsh64x64 <t>       (Hmul64 <t>         (Const64 <t> [smagic64m(c)])         x)       (Const64 <t> [smagic64s(c)]))     (Rsh64x64 <t>       x       (Const64 <t> [63])))
+       // match: (Div8  <t> n (Const8  [c]))
+       // cond: c < 0 && c != -1<<7
+       // result: (Neg8  (Div8  <t> n (Const8  <t> [-c])))
        for {
                t := v.Type
-               x := v.Args[0]
+               n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
                        break
                }
                c := v_1.AuxInt
-               if !(c > 0 && smagic64ok(c) && smagic64m(c) > 0) {
+               if !(c < 0 && c != -1<<7) {
                        break
                }
-               v.reset(OpSub64)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v1 := b.NewValue0(v.Pos, OpHmul64, t)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = smagic64m(c)
-               v1.AddArg(v2)
-               v1.AddArg(x)
+               v.reset(OpNeg8)
+               v0 := b.NewValue0(v.Pos, OpDiv8, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpConst8, t)
+               v1.AuxInt = -c
                v0.AddArg(v1)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = smagic64s(c)
-               v0.AddArg(v3)
                v.AddArg(v0)
-               v4 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v4.AddArg(x)
-               v5 := b.NewValue0(v.Pos, OpConst64, t)
-               v5.AuxInt = 63
-               v4.AddArg(v5)
-               v.AddArg(v4)
                return true
        }
-       // match: (Div64 <t> x (Const64 [c]))
-       // cond: c > 0 && smagic64ok(c) && smagic64m(c) < 0
-       // result: (Sub64 <t>     (Rsh64x64 <t>       (Add64 <t>         (Hmul64 <t>           (Const64 <t> [smagic64m(c)])           x)         x)       (Const64 <t> [smagic64s(c)]))     (Rsh64x64 <t>       x       (Const64 <t> [63])))
+       // match: (Div8  <t> x (Const8  [-1<<7 ]))
+       // cond:
+       // result: (Rsh8Ux64  (And8  <t> x (Neg8  <t> x)) (Const64 <config.fe.TypeUInt64()> [7 ]))
        for {
                t := v.Type
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
                        break
                }
-               c := v_1.AuxInt
-               if !(c > 0 && smagic64ok(c) && smagic64m(c) < 0) {
+               if v_1.AuxInt != -1<<7 {
                        break
                }
-               v.reset(OpSub64)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v1 := b.NewValue0(v.Pos, OpAdd64, t)
-               v2 := b.NewValue0(v.Pos, OpHmul64, t)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = smagic64m(c)
-               v2.AddArg(v3)
-               v2.AddArg(x)
-               v1.AddArg(v2)
+               v.reset(OpRsh8Ux64)
+               v0 := b.NewValue0(v.Pos, OpAnd8, t)
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpNeg8, t)
                v1.AddArg(x)
                v0.AddArg(v1)
-               v4 := b.NewValue0(v.Pos, OpConst64, t)
-               v4.AuxInt = smagic64s(c)
-               v0.AddArg(v4)
                v.AddArg(v0)
-               v5 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v5.AddArg(x)
-               v6 := b.NewValue0(v.Pos, OpConst64, t)
-               v6.AuxInt = 63
-               v5.AddArg(v6)
-               v.AddArg(v5)
+               v2 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v2.AuxInt = 7
+               v.AddArg(v2)
                return true
        }
-       // match: (Div64 <t> x (Const64 [c]))
-       // cond: c < 0 && smagic64ok(c) && smagic64m(c) > 0
-       // result: (Neg64 <t>     (Sub64 <t>       (Rsh64x64 <t>         (Hmul64 <t>           (Const64 <t> [smagic64m(c)])           x)         (Const64 <t> [smagic64s(c)]))       (Rsh64x64 <t>         x         (Const64 <t> [63]))))
+       // match: (Div8  <t> n (Const8  [c]))
+       // cond: isPowerOfTwo(c)
+       // result: (Rsh8x64     (Add8  <t> n (Rsh8Ux64  <t> (Rsh8x64  <t> n (Const64 <config.fe.TypeUInt64()> [ 7])) (Const64 <config.fe.TypeUInt64()> [ 8-log2(c)])))     (Const64 <config.fe.TypeUInt64()> [log2(c)]))
        for {
                t := v.Type
-               x := v.Args[0]
+               n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
                        break
                }
                c := v_1.AuxInt
-               if !(c < 0 && smagic64ok(c) && smagic64m(c) > 0) {
+               if !(isPowerOfTwo(c)) {
                        break
                }
-               v.reset(OpNeg64)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpSub64, t)
-               v1 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v2 := b.NewValue0(v.Pos, OpHmul64, t)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = smagic64m(c)
+               v.reset(OpRsh8x64)
+               v0 := b.NewValue0(v.Pos, OpAdd8, t)
+               v0.AddArg(n)
+               v1 := b.NewValue0(v.Pos, OpRsh8Ux64, t)
+               v2 := b.NewValue0(v.Pos, OpRsh8x64, t)
+               v2.AddArg(n)
+               v3 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v3.AuxInt = 7
                v2.AddArg(v3)
-               v2.AddArg(x)
                v1.AddArg(v2)
-               v4 := b.NewValue0(v.Pos, OpConst64, t)
-               v4.AuxInt = smagic64s(c)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 8 - log2(c)
                v1.AddArg(v4)
                v0.AddArg(v1)
-               v5 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v5.AddArg(x)
-               v6 := b.NewValue0(v.Pos, OpConst64, t)
-               v6.AuxInt = 63
-               v5.AddArg(v6)
-               v0.AddArg(v5)
                v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v5.AuxInt = log2(c)
+               v.AddArg(v5)
                return true
        }
-       // match: (Div64 <t> x (Const64 [c]))
-       // cond: c < 0 && smagic64ok(c) && smagic64m(c) < 0
-       // result: (Neg64 <t>     (Sub64 <t>       (Rsh64x64 <t>         (Add64 <t>           (Hmul64 <t>             (Const64 <t> [smagic64m(c)])             x)           x)         (Const64 <t> [smagic64s(c)]))       (Rsh64x64 <t>         x         (Const64 <t> [63]))))
+       // match: (Div8 <t> x (Const8 [c]))
+       // cond: smagicOK(8,c)
+       // result: (Sub8 <t>     (Rsh32x64 <t>       (Mul32 <config.fe.TypeUInt32()>         (Const32 <config.fe.TypeUInt32()> [int64(smagic(8,c).m)])         (SignExt8to32 x))       (Const64 <config.fe.TypeUInt64()> [8+smagic(8,c).s]))     (Rsh32x64 <t>       (SignExt8to32 x)       (Const64 <config.fe.TypeUInt64()> [31])))
        for {
                t := v.Type
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
                        break
                }
                c := v_1.AuxInt
-               if !(c < 0 && smagic64ok(c) && smagic64m(c) < 0) {
+               if !(smagicOK(8, c)) {
                        break
                }
-               v.reset(OpNeg64)
+               v.reset(OpSub8)
                v.Type = t
-               v0 := b.NewValue0(v.Pos, OpSub64, t)
-               v1 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v2 := b.NewValue0(v.Pos, OpAdd64, t)
-               v3 := b.NewValue0(v.Pos, OpHmul64, t)
-               v4 := b.NewValue0(v.Pos, OpConst64, t)
-               v4.AuxInt = smagic64m(c)
-               v3.AddArg(v4)
-               v3.AddArg(x)
-               v2.AddArg(v3)
-               v2.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v1 := b.NewValue0(v.Pos, OpMul32, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(smagic(8, c).m)
                v1.AddArg(v2)
-               v5 := b.NewValue0(v.Pos, OpConst64, t)
-               v5.AuxInt = smagic64s(c)
-               v1.AddArg(v5)
+               v3 := b.NewValue0(v.Pos, OpSignExt8to32, config.fe.TypeInt32())
+               v3.AddArg(x)
+               v1.AddArg(v3)
                v0.AddArg(v1)
-               v6 := b.NewValue0(v.Pos, OpRsh64x64, t)
-               v6.AddArg(x)
-               v7 := b.NewValue0(v.Pos, OpConst64, t)
-               v7.AuxInt = 63
-               v6.AddArg(v7)
-               v0.AddArg(v6)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 8 + smagic(8, c).s
+               v0.AddArg(v4)
                v.AddArg(v0)
+               v5 := b.NewValue0(v.Pos, OpRsh32x64, t)
+               v6 := b.NewValue0(v.Pos, OpSignExt8to32, config.fe.TypeInt32())
+               v6.AddArg(x)
+               v5.AddArg(v6)
+               v7 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v7.AuxInt = 31
+               v5.AddArg(v7)
+               v.AddArg(v5)
                return true
        }
        return false
 }
-func rewriteValuegeneric_OpDiv64F(v *Value, config *Config) bool {
+func rewriteValuegeneric_OpDiv8u(v *Value, config *Config) bool {
        b := v.Block
        _ = b
-       // match: (Div64F x (Const64F [f2i(1)]))
-       // cond:
-       // result: x
+       // match: (Div8u  (Const8  [c])  (Const8  [d]))
+       // cond: d != 0
+       // result: (Const8  [int64(int8(uint8(c)/uint8(d)))])
        for {
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
-                       break
-               }
-               if v_1.AuxInt != f2i(1) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (Div64F x (Const64F [f2i(-1)]))
-       // cond:
-       // result: (Neg32F x)
-       for {
-               x := v.Args[0]
+               c := v_0.AuxInt
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64F {
+               if v_1.Op != OpConst8 {
                        break
                }
-               if v_1.AuxInt != f2i(-1) {
+               d := v_1.AuxInt
+               if !(d != 0) {
                        break
                }
-               v.reset(OpNeg32F)
-               v.AddArg(x)
+               v.reset(OpConst8)
+               v.AuxInt = int64(int8(uint8(c) / uint8(d)))
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpDiv64u(v *Value, config *Config) bool {
-       b := v.Block
-       _ = b
-       // match: (Div64u <t> n (Const64 [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (Rsh64Ux64 n (Const64 <t> [log2(c)]))
+       // match: (Div8u  n (Const8  [c]))
+       // cond: isPowerOfTwo(c&0xff)
+       // result: (Rsh8Ux64 n  (Const64 <config.fe.TypeUInt64()> [log2(c&0xff)]))
        for {
-               t := v.Type
                n := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
                        break
                }
                c := v_1.AuxInt
-               if !(isPowerOfTwo(c)) {
+               if !(isPowerOfTwo(c & 0xff)) {
                        break
                }
-               v.reset(OpRsh64Ux64)
+               v.reset(OpRsh8Ux64)
                v.AddArg(n)
-               v0 := b.NewValue0(v.Pos, OpConst64, t)
-               v0.AuxInt = log2(c)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagic64ok(c) && !umagic64a(c)
-       // result: (Rsh64Ux64     (Hmul64u <t>       (Const64 <t> [umagic64m(c)])       x)     (Const64 <t> [umagic64s(c)]))
-       for {
-               t := v.Type
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(umagic64ok(c) && !umagic64a(c)) {
-                       break
-               }
-               v.reset(OpRsh64Ux64)
-               v0 := b.NewValue0(v.Pos, OpHmul64u, t)
-               v1 := b.NewValue0(v.Pos, OpConst64, t)
-               v1.AuxInt = umagic64m(c)
-               v0.AddArg(v1)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v0.AuxInt = log2(c & 0xff)
                v.AddArg(v0)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = umagic64s(c)
-               v.AddArg(v2)
                return true
        }
-       // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagic64ok(c) && umagic64a(c)
-       // result: (Rsh64Ux64     (Avg64u <t>       (Hmul64u <t>         x         (Const64 <t> [umagic64m(c)]))       x)     (Const64 <t> [umagic64s(c)-1]))
+       // match: (Div8u x (Const8 [c]))
+       // cond: umagicOK(8, c)
+       // result: (Trunc32to8     (Rsh32Ux64 <config.fe.TypeUInt32()>       (Mul32 <config.fe.TypeUInt32()>         (Const32 <config.fe.TypeUInt32()> [int64(1<<8+umagic(8,c).m)])         (ZeroExt8to32 x))       (Const64 <config.fe.TypeUInt64()> [8+umagic(8,c).s])))
        for {
-               t := v.Type
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpConst64 {
+               if v_1.Op != OpConst8 {
                        break
                }
                c := v_1.AuxInt
-               if !(umagic64ok(c) && umagic64a(c)) {
+               if !(umagicOK(8, c)) {
                        break
                }
-               v.reset(OpRsh64Ux64)
-               v0 := b.NewValue0(v.Pos, OpAvg64u, t)
-               v1 := b.NewValue0(v.Pos, OpHmul64u, t)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpConst64, t)
-               v2.AuxInt = umagic64m(c)
+               v.reset(OpTrunc32to8)
+               v0 := b.NewValue0(v.Pos, OpRsh32Ux64, config.fe.TypeUInt32())
+               v1 := b.NewValue0(v.Pos, OpMul32, config.fe.TypeUInt32())
+               v2 := b.NewValue0(v.Pos, OpConst32, config.fe.TypeUInt32())
+               v2.AuxInt = int64(1<<8 + umagic(8, c).m)
                v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpZeroExt8to32, config.fe.TypeUInt32())
+               v3.AddArg(x)
+               v1.AddArg(v3)
                v0.AddArg(v1)
-               v0.AddArg(x)
+               v4 := b.NewValue0(v.Pos, OpConst64, config.fe.TypeUInt64())
+               v4.AuxInt = 8 + umagic(8, c).s
+               v0.AddArg(v4)
                v.AddArg(v0)
-               v3 := b.NewValue0(v.Pos, OpConst64, t)
-               v3.AuxInt = umagic64s(c) - 1
-               v.AddArg(v3)
                return true
        }
        return false
@@ -5158,6 +6283,57 @@ func rewriteValuegeneric_OpMod16(v *Value, config *Config) bool {
                v.AuxInt = int64(int16(c % d))
                return true
        }
+       // match: (Mod16 <t> n (Const16 [c]))
+       // cond: c < 0 && c != -1<<15
+       // result: (Mod16 <t> n (Const16 <t> [-c]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<15) {
+                       break
+               }
+               v.reset(OpMod16)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = -c
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mod16  <t> x (Const16 [c]))
+       // cond: x.Op != OpConst16 && (c > 0 || c == -1<<15)
+       // result: (Sub16 x (Mul16 <t> (Div16  <t> x (Const16 <t> [c])) (Const16 <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(x.Op != OpConst16 && (c > 0 || c == -1<<15)) {
+                       break
+               }
+               v.reset(OpSub16)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul16, t)
+               v1 := b.NewValue0(v.Pos, OpDiv16, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst16, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst16, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpMod16u(v *Value, config *Config) bool {
@@ -5167,21 +6343,71 @@ func rewriteValuegeneric_OpMod16u(v *Value, config *Config) bool {
        // cond: d != 0
        // result: (Const16 [int64(uint16(c) % uint16(d))])
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpConst16 {
-                       break
-               }
-               c := v_0.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               c := v_0.AuxInt
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               d := v_1.AuxInt
+               if !(d != 0) {
+                       break
+               }
+               v.reset(OpConst16)
+               v.AuxInt = int64(uint16(c) % uint16(d))
+               return true
+       }
+       // match: (Mod16u <t> n (Const16 [c]))
+       // cond: isPowerOfTwo(c&0xffff)
+       // result: (And16 n (Const16 <t> [(c&0xffff)-1]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst16 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c & 0xffff)) {
+                       break
+               }
+               v.reset(OpAnd16)
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst16, t)
+               v0.AuxInt = (c & 0xffff) - 1
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mod16u <t> x (Const16 [c]))
+       // cond: x.Op != OpConst16 && c > 0 && umagicOK(16,c)
+       // result: (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpConst16 {
                        break
                }
-               d := v_1.AuxInt
-               if !(d != 0) {
+               c := v_1.AuxInt
+               if !(x.Op != OpConst16 && c > 0 && umagicOK(16, c)) {
                        break
                }
-               v.reset(OpConst16)
-               v.AuxInt = int64(uint16(c) % uint16(d))
+               v.reset(OpSub16)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul16, t)
+               v1 := b.NewValue0(v.Pos, OpDiv16u, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst16, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst16, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
                return true
        }
        return false
@@ -5210,6 +6436,57 @@ func rewriteValuegeneric_OpMod32(v *Value, config *Config) bool {
                v.AuxInt = int64(int32(c % d))
                return true
        }
+       // match: (Mod32 <t> n (Const32 [c]))
+       // cond: c < 0 && c != -1<<31
+       // result: (Mod32 <t> n (Const32 <t> [-c]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<31) {
+                       break
+               }
+               v.reset(OpMod32)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = -c
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mod32  <t> x (Const32 [c]))
+       // cond: x.Op != OpConst32 && (c > 0 || c == -1<<31)
+       // result: (Sub32 x (Mul32 <t> (Div32  <t> x (Const32 <t> [c])) (Const32 <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(x.Op != OpConst32 && (c > 0 || c == -1<<31)) {
+                       break
+               }
+               v.reset(OpSub32)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul32, t)
+               v1 := b.NewValue0(v.Pos, OpDiv32, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst32, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpMod32u(v *Value, config *Config) bool {
@@ -5236,6 +6513,56 @@ func rewriteValuegeneric_OpMod32u(v *Value, config *Config) bool {
                v.AuxInt = int64(uint32(c) % uint32(d))
                return true
        }
+       // match: (Mod32u <t> n (Const32 [c]))
+       // cond: isPowerOfTwo(c&0xffffffff)
+       // result: (And32 n (Const32 <t> [(c&0xffffffff)-1]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c & 0xffffffff)) {
+                       break
+               }
+               v.reset(OpAnd32)
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst32, t)
+               v0.AuxInt = (c & 0xffffffff) - 1
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mod32u <t> x (Const32 [c]))
+       // cond: x.Op != OpConst32 && c > 0 && umagicOK(32,c)
+       // result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst32 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(x.Op != OpConst32 && c > 0 && umagicOK(32, c)) {
+                       break
+               }
+               v.reset(OpSub32)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul32, t)
+               v1 := b.NewValue0(v.Pos, OpDiv32u, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst32, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst32, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpMod64(v *Value, config *Config) bool {
@@ -5262,8 +6589,30 @@ func rewriteValuegeneric_OpMod64(v *Value, config *Config) bool {
                v.AuxInt = c % d
                return true
        }
+       // match: (Mod64 <t> n (Const64 [c]))
+       // cond: c < 0 && c != -1<<63
+       // result: (Mod64 <t> n (Const64 <t> [-c]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst64 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<63) {
+                       break
+               }
+               v.reset(OpMod64)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst64, t)
+               v0.AuxInt = -c
+               v.AddArg(v0)
+               return true
+       }
        // match: (Mod64  <t> x (Const64 [c]))
-       // cond: x.Op != OpConst64 && smagic64ok(c)
+       // cond: x.Op != OpConst64 && (c > 0 || c == -1<<63)
        // result: (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
        for {
                t := v.Type
@@ -5273,7 +6622,7 @@ func rewriteValuegeneric_OpMod64(v *Value, config *Config) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(x.Op != OpConst64 && smagic64ok(c)) {
+               if !(x.Op != OpConst64 && (c > 0 || c == -1<<63)) {
                        break
                }
                v.reset(OpSub64)
@@ -5339,7 +6688,7 @@ func rewriteValuegeneric_OpMod64u(v *Value, config *Config) bool {
                return true
        }
        // match: (Mod64u <t> x (Const64 [c]))
-       // cond: x.Op != OpConst64 && umagic64ok(c)
+       // cond: x.Op != OpConst64 && c > 0 && umagicOK(64,c)
        // result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
        for {
                t := v.Type
@@ -5349,7 +6698,7 @@ func rewriteValuegeneric_OpMod64u(v *Value, config *Config) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(x.Op != OpConst64 && umagic64ok(c)) {
+               if !(x.Op != OpConst64 && c > 0 && umagicOK(64, c)) {
                        break
                }
                v.reset(OpSub64)
@@ -5393,6 +6742,57 @@ func rewriteValuegeneric_OpMod8(v *Value, config *Config) bool {
                v.AuxInt = int64(int8(c % d))
                return true
        }
+       // match: (Mod8  <t> n (Const8  [c]))
+       // cond: c < 0 && c != -1<<7
+       // result: (Mod8  <t> n (Const8  <t> [-c]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(c < 0 && c != -1<<7) {
+                       break
+               }
+               v.reset(OpMod8)
+               v.Type = t
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = -c
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mod8   <t> x (Const8  [c]))
+       // cond: x.Op != OpConst8  && (c > 0 || c == -1<<7)
+       // result: (Sub8  x (Mul8  <t> (Div8   <t> x (Const8  <t> [c])) (Const8  <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(x.Op != OpConst8 && (c > 0 || c == -1<<7)) {
+                       break
+               }
+               v.reset(OpSub8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul8, t)
+               v1 := b.NewValue0(v.Pos, OpDiv8, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst8, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst8, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpMod8u(v *Value, config *Config) bool {
@@ -5419,6 +6819,56 @@ func rewriteValuegeneric_OpMod8u(v *Value, config *Config) bool {
                v.AuxInt = int64(uint8(c) % uint8(d))
                return true
        }
+       // match: (Mod8u  <t> n (Const8  [c]))
+       // cond: isPowerOfTwo(c&0xff)
+       // result: (And8 n (Const8 <t> [(c&0xff)-1]))
+       for {
+               t := v.Type
+               n := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(isPowerOfTwo(c & 0xff)) {
+                       break
+               }
+               v.reset(OpAnd8)
+               v.AddArg(n)
+               v0 := b.NewValue0(v.Pos, OpConst8, t)
+               v0.AuxInt = (c & 0xff) - 1
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Mod8u  <t> x (Const8  [c]))
+       // cond: x.Op != OpConst8  && c > 0 && umagicOK(8 ,c)
+       // result: (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
+       for {
+               t := v.Type
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpConst8 {
+                       break
+               }
+               c := v_1.AuxInt
+               if !(x.Op != OpConst8 && c > 0 && umagicOK(8, c)) {
+                       break
+               }
+               v.reset(OpSub8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpMul8, t)
+               v1 := b.NewValue0(v.Pos, OpDiv8u, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpConst8, t)
+               v2.AuxInt = c
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpConst8, t)
+               v3.AuxInt = c
+               v0.AddArg(v3)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValuegeneric_OpMul16(v *Value, config *Config) bool {
@@ -5442,6 +6892,23 @@ func rewriteValuegeneric_OpMul16(v *Value, config *Config) bool {
                v.AuxInt = int64(int16(c * d))
                return true
        }
+       // match: (Mul16 (Const16 [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst16 {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Mul16 (Const16 [-1]) x)
        // cond:
        // result: (Neg16 x)
@@ -5562,6 +7029,23 @@ func rewriteValuegeneric_OpMul32(v *Value, config *Config) bool {
                v.AuxInt = int64(int32(c * d))
                return true
        }
+       // match: (Mul32 (Const32 [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst32 {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Mul32 (Const32 [-1]) x)
        // cond:
        // result: (Neg32 x)
@@ -5809,6 +7293,23 @@ func rewriteValuegeneric_OpMul64(v *Value, config *Config) bool {
                v.AuxInt = c * d
                return true
        }
+       // match: (Mul64 (Const64 [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst64 {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Mul64 (Const64 [-1]) x)
        // cond:
        // result: (Neg64 x)
@@ -6056,6 +7557,23 @@ func rewriteValuegeneric_OpMul8(v *Value, config *Config) bool {
                v.AuxInt = int64(int8(c * d))
                return true
        }
+       // match: (Mul8  (Const8  [1]) x)
+       // cond:
+       // result: x
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpConst8 {
+                       break
+               }
+               if v_0.AuxInt != 1 {
+                       break
+               }
+               x := v.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
        // match: (Mul8  (Const8  [-1]) x)
        // cond:
        // result: (Neg8  x)
index 1f4b7bea079b7e6d1e8c58ec0fbb03c4819ac1dc..cf17dda6844d8c3bda93e87e90f1e30a5710b4fb 100644 (file)
@@ -292,6 +292,25 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[1].Reg()
 
+       case ssa.Op386AVGLU:
+               // compute (x+y)/2 unsigned.
+               // Do a 32-bit add, the overflow goes into the carry.
+               // Shift right once and pull the carry back into the 31st bit.
+               r := v.Reg()
+               if r != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
+               p := gc.Prog(x86.AADDL)
+               p.From.Type = obj.TYPE_REG
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+               p.From.Reg = v.Args[1].Reg()
+               p = gc.Prog(x86.ARCRL)
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = 1
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+
        case ssa.Op386ADDLconst:
                r := v.Reg()
                a := v.Args[0].Reg()