]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: implement CMOV on amd64
authorGiovanni Bajo <rasky@develer.com>
Mon, 5 Mar 2018 19:59:40 +0000 (20:59 +0100)
committerGiovanni Bajo <rasky@develer.com>
Mon, 12 Mar 2018 18:01:33 +0000 (18:01 +0000)
This builds upon the branchelim pass, activating it for amd64 and
lowering CondSelect. Special care is made to FPU instructions for
NaN handling.

Benchmark results on Xeon E5630 (Westmere EP):

name                      old time/op    new time/op    delta
BinaryTree17-16              4.99s ± 9%     4.66s ± 2%     ~     (p=0.095 n=5+5)
Fannkuch11-16                4.93s ± 3%     5.04s ± 2%     ~     (p=0.548 n=5+5)
FmtFprintfEmpty-16          58.8ns ± 7%    61.4ns ±14%     ~     (p=0.579 n=5+5)
FmtFprintfString-16          114ns ± 2%     114ns ± 4%     ~     (p=0.603 n=5+5)
FmtFprintfInt-16             181ns ± 4%     125ns ± 3%  -30.90%  (p=0.008 n=5+5)
FmtFprintfIntInt-16          263ns ± 2%     217ns ± 2%  -17.34%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-16     230ns ± 1%     212ns ± 1%   -7.99%  (p=0.008 n=5+5)
FmtFprintfFloat-16           411ns ± 3%     344ns ± 5%  -16.43%  (p=0.008 n=5+5)
FmtManyArgs-16               828ns ± 4%     790ns ± 2%   -4.59%  (p=0.032 n=5+5)
GobDecode-16                10.9ms ± 4%    10.8ms ± 5%     ~     (p=0.548 n=5+5)
GobEncode-16                9.52ms ± 5%    9.46ms ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                      334ms ± 2%     337ms ± 2%     ~     (p=0.548 n=5+5)
Gunzip-16                   64.4ms ± 1%    65.0ms ± 1%   +1.00%  (p=0.008 n=5+5)
HTTPClientServer-16          156µs ± 3%     155µs ± 3%     ~     (p=0.690 n=5+5)
JSONEncode-16               21.0ms ± 1%    21.8ms ± 0%   +3.76%  (p=0.016 n=5+4)
JSONDecode-16               95.1ms ± 0%    95.7ms ± 1%     ~     (p=0.151 n=5+5)
Mandelbrot200-16            6.38ms ± 1%    6.42ms ± 1%     ~     (p=0.095 n=5+5)
GoParse-16                  5.47ms ± 2%    5.36ms ± 1%   -1.95%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16       111ns ± 1%     111ns ± 1%     ~     (p=0.635 n=5+4)
RegexpMatchEasy0_1K-16       408ns ± 1%     411ns ± 2%     ~     (p=0.087 n=5+5)
RegexpMatchEasy1_32-16       103ns ± 1%     104ns ± 1%     ~     (p=0.484 n=5+5)
RegexpMatchEasy1_1K-16       659ns ± 2%     652ns ± 1%     ~     (p=0.571 n=5+5)
RegexpMatchMedium_32-16      176ns ± 2%     174ns ± 1%     ~     (p=0.476 n=5+5)
RegexpMatchMedium_1K-16     58.6µs ± 4%    57.7µs ± 4%     ~     (p=0.548 n=5+5)
RegexpMatchHard_32-16       3.07µs ± 3%    3.04µs ± 4%     ~     (p=0.421 n=5+5)
RegexpMatchHard_1K-16       89.2µs ± 1%    87.9µs ± 2%   -1.52%  (p=0.032 n=5+5)
Revcomp-16                   575ms ± 0%     587ms ± 2%   +2.12%  (p=0.032 n=4+5)
Template-16                  110ms ± 1%     107ms ± 3%   -3.00%  (p=0.032 n=5+5)
TimeParse-16                 463ns ± 0%     462ns ± 0%     ~     (p=0.810 n=5+4)
TimeFormat-16                538ns ± 0%     535ns ± 0%   -0.63%  (p=0.024 n=5+5)

name                      old speed      new speed      delta
GobDecode-16              70.7MB/s ± 4%  71.4MB/s ± 5%     ~     (p=0.452 n=5+5)
GobEncode-16              80.7MB/s ± 5%  81.2MB/s ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                   58.2MB/s ± 2%  57.7MB/s ± 2%     ~     (p=0.452 n=5+5)
Gunzip-16                  302MB/s ± 1%   299MB/s ± 1%   -0.99%  (p=0.008 n=5+5)
JSONEncode-16             92.4MB/s ± 1%  89.1MB/s ± 0%   -3.63%  (p=0.016 n=5+4)
JSONDecode-16             20.4MB/s ± 0%  20.3MB/s ± 1%     ~     (p=0.135 n=5+5)
GoParse-16                10.6MB/s ± 2%  10.8MB/s ± 1%   +2.00%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16     286MB/s ± 1%   285MB/s ± 3%     ~     (p=1.000 n=5+5)
RegexpMatchEasy0_1K-16    2.51GB/s ± 1%  2.49GB/s ± 2%     ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-16     309MB/s ± 1%   307MB/s ± 1%     ~     (p=0.548 n=5+5)
RegexpMatchEasy1_1K-16    1.55GB/s ± 2%  1.57GB/s ± 1%     ~     (p=0.690 n=5+5)
RegexpMatchMedium_32-16   5.68MB/s ± 2%  5.73MB/s ± 1%     ~     (p=0.579 n=5+5)
RegexpMatchMedium_1K-16   17.5MB/s ± 4%  17.8MB/s ± 4%     ~     (p=0.500 n=5+5)
RegexpMatchHard_32-16     10.4MB/s ± 3%  10.5MB/s ± 4%     ~     (p=0.460 n=5+5)
RegexpMatchHard_1K-16     11.5MB/s ± 1%  11.7MB/s ± 2%   +1.57%  (p=0.032 n=5+5)
Revcomp-16                 442MB/s ± 0%   433MB/s ± 2%   -2.05%  (p=0.032 n=4+5)
Template-16               17.7MB/s ± 1%  18.2MB/s ± 3%   +3.12%  (p=0.032 n=5+5)

Change-Id: Ic7cb7374d07da031e771bdcbfdd832fd1b17159c
Reviewed-on: https://go-review.googlesource.com/98695
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/branchelim.go
src/cmd/compile/internal/ssa/branchelim_test.go
src/cmd/compile/internal/ssa/export_test.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/condmove.go [new file with mode: 0644]

index 92d3ec22fc2b346317d5aabf1f91cd914429f683..5becdd018e2ff2edc5e799b4944586f345abe873 100644 (file)
@@ -398,7 +398,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
 
-       case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
+       case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
+               ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
+               ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
+               ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
+               ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
+               ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
+               ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
+               ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
+               ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
+               ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
+               ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
+               ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
                r := v.Reg()
                if r != v.Args[0].Reg() {
                        v.Fatalf("input[0] and output not in same register %s", v.LongString())
@@ -409,6 +420,71 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
 
+       case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
+               r := v.Reg()
+               if r != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
+               // Flag condition: ^ZERO || PARITY
+               // Generate:
+               //   CMOV*NE  SRC,DST
+               //   CMOV*PS  SRC,DST
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[1].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+               var q *obj.Prog
+               if v.Op == ssa.OpAMD64CMOVQNEF {
+                       q = s.Prog(x86.ACMOVQPS)
+               } else if v.Op == ssa.OpAMD64CMOVLNEF {
+                       q = s.Prog(x86.ACMOVLPS)
+               } else {
+                       q = s.Prog(x86.ACMOVWPS)
+               }
+               q.From.Type = obj.TYPE_REG
+               q.From.Reg = v.Args[1].Reg()
+               q.To.Type = obj.TYPE_REG
+               q.To.Reg = r
+
+       case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
+               r := v.Reg()
+               if r != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
+
+               // Flag condition: ZERO && !PARITY
+               // Generate:
+               //   MOV      SRC,AX
+               //   CMOV*NE  DST,AX
+               //   CMOV*PC  AX,DST
+               //
+               // TODO(rasky): we could generate:
+               //   CMOV*NE  DST,SRC
+               //   CMOV*PC  SRC,DST
+               // But this requires a way for regalloc to know that SRC might be
+               // clobbered by this instruction.
+               if v.Args[1].Reg() != x86.REG_AX {
+                       opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
+               }
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = x86.REG_AX
+               var q *obj.Prog
+               if v.Op == ssa.OpAMD64CMOVQEQF {
+                       q = s.Prog(x86.ACMOVQPC)
+               } else if v.Op == ssa.OpAMD64CMOVLEQF {
+                       q = s.Prog(x86.ACMOVLPC)
+               } else {
+                       q = s.Prog(x86.ACMOVWPC)
+               }
+               q.From.Type = obj.TYPE_REG
+               q.From.Reg = x86.REG_AX
+               q.To.Type = obj.TYPE_REG
+               q.To.Reg = r
+
        case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
                r := v.Reg()
                if r != v.Args[0].Reg() {
index 54508985b369452155391e2684f4d9316f9dd4e6..75a6b8238c9444dd6333c7dbf5d5d379e8faff9e 100644 (file)
@@ -19,7 +19,10 @@ package ssa
 // rewrite Phis in the postdominator as CondSelects.
 func branchelim(f *Func) {
        // FIXME: add support for lowering CondSelects on more architectures
-       if f.Config.arch != "arm64" {
+       switch f.Config.arch {
+       case "arm64", "amd64":
+               // implemented
+       default:
                return
        }
 
@@ -32,10 +35,22 @@ func branchelim(f *Func) {
        }
 }
 
-func canCondSelect(v *Value) bool {
+func canCondSelect(v *Value, arch string) bool {
        // For now, stick to simple scalars that fit in registers
-       sz := v.Type.Size()
-       return sz <= v.Block.Func.Config.RegSize && (v.Type.IsInteger() || v.Type.IsPtrShaped())
+       switch {
+       case v.Type.Size() > v.Block.Func.Config.RegSize:
+               return false
+       case v.Type.IsPtrShaped():
+               return true
+       case v.Type.IsInteger():
+               if arch == "amd64" && v.Type.Size() < 2 {
+                       // amd64 doesn't support CMOV with byte registers
+                       return false
+               }
+               return true
+       default:
+               return false
+       }
 }
 
 func elimIf(f *Func, dom *Block) bool {
@@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool {
        for _, v := range post.Values {
                if v.Op == OpPhi {
                        hasphis = true
-                       if !canCondSelect(v) {
+                       if !canCondSelect(v, f.Config.arch) {
                                return false
                        }
                }
@@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool {
        for _, v := range post.Values {
                if v.Op == OpPhi {
                        hasphis = true
-                       if !canCondSelect(v) {
+                       if !canCondSelect(v, f.Config.arch) {
                                return false
                        }
                }
index 979ba1d961270e9665bd996ae638bf00bb150435..30bb133f8ed7a5c27d1c1460048cd0361e6fe0a0 100644 (file)
@@ -11,128 +11,162 @@ import (
 
 // Test that a trivial 'if' is eliminated
 func TestBranchElimIf(t *testing.T) {
-       c := testConfig(t)
-       c.config.arch = "arm64" // FIXME
-       boolType := types.New(types.TBOOL)
-       intType := types.New(types.TINT32)
-       fun := c.Fun("entry",
-               Bloc("entry",
-                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
-                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
-                       Valu("const1", OpConst32, intType, 1, nil),
-                       Valu("const2", OpConst32, intType, 2, nil),
-                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
-                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
-                       If("cond", "b2", "b3")),
-               Bloc("b2",
-                       Goto("b3")),
-               Bloc("b3",
-                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
-                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
-                       Exit("retstore")))
+       var testData = []struct {
+               arch    string
+               intType string
+               ok      bool
+       }{
+               {"arm64", "int32", true},
+               {"amd64", "int32", true},
+               {"amd64", "int8", false},
+       }
 
-       CheckFunc(fun.f)
-       branchelim(fun.f)
-       CheckFunc(fun.f)
-       Deadcode(fun.f)
-       CheckFunc(fun.f)
+       for _, data := range testData {
+               t.Run(data.arch+"/"+data.intType, func(t *testing.T) {
+                       c := testConfigArch(t, data.arch)
+                       boolType := c.config.Types.Bool
+                       var intType *types.Type
+                       switch data.intType {
+                       case "int32":
+                               intType = c.config.Types.Int32
+                       case "int8":
+                               intType = c.config.Types.Int8
+                       default:
+                               t.Fatal("invalid integer type:", data.intType)
+                       }
+                       fun := c.Fun("entry",
+                               Bloc("entry",
+                                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
+                                       Valu("const1", OpConst32, intType, 1, nil),
+                                       Valu("const2", OpConst32, intType, 2, nil),
+                                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
+                                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
+                                       If("cond", "b2", "b3")),
+                               Bloc("b2",
+                                       Goto("b3")),
+                               Bloc("b3",
+                                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
+                                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
+                                       Exit("retstore")))
 
-       if len(fun.f.Blocks) != 1 {
-               t.Errorf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
-       }
-       if fun.values["phi"].Op != OpCondSelect {
-               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
-       }
-       if fun.values["phi"].Args[2] != fun.values["cond"] {
-               t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
-       }
-       if fun.blocks["entry"].Kind != BlockExit {
-               t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                       CheckFunc(fun.f)
+                       branchelim(fun.f)
+                       CheckFunc(fun.f)
+                       Deadcode(fun.f)
+                       CheckFunc(fun.f)
+
+                       if data.ok {
+
+                               if len(fun.f.Blocks) != 1 {
+                                       t.Fatalf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
+                               }
+                               if fun.values["phi"].Op != OpCondSelect {
+                                       t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                               }
+                               if fun.values["phi"].Args[2] != fun.values["cond"] {
+                                       t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
+                               }
+                               if fun.blocks["entry"].Kind != BlockExit {
+                                       t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                               }
+                       } else {
+                               if len(fun.f.Blocks) != 3 {
+                                       t.Fatalf("expected 3 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
+                               }
+                       }
+               })
        }
 }
 
 // Test that a trivial if/else is eliminated
 func TestBranchElimIfElse(t *testing.T) {
-       c := testConfig(t)
-       c.config.arch = "arm64" // FIXME
-       boolType := types.New(types.TBOOL)
-       intType := types.New(types.TINT32)
-       fun := c.Fun("entry",
-               Bloc("entry",
-                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
-                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
-                       Valu("const1", OpConst32, intType, 1, nil),
-                       Valu("const2", OpConst32, intType, 2, nil),
-                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
-                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
-                       If("cond", "b2", "b3")),
-               Bloc("b2",
-                       Goto("b4")),
-               Bloc("b3",
-                       Goto("b4")),
-               Bloc("b4",
-                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
-                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
-                       Exit("retstore")))
+       for _, arch := range []string{"arm64", "amd64"} {
+               t.Run(arch, func(t *testing.T) {
+                       c := testConfigArch(t, arch)
+                       boolType := c.config.Types.Bool
+                       intType := c.config.Types.Int32
+                       fun := c.Fun("entry",
+                               Bloc("entry",
+                                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
+                                       Valu("const1", OpConst32, intType, 1, nil),
+                                       Valu("const2", OpConst32, intType, 2, nil),
+                                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
+                                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
+                                       If("cond", "b2", "b3")),
+                               Bloc("b2",
+                                       Goto("b4")),
+                               Bloc("b3",
+                                       Goto("b4")),
+                               Bloc("b4",
+                                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
+                                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
+                                       Exit("retstore")))
 
-       CheckFunc(fun.f)
-       branchelim(fun.f)
-       CheckFunc(fun.f)
-       Deadcode(fun.f)
-       CheckFunc(fun.f)
+                       CheckFunc(fun.f)
+                       branchelim(fun.f)
+                       CheckFunc(fun.f)
+                       Deadcode(fun.f)
+                       CheckFunc(fun.f)
 
-       if len(fun.f.Blocks) != 1 {
-               t.Errorf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
-       }
-       if fun.values["phi"].Op != OpCondSelect {
-               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
-       }
-       if fun.values["phi"].Args[2] != fun.values["cond"] {
-               t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
-       }
-       if fun.blocks["entry"].Kind != BlockExit {
-               t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                       if len(fun.f.Blocks) != 1 {
+                               t.Fatalf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
+                       }
+                       if fun.values["phi"].Op != OpCondSelect {
+                               t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                       }
+                       if fun.values["phi"].Args[2] != fun.values["cond"] {
+                               t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
+                       }
+                       if fun.blocks["entry"].Kind != BlockExit {
+                               t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                       }
+               })
        }
 }
 
 // Test that an if/else CFG that loops back
 // into itself does *not* get eliminated.
 func TestNoBranchElimLoop(t *testing.T) {
-       c := testConfig(t)
-       c.config.arch = "arm64" // FIXME
-       boolType := types.New(types.TBOOL)
-       intType := types.New(types.TINT32)
+       for _, arch := range []string{"arm64", "amd64"} {
+               t.Run(arch, func(t *testing.T) {
+                       c := testConfigArch(t, arch)
+                       boolType := c.config.Types.Bool
+                       intType := c.config.Types.Int32
 
-       // The control flow here is totally bogus,
-       // but a dead cycle seems like the only plausible
-       // way to arrive at a diamond CFG that is also a loop.
-       fun := c.Fun("entry",
-               Bloc("entry",
-                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
-                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
-                       Valu("const2", OpConst32, intType, 2, nil),
-                       Valu("const3", OpConst32, intType, 3, nil),
-                       Goto("b5")),
-               Bloc("b2",
-                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
-                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
-                       Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
-                       If("cond", "b3", "b4")),
-               Bloc("b3",
-                       Goto("b2")),
-               Bloc("b4",
-                       Goto("b2")),
-               Bloc("b5",
-                       Exit("start")))
+                       // The control flow here is totally bogus,
+                       // but a dead cycle seems like the only plausible
+                       // way to arrive at a diamond CFG that is also a loop.
+                       fun := c.Fun("entry",
+                               Bloc("entry",
+                                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
+                                       Valu("const2", OpConst32, intType, 2, nil),
+                                       Valu("const3", OpConst32, intType, 3, nil),
+                                       Goto("b5")),
+                               Bloc("b2",
+                                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
+                                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
+                                       Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
+                                       If("cond", "b3", "b4")),
+                               Bloc("b3",
+                                       Goto("b2")),
+                               Bloc("b4",
+                                       Goto("b2")),
+                               Bloc("b5",
+                                       Exit("start")))
 
-       CheckFunc(fun.f)
-       branchelim(fun.f)
-       CheckFunc(fun.f)
+                       CheckFunc(fun.f)
+                       branchelim(fun.f)
+                       CheckFunc(fun.f)
 
-       if len(fun.f.Blocks) != 5 {
-               t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
-       }
-       if fun.values["phi"].Op != OpPhi {
-               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                       if len(fun.f.Blocks) != 5 {
+                               t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
+                       }
+                       if fun.values["phi"].Op != OpPhi {
+                               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                       }
+               })
        }
 }
index 1fe0bbe6ae3976f313b9f611bb6d1defac70c3e8..8d3bd74fa515d858fd0d9540fb8123d0f852add5 100644 (file)
@@ -7,6 +7,7 @@ package ssa
 import (
        "cmd/compile/internal/types"
        "cmd/internal/obj"
+       "cmd/internal/obj/arm64"
        "cmd/internal/obj/s390x"
        "cmd/internal/obj/x86"
        "cmd/internal/src"
@@ -22,6 +23,7 @@ var Copyelim = copyelim
 var testCtxts = map[string]*obj.Link{
        "amd64": obj.Linknew(&x86.Linkamd64),
        "s390x": obj.Linknew(&s390x.Links390x),
+       "arm64": obj.Linknew(&arm64.Linkarm64),
 }
 
 func testConfig(tb testing.TB) *Conf      { return testConfigArch(tb, "amd64") }
index 647a5d9cd134fcfba9ac83d60f06979adf585aff..ffac45bf66cf0328cdb7dce84aef863a1a6e97cb 100644 (file)
 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
 
+// Lowering conditional moves
+// If the condition is a SETxx, we can just run a CMOV from the comparison that was
+// setting the flags.
+// Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL
+(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t))
+    -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
+(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t)
+    -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
+(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t)
+    -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
+
+// If the condition does not set the flags, we need to generate a comparison.
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1
+    -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2
+    -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4
+    -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
+
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
+    -> (CMOVQNE y x (CMPQconst [0] check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
+    -> (CMOVLNE y x (CMPQconst [0] check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
+    -> (CMOVWNE y x (CMPQconst [0] check))
+
+// Absorb InvertFlags
+(CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
+    -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+(CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
+    -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+(CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
+    -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+
+// Absorb constants generated during lower
+(CMOVQ(EQ|LE|GE|CC|LS) _ x (FlagEQ)) -> x
+(CMOVQ(NE|LT|GT|CS|HI) y _ (FlagEQ)) -> y
+(CMOVQ(NE|GT|GE|HI|CC) _ x (FlagGT_UGT)) -> x
+(CMOVQ(EQ|LE|LT|LS|CS) y _ (FlagGT_UGT)) -> y
+(CMOVQ(NE|GT|GE|LS|CS) _ x (FlagGT_ULT)) -> x
+(CMOVQ(EQ|LE|LT|HI|CC) y _ (FlagGT_ULT)) -> y
+(CMOVQ(NE|LT|LE|CS|LS) _ x (FlagLT_ULT)) -> x
+(CMOVQ(EQ|GT|GE|HI|CC) y _ (FlagLT_ULT)) -> y
+(CMOVQ(NE|LT|LE|HI|CC) _ x (FlagLT_UGT)) -> x
+(CMOVQ(EQ|GT|GE|CS|LS) y _ (FlagLT_ULT)) -> y
+
 // Miscellaneous
 (Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
 (Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
 (CMPLconst x [0]) -> (TESTL x x)
 (CMPWconst x [0]) -> (TESTW x x)
 (CMPBconst x [0]) -> (TESTB x x)
+(TESTQconst [-1] x) -> (TESTQ x x)
+(TESTLconst [-1] x) -> (TESTL x x)
+(TESTWconst [-1] x) -> (TESTW x x)
+(TESTBconst [-1] x) -> (TESTB x x)
 
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
index c5c2e8ceacb9a1b9e30c4ec05ff623eb2be3b3fe..9577890f9a433565f4260aa4ff83ad01acef9b4e 100644 (file)
@@ -132,6 +132,7 @@ func init() {
                gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
                gp21load  = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
                gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
+               gp21pax   = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
 
                gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
                gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
@@ -340,10 +341,57 @@ func init() {
                {name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg
                {name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg
 
-               // Note ASM for ops moves whole register
-               //
-               {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
-               {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
+               // CMOV instructions: 64, 32 and 16-bit sizes.
+               // if arg2 encodes a true result, return arg1, else arg0
+               {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true},
+               {name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
+               {name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true},
+               {name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true},
+               {name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true},
+               {name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true},
+               {name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true},
+               {name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
+               {name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
+               {name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true},
+
+               {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true},
+               {name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
+               {name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true},
+               {name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true},
+               {name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true},
+               {name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true},
+               {name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true},
+               {name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
+               {name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
+               {name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true},
+
+               {name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true},
+               {name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
+               {name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true},
+               {name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true},
+               {name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true},
+               {name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true},
+               {name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true},
+               {name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
+               {name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
+               {name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true},
+
+               // CMOV with floating point instructions. We need separate pseudo-op to handle
+               // InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
+               // NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
+               // code generation in amd64/ssa.go.
+               {name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
+               {name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
+               {name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
+               {name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
+               {name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
+               {name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
+               {name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
+               {name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
+               {name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
+               {name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
+               {name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
+               {name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
 
                {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
                {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
@@ -578,7 +626,6 @@ func init() {
                {name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
                //arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
                {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
-
                // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
                // It saves all GP registers if necessary, but may clobber others.
                {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
index 48ec74b3d2b618d9eb74b6ce14dae886d5aeff78..2d73208623df6fc6d73a9f033451b42cdb269228 100644 (file)
@@ -560,7 +560,47 @@ const (
        OpAMD64BSRQ
        OpAMD64BSRL
        OpAMD64CMOVQEQ
+       OpAMD64CMOVQNE
+       OpAMD64CMOVQLT
+       OpAMD64CMOVQGT
+       OpAMD64CMOVQLE
+       OpAMD64CMOVQGE
+       OpAMD64CMOVQLS
+       OpAMD64CMOVQHI
+       OpAMD64CMOVQCC
+       OpAMD64CMOVQCS
        OpAMD64CMOVLEQ
+       OpAMD64CMOVLNE
+       OpAMD64CMOVLLT
+       OpAMD64CMOVLGT
+       OpAMD64CMOVLLE
+       OpAMD64CMOVLGE
+       OpAMD64CMOVLLS
+       OpAMD64CMOVLHI
+       OpAMD64CMOVLCC
+       OpAMD64CMOVLCS
+       OpAMD64CMOVWEQ
+       OpAMD64CMOVWNE
+       OpAMD64CMOVWLT
+       OpAMD64CMOVWGT
+       OpAMD64CMOVWLE
+       OpAMD64CMOVWGE
+       OpAMD64CMOVWLS
+       OpAMD64CMOVWHI
+       OpAMD64CMOVWCC
+       OpAMD64CMOVWCS
+       OpAMD64CMOVQEQF
+       OpAMD64CMOVQNEF
+       OpAMD64CMOVQGTF
+       OpAMD64CMOVQGEF
+       OpAMD64CMOVLEQF
+       OpAMD64CMOVLNEF
+       OpAMD64CMOVLGTF
+       OpAMD64CMOVLGEF
+       OpAMD64CMOVWEQF
+       OpAMD64CMOVWNEF
+       OpAMD64CMOVWGTF
+       OpAMD64CMOVWGEF
        OpAMD64BSWAPQ
        OpAMD64BSWAPL
        OpAMD64POPCNTQ
@@ -6803,6 +6843,141 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "CMOVQNE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQLT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQLT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQGT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQLE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQLE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQGE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQLS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQLS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQHI",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQCC",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQCS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:         "CMOVLEQ",
                argLen:       3,
@@ -6818,6 +6993,472 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "CMOVLNE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLLT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLLT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLGT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLLE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLLE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLGE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLLS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLLS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLHI",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLCC",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLCS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWEQ",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWEQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWNE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWLT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWLT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWGT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWLE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWLE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWGE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWLS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWLS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWHI",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWCC",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWCS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQEQF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 1, // AX
+                       outputs: []outputInfo{
+                               {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQNEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGTF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLEQF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLNEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGTF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWEQF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWNEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGTF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:         "BSWAPQ",
                argLen:       1,
index c500c757ef7bbba367fa279b9919b2c94c1f4a32..eb2489ac7717ba9f4b031d077b25023fc5cdd01e 100644 (file)
@@ -55,8 +55,66 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64BSFQ_0(v)
        case OpAMD64BTQconst:
                return rewriteValueAMD64_OpAMD64BTQconst_0(v)
+       case OpAMD64CMOVLCC:
+               return rewriteValueAMD64_OpAMD64CMOVLCC_0(v)
+       case OpAMD64CMOVLCS:
+               return rewriteValueAMD64_OpAMD64CMOVLCS_0(v)
+       case OpAMD64CMOVLEQ:
+               return rewriteValueAMD64_OpAMD64CMOVLEQ_0(v)
+       case OpAMD64CMOVLGE:
+               return rewriteValueAMD64_OpAMD64CMOVLGE_0(v)
+       case OpAMD64CMOVLGT:
+               return rewriteValueAMD64_OpAMD64CMOVLGT_0(v)
+       case OpAMD64CMOVLHI:
+               return rewriteValueAMD64_OpAMD64CMOVLHI_0(v)
+       case OpAMD64CMOVLLE:
+               return rewriteValueAMD64_OpAMD64CMOVLLE_0(v)
+       case OpAMD64CMOVLLS:
+               return rewriteValueAMD64_OpAMD64CMOVLLS_0(v)
+       case OpAMD64CMOVLLT:
+               return rewriteValueAMD64_OpAMD64CMOVLLT_0(v)
+       case OpAMD64CMOVLNE:
+               return rewriteValueAMD64_OpAMD64CMOVLNE_0(v)
+       case OpAMD64CMOVQCC:
+               return rewriteValueAMD64_OpAMD64CMOVQCC_0(v)
+       case OpAMD64CMOVQCS:
+               return rewriteValueAMD64_OpAMD64CMOVQCS_0(v)
        case OpAMD64CMOVQEQ:
                return rewriteValueAMD64_OpAMD64CMOVQEQ_0(v)
+       case OpAMD64CMOVQGE:
+               return rewriteValueAMD64_OpAMD64CMOVQGE_0(v)
+       case OpAMD64CMOVQGT:
+               return rewriteValueAMD64_OpAMD64CMOVQGT_0(v)
+       case OpAMD64CMOVQHI:
+               return rewriteValueAMD64_OpAMD64CMOVQHI_0(v)
+       case OpAMD64CMOVQLE:
+               return rewriteValueAMD64_OpAMD64CMOVQLE_0(v)
+       case OpAMD64CMOVQLS:
+               return rewriteValueAMD64_OpAMD64CMOVQLS_0(v)
+       case OpAMD64CMOVQLT:
+               return rewriteValueAMD64_OpAMD64CMOVQLT_0(v)
+       case OpAMD64CMOVQNE:
+               return rewriteValueAMD64_OpAMD64CMOVQNE_0(v)
+       case OpAMD64CMOVWCC:
+               return rewriteValueAMD64_OpAMD64CMOVWCC_0(v)
+       case OpAMD64CMOVWCS:
+               return rewriteValueAMD64_OpAMD64CMOVWCS_0(v)
+       case OpAMD64CMOVWEQ:
+               return rewriteValueAMD64_OpAMD64CMOVWEQ_0(v)
+       case OpAMD64CMOVWGE:
+               return rewriteValueAMD64_OpAMD64CMOVWGE_0(v)
+       case OpAMD64CMOVWGT:
+               return rewriteValueAMD64_OpAMD64CMOVWGT_0(v)
+       case OpAMD64CMOVWHI:
+               return rewriteValueAMD64_OpAMD64CMOVWHI_0(v)
+       case OpAMD64CMOVWLE:
+               return rewriteValueAMD64_OpAMD64CMOVWLE_0(v)
+       case OpAMD64CMOVWLS:
+               return rewriteValueAMD64_OpAMD64CMOVWLS_0(v)
+       case OpAMD64CMOVWLT:
+               return rewriteValueAMD64_OpAMD64CMOVWLT_0(v)
+       case OpAMD64CMOVWNE:
+               return rewriteValueAMD64_OpAMD64CMOVWNE_0(v)
        case OpAMD64CMPB:
                return rewriteValueAMD64_OpAMD64CMPB_0(v)
        case OpAMD64CMPBconst:
@@ -391,12 +449,20 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64SUBSSmem_0(v)
        case OpAMD64TESTB:
                return rewriteValueAMD64_OpAMD64TESTB_0(v)
+       case OpAMD64TESTBconst:
+               return rewriteValueAMD64_OpAMD64TESTBconst_0(v)
        case OpAMD64TESTL:
                return rewriteValueAMD64_OpAMD64TESTL_0(v)
+       case OpAMD64TESTLconst:
+               return rewriteValueAMD64_OpAMD64TESTLconst_0(v)
        case OpAMD64TESTQ:
                return rewriteValueAMD64_OpAMD64TESTQ_0(v)
+       case OpAMD64TESTQconst:
+               return rewriteValueAMD64_OpAMD64TESTQconst_0(v)
        case OpAMD64TESTW:
                return rewriteValueAMD64_OpAMD64TESTW_0(v)
+       case OpAMD64TESTWconst:
+               return rewriteValueAMD64_OpAMD64TESTWconst_0(v)
        case OpAMD64XADDLlock:
                return rewriteValueAMD64_OpAMD64XADDLlock_0(v)
        case OpAMD64XADDQlock:
@@ -493,6 +559,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpCom64_0(v)
        case OpCom8:
                return rewriteValueAMD64_OpCom8_0(v)
+       case OpCondSelect:
+               return rewriteValueAMD64_OpCondSelect_0(v) || rewriteValueAMD64_OpCondSelect_10(v) || rewriteValueAMD64_OpCondSelect_20(v) || rewriteValueAMD64_OpCondSelect_30(v) || rewriteValueAMD64_OpCondSelect_40(v)
        case OpConst16:
                return rewriteValueAMD64_OpConst16_0(v)
        case OpConst32:
@@ -3285,1097 +3353,1419 @@ func rewriteValueAMD64_OpAMD64BTQconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
-       // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
-       // cond: c != 0
-       // result: x
+func rewriteValueAMD64_OpAMD64CMOVLCC_0(v *Value) bool {
+       // match: (CMOVLCC x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLLS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
+               y := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpSelect1 {
-                       break
-               }
-               v_2_0 := v_2.Args[0]
-               if v_2_0.Op != OpAMD64BSFQ {
-                       break
-               }
-               v_2_0_0 := v_2_0.Args[0]
-               if v_2_0_0.Op != OpAMD64ORQconst {
-                       break
-               }
-               c := v_2_0_0.AuxInt
-               if !(c != 0) {
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLLS)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPB x (MOVLconst [c]))
+func rewriteValueAMD64_OpAMD64CMOVLCS_0(v *Value) bool {
+       // match: (CMOVLCS x y (InvertFlags cond))
        // cond:
-       // result: (CMPBconst x [int64(int8(c))])
+       // result: (CMOVLHI x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64CMPBconst)
-               v.AuxInt = int64(int8(c))
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLHI)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPB (MOVLconst [c]) x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLEQ_0(v *Value) bool {
+       // match: (CMOVLEQ x y (InvertFlags cond))
        // cond:
-       // result: (InvertFlags (CMPBconst x [int64(int8(c))]))
+       // result: (CMOVLEQ x y cond)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v0.AuxInt = int64(int8(c))
-               v0.AddArg(x)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLEQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPBmem {sym} [off] ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLGE_0(v *Value) bool {
+       // match: (CMOVLGE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLLE x y cond)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64CMPBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLLE)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPBmem {sym} [off] ptr x mem))
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
+       // match: (CMOVLGT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLLT x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLLT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPBconst_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)==int8(y)
-       // result: (FlagEQ)
+func rewriteValueAMD64_OpAMD64CMOVLHI_0(v *Value) bool {
+       // match: (CMOVLHI x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLCS x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) == int8(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLCS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)<int8(y) && uint8(x)<uint8(y)
-       // result: (FlagLT_ULT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
+       // match: (CMOVLLE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLGE x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) < int8(y) && uint8(x) < uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)<int8(y) && uint8(x)>uint8(y)
-       // result: (FlagLT_UGT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
+       // match: (CMOVLLS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLCC x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) < int8(y) && uint8(x) > uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLCC)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)>int8(y) && uint8(x)<uint8(y)
-       // result: (FlagGT_ULT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLLT_0(v *Value) bool {
+       // match: (CMOVLLT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLGT x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) > int8(y) && uint8(x) < uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLGT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)>int8(y) && uint8(x)>uint8(y)
-       // result: (FlagGT_UGT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
+       // match: (CMOVLNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLNE x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) > int8(y) && uint8(x) > uint8(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLNE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int8(m) && int8(m) < int8(n)
-       // result: (FlagLT_ULT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
+       // match: (CMOVQCC x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQLS x y cond)
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= int8(m) && int8(m) < int8(n)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQLS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (ANDL x y) [0])
+       // match: (CMOVQCC _ x (FlagEQ))
        // cond:
-       // result: (TESTB x y)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTB)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (ANDLconst [c] x) [0])
+       // match: (CMOVQCC _ x (FlagGT_UGT))
        // cond:
-       // result: (TESTBconst [int64(int8(c))] x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = int64(int8(c))
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPBconst x [0])
+       // match: (CMOVQCC y _ (FlagGT_ULT))
        // cond:
-       // result: (TESTB x x)
+       // result: y
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTB)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQCC y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPBmem_0(v *Value) bool {
-       // match: (CMPBmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int8(c)),off)
-       // result: (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
+       // match: (CMOVQCC _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(int64(int8(c)), off)) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64CMPBconstmem)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPL x (MOVLconst [c]))
+func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
+       // match: (CMOVQCS x y (InvertFlags cond))
        // cond:
-       // result: (CMPLconst x [c])
+       // result: (CMOVQHI x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64CMPLconst)
-               v.AuxInt = c
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQHI)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPL (MOVLconst [c]) x)
+       // match: (CMOVQCS y _ (FlagEQ))
        // cond:
-       // result: (InvertFlags (CMPLconst x [c]))
+       // result: y
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPLmem {sym} [off] ptr x mem)
+       // match: (CMOVQCS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQCS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPLmem {sym} [off] ptr x mem))
+       // match: (CMOVQCS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQCS y _ (FlagLT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)==int32(y)
-       // result: (FlagEQ)
+func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
+       // match: (CMOVQEQ x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQEQ x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) == int32(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQEQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVQEQ _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQEQ y _ (FlagGT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVQEQ y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQEQ y _ (FlagLT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVQEQ y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
-       // result: (FlagGT_UGT)
+       // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
+       // cond: c != 0
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpSelect1 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
+               v_2_0 := v_2.Args[0]
+               if v_2_0.Op != OpAMD64BSFQ {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
-               return true
-       }
-       // match: (CMPLconst (SHRLconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)
-       // result: (FlagLT_ULT)
-       for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
+               v_2_0_0 := v_2_0.Args[0]
+               if v_2_0_0.Op != OpAMD64ORQconst {
                        break
                }
-               c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) {
+               c := v_2_0_0.AuxInt
+               if !(c != 0) {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPLconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int32(m) && int32(m) < int32(n)
-       // result: (FlagLT_ULT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
+       // match: (CMOVQGE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQLE x y cond)
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= int32(m) && int32(m) < int32(n)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQLE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPLconst (ANDL x y) [0])
+       // match: (CMOVQGE _ x (FlagEQ))
        // cond:
-       // result: (TESTL x y)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTL)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (ANDLconst [c] x) [0])
+       // match: (CMOVQGE _ x (FlagGT_UGT))
        // cond:
-       // result: (TESTLconst [c] x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPLconst x [0])
+       // match: (CMOVQGE _ x (FlagGT_ULT))
        // cond:
-       // result: (TESTL x x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTL)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQGE y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
-       // match: (CMPLmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQGE y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPLconstmem)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (CMPQconst x [c])
+func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
+       // match: (CMOVQGT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQLT x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64CMPQconst)
-               v.AuxInt = c
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQLT)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (InvertFlags (CMPQconst x [c]))
+       // match: (CMOVQGT y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQGT _ x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(is32Bit(c)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPQmem {sym} [off] ptr x mem)
+       // match: (CMOVQGT _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPQmem {sym} [off] ptr x mem))
+       // match: (CMOVQGT y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQGT y _ (FlagLT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQconst_0(v *Value) bool {
-       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
+func rewriteValueAMD64_OpAMD64CMOVQHI_0(v *Value) bool {
+       // match: (CMOVQHI x y (InvertFlags cond))
        // cond:
-       // result: (FlagLT_ULT)
+       // result: (CMOVQCS x y cond)
        for {
-               if v.AuxInt != 32 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0.AuxInt != -16 {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 15 {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQCS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
+       // match: (CMOVQHI y _ (FlagEQ))
        // cond:
-       // result: (FlagLT_ULT)
+       // result: y
        for {
-               if v.AuxInt != 32 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0.AuxInt != -8 {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 7 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x==y
-       // result: (FlagEQ)
+       // match: (CMOVQHI _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x == y) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x<y && uint64(x)<uint64(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVQHI y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x < y && uint64(x) < uint64(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x<y && uint64(x)>uint64(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVQHI y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x < y && uint64(x) > uint64(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x>y && uint64(x)<uint64(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVQHI _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x > y && uint64(x) < uint64(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x>y && uint64(x)>uint64(y)
-       // result: (FlagGT_UGT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
+       // match: (CMOVQLE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQGE x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x > y && uint64(x) > uint64(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPQconst (MOVBQZX _) [c])
-       // cond: 0xFF < c
-       // result: (FlagLT_ULT)
+       // match: (CMOVQLE _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
-                       break
-               }
-               if !(0xFF < c) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (MOVWQZX _) [c])
-       // cond: 0xFFFF < c
-       // result: (FlagLT_ULT)
+       // match: (CMOVQLE y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQZX {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               if !(0xFFFF < c) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQLE y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVLQZX _) [c])
-       // cond: 0xFFFFFFFF < c
-       // result: (FlagLT_ULT)
+       // match: (CMOVQLE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLQZX {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               if !(0xFFFFFFFF < c) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQLE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPQconst (SHRQconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)
-       // result: (FlagLT_ULT)
+func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
+       // match: (CMOVQLS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQCC x y cond)
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQCC)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVQLS _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (ANDQconst _ [m]) [n])
-       // cond: 0 <= m && m < n
-       // result: (FlagLT_ULT)
+       // match: (CMOVQLS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQLS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= m && m < n
-       // result: (FlagLT_ULT)
+       // match: (CMOVQLS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQLS y _ (FlagLT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (ANDQ x y) [0])
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQLT_0(v *Value) bool {
+       // match: (CMOVQLT x y (InvertFlags cond))
        // cond:
-       // result: (TESTQ x y)
+       // result: (CMOVQGT x y cond)
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQ {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQGT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVQLT y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTQ)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (ANDQconst [c] x) [0])
+       // match: (CMOVQLT y _ (FlagGT_UGT))
        // cond:
-       // result: (TESTQconst [c] x)
+       // result: y
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQLT y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTQconst)
-               v.AuxInt = c
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQLT _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPQconst x [0])
+       // match: (CMOVQLT _ x (FlagLT_UGT))
        // cond:
-       // result: (TESTQ x x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
+       // match: (CMOVQNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQNE x y cond)
+       for {
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64TESTQ)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQNE)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVQNE y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQNE _ x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQNE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQNE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQNE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
-       // match: (CMPQmem {sym} [off] ptr (MOVQconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
+       // match: (CMOVWCC x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWLS x y cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWLS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
+       // match: (CMOVWCS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWHI x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64CMPQconstmem)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWHI)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
+       // match: (CMOVWEQ x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWEQ x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWEQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
+       // match: (CMOVWGE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWLE x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWLE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
+       // match: (CMOVWGT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWLT x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWLT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWHI_0(v *Value) bool {
+       // match: (CMOVWHI x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWCS x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWCS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
+       // match: (CMOVWLE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWGE x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
+       // match: (CMOVWLS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWCC x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWLT_0(v *Value) bool {
+       // match: (CMOVWLT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWGT x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWGT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
+       // match: (CMOVWNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWNE x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
+                       break
+               }
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPB_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (CMPW x (MOVLconst [c]))
+       // match: (CMPB x (MOVLconst [c]))
        // cond:
-       // result: (CMPWconst x [int64(int16(c))])
+       // result: (CMPBconst x [int64(int8(c))])
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -4384,14 +4774,14 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64CMPWconst)
-               v.AuxInt = int64(int16(c))
+               v.reset(OpAMD64CMPBconst)
+               v.AuxInt = int64(int8(c))
                v.AddArg(x)
                return true
        }
-       // match: (CMPW (MOVLconst [c]) x)
+       // match: (CMPB (MOVLconst [c]) x)
        // cond:
-       // result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+       // result: (InvertFlags (CMPBconst x [int64(int8(c))]))
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
@@ -4401,19 +4791,19 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                c := v_0.AuxInt
                x := v.Args[1]
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v0.AuxInt = int64(int16(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v0.AuxInt = int64(int8(c))
                v0.AddArg(x)
                v.AddArg(v0)
                return true
        }
-       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
+       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPWmem {sym} [off] ptr x mem)
+       // result: (CMPBmem {sym} [off] ptr x mem)
        for {
                _ = v.Args[1]
                l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               if l.Op != OpAMD64MOVBload {
                        break
                }
                off := l.AuxInt
@@ -4425,7 +4815,7 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64CMPWmem)
+               v.reset(OpAMD64CMPBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -4433,14 +4823,14 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
+       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPWmem {sym} [off] ptr x mem))
+       // result: (InvertFlags (CMPBmem {sym} [off] ptr x mem))
        for {
                _ = v.Args[1]
                x := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVWload {
+               if l.Op != OpAMD64MOVBload {
                        break
                }
                off := l.AuxInt
@@ -4452,7 +4842,7 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
                        break
                }
                v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBmem, types.TypeFlags)
                v0.AuxInt = off
                v0.Aux = sym
                v0.AddArg(ptr)
@@ -4463,11 +4853,11 @@ func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMPBconst_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)==int16(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)==int8(y)
        // result: (FlagEQ)
        for {
                y := v.AuxInt
@@ -4476,14 +4866,14 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) == int16(y)) {
+               if !(int8(x) == int8(y)) {
                        break
                }
                v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)<int16(y) && uint16(x)<uint16(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)<int8(y) && uint8(x)<uint8(y)
        // result: (FlagLT_ULT)
        for {
                y := v.AuxInt
@@ -4492,14 +4882,14 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) < int16(y) && uint16(x) < uint16(y)) {
+               if !(int8(x) < int8(y) && uint8(x) < uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)<int16(y) && uint16(x)>uint16(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)<int8(y) && uint8(x)>uint8(y)
        // result: (FlagLT_UGT)
        for {
                y := v.AuxInt
@@ -4508,14 +4898,14 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) < int16(y) && uint16(x) > uint16(y)) {
+               if !(int8(x) < int8(y) && uint8(x) > uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)>int16(y) && uint16(x)<uint16(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)>int8(y) && uint8(x)<uint8(y)
        // result: (FlagGT_ULT)
        for {
                y := v.AuxInt
@@ -4524,14 +4914,14 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) > int16(y) && uint16(x) < uint16(y)) {
+               if !(int8(x) > int8(y) && uint8(x) < uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)>int16(y) && uint16(x)>uint16(y)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)>int8(y) && uint8(x)>uint8(y)
        // result: (FlagGT_UGT)
        for {
                y := v.AuxInt
@@ -4540,14 +4930,14 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                x := v_0.AuxInt
-               if !(int16(x) > int16(y) && uint16(x) > uint16(y)) {
+               if !(int8(x) > int8(y) && uint8(x) > uint8(y)) {
                        break
                }
                v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (CMPWconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int16(m) && int16(m) < int16(n)
+       // match: (CMPBconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int8(m) && int8(m) < int8(n)
        // result: (FlagLT_ULT)
        for {
                n := v.AuxInt
@@ -4556,15 +4946,15 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                m := v_0.AuxInt
-               if !(0 <= int16(m) && int16(m) < int16(n)) {
+               if !(0 <= int8(m) && int8(m) < int8(n)) {
                        break
                }
                v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (CMPWconst (ANDL x y) [0])
+       // match: (CMPBconst (ANDL x y) [0])
        // cond:
-       // result: (TESTW x y)
+       // result: (TESTB x y)
        for {
                if v.AuxInt != 0 {
                        break
@@ -4576,14 +4966,14 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                _ = v_0.Args[1]
                x := v_0.Args[0]
                y := v_0.Args[1]
-               v.reset(OpAMD64TESTW)
+               v.reset(OpAMD64TESTB)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (CMPWconst (ANDLconst [c] x) [0])
+       // match: (CMPBconst (ANDLconst [c] x) [0])
        // cond:
-       // result: (TESTWconst [int64(int16(c))] x)
+       // result: (TESTBconst [int64(int8(c))] x)
        for {
                if v.AuxInt != 0 {
                        break
@@ -4594,31 +4984,31 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                }
                c := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpAMD64TESTWconst)
-               v.AuxInt = int64(int16(c))
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = int64(int8(c))
                v.AddArg(x)
                return true
        }
-       // match: (CMPWconst x [0])
+       // match: (CMPBconst x [0])
        // cond:
-       // result: (TESTW x x)
+       // result: (TESTB x x)
        for {
                if v.AuxInt != 0 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpAMD64TESTW)
+               v.reset(OpAMD64TESTB)
                v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
+       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
        // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                c := v.AuxInt
                l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               if l.Op != OpAMD64MOVBload {
                        break
                }
                off := l.AuxInt
@@ -4630,7 +5020,7 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
                        break
                }
                b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
                v.AddArg(v0)
                v0.AuxInt = makeValAndOff(c, off)
@@ -4641,10 +5031,10 @@ func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
-       // match: (CMPWmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int16(c)),off)
-       // result: (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
+func rewriteValueAMD64_OpAMD64CMPBmem_0(v *Value) bool {
+       // match: (CMPBmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int8(c)),off)
+       // result: (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -4656,11 +5046,11 @@ func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               if !(validValAndOff(int64(int16(c)), off)) {
+               if !(validValAndOff(int64(int8(c)), off)) {
                        break
                }
-               v.reset(OpAMD64CMPWconstmem)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.reset(OpAMD64CMPBconstmem)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
@@ -4668,2035 +5058,3090 @@ func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v *Value) bool {
-       // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
-       // cond: is32Bit(off1+off2)
-       // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
+func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPL x (MOVLconst [c]))
+       // cond:
+       // result: (CMPLconst x [c])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1 + off2)) {
+               c := v_1.AuxInt
+               v.reset(OpAMD64CMPLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPL (MOVLconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPLconst x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
-       // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
-       // cond: is32Bit(off1+off2)
-       // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
+       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPLmem {sym} [off] ptr x mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1 + off2)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64CMPLmem)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
-       // match: (LEAL [c] {s} (ADDLconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (LEAL [c+d] {s} x)
+       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPLmem {sym} [off] ptr x mem))
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64LEAL)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
-       // match: (LEAQ [c] {s} (ADDQconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ [c+d] {s} x)
+func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)==int32(y)
+       // result: (FlagEQ)
        for {
-               c := v.AuxInt
-               s := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               x := v_0.AuxInt
+               if !(int32(x) == int32(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (LEAQ [c] {s} (ADDQ x y))
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (LEAQ1 [c] {s} x y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
+       // result: (FlagLT_ULT)
        for {
-               c := v.AuxInt
-               s := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(x.Op != OpSB && y.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
+       // result: (FlagLT_UGT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
+               v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
+       // result: (FlagGT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
+       // result: (FlagGT_UGT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPLconst (SHRLconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)
+       // result: (FlagLT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_0.AuxInt
+               if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPLconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int32(m) && int32(m) < int32(n)
+       // result: (FlagLT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               m := v_0.AuxInt
+               if !(0 <= int32(m) && int32(m) < int32(n)) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAQ1_0(v *Value) bool {
-       // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ1 [c+d] {s} x y)
+       // match: (CMPLconst (ANDL x y) [0])
+       // cond:
+       // result: (TESTL x y)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 0 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ1 [c+d] {s} x y)
+       // match: (CMPLconst (ANDLconst [c] x) [0])
+       // cond:
+       // result: (TESTLconst [c] x)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 0 {
                        break
                }
-               d := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
+       // match: (CMPLconst x [0])
        // cond:
-       // result: (LEAQ2 [c] {s} x y)
+       // result: (TESTL x x)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
+               if v.AuxInt != 0 {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c
-               v.Aux = s
+               x := v.Args[0]
+               v.reset(OpAMD64TESTL)
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
-       // cond:
-       // result: (LEAQ2 [c] {s} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               if v_0.AuxInt != 1 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
-       // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
+       // match: (CMPLmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off)) {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64CMPLconstmem)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
-       // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (CMPQconst x [c])
        for {
-               c := v.AuxInt
-               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_0.AuxInt != 2 {
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ4)
+               v.reset(OpAMD64CMPQconst)
                v.AuxInt = c
-               v.Aux = s
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
-       // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // match: (CMPQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (InvertFlags (CMPQconst x [c]))
        for {
-               c := v.AuxInt
-               s := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_1.AuxInt != 3 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
-       // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPQmem {sym} [off] ptr x mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               if v_0.AuxInt != 3 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
+               v.reset(OpAMD64CMPQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(mem)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPQmem {sym} [off] ptr x mem))
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
                _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               x := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
-       // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ2 [c+d] {s} x y)
+func rewriteValueAMD64_OpAMD64CMPQconst_0(v *Value) bool {
+       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 32 {
+                       break
+               }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64NEGQ {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+2*d) && y.Op != OpSB
-       // result: (LEAQ2 [c+2*d] {s} x y)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_0_0.AuxInt != -16 {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+2*d) && y.Op != OpSB) {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c + 2*d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               if v_0_0_0.AuxInt != 15 {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: (FlagLT_ULT)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if v.AuxInt != 32 {
                        break
                }
-               if v_1.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
-       // cond:
-       // result: (LEAQ8 [c] {s} x y)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if v_0_0.AuxInt != -8 {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 7 {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x==y
+       // result: (FlagEQ)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(x == y) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
-       // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ4 [c+d] {s} x y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x<y && uint64(x)<uint64(y)
+       // result: (FlagLT_ULT)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(x < y && uint64(x) < uint64(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+4*d) && y.Op != OpSB
-       // result: (LEAQ4 [c+4*d] {s} x y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x<y && uint64(x)>uint64(y)
+       // result: (FlagLT_UGT)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+4*d) && y.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(x < y && uint64(x) > uint64(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c + 4*d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
-       // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x>y && uint64(x)<uint64(y)
+       // result: (FlagGT_ULT)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               x := v_0.AuxInt
+               if !(x > y && uint64(x) < uint64(y)) {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x>y && uint64(x)>uint64(y)
+       // result: (FlagGT_UGT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(x > y && uint64(x) > uint64(y)) {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
-       // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ8 [c+d] {s} x y)
+       // match: (CMPQconst (MOVBQZX _) [c])
+       // cond: 0xFF < c
+       // result: (FlagLT_ULT)
        for {
                c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               if !(0xFF < c) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c + d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+8*d) && y.Op != OpSB
-       // result: (LEAQ8 [c+8*d] {s} x y)
+       // match: (CMPQconst (MOVWQZX _) [c])
+       // cond: 0xFFFF < c
+       // result: (FlagLT_ULT)
        for {
                c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+8*d) && y.Op != OpSB) {
+               if !(0xFFFF < c) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c + 8*d
-               v.Aux = s
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMPQconst (MOVLQZX _) [c])
+       // cond: 0xFFFFFFFF < c
+       // result: (FlagLT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVLQZX {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               if !(0xFFFFFFFF < c) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBQSX_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPQconst (SHRQconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)
+       // result: (FlagLT_ULT)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBload {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_0.AuxInt
+               if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPQconst (ANDQconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPQconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPQconst (ANDQ x y) [0])
+       // cond:
+       // result: (TESTQ x y)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               if v.AuxInt != 0 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX (ANDLconst [c] x))
-       // cond: c & 0x80 == 0
-       // result: (ANDLconst [c & 0x7f] x)
+       // match: (CMPQconst (ANDQconst [c] x) [0])
+       // cond:
+       // result: (TESTQconst [c] x)
        for {
+               if v.AuxInt != 0 {
+                       break
+               }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if v_0.Op != OpAMD64ANDQconst {
                        break
                }
                c := v_0.AuxInt
                x := v_0.Args[0]
-               if !(c&0x80 == 0) {
-                       break
-               }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7f
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (MOVBQSX (MOVBQSX x))
+       // match: (CMPQconst x [0])
        // cond:
-       // result: (MOVBQSX x)
+       // result: (TESTQ x x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQSX {
+               if v.AuxInt != 0 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
+               x := v.Args[0]
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBQSXload_0(v *Value) bool {
-       // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVBQSX x)
+       // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBstore {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
+       // match: (CMPQmem {sym} [off] ptr (MOVQconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off)) {
                        break
                }
-               v.reset(OpAMD64MOVBQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpAMD64CMPQconstmem)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBQZX_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPW x (MOVLconst [c]))
+       // cond:
+       // result: (CMPWconst x [int64(int16(c))])
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVBload {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_1.AuxInt
+               v.reset(OpAMD64CMPWconst)
+               v.AuxInt = int64(int16(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPW (MOVLconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v0.AuxInt = int64(int16(c))
+               v0.AddArg(x)
                v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPWmem {sym} [off] ptr x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64CMPWmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPWmem {sym} [off] ptr x mem))
        for {
+               _ = v.Args[1]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWmem, types.TypeFlags)
                v0.AuxInt = off
                v0.Aux = sym
                v0.AddArg(ptr)
+               v0.AddArg(x)
                v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)==int16(y)
+       // result: (FlagEQ)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v_0.AuxInt
+               if !(int16(x) == int16(y)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)<int16(y) && uint16(x)<uint16(y)
+       // result: (FlagLT_ULT)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBloadidx1 {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v_0.AuxInt
+               if !(int16(x) < int16(y) && uint16(x) < uint16(y)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBQZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c & 0xff] x)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)<int16(y) && uint16(x)>uint16(y)
+       // result: (FlagLT_UGT)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0xff
-               v.AddArg(x)
+               x := v_0.AuxInt
+               if !(int16(x) < int16(y) && uint16(x) > uint16(y)) {
+                       break
+               }
+               v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (MOVBQZX (MOVBQZX x))
-       // cond:
-       // result: (MOVBQZX x)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)>int16(y) && uint16(x)<uint16(y)
+       // result: (FlagGT_ULT)
        for {
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               x := v_0.AuxInt
+               if !(int16(x) > int16(y) && uint16(x) < uint16(y)) {
+                       break
+               }
+               v.reset(OpAMD64FlagGT_ULT)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBload_0(v *Value) bool {
-       // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVBQZX x)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)>int16(y) && uint16(x)>uint16(y)
+       // result: (FlagGT_UGT)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBstore {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               x := v_0.AuxInt
+               if !(int16(x) > int16(y) && uint16(x) > uint16(y)) {
                        break
                }
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       // match: (CMPWconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int16(m) && int16(m) < int16(n)
+       // result: (FlagLT_ULT)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               n := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               m := v_0.AuxInt
+               if !(0 <= int16(m) && int16(m) < int16(n)) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (CMPWconst (ANDL x y) [0])
+       // cond:
+       // result: (TESTW x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v.AuxInt != 0 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (CMPWconst (ANDLconst [c] x) [0])
+       // cond:
+       // result: (TESTWconst [int64(int16(c))] x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               if v.AuxInt != 0 {
+                       break
+               }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTWconst)
+               v.AuxInt = int64(int16(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPWconst x [0])
+       // cond:
+       // result: (TESTW x x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               x := v.Args[0]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
+       // match: (CMPWmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int16(c)),off)
+       // result: (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int16(c)), off)) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpAMD64CMPWconstmem)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v *Value) bool {
+       // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVBload)
+               v.reset(OpAMD64CMPXCHGLlock)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v *Value) bool {
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
+       // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
+       // cond: is32Bit(off1+off2)
+       // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
+               v.reset(OpAMD64CMPXCHGQlock)
+               v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
+               v.AddArg(old)
+               v.AddArg(new_)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
+       // match: (LEAL [c] {s} (ADDLconst [d] x))
        // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (LEAL [c+d] {s} x)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               idx := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               d := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
+               d := v_0.AuxInt
+               x := v_0.Args[0]
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
+               v.reset(OpAMD64LEAL)
                v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.Aux = s
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
+       // match: (LEAQ [c] {s} (ADDQconst [d] x))
        // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (LEAQ [c+d] {s} x)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
+               d := v_0.AuxInt
+               x := v_0.Args[0]
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
+               v.reset(OpAMD64LEAQ)
                v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.Aux = s
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (LEAQ [c] {s} (ADDQ x y))
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAQ1 [c] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               s := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               d := v_0.AuxInt
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(x.Op != OpSB && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
-       // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem)
-       // cond: y.Uses == 1
-       // result: (SETLmem [off] {sym} ptr x mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETL {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETLEmem [off] {sym} ptr x mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETLE {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem)
-       // cond: y.Uses == 1
-       // result: (SETGmem [off] {sym} ptr x mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETG {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETGmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETGEmem [off] {sym} ptr x mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETGE {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem)
-       // cond: y.Uses == 1
-       // result: (SETEQmem [off] {sym} ptr x mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETEQ {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETNEmem [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ1_0(v *Value) bool {
+       // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETNE {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem)
-       // cond: y.Uses == 1
-       // result: (SETBmem [off] {sym} ptr x mem)
+       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETB {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               d := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETBEmem [off] {sym} ptr x mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ2 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETBE {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               if v_1.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem)
-       // cond: y.Uses == 1
-       // result: (SETAmem [off] {sym} ptr x mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
+       // cond:
+       // result: (LEAQ2 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETA {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64SETAmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETAEmem [off] {sym} ptr x mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETAE {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               if v_1.AuxInt != 2 {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (LEAQ4 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBQSX {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (LEAQ8 [c] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBQZX {
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if v_1.AuxInt != 3 {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               if v_0.AuxInt != 3 {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
+               v.reset(OpAMD64LEAQ1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
+               v.reset(OpAMD64LEAQ1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
+       // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ2 [c+d] {s} x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
+       // match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+2*d) && y.Op != OpSB
+       // result: (LEAQ2 [c+2*d] {s} x y)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpAMD64MOVBstore {
-                       break
-               }
-               if x0.AuxInt != i-1 {
-                       break
-               }
-               if x0.Aux != s {
-                       break
-               }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
-                       break
-               }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRWconst {
-                       break
-               }
-               if x0_1.AuxInt != 8 {
-                       break
-               }
-               if w != x0_1.Args[0] {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x0.Args[2]
-               if !(x0.Uses == 1 && clobber(x0)) {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+2*d) && y.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + 2*d
                v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
-               v0.AuxInt = 8
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
+       // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x2 := v.Args[2]
-               if x2.Op != OpAMD64MOVBstore {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x2.AuxInt != i-1 {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if x2.Aux != s {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
+               if v_1.AuxInt != 2 {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRLconst {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if x2_1.AuxInt != 8 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               if w != x2_1.Args[0] {
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
+       // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ4 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               if x1.AuxInt != i-2 {
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+4*d) && y.Op != OpSB
+       // result: (LEAQ4 [c+4*d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if x1.Aux != s {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+4*d) && y.Op != OpSB) {
                        break
                }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c + 4*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRLconst {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if w != x1_1.Args[0] {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
+       // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ8 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if x0.AuxInt != i-3 {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               if x0.Aux != s {
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+8*d) && y.Op != OpSB
+       // result: (LEAQ8 [c+8*d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+8*d) && y.Op != OpSB) {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRLconst {
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c + 8*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if x0_1.AuxInt != 24 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               if w != x0_1.Args[0] {
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQSX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBload {
                        break
                }
-               mem := x0.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 3
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
-               v0.AddArg(w)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(mem)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
-       // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
+       // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x6 := v.Args[2]
-               if x6.Op != OpAMD64MOVBstore {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               if x6.AuxInt != i-1 {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if x6.Aux != s {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX (ANDLconst [c] x))
+       // cond: c & 0x80 == 0
+       // result: (ANDLconst [c & 0x7f] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x80 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7f
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBQSX (MOVBQSX x))
+       // cond:
+       // result: (MOVBQSX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQSX {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQSXload_0(v *Value) bool {
+       // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVBQSX x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQZX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c & 0xff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0xff
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBQZX (MOVBQZX x))
+       // cond:
+       // result: (MOVBQZX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBload_0(v *Value) bool {
+       // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVBQZX x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBstore {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v *Value) bool {
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
+       // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem)
+       // cond: y.Uses == 1
+       // result: (SETLmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETL {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETLEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETLE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETLEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem)
+       // cond: y.Uses == 1
+       // result: (SETGmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETG {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETGmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETGEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETGE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem)
+       // cond: y.Uses == 1
+       // result: (SETEQmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETEQ {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETEQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETNEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETNE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETNEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem)
+       // cond: y.Uses == 1
+       // result: (SETBmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETB {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETBEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETBE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETBEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem)
+       // cond: y.Uses == 1
+       // result: (SETAmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETA {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETAEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETAE {
+                       break
+               }
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBQSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBQZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreconst)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1 && clobber(x0)
+       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-1 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRWconst {
+                       break
+               }
+               if x0_1.AuxInt != 8 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               mem := x0.Args[2]
+               if !(x0.Uses == 1 && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x2 := v.Args[2]
+               if x2.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x2.AuxInt != i-1 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[2]
+               if p != x2.Args[0] {
+                       break
+               }
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x2_1.AuxInt != 8 {
+                       break
+               }
+               if w != x2_1.Args[0] {
+                       break
+               }
+               x1 := x2.Args[2]
+               if x1.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[2]
+               if p != x1.Args[0] {
+                       break
+               }
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x1_1.AuxInt != 16 {
+                       break
+               }
+               if w != x1_1.Args[0] {
+                       break
+               }
+               x0 := x1.Args[2]
+               if x0.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x0.AuxInt != i-3 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
+                       break
+               }
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x0_1.AuxInt != 24 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               mem := x0.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x6 := v.Args[2]
+               if x6.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x6.AuxInt != i-1 {
+                       break
+               }
+               if x6.Aux != s {
                        break
                }
                _ = x6.Args[2]
@@ -37147,122 +38592,664 @@ func rewriteValueAMD64_OpAMD64SBBLcarrymask_0(v *Value) bool {
                v.AuxInt = -1
                return true
        }
-       // match: (SBBLcarrymask (FlagLT_UGT))
+       // match: (SBBLcarrymask (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SBBLcarrymask (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [-1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (SBBLcarrymask (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SBBQcarrymask_0(v *Value) bool {
+       // match: (SBBQcarrymask (FlagEQ))
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SBBQcarrymask (FlagLT_ULT))
+       // cond:
+       // result: (MOVQconst [-1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (SBBQcarrymask (FlagLT_UGT))
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SBBQcarrymask (FlagGT_ULT))
+       // cond:
+       // result: (MOVQconst [-1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (SBBQcarrymask (FlagGT_UGT))
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETA_0(v *Value) bool {
+       // match: (SETA (InvertFlags x))
+       // cond:
+       // result: (SETB x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETB)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SETA (FlagEQ))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETA (FlagLT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETA (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETA (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETA (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETAE_0(v *Value) bool {
+       // match: (SETAE (InvertFlags x))
+       // cond:
+       // result: (SETBE x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETBE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SETAE (FlagEQ))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETAE (FlagLT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETAE (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETAE (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETAE (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETAEmem [off] {sym} ptr (InvertFlags x) mem)
+       // cond:
+       // result: (SETBEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64SETBEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SETAEmem [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SETAEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagEQ {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETAmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (MOVLconst [0])
+       // result: (SETBmem [off] {sym} ptr x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBLcarrymask (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [-1])
+       // match: (SETAmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SETAmem [off1+off2] {sym} base val mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = -1
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBLcarrymask (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETAmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SETAmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SBBQcarrymask_0(v *Value) bool {
-       // match: (SBBQcarrymask (FlagEQ))
+       // match: (SETAmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
-       // result: (MOVQconst [0])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagLT_ULT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVQconst [-1])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -1
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagLT_UGT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVQconst [0])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagGT_ULT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVQconst [-1])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -1
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagGT_UGT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVQconst [0])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETA_0(v *Value) bool {
-       // match: (SETA (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETB_0(v *Value) bool {
+       // match: (SETB (InvertFlags x))
        // cond:
-       // result: (SETB x)
+       // result: (SETA x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETB)
+               v.reset(OpAMD64SETA)
                v.AddArg(x)
                return true
        }
-       // match: (SETA (FlagEQ))
+       // match: (SETB (FlagEQ))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -37274,71 +39261,71 @@ func rewriteValueAMD64_OpAMD64SETA_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETA (FlagLT_ULT))
+       // match: (SETB (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETA (FlagLT_UGT))
+       // match: (SETB (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETA (FlagGT_ULT))
+       // match: (SETB (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETA (FlagGT_UGT))
+       // match: (SETB (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETAE_0(v *Value) bool {
-       // match: (SETAE (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
+       // match: (SETBE (InvertFlags x))
        // cond:
-       // result: (SETBE x)
+       // result: (SETAE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETBE)
+               v.reset(OpAMD64SETAE)
                v.AddArg(x)
                return true
        }
-       // match: (SETAE (FlagEQ))
+       // match: (SETBE (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -37350,62 +39337,62 @@ func rewriteValueAMD64_OpAMD64SETAE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETAE (FlagLT_ULT))
+       // match: (SETBE (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETAE (FlagLT_UGT))
+       // match: (SETBE (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETAE (FlagGT_ULT))
+       // match: (SETBE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETAE (FlagGT_UGT))
+       // match: (SETBE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETBEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETAEmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETBEmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETBEmem [off] {sym} ptr x mem)
+       // result: (SETAEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37417,7 +39404,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETBEmem)
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -37425,9 +39412,9 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETBEmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETAEmem [off1+off2] {sym} base val mem)
+       // result: (SETBEmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -37443,7 +39430,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64SETBEmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -37451,9 +39438,9 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETBEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETAEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETBEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -37470,7 +39457,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64SETBEmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -37478,7 +39465,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -37501,9 +39488,9 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37519,14 +39506,14 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37542,14 +39529,14 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37565,14 +39552,14 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37588,19 +39575,19 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETAmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETBmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETBmem [off] {sym} ptr x mem)
+       // result: (SETAmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37612,7 +39599,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETBmem)
+               v.reset(OpAMD64SETAmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -37620,9 +39607,9 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETBmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETAmem [off1+off2] {sym} base val mem)
+       // result: (SETBmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -37638,7 +39625,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETAmem)
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -37646,9 +39633,9 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETBmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETAmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETBmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -37665,7 +39652,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETAmem)
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -37673,7 +39660,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -37696,9 +39683,9 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37714,14 +39701,14 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37737,14 +39724,14 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37760,14 +39747,14 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37783,104 +39770,254 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETB_0(v *Value) bool {
-       // match: (SETB (InvertFlags x))
-       // cond:
-       // result: (SETA x)
+func rewriteValueAMD64_OpAMD64SETEQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (SETEQ (TESTL (SHLL (MOVLconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETAE (BTL x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64InvertFlags {
+               if v_0.Op != OpAMD64TESTL {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETA)
-               v.AddArg(x)
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagEQ))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETEQ (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTL x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
+               if v_0.Op != OpAMD64TESTL {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagLT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETAE (BTQ x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (TESTLconst [c] x))
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTLconst [log2uint32(c)] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagLT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETEQ (TESTQconst [c] x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               if v_0.Op != OpAMD64TESTQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETEQ (TESTQ (MOVQconst [c]) x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETEQ (TESTQ x (MOVQconst [c])))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
-       // match: (SETBE (InvertFlags x))
+       // match: (SETEQ (InvertFlags x))
        // cond:
-       // result: (SETAE x)
+       // result: (SETEQ x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETAE)
+               v.reset(OpAMD64SETEQ)
                v.AddArg(x)
                return true
        }
-       // match: (SETBE (FlagEQ))
+       // match: (SETEQ (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -37892,19 +40029,22 @@ func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETBE (FlagLT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
+       // match: (SETEQ (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETBE (FlagLT_UGT))
+       // match: (SETEQ (FlagLT_UGT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -37916,19 +40056,19 @@ func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETBE (FlagGT_ULT))
+       // match: (SETEQ (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETBE (FlagGT_UGT))
+       // match: (SETEQ (FlagGT_UGT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -37942,207 +40082,308 @@ func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETBEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETBEmem [off] {sym} ptr (InvertFlags x) mem)
-       // cond:
-       // result: (SETAEmem [off] {sym} ptr x mem)
+       config := b.Func.Config
+       _ = config
+       // match: (SETEQmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64InvertFlags {
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               x := v_1.Args[0]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
+               if !(!config.nacl) {
+                       break
+               }
                v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SETBEmem [off1+off2] {sym} base val mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(val)
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETBEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagEQ) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagEQ {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 1 {
                        break
                }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTLconst [c] x) mem)
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTLconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQconst [c] x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVQconst {
                        break
                }
+               c := v_1_0.AuxInt
+               x := v_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETBmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETEQmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETAmem [off] {sym} ptr x mem)
+       // result: (SETEQmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38154,7 +40395,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETAmem)
+               v.reset(OpAMD64SETEQmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -38162,9 +40403,9 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETEQmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETBmem [off1+off2] {sym} base val mem)
+       // result: (SETEQmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -38180,7 +40421,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
+               v.reset(OpAMD64SETEQmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -38188,9 +40429,14 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETEQmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETBmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETEQmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -38207,7 +40453,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
+               v.reset(OpAMD64SETEQmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -38215,9 +40461,9 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38233,14 +40479,14 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38256,12 +40502,12 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -38284,30 +40530,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
-                       break
-               }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETBmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -38316,7 +40539,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                _ = v.Args[2]
                ptr := v.Args[0]
                x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               if x.Op != OpAMD64FlagGT_ULT {
                        break
                }
                mem := v.Args[2]
@@ -38327,252 +40550,125 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
                v0.AuxInt = 0
                v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETEQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETEQ (TESTL (SHLL (MOVLconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETAE (BTL x y))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (SETEQ (TESTL y (SHLL (MOVLconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETAE (BTL x y))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETAE (BTQ x y))
+       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
                v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SETEQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETAE (BTQ x y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETG_0(v *Value) bool {
+       // match: (SETG (InvertFlags x))
+       // cond:
+       // result: (SETL x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_0_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
+               if v_0.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETL)
+               v.AddArg(x)
                return true
        }
-       // match: (SETEQ (TESTLconst [c] x))
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTLconst [log2uint32(c)] x))
+       // match: (SETG (FlagEQ))
+       // cond:
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+               if v_0.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (TESTQconst [c] x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTQconst [log2(c)] x))
+       // match: (SETG (FlagLT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (TESTQ (MOVQconst [c]) x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTQconst [log2(c)] x))
+       // match: (SETG (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_0_0.AuxInt
-               x := v_0.Args[1]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (TESTQ x (MOVQconst [c])))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTQconst [log2(c)] x))
+       // match: (SETG (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_0_1.AuxInt
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETG (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (SETEQ (InvertFlags x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETGE_0(v *Value) bool {
+       // match: (SETGE (InvertFlags x))
        // cond:
-       // result: (SETEQ x)
+       // result: (SETLE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETEQ)
+               v.reset(OpAMD64SETLE)
                v.AddArg(x)
                return true
        }
-       // match: (SETEQ (FlagEQ))
+       // match: (SETGE (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -38584,10 +40680,7 @@ func rewriteValueAMD64_OpAMD64SETEQ_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
-       // match: (SETEQ (FlagLT_ULT))
+       // match: (SETGE (FlagLT_ULT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -38599,7 +40692,7 @@ func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (FlagLT_UGT))
+       // match: (SETGE (FlagLT_UGT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -38611,334 +40704,233 @@ func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (FlagGT_ULT))
+       // match: (SETGE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETEQ (FlagGT_UGT))
+       // match: (SETGE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETEQmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SETGEmem [off] {sym} ptr (InvertFlags x) mem)
+       // cond:
+       // result: (SETLEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
+               if v_1.Op != OpAMD64InvertFlags {
                        break
                }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
+               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64SETLEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SETGEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SETGEmem [off1+off2] {sym} base val mem)
        for {
-               off := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_1_1.Args[1]
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               if !(!config.nacl) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
+       // match: (SETGEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SETGEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               if !(!config.nacl) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 1 {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagEQ {
                        break
                }
-               x := v_1_1.Args[1]
                mem := v.Args[2]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTLconst [c] x) mem)
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTLconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQconst [c] x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64MOVQconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_1_0.AuxInt
-               x := v_1.Args[1]
                mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64MOVQconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_1_1.AuxInt
                mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (InvertFlags x) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETGmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETEQmem [off] {sym} ptr x mem)
+       // result: (SETLmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38950,7 +40942,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETEQmem)
+               v.reset(OpAMD64SETLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -38958,9 +40950,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETGmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETEQmem [off1+off2] {sym} base val mem)
+       // result: (SETGmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -38976,7 +40968,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
+               v.reset(OpAMD64SETGmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -38984,14 +40976,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETEQmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETGmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETEQmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETGmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -39008,7 +40995,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
+               v.reset(OpAMD64SETGmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -39016,9 +41003,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39034,12 +41021,12 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -39062,7 +41049,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -39085,9 +41072,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39103,14 +41090,14 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39126,28 +41113,28 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETG_0(v *Value) bool {
-       // match: (SETG (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETL_0(v *Value) bool {
+       // match: (SETL (InvertFlags x))
        // cond:
-       // result: (SETL x)
+       // result: (SETG x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETL)
+               v.reset(OpAMD64SETG)
                v.AddArg(x)
                return true
        }
-       // match: (SETG (FlagEQ))
+       // match: (SETL (FlagEQ))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -39159,71 +41146,71 @@ func rewriteValueAMD64_OpAMD64SETG_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETG (FlagLT_ULT))
+       // match: (SETL (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETG (FlagLT_UGT))
+       // match: (SETL (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETG (FlagGT_ULT))
+       // match: (SETL (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETG (FlagGT_UGT))
+       // match: (SETL (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETGE_0(v *Value) bool {
-       // match: (SETGE (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
+       // match: (SETLE (InvertFlags x))
        // cond:
-       // result: (SETLE x)
+       // result: (SETGE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETLE)
+               v.reset(OpAMD64SETGE)
                v.AddArg(x)
                return true
        }
-       // match: (SETGE (FlagEQ))
+       // match: (SETLE (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -39235,62 +41222,62 @@ func rewriteValueAMD64_OpAMD64SETGE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETGE (FlagLT_ULT))
+       // match: (SETLE (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETGE (FlagLT_UGT))
+       // match: (SETLE (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETGE (FlagGT_ULT))
+       // match: (SETLE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETGE (FlagGT_UGT))
+       // match: (SETLE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETLEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETGEmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETLEmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETLEmem [off] {sym} ptr x mem)
+       // result: (SETGEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39302,7 +41289,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETLEmem)
+               v.reset(OpAMD64SETGEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -39310,9 +41297,9 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETLEmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETGEmem [off1+off2] {sym} base val mem)
+       // result: (SETLEmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -39328,7 +41315,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
+               v.reset(OpAMD64SETLEmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -39336,9 +41323,9 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETLEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETGEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETLEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -39355,7 +41342,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
+               v.reset(OpAMD64SETLEmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -39363,7 +41350,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -39386,9 +41373,9 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39404,14 +41391,14 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39427,14 +41414,14 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39450,14 +41437,14 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39473,19 +41460,19 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETGmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETLmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETLmem [off] {sym} ptr x mem)
+       // result: (SETGmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39497,7 +41484,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETLmem)
+               v.reset(OpAMD64SETGmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -39505,9 +41492,9 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETLmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETGmem [off1+off2] {sym} base val mem)
+       // result: (SETLmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -39523,7 +41510,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETGmem)
+               v.reset(OpAMD64SETLmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -39531,9 +41518,9 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETLmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETGmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETLmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -39550,7 +41537,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETGmem)
+               v.reset(OpAMD64SETLmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -39558,7 +41545,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -39581,9 +41568,9 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39599,14 +41586,14 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39622,14 +41609,14 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39645,14 +41632,14 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39668,116 +41655,269 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETNE_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (SETNE (TESTL (SHLL (MOVLconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETB (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETNE (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETB (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETL_0(v *Value) bool {
-       // match: (SETL (InvertFlags x))
-       // cond:
-       // result: (SETG x)
+       // match: (SETNE (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB (BTQ x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64InvertFlags {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SETL (FlagEQ))
-       // cond:
-       // result: (MOVLconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagLT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETNE (TESTLconst [c] x))
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTLconst [log2uint32(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               if v_0.Op != OpAMD64TESTLconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagLT_UGT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETNE (TESTQconst [c] x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               if v_0.Op != OpAMD64TESTQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETNE (TESTQ (MOVQconst [c]) x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETNE (TESTQ x (MOVQconst [c])))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
-       // match: (SETLE (InvertFlags x))
+       // match: (SETNE (InvertFlags x))
        // cond:
-       // result: (SETGE x)
+       // result: (SETNE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETGE)
+               v.reset(OpAMD64SETNE)
                v.AddArg(x)
                return true
        }
-       // match: (SETLE (FlagEQ))
+       // match: (SETNE (FlagEQ))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETLE (FlagLT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETNE_10(v *Value) bool {
+       // match: (SETNE (FlagLT_ULT))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -39789,7 +41929,7 @@ func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETLE (FlagLT_UGT))
+       // match: (SETNE (FlagLT_UGT))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -39801,233 +41941,334 @@ func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETLE (FlagGT_ULT))
+       // match: (SETNE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETLE (FlagGT_UGT))
+       // match: (SETNE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETLEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETNEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETLEmem [off] {sym} ptr (InvertFlags x) mem)
-       // cond:
-       // result: (SETGEmem [off] {sym} ptr x mem)
+       config := b.Func.Config
+       _ = config
+       // match: (SETNEmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64InvertFlags {
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               x := v_1.Args[0]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64SETGEmem)
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SETLEmem [off1+off2] {sym} base val mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(val)
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETLEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagEQ) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagEQ {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVQconst {
                        break
                }
+               if v_1_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTLconst [c] x) mem)
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTLconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQconst [c] x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               x := v_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64MOVQconst {
                        break
                }
+               c := v_1_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETLmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETNEmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETGmem [off] {sym} ptr x mem)
+       // result: (SETNEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -40039,7 +42280,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETGmem)
+               v.reset(OpAMD64SETNEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -40047,9 +42288,9 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETNEmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETLmem [off1+off2] {sym} base val mem)
+       // result: (SETNEmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -40065,7 +42306,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETLmem)
+               v.reset(OpAMD64SETNEmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -40073,9 +42314,14 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETNEmem_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETNEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETLmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETNEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -40092,7 +42338,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETLmem)
+               v.reset(OpAMD64SETNEmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -40100,7 +42346,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -40123,7 +42369,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -40146,7 +42392,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -40169,9 +42415,9 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -40187,14 +42433,14 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -40210,820 +42456,592 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETNE_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETNE (TESTL (SHLL (MOVLconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETB (BTL x y))
+       // match: (SHLL x (MOVQconst [c]))
+       // cond:
+       // result: (SHLLconst [c&31] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (TESTL y (SHLL (MOVLconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETB (BTL x y))
+       // match: (SHLL x (MOVLconst [c]))
+       // cond:
+       // result: (SHLLconst [c&31] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETB (BTQ x y))
+       // match: (SHLL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLQ {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVQconst {
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLL x (NEGQ <t> y))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               if v_0_0_0.AuxInt != 1 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (TESTQ y (SHLQ (MOVQconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETB (BTQ x y))
+       // match: (SHLL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
                        break
                }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLQ {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVQconst {
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLL x (NEGQ <t> y))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               if v_0_1_0.AuxInt != 1 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
                        break
                }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (TESTLconst [c] x))
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTLconst [log2uint32(c)] x))
+       // match: (SHLL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNE (TESTQconst [c] x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTQconst [log2(c)] x))
+       // match: (SHLL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLL x (NEGL <t> y))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (TESTQ (MOVQconst [c]) x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTQconst [log2(c)] x))
+       // match: (SHLL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_0_0.AuxInt
-               x := v_0.Args[1]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNE (TESTQ x (MOVQconst [c])))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTQconst [log2(c)] x))
+       // match: (SHLL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLL x (NEGL <t> y))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64MOVQconst {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_0_1.AuxInt
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (InvertFlags x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHLLconst_0(v *Value) bool {
+       // match: (SHLLconst x [0])
        // cond:
-       // result: (SETNE x)
+       // result: x
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64InvertFlags {
+               if v.AuxInt != 0 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETNE)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (SETNE (FlagEQ))
-       // cond:
-       // result: (MOVLconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
-                       break
-               }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
-               return true
-       }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETNE_10(v *Value) bool {
-       // match: (SETNE (FlagLT_ULT))
+func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SHLQ x (MOVQconst [c]))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (SHLQconst [c&63] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (FlagLT_UGT))
+       // match: (SHLQ x (MOVLconst [c]))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (SHLQconst [c&63] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SHLQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLQ x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (SETNE (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETNEmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETNEmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SHLQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLQ x (NEGQ <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               _ = v_1.Args[1]
+               t := v_1.Type
                v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
+               if v_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SHLQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLQ x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
+               if v_1.Op != OpAMD64ANDQconst {
                        break
                }
-               _ = v_1.Args[1]
+               c := v_1.AuxInt
                y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_1_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
+       // match: (SHLQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLQ x (NEGQ <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               if v_1_1_0.AuxInt != 1 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
                        break
                }
-               x := v_1_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTLconst [c] x) mem)
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
+       // match: (SHLQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLQ x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTLconst {
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
                c := v_1.AuxInt
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQconst [c] x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SHLQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLQ x (NEGL <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQconst {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SHLQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLQ x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_1_0.AuxInt
-               x := v_1.Args[1]
-               mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SHLQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLQ x (NEGL <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64MOVQconst {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_1_1.AuxInt
-               mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (InvertFlags x) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHLQconst_0(v *Value) bool {
+       // match: (SHLQconst x [0])
        // cond:
-       // result: (SETNEmem [off] {sym} ptr x mem)
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64InvertFlags {
+               if v.AuxInt != 0 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SETNEmem [off1+off2] {sym} base val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
+       // match: (SHRB x (MOVQconst [c]))
+       // cond: c&31 < 8
+       // result: (SHRBconst [c&31] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               c := v_1.AuxInt
+               if !(c&31 < 8) {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHRBconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETNEmem_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETNEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETNEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (SHRB x (MOVLconst [c]))
+       // cond: c&31 < 8
+       // result: (SHRBconst [c&31] x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_1.AuxInt
+               if !(c&31 < 8) {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHRBconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagEQ) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SHRB _ (MOVQconst [c]))
+       // cond: c&31 >= 8
+       // result: (MOVLconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagEQ {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_ULT {
+               c := v_1.AuxInt
+               if !(c&31 >= 8) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SHRB _ (MOVLconst [c]))
+       // cond: c&31 >= 8
+       // result: (MOVLconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_UGT {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
+               c := v_1.AuxInt
+               if !(c&31 >= 8) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHRBconst_0(v *Value) bool {
+       // match: (SHRBconst x [0])
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               if v.AuxInt != 0 {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SHRL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHLL x (MOVQconst [c]))
+       // match: (SHRL x (MOVQconst [c]))
        // cond:
-       // result: (SHLLconst [c&31] x)
+       // result: (SHRLconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41032,14 +43050,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLLconst)
+               v.reset(OpAMD64SHRLconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHLL x (MOVLconst [c]))
+       // match: (SHRL x (MOVLconst [c]))
        // cond:
-       // result: (SHLLconst [c&31] x)
+       // result: (SHRLconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41048,14 +43066,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLLconst)
+               v.reset(OpAMD64SHRLconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHLL x (ADDQconst [c] y))
+       // match: (SHRL x (ADDQconst [c] y))
        // cond: c & 31 == 0
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41068,14 +43086,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGQ <t> (ADDQconst [c] y)))
+       // match: (SHRL x (NEGQ <t> (ADDQconst [c] y)))
        // cond: c & 31 == 0
-       // result: (SHLL x (NEGQ <t> y))
+       // result: (SHRL x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41093,16 +43111,16 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLL x (ANDQconst [c] y))
+       // match: (SHRL x (ANDQconst [c] y))
        // cond: c & 31 == 31
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41115,14 +43133,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGQ <t> (ANDQconst [c] y)))
+       // match: (SHRL x (NEGQ <t> (ANDQconst [c] y)))
        // cond: c & 31 == 31
-       // result: (SHLL x (NEGQ <t> y))
+       // result: (SHRL x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41140,16 +43158,16 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLL x (ADDLconst [c] y))
+       // match: (SHRL x (ADDLconst [c] y))
        // cond: c & 31 == 0
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41162,14 +43180,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGL <t> (ADDLconst [c] y)))
+       // match: (SHRL x (NEGL <t> (ADDLconst [c] y)))
        // cond: c & 31 == 0
-       // result: (SHLL x (NEGL <t> y))
+       // result: (SHRL x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41187,16 +43205,16 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLL x (ANDLconst [c] y))
+       // match: (SHRL x (ANDLconst [c] y))
        // cond: c & 31 == 31
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41209,14 +43227,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGL <t> (ANDLconst [c] y)))
+       // match: (SHRL x (NEGL <t> (ANDLconst [c] y)))
        // cond: c & 31 == 31
-       // result: (SHLL x (NEGL <t> y))
+       // result: (SHRL x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41234,7 +43252,7 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
@@ -41243,8 +43261,8 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLLconst_0(v *Value) bool {
-       // match: (SHLLconst x [0])
+func rewriteValueAMD64_OpAMD64SHRLconst_0(v *Value) bool {
+       // match: (SHRLconst x [0])
        // cond:
        // result: x
        for {
@@ -41259,12 +43277,12 @@ func rewriteValueAMD64_OpAMD64SHLLconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SHRQ_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHLQ x (MOVQconst [c]))
+       // match: (SHRQ x (MOVQconst [c]))
        // cond:
-       // result: (SHLQconst [c&63] x)
+       // result: (SHRQconst [c&63] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41273,14 +43291,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLQconst)
+               v.reset(OpAMD64SHRQconst)
                v.AuxInt = c & 63
                v.AddArg(x)
                return true
        }
-       // match: (SHLQ x (MOVLconst [c]))
+       // match: (SHRQ x (MOVLconst [c]))
        // cond:
-       // result: (SHLQconst [c&63] x)
+       // result: (SHRQconst [c&63] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41289,14 +43307,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLQconst)
+               v.reset(OpAMD64SHRQconst)
                v.AuxInt = c & 63
                v.AddArg(x)
                return true
        }
-       // match: (SHLQ x (ADDQconst [c] y))
+       // match: (SHRQ x (ADDQconst [c] y))
        // cond: c & 63 == 0
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41309,14 +43327,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGQ <t> (ADDQconst [c] y)))
+       // match: (SHRQ x (NEGQ <t> (ADDQconst [c] y)))
        // cond: c & 63 == 0
-       // result: (SHLQ x (NEGQ <t> y))
+       // result: (SHRQ x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41334,16 +43352,16 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ANDQconst [c] y))
+       // match: (SHRQ x (ANDQconst [c] y))
        // cond: c & 63 == 63
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41356,14 +43374,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGQ <t> (ANDQconst [c] y)))
+       // match: (SHRQ x (NEGQ <t> (ANDQconst [c] y)))
        // cond: c & 63 == 63
-       // result: (SHLQ x (NEGQ <t> y))
+       // result: (SHRQ x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41381,16 +43399,16 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ADDLconst [c] y))
+       // match: (SHRQ x (ADDLconst [c] y))
        // cond: c & 63 == 0
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41403,14 +43421,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGL <t> (ADDLconst [c] y)))
+       // match: (SHRQ x (NEGL <t> (ADDLconst [c] y)))
        // cond: c & 63 == 0
-       // result: (SHLQ x (NEGL <t> y))
+       // result: (SHRQ x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41428,16 +43446,16 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ANDLconst [c] y))
+       // match: (SHRQ x (ANDLconst [c] y))
        // cond: c & 63 == 63
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41450,14 +43468,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGL <t> (ANDLconst [c] y)))
+       // match: (SHRQ x (NEGL <t> (ANDLconst [c] y)))
        // cond: c & 63 == 63
-       // result: (SHLQ x (NEGL <t> y))
+       // result: (SHRQ x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41475,7 +43493,7 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
@@ -41484,8 +43502,8 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLQconst_0(v *Value) bool {
-       // match: (SHLQconst x [0])
+func rewriteValueAMD64_OpAMD64SHRQconst_0(v *Value) bool {
+       // match: (SHRQconst x [0])
        // cond:
        // result: x
        for {
@@ -41500,10 +43518,10 @@ func rewriteValueAMD64_OpAMD64SHLQconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
-       // match: (SHRB x (MOVQconst [c]))
-       // cond: c&31 < 8
-       // result: (SHRBconst [c&31] x)
+func rewriteValueAMD64_OpAMD64SHRW_0(v *Value) bool {
+       // match: (SHRW x (MOVQconst [c]))
+       // cond: c&31 < 16
+       // result: (SHRWconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41512,17 +43530,17 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 < 8) {
+               if !(c&31 < 16) {
                        break
                }
-               v.reset(OpAMD64SHRBconst)
+               v.reset(OpAMD64SHRWconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHRB x (MOVLconst [c]))
-       // cond: c&31 < 8
-       // result: (SHRBconst [c&31] x)
+       // match: (SHRW x (MOVLconst [c]))
+       // cond: c&31 < 16
+       // result: (SHRWconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41531,16 +43549,16 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 < 8) {
+               if !(c&31 < 16) {
                        break
                }
-               v.reset(OpAMD64SHRBconst)
+               v.reset(OpAMD64SHRWconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHRB _ (MOVQconst [c]))
-       // cond: c&31 >= 8
+       // match: (SHRW _ (MOVQconst [c]))
+       // cond: c&31 >= 16
        // result: (MOVLconst [0])
        for {
                _ = v.Args[1]
@@ -41549,15 +43567,15 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 >= 8) {
+               if !(c&31 >= 16) {
                        break
                }
                v.reset(OpAMD64MOVLconst)
                v.AuxInt = 0
                return true
        }
-       // match: (SHRB _ (MOVLconst [c]))
-       // cond: c&31 >= 8
+       // match: (SHRW _ (MOVLconst [c]))
+       // cond: c&31 >= 16
        // result: (MOVLconst [0])
        for {
                _ = v.Args[1]
@@ -41566,7 +43584,7 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 >= 8) {
+               if !(c&31 >= 16) {
                        break
                }
                v.reset(OpAMD64MOVLconst)
@@ -41575,8 +43593,8 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRBconst_0(v *Value) bool {
-       // match: (SHRBconst x [0])
+func rewriteValueAMD64_OpAMD64SHRWconst_0(v *Value) bool {
+       // match: (SHRWconst x [0])
        // cond:
        // result: x
        for {
@@ -41591,269 +43609,702 @@ func rewriteValueAMD64_OpAMD64SHRBconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SUBL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHRL x (MOVQconst [c]))
+       // match: (SUBL x (MOVLconst [c]))
        // cond:
-       // result: (SHRLconst [c&31] x)
+       // result: (SUBLconst x [c])
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHRLconst)
-               v.AuxInt = c & 31
+               v.reset(OpAMD64SUBLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRL x (MOVLconst [c]))
+       // match: (SUBL (MOVLconst [c]) x)
        // cond:
-       // result: (SHRLconst [c&31] x)
+       // result: (NEGL (SUBLconst <v.Type> x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SUBL x x)
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SUBL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBLconst_0(v *Value) bool {
+       // match: (SUBLconst [c] x)
+       // cond: int32(c) == 0
+       // result: x
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBLconst [c] x)
+       // cond:
+       // result: (ADDLconst [int64(int32(-c))] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               v.reset(OpAMD64ADDLconst)
+               v.AuxInt = int64(int32(-c))
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBL x (MOVLf2i y))
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSSstore {
+                       break
+               }
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SUBQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (SUBQconst x [c])
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHRLconst)
-               v.AuxInt = c & 31
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRL x (ADDQconst [c] y))
-       // cond: c & 31 == 0
-       // result: (SHRL x y)
+       // match: (SUBQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (NEGQ (SUBQconst <v.Type> x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SUBQ x x)
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SUBQ x l:(MOVQload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBQconst_0(v *Value) bool {
+       // match: (SUBQconst [0] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBQconst [c] x)
+       // cond: c != -(1<<31)
+       // result: (ADDQconst [-c] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c != -(1 << 31)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = -c
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBQconst (MOVQconst [d]) [c])
+       // cond:
+       // result: (MOVQconst [d-c])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = d - c
+               return true
+       }
+       // match: (SUBQconst (SUBQconst x [d]) [c])
+       // cond: is32Bit(-c-d)
+       // result: (ADDQconst [-c-d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SUBQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(-c - d)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = -c - d
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBQmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBQmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBQmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 0) {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
+               v.reset(OpAMD64SUBQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBQ x (MOVQf2i y))
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSDstore {
+                       break
+               }
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBQ)
                v.AddArg(x)
-               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SHRL x (NEGQ <t> (ADDQconst [c] y)))
-       // cond: c & 31 == 0
-       // result: (SHRL x (NEGQ <t> y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
+       // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBSDmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSDmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBSDmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 0) {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64SUBSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (ANDQconst [c] y))
-       // cond: c & 31 == 31
-       // result: (SHRL x y)
+       // match: (SUBSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDQconst {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 31) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64SUBSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (NEGQ <t> (ANDQconst [c] y)))
-       // cond: c & 31 == 31
-       // result: (SHRL x (NEGQ <t> y))
+       // match: (SUBSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBSD x (MOVQi2f y))
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVQstore {
                        break
                }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDQconst {
+               if v_2.AuxInt != off {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 31) {
+               if v_2.Aux != sym {
                        break
                }
-               v.reset(OpAMD64SHRL)
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBSD)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHRL x (ADDLconst [c] y))
-       // cond: c & 31 == 0
-       // result: (SHRL x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSS_0(v *Value) bool {
+       // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBSSmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDLconst {
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSSload {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
+               v.reset(OpAMD64SUBSSmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (NEGL <t> (ADDLconst [c] y)))
-       // cond: c & 31 == 0
-       // result: (SHRL x (NEGL <t> y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSSmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBSSmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDLconst {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 0) {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64SUBSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (ANDLconst [c] y))
-       // cond: c & 31 == 31
-       // result: (SHRL x y)
+       // match: (SUBSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 31) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64SUBSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (NEGL <t> (ANDLconst [c] y)))
-       // cond: c & 31 == 31
-       // result: (SHRL x (NEGL <t> y))
+       // match: (SUBSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBSS x (MOVLi2f y))
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVLstore {
                        break
                }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDLconst {
+               if v_2.AuxInt != off {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 31) {
+               if v_2.Aux != sym {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SHRLconst_0(v *Value) bool {
-       // match: (SHRLconst x [0])
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBSS)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64TESTB_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHRQ x (MOVQconst [c]))
+       // match: (TESTB (MOVLconst [c]) x)
        // cond:
-       // result: (SHRQconst [c&63] x)
+       // result: (TESTBconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64SHRQconst)
-               v.AuxInt = c & 63
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRQ x (MOVLconst [c]))
+       // match: (TESTB x (MOVLconst [c]))
        // cond:
-       // result: (SHRQconst [c&63] x)
+       // result: (TESTBconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41862,366 +44313,380 @@ func rewriteValueAMD64_OpAMD64SHRQ_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHRQconst)
-               v.AuxInt = c & 63
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRQ x (ADDQconst [c] y))
-       // cond: c & 63 == 0
-       // result: (SHRQ x y)
+       // match: (TESTB l:(MOVBload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRQ x (NEGQ <t> (ADDQconst [c] y)))
-       // cond: c & 63 == 0
-       // result: (SHRQ x (NEGQ <t> y))
+       // match: (TESTB l2 l:(MOVBload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDQconst {
+               l2 := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRQ x (ANDQconst [c] y))
-       // cond: c & 63 == 63
-       // result: (SHRQ x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64TESTBconst_0(v *Value) bool {
+       // match: (TESTBconst [-1] x)
+       // cond:
+       // result: (TESTB x x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 63) {
+               if v.AuxInt != -1 {
                        break
                }
-               v.reset(OpAMD64SHRQ)
+               x := v.Args[0]
+               v.reset(OpAMD64TESTB)
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (SHRQ x (NEGQ <t> (ANDQconst [c] y)))
-       // cond: c & 63 == 63
-       // result: (SHRQ x (NEGQ <t> y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64TESTL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (TESTL (MOVLconst [c]) x)
+       // cond:
+       // result: (TESTLconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 63) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64SHRQ)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64TESTLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       // match: (SHRQ x (ADDLconst [c] y))
-       // cond: c & 63 == 0
-       // result: (SHRQ x y)
+       // match: (TESTL x (MOVLconst [c]))
+       // cond:
+       // result: (TESTLconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDLconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 0) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
+               v.reset(OpAMD64TESTLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (SHRQ x (NEGL <t> (ADDLconst [c] y)))
-       // cond: c & 63 == 0
-       // result: (SHRQ x (NEGL <t> y))
+       // match: (TESTL l:(MOVLload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDLconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRQ x (ANDLconst [c] y))
-       // cond: c & 63 == 63
-       // result: (SHRQ x y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 63) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SHRQ x (NEGL <t> (ANDLconst [c] y)))
-       // cond: c & 63 == 63
-       // result: (SHRQ x (NEGL <t> y))
+       // match: (TESTL l2 l:(MOVLload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDLconst {
+               l2 := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 63) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRQconst_0(v *Value) bool {
-       // match: (SHRQconst x [0])
+func rewriteValueAMD64_OpAMD64TESTLconst_0(v *Value) bool {
+       // match: (TESTLconst [-1] x)
        // cond:
-       // result: x
+       // result: (TESTL x x)
        for {
-               if v.AuxInt != 0 {
+               if v.AuxInt != -1 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64TESTL)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRW_0(v *Value) bool {
-       // match: (SHRW x (MOVQconst [c]))
-       // cond: c&31 < 16
-       // result: (SHRWconst [c&31] x)
+func rewriteValueAMD64_OpAMD64TESTQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (TESTQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (TESTQconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 < 16) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64SHRWconst)
-               v.AuxInt = c & 31
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRW x (MOVLconst [c]))
-       // cond: c&31 < 16
-       // result: (SHRWconst [c&31] x)
+       // match: (TESTQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (TESTQconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 < 16) {
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64SHRWconst)
-               v.AuxInt = c & 31
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRW _ (MOVQconst [c]))
-       // cond: c&31 >= 16
-       // result: (MOVLconst [0])
+       // match: (TESTQ l:(MOVQload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 >= 16) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRW _ (MOVLconst [c]))
-       // cond: c&31 >= 16
-       // result: (MOVLconst [0])
+       // match: (TESTQ l2 l:(MOVQload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               l2 := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 >= 16) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRWconst_0(v *Value) bool {
-       // match: (SHRWconst x [0])
+func rewriteValueAMD64_OpAMD64TESTQconst_0(v *Value) bool {
+       // match: (TESTQconst [-1] x)
        // cond:
-       // result: x
+       // result: (TESTQ x x)
        for {
-               if v.AuxInt != 0 {
+               if v.AuxInt != -1 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SUBL x (MOVLconst [c]))
+       // match: (TESTW (MOVLconst [c]) x)
        // cond:
-       // result: (SUBLconst x [c])
+       // result: (TESTWconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64SUBLconst)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64TESTWconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBL (MOVLconst [c]) x)
+       // match: (TESTW x (MOVLconst [c]))
        // cond:
-       // result: (NEGL (SUBLconst <v.Type> x [c]))
+       // result: (TESTWconst [c] x)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64NEGL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               v.reset(OpAMD64TESTWconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBL x x)
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (TESTW l:(MOVWload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+                       break
+               }
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SUBL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBLmem x [off] {sym} ptr mem)
+       // match: (TESTW l2 l:(MOVWload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               l2 := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVWload {
                        break
                }
                off := l.AuxInt
@@ -42229,54 +44694,70 @@ func rewriteValueAMD64_OpAMD64SUBL_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SUBLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBLconst_0(v *Value) bool {
-       // match: (SUBLconst [c] x)
-       // cond: int32(c) == 0
-       // result: x
+func rewriteValueAMD64_OpAMD64TESTWconst_0(v *Value) bool {
+       // match: (TESTWconst [-1] x)
+       // cond:
+       // result: (TESTW x x)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(int32(c) == 0) {
+               if v.AuxInt != -1 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               x := v.Args[0]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (SUBLconst [c] x)
-       // cond:
-       // result: (ADDLconst [int64(int32(-c))] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64XADDLlock_0(v *Value) bool {
+       // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XADDLlock [off1+off2] {sym} val ptr mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               v.reset(OpAMD64ADDLconst)
-               v.AuxInt = int64(int32(-c))
-               v.AddArg(x)
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XADDLlock)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+func rewriteValueAMD64_OpAMD64XADDQlock_0(v *Value) bool {
+       // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (SUBLmem [off1+off2] {sym} val base mem)
+       // result: (XADDQlock [off1+off2] {sym} val ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -42287,22 +44768,51 @@ func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
                        break
                }
                off2 := v_1.AuxInt
-               base := v_1.Args[0]
+               ptr := v_1.Args[0]
                mem := v.Args[2]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SUBLmem)
+               v.reset(OpAMD64XADDQlock)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(val)
-               v.AddArg(base)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (SUBLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
+       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -42314,299 +44824,325 @@ func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               base := v_1.Args[0]
+               ptr := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64SUBLmem)
+               v.reset(OpAMD64XCHGL)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(val)
-               v.AddArg(base)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (SUBLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
-       // cond:
-       // result: (SUBL x (MOVLf2i y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGQ_0(v *Value) bool {
+       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
        for {
-               off := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSSstore {
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if v_2.AuxInt != off {
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SUBQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (SUBQconst x [c])
+func rewriteValueAMD64_OpAMD64XORL_0(v *Value) bool {
+       // match: (XORL x (MOVLconst [c]))
+       // cond:
+       // result: (XORLconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64SUBQconst)
+               v.reset(OpAMD64XORLconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (NEGQ (SUBQconst <v.Type> x [c]))
+       // match: (XORL (MOVLconst [c]) x)
+       // cond:
+       // result: (XORLconst [c] x)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQ x x)
-       // cond:
-       // result: (MOVQconst [0])
+       // match: (XORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (SUBQ x l:(MOVQload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBQmem x [off] {sym} ptr mem)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64SUBQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBQconst_0(v *Value) bool {
-       // match: (SUBQconst [0] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               if !(d == 32-c) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBQconst [c] x)
-       // cond: c != -(1<<31)
-       // result: (ADDQconst [-c] x)
+       // match: (XORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c != -(1 << 31)) {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = -c
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBQconst (MOVQconst [d]) [c])
-       // cond:
-       // result: (MOVQconst [d-c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
-               c := v.AuxInt
+               t := v.Type
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = d - c
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQconst (SUBQconst x [d]) [c])
-       // cond: is32Bit(-c-d)
-       // result: (ADDQconst [-c-d] x)
+       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
-               c := v.AuxInt
+               t := v.Type
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SUBQconst {
+               if v_0.Op != OpAMD64SHRWconst {
                        break
                }
                d := v_0.AuxInt
                x := v_0.Args[0]
-               if !(is32Bit(-c - d)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = -c - d
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBQmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBQmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SUBQmem [off1+off2] {sym} val base mem)
+       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64SUBQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64SUBQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // match: (XORL x x)
        // cond:
-       // result: (SUBQ x (MOVQf2i y))
+       // result: (MOVLconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSDstore {
-                       break
-               }
-               if v_2.AuxInt != off {
+               if x != v.Args[1] {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBQ)
+               v.reset(OpAMD64XORLmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
-       // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
+func rewriteValueAMD64_OpAMD64XORL_10(v *Value) bool {
+       // match: (XORL l:(MOVLload [off] {sym} ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBSDmem x [off] {sym} ptr mem)
+       // result: (XORLmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSDload {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -42614,10 +45150,11 @@ func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
+               x := v.Args[1]
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SUBSDmem)
+               v.reset(OpAMD64XORLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -42627,544 +45164,421 @@ func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBSDmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SUBSDmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpAMD64XORLconst_0(v *Value) bool {
+       // match: (XORLconst [1] (SETNE x))
+       // cond:
+       // result: (SETEQ x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETNE {
                        break
                }
-               v.reset(OpAMD64SUBSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETEQ)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (XORLconst [1] (SETEQ x))
+       // cond:
+       // result: (SETNE x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETEQ {
                        break
                }
-               v.reset(OpAMD64SUBSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETNE)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
+       // match: (XORLconst [1] (SETL x))
        // cond:
-       // result: (SUBSD x (MOVQi2f y))
+       // result: (SETGE x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVQstore {
+               if v.AuxInt != 1 {
                        break
                }
-               if v_2.AuxInt != off {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETL {
                        break
                }
-               if v_2.Aux != sym {
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETGE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETGE x))
+       // cond:
+       // result: (SETL x)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETGE {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBSD)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETL)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBSS_0(v *Value) bool {
-       // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBSSmem x [off] {sym} ptr mem)
+       // match: (XORLconst [1] (SETLE x))
+       // cond:
+       // result: (SETG x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSSload {
+               if v.AuxInt != 1 {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETLE {
                        break
                }
-               v.reset(OpAMD64SUBSSmem)
-               v.AuxInt = off
-               v.Aux = sym
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETG)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBSSmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SUBSSmem [off1+off2] {sym} val base mem)
+       // match: (XORLconst [1] (SETG x))
+       // cond:
+       // result: (SETLE x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETG {
                        break
                }
-               v.reset(OpAMD64SUBSSmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETLE)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (XORLconst [1] (SETB x))
+       // cond:
+       // result: (SETAE x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETB {
                        break
                }
-               v.reset(OpAMD64SUBSSmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETAE)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // match: (XORLconst [1] (SETAE x))
        // cond:
-       // result: (SUBSS x (MOVLi2f y))
+       // result: (SETB x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVLstore {
+               if v.AuxInt != 1 {
                        break
                }
-               if v_2.AuxInt != off {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETAE {
                        break
                }
-               if v_2.Aux != sym {
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETB)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETBE x))
+       // cond:
+       // result: (SETA x)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETBE {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBSS)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETA)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64TESTB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (TESTB (MOVLconst [c]) x)
+       // match: (XORLconst [1] (SETA x))
        // cond:
-       // result: (TESTBconst [c] x)
+       // result: (SETBE x)
        for {
-               _ = v.Args[1]
+               if v.AuxInt != 1 {
+                       break
+               }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64SETA {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = c
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETBE)
                v.AddArg(x)
                return true
        }
-       // match: (TESTB x (MOVLconst [c]))
+       return false
+}
+func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
+       // match: (XORLconst [c] (XORLconst [d] x))
        // cond:
-       // result: (TESTBconst [c] x)
+       // result: (XORLconst [c ^ d] x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64XORLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = c
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = c ^ d
                v.AddArg(x)
                return true
        }
-       // match: (TESTB l:(MOVBload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORLconst [c] x)
+       // cond: int32(c)==0
+       // result: x
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (TESTB l2 l:(MOVBload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [c^d])
        for {
-               _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = c ^ d
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64TESTL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64XORLmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (TESTL (MOVLconst [c]) x)
-       // cond:
-       // result: (TESTLconst [c] x)
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (XORLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XORLmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (TESTL x (MOVLconst [c]))
-       // cond:
-       // result: (TESTLconst [c] x)
+       // match: (XORLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (XORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (TESTL l:(MOVLload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // cond:
+       // result: (XORL x (MOVLf2i y))
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSSstore {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               if v_2.AuxInt != off {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (TESTL l2 l:(MOVLload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
-       for {
-               _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               if v_2.Aux != sym {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
-               v.reset(OpCopy)
+               y := v_2.Args[1]
+               v.reset(OpAMD64XORL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64TESTQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (TESTQ (MOVQconst [c]) x)
+func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool {
+       // match: (XORQ x (MOVQconst [c]))
        // cond: is32Bit(c)
-       // result: (TESTQconst [c] x)
+       // result: (XORQconst [c] x)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64TESTQconst)
+               v.reset(OpAMD64XORQconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (TESTQ x (MOVQconst [c]))
+       // match: (XORQ (MOVQconst [c]) x)
        // cond: is32Bit(c)
-       // result: (TESTQconst [c] x)
+       // result: (XORQconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64TESTQconst)
+               v.reset(OpAMD64XORQconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (TESTQ l:(MOVQload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (TESTQ l2 l:(MOVQload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
-       for {
-               _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               if !(d == 64-c) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (TESTW (MOVLconst [c]) x)
-       // cond:
-       // result: (TESTWconst [c] x)
+       // match: (XORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64TESTWconst)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (TESTW x (MOVLconst [c]))
+       // match: (XORQ x x)
        // cond:
-       // result: (TESTWconst [c] x)
+       // result: (MOVQconst [0])
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if x != v.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64TESTWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (TESTW l:(MOVWload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORQ x l:(MOVQload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -43172,28 +45586,24 @@ func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64XORQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (TESTW l2 l:(MOVWload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVWload {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -43201,83 +45611,74 @@ func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64XORQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XADDLlock_0(v *Value) bool {
-       // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XADDLlock [off1+off2] {sym} val ptr mem)
+func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
+       // match: (XORQconst [c] (XORQconst [d] x))
+       // cond:
+       // result: (XORQconst [c ^ d] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64XORQconst {
                        break
                }
-               v.reset(OpAMD64XADDLlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORQconst)
+               v.AuxInt = c ^ d
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XADDQlock_0(v *Value) bool {
-       // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XADDQlock [off1+off2] {sym} val ptr mem)
+       // match: (XORQconst [0] x)
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 0 {
                        break
                }
-               off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORQconst [c] (MOVQconst [d]))
+       // cond:
+       // result: (MOVQconst [c^d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XADDQlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c ^ d
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
-       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+func rewriteValueAMD64_OpAMD64XORQmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (XORQmem [off1] {sym} val (ADDQconst [off2] base) mem)
        // cond: is32Bit(off1+off2)
-       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       // result: (XORQmem [off1+off2] {sym} val base mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -43288,22 +45689,22 @@ func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
                        break
                }
                off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
+               base := v_1.Args[0]
                mem := v.Args[2]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64XCHGL)
+               v.reset(OpAMD64XORQmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(val)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
-       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       // match: (XORQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (XORQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -43315,1629 +45716,1806 @@ func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               ptr := v_1.Args[0]
+               base := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64XCHGL)
+               v.reset(OpAMD64XORQmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(val)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XCHGQ_0(v *Value) bool {
-       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
+       // match: (XORQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // cond:
+       // result: (XORQ x (MOVQf2i y))
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSDstore {
                        break
                }
-               off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if v_2.AuxInt != off {
                        break
                }
-               v.reset(OpAMD64XCHGQ)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64XORQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAdd16_0(v *Value) bool {
+       // match: (Add16 x y)
+       // cond:
+       // result: (ADDL x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAdd32_0(v *Value) bool {
+       // match: (Add32 x y)
+       // cond:
+       // result: (ADDL x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAdd32F_0(v *Value) bool {
+       // match: (Add32F x y)
+       // cond:
+       // result: (ADDSS x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDSS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
-       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+}
+func rewriteValueAMD64_OpAdd64_0(v *Value) bool {
+       // match: (Add64 x y)
+       // cond:
+       // result: (ADDQ x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64XCHGQ)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORL_0(v *Value) bool {
-       // match: (XORL x (MOVLconst [c]))
+func rewriteValueAMD64_OpAdd64F_0(v *Value) bool {
+       // match: (Add64F x y)
        // cond:
-       // result: (XORLconst [c] x)
+       // result: (ADDSD x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = c
+               y := v.Args[1]
+               v.reset(OpAMD64ADDSD)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL (MOVLconst [c]) x)
+}
+func rewriteValueAMD64_OpAdd8_0(v *Value) bool {
+       // match: (Add8 x y)
        // cond:
-       // result: (XORLconst [c] x)
+       // result: (ADDL x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL (SHLLconst x [c]) (SHRLconst x [d]))
-       // cond: d==32-c
-       // result: (ROLLconst x [c])
+}
+func rewriteValueAMD64_OpAddPtr_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (AddPtr x y)
+       // cond: config.PtrSize == 8
+       // result: (ADDQ x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 32-c) {
+               x := v.Args[0]
+               y := v.Args[1]
+               if !(config.PtrSize == 8) {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64ADDQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL (SHRLconst x [d]) (SHLLconst x [c]))
-       // cond: d==32-c
-       // result: (ROLLconst x [c])
+       // match: (AddPtr x y)
+       // cond: config.PtrSize == 4
+       // result: (ADDL x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 32-c) {
+               x := v.Args[0]
+               y := v.Args[1]
+               if !(config.PtrSize == 4) {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64ADDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
-       // cond: d==16-c && c < 16 && t.Size() == 2
-       // result: (ROLWconst x [c])
+       return false
+}
+func rewriteValueAMD64_OpAddr_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (Addr {sym} base)
+       // cond: config.PtrSize == 8
+       // result: (LEAQ {sym} base)
        for {
-               t := v.Type
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRWconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
+               sym := v.Aux
+               base := v.Args[0]
+               if !(config.PtrSize == 8) {
                        break
                }
-               if !(d == 16-c && c < 16 && t.Size() == 2) {
+               v.reset(OpAMD64LEAQ)
+               v.Aux = sym
+               v.AddArg(base)
+               return true
+       }
+       // match: (Addr {sym} base)
+       // cond: config.PtrSize == 4
+       // result: (LEAL {sym} base)
+       for {
+               sym := v.Aux
+               base := v.Args[0]
+               if !(config.PtrSize == 4) {
                        break
                }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64LEAL)
+               v.Aux = sym
+               v.AddArg(base)
                return true
        }
-       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
-       // cond: d==16-c && c < 16 && t.Size() == 2
-       // result: (ROLWconst x [c])
+       return false
+}
+func rewriteValueAMD64_OpAnd16_0(v *Value) bool {
+       // match: (And16 x y)
+       // cond:
+       // result: (ANDL x y)
        for {
-               t := v.Type
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRWconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 16-c && c < 16 && t.Size() == 2) {
-                       break
-               }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
-       // cond: d==8-c && c < 8 && t.Size() == 1
-       // result: (ROLBconst x [c])
+}
+func rewriteValueAMD64_OpAnd32_0(v *Value) bool {
+       // match: (And32 x y)
+       // cond:
+       // result: (ANDL x y)
        for {
-               t := v.Type
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRBconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 8-c && c < 8 && t.Size() == 1) {
-                       break
-               }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
-       // cond: d==8-c && c < 8 && t.Size() == 1
-       // result: (ROLBconst x [c])
+}
+func rewriteValueAMD64_OpAnd64_0(v *Value) bool {
+       // match: (And64 x y)
+       // cond:
+       // result: (ANDQ x y)
        for {
-               t := v.Type
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRBconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 8-c && c < 8 && t.Size() == 1) {
-                       break
-               }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ANDQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL x x)
+}
+func rewriteValueAMD64_OpAnd8_0(v *Value) bool {
+       // match: (And8 x y)
        // cond:
-       // result: (MOVLconst [0])
+       // result: (ANDL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               if x != v.Args[1] {
-                       break
-               }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORLmem x [off] {sym} ptr mem)
+}
+func rewriteValueAMD64_OpAndB_0(v *Value) bool {
+       // match: (AndB x y)
+       // cond:
+       // result: (ANDL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off
-               v.Aux = sym
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicAdd32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicAdd32 ptr val mem)
+       // cond:
+       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64AddTupleFirst32)
+               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicAdd64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicAdd64 ptr val mem)
+       // cond:
+       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64AddTupleFirst64)
+               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicAnd8_0(v *Value) bool {
+       // match: (AtomicAnd8 ptr val mem)
+       // cond:
+       // result: (ANDBlock ptr val mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64ANDBlock)
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORL_10(v *Value) bool {
-       // match: (XORL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORLmem x [off] {sym} ptr mem)
+func rewriteValueAMD64_OpAtomicCompareAndSwap32_0(v *Value) bool {
+       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
+       // cond:
+       // result: (CMPXCHGLlock ptr old new_ mem)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64CMPXCHGLlock)
                v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
                v.AddArg(mem)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORLconst_0(v *Value) bool {
-       // match: (XORLconst [1] (SETNE x))
+func rewriteValueAMD64_OpAtomicCompareAndSwap64_0(v *Value) bool {
+       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
        // cond:
-       // result: (SETEQ x)
+       // result: (CMPXCHGQlock ptr old new_ mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETNE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETEQ)
-               v.AddArg(x)
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64CMPXCHGQlock)
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETEQ x))
+}
+func rewriteValueAMD64_OpAtomicExchange32_0(v *Value) bool {
+       // match: (AtomicExchange32 ptr val mem)
        // cond:
-       // result: (SETNE x)
+       // result: (XCHGL val ptr mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETEQ {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64XCHGL)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicExchange64_0(v *Value) bool {
+       // match: (AtomicExchange64 ptr val mem)
+       // cond:
+       // result: (XCHGQ val ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64XCHGQ)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoad32_0(v *Value) bool {
+       // match: (AtomicLoad32 ptr mem)
+       // cond:
+       // result: (MOVLatomicload ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoad64_0(v *Value) bool {
+       // match: (AtomicLoad64 ptr mem)
+       // cond:
+       // result: (MOVQatomicload ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoadPtr_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 8
+       // result: (MOVQatomicload ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 8) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETNE)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETL x))
-       // cond:
-       // result: (SETGE x)
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 4
+       // result: (MOVLatomicload ptr mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETL {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 4) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETGE)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETGE x))
+       return false
+}
+func rewriteValueAMD64_OpAtomicOr8_0(v *Value) bool {
+       // match: (AtomicOr8 ptr val mem)
        // cond:
-       // result: (SETL x)
+       // result: (ORBlock ptr val mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETGE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETL)
-               v.AddArg(x)
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64ORBlock)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETLE x))
+}
+func rewriteValueAMD64_OpAtomicStore32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicStore32 ptr val mem)
        // cond:
-       // result: (SETG x)
+       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETLE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETG)
-               v.AddArg(x)
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETG x))
+}
+func rewriteValueAMD64_OpAtomicStore64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicStore64 ptr val mem)
        // cond:
-       // result: (SETLE x)
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETG {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETLE)
-               v.AddArg(x)
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETB x))
-       // cond:
-       // result: (SETAE x)
+}
+func rewriteValueAMD64_OpAtomicStorePtrNoWB_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 8
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETB {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 8) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETAE)
-               v.AddArg(x)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETAE x))
-       // cond:
-       // result: (SETB x)
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 4
+       // result: (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETAE {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 4) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETB)
-               v.AddArg(x)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.BytePtr, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETBE x))
+       return false
+}
+func rewriteValueAMD64_OpAvg64u_0(v *Value) bool {
+       // match: (Avg64u x y)
        // cond:
-       // result: (SETA x)
+       // result: (AVGQU x y)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETBE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETA)
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64AVGQU)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORLconst [1] (SETA x))
+}
+func rewriteValueAMD64_OpBitLen32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (BitLen32 x)
        // cond:
-       // result: (SETBE x)
+       // result: (BitLen64 (MOVLQZX <typ.UInt64> x))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETA {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETBE)
-               v.AddArg(x)
+               x := v.Args[0]
+               v.reset(OpBitLen64)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
-       // match: (XORLconst [c] (XORLconst [d] x))
+func rewriteValueAMD64_OpBitLen64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (BitLen64 <t> x)
        // cond:
-       // result: (XORLconst [c ^ d] x)
+       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64XORLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = c ^ d
-               v.AddArg(x)
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
+               v1 := b.NewValue0(v.Pos, OpSelect0, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
+               v3.AuxInt = -1
+               v0.AddArg(v3)
+               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v5 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v5.AddArg(x)
+               v4.AddArg(v5)
+               v0.AddArg(v4)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [c] x)
-       // cond: int32(c)==0
-       // result: x
+}
+func rewriteValueAMD64_OpBswap32_0(v *Value) bool {
+       // match: (Bswap32 x)
+       // cond:
+       // result: (BSWAPL x)
        for {
-               c := v.AuxInt
                x := v.Args[0]
-               if !(int32(c) == 0) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64BSWAPL)
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [c] (MOVLconst [d]))
+}
+func rewriteValueAMD64_OpBswap64_0(v *Value) bool {
+       // match: (Bswap64 x)
        // cond:
-       // result: (MOVLconst [c^d])
+       // result: (BSWAPQ x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = c ^ d
+               x := v.Args[0]
+               v.reset(OpAMD64BSWAPQ)
+               v.AddArg(x)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (XORLmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XORLmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpCeil_0(v *Value) bool {
+       // match: (Ceil x)
+       // cond:
+       // result: (ROUNDSD [2] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v.Args[0]
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = 2
+               v.AddArg(x)
                return true
        }
-       // match: (XORLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (XORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+}
+func rewriteValueAMD64_OpClosureCall_0(v *Value) bool {
+       // match: (ClosureCall [argwid] entry closure mem)
+       // cond:
+       // result: (CALLclosure [argwid] entry closure mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               argwid := v.AuxInt
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
+               entry := v.Args[0]
+               closure := v.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
+               v.reset(OpAMD64CALLclosure)
+               v.AuxInt = argwid
+               v.AddArg(entry)
+               v.AddArg(closure)
                v.AddArg(mem)
                return true
        }
-       // match: (XORLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+}
+func rewriteValueAMD64_OpCom16_0(v *Value) bool {
+       // match: (Com16 x)
        // cond:
-       // result: (XORL x (MOVLf2i y))
+       // result: (NOTL x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
                x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSSstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
-                       break
-               }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
-                       break
-               }
-               y := v_2.Args[1]
-               v.reset(OpAMD64XORL)
+               v.reset(OpAMD64NOTL)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool {
-       // match: (XORQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (XORQconst [c] x)
+func rewriteValueAMD64_OpCom32_0(v *Value) bool {
+       // match: (Com32 x)
+       // cond:
+       // result: (NOTL x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = c
+               v.reset(OpAMD64NOTL)
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (XORQconst [c] x)
+}
+func rewriteValueAMD64_OpCom64_0(v *Value) bool {
+       // match: (Com64 x)
+       // cond:
+       // result: (NOTQ x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64NOTQ)
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [d]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+}
+func rewriteValueAMD64_OpCom8_0(v *Value) bool {
+       // match: (Com8 x)
+       // cond:
+       // result: (NOTL x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 64-c) {
-                       break
-               }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64NOTL)
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHRQconst x [d]) (SHLQconst x [c]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+}
+func rewriteValueAMD64_OpCondSelect_0(v *Value) bool {
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQ y x cond)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQ {
                        break
                }
-               if !(d == 64-c) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
+               v.reset(OpAMD64CMOVQEQ)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQ x x)
-       // cond:
-       // result: (MOVQconst [0])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
-                       break
-               }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (XORQ x l:(MOVQload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORQmem x [off] {sym} ptr mem)
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNE {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpAMD64CMOVQNE)
+               v.AddArg(y)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQ l:(MOVQload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORQmem x [off] {sym} ptr mem)
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLT y x cond)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETL {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpAMD64CMOVQLT)
+               v.AddArg(y)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
-       // match: (XORQconst [c] (XORQconst [d] x))
-       // cond:
-       // result: (XORQconst [c ^ d] x)
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGT y x cond)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64XORQconst {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETG {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = c ^ d
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGT)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQconst [0] x)
-       // cond:
-       // result: x
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLE y x cond)
        for {
-               if v.AuxInt != 0 {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETLE {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLE)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQconst [c] (MOVQconst [d]))
-       // cond:
-       // result: (MOVQconst [c^d])
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGE y x cond)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGE {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = c ^ d
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XORQmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (XORQmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XORQmem [off1+off2] {sym} val base mem)
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQHI y x cond)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
+               t := v.Type
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETA {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64CMOVQHI)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (XORQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCS y x cond)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               t := v.Type
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETB {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64CMOVQCS)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
-       // cond:
-       // result: (XORQ x (MOVQf2i y))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCC y x cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               t := v.Type
                _ = v.Args[2]
                x := v.Args[0]
-               ptr := v.Args[1]
+               y := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSDstore {
+               if v_2.Op != OpAMD64SETAE {
                        break
                }
-               if v_2.AuxInt != off {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64CMOVQCC)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLS y x cond)
+       for {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETBE {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64XORQ)
+               v.reset(OpAMD64CMOVQLS)
+               v.AddArg(y)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(cond)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAdd16_0(v *Value) bool {
-       // match: (Add16 x y)
-       // cond:
-       // result: (ADDL x y)
+func rewriteValueAMD64_OpCondSelect_10(v *Value) bool {
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd32_0(v *Value) bool {
-       // match: (Add32 x y)
-       // cond:
-       // result: (ADDL x y)
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNEF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNEF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd32F_0(v *Value) bool {
-       // match: (Add32F x y)
-       // cond:
-       // result: (ADDSS x y)
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGTF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDSS)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGTF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd64_0(v *Value) bool {
-       // match: (Add64 x y)
-       // cond:
-       // result: (ADDQ x y)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGEF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDQ)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGEF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd64F_0(v *Value) bool {
-       // match: (Add64F x y)
-       // cond:
-       // result: (ADDSD x y)
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQ y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDSD)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQ)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd8_0(v *Value) bool {
-       // match: (Add8 x y)
-       // cond:
-       // result: (ADDL x y)
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNE y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAddPtr_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (AddPtr x y)
-       // cond: config.PtrSize == 8
-       // result: (ADDQ x y)
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLT y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               if !(config.PtrSize == 8) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETL {
                        break
                }
-               v.reset(OpAMD64ADDQ)
-               v.AddArg(x)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLT)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (AddPtr x y)
-       // cond: config.PtrSize == 4
-       // result: (ADDL x y)
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGT y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               if !(config.PtrSize == 4) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETG {
                        break
                }
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGT)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAddr_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (Addr {sym} base)
-       // cond: config.PtrSize == 8
-       // result: (LEAQ {sym} base)
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLE y x cond)
        for {
-               sym := v.Aux
-               base := v.Args[0]
-               if !(config.PtrSize == 8) {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETLE {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.Aux = sym
-               v.AddArg(base)
-               return true
-       }
-       // match: (Addr {sym} base)
-       // cond: config.PtrSize == 4
-       // result: (LEAL {sym} base)
-       for {
-               sym := v.Aux
-               base := v.Args[0]
-               if !(config.PtrSize == 4) {
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
                        break
                }
-               v.reset(OpAMD64LEAL)
-               v.Aux = sym
-               v.AddArg(base)
+               v.reset(OpAMD64CMOVLLE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAnd16_0(v *Value) bool {
-       // match: (And16 x y)
-       // cond:
-       // result: (ANDL x y)
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGE y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGE)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAnd32_0(v *Value) bool {
-       // match: (And32 x y)
-       // cond:
-       // result: (ANDL x y)
+func rewriteValueAMD64_OpCondSelect_20(v *Value) bool {
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLHI y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLHI)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAnd64_0(v *Value) bool {
-       // match: (And64 x y)
-       // cond:
-       // result: (ANDQ x y)
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCS y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDQ)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLCS)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAnd8_0(v *Value) bool {
-       // match: (And8 x y)
-       // cond:
-       // result: (ANDL x y)
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCC y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLCC)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndB_0(v *Value) bool {
-       // match: (AndB x y)
-       // cond:
-       // result: (ANDL x y)
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLS y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLS)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAdd32_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicAdd32 ptr val mem)
-       // cond:
-       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64AddTupleFirst32)
-               v.AddArg(val)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAdd64_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicAdd64 ptr val mem)
-       // cond:
-       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQF y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64AddTupleFirst64)
-               v.AddArg(val)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAnd8_0(v *Value) bool {
-       // match: (AtomicAnd8 ptr val mem)
-       // cond:
-       // result: (ANDBlock ptr val mem)
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNEF y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64ANDBlock)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap32_0(v *Value) bool {
-       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
-       // cond:
-       // result: (CMPXCHGLlock ptr old new_ mem)
-       for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNEF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap64_0(v *Value) bool {
-       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
-       // cond:
-       // result: (CMPXCHGQlock ptr old new_ mem)
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGTF y x cond)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGTF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicExchange32_0(v *Value) bool {
-       // match: (AtomicExchange32 ptr val mem)
-       // cond:
-       // result: (XCHGL val ptr mem)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGEF y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64XCHGL)
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGEF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicExchange64_0(v *Value) bool {
-       // match: (AtomicExchange64 ptr val mem)
-       // cond:
-       // result: (XCHGQ val ptr mem)
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQ y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64XCHGQ)
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQ)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicLoad32_0(v *Value) bool {
-       // match: (AtomicLoad32 ptr mem)
-       // cond:
-       // result: (MOVLatomicload ptr mem)
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNE y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVLatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAtomicLoad64_0(v *Value) bool {
-       // match: (AtomicLoad64 ptr mem)
-       // cond:
-       // result: (MOVQatomicload ptr mem)
+func rewriteValueAMD64_OpCondSelect_30(v *Value) bool {
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLT y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLT)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicLoadPtr_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (AtomicLoadPtr ptr mem)
-       // cond: config.PtrSize == 8
-       // result: (MOVQatomicload ptr mem)
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGT y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               if !(config.PtrSize == 8) {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETG {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGT)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (AtomicLoadPtr ptr mem)
-       // cond: config.PtrSize == 4
-       // result: (MOVLatomicload ptr mem)
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLE y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               if !(config.PtrSize == 4) {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETLE {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAtomicOr8_0(v *Value) bool {
-       // match: (AtomicOr8 ptr val mem)
-       // cond:
-       // result: (ORBlock ptr val mem)
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGE y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64ORBlock)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicStore32_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicStore32 ptr val mem)
-       // cond:
-       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWHI y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWHI)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicStore64_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicStore64 ptr val mem)
-       // cond:
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCS y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCS)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicStorePtrNoWB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicStorePtrNoWB ptr val mem)
-       // cond: config.PtrSize == 8
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCC y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(config.PtrSize == 8) {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETAE {
                        break
                }
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (AtomicStorePtrNoWB ptr val mem)
-       // cond: config.PtrSize == 4
-       // result: (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLS y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(config.PtrSize == 4) {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETBE {
                        break
                }
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.BytePtr, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLS)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAvg64u_0(v *Value) bool {
-       // match: (Avg64u x y)
-       // cond:
-       // result: (AVGQU x y)
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64AVGQU)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpBitLen32_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (BitLen32 x)
-       // cond:
-       // result: (BitLen64 (MOVLQZX <typ.UInt64> x))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNEF y x cond)
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpBitLen64)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNEF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpBitLen64_0(v *Value) bool {
+func rewriteValueAMD64_OpCondSelect_40(v *Value) bool {
        b := v.Block
        _ = b
        typ := &b.Func.Config.Types
        _ = typ
-       // match: (BitLen64 <t> x)
-       // cond:
-       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGTF y x cond)
        for {
                t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = 1
-               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
-               v1 := b.NewValue0(v.Pos, OpSelect0, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
-               v3.AuxInt = -1
-               v0.AddArg(v3)
-               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v5 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v5.AddArg(x)
-               v4.AddArg(v5)
-               v0.AddArg(v4)
-               v.AddArg(v0)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGTF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpBswap32_0(v *Value) bool {
-       // match: (Bswap32 x)
-       // cond:
-       // result: (BSWAPL x)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGEF y x cond)
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64BSWAPL)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGEF)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpBswap64_0(v *Value) bool {
-       // match: (Bswap64 x)
-       // cond:
-       // result: (BSWAPQ x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 1
+       // result: (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64BSWAPQ)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
                v.AddArg(x)
+               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCeil_0(v *Value) bool {
-       // match: (Ceil x)
-       // cond:
-       // result: (ROUNDSD [2] x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 2
+       // result: (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = 2
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
                v.AddArg(x)
+               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpClosureCall_0(v *Value) bool {
-       // match: (ClosureCall [argwid] entry closure mem)
-       // cond:
-       // result: (CALLclosure [argwid] entry closure mem)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 4
+       // result: (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
        for {
-               argwid := v.AuxInt
+               t := v.Type
                _ = v.Args[2]
-               entry := v.Args[0]
-               closure := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64CALLclosure)
-               v.AuxInt = argwid
-               v.AddArg(entry)
-               v.AddArg(closure)
-               v.AddArg(mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCom16_0(v *Value) bool {
-       // match: (Com16 x)
-       // cond:
-       // result: (NOTL x)
-       for {
                x := v.Args[0]
-               v.reset(OpAMD64NOTL)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 4) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
                v.AddArg(x)
+               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCom32_0(v *Value) bool {
-       // match: (Com32 x)
-       // cond:
-       // result: (NOTL x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64NOTL)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNE)
+               v.AddArg(y)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = 0
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCom64_0(v *Value) bool {
-       // match: (Com64 x)
-       // cond:
-       // result: (NOTQ x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
+       // result: (CMOVLNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64NOTQ)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
+               v.AddArg(y)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = 0
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCom8_0(v *Value) bool {
-       // match: (Com8 x)
-       // cond:
-       // result: (NOTL x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
+       // result: (CMOVWNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64NOTL)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg(y)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = 0
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
 func rewriteValueAMD64_OpConst16_0(v *Value) bool {
        // match: (Const16 [val])
diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go
new file mode 100644 (file)
index 0000000..1f51505
--- /dev/null
@@ -0,0 +1,178 @@
+// asmcheck
+
+package codegen
+
+func cmovint(c int) int {
+       x := c + 4
+       if x < 0 {
+               x = 182
+       }
+       // amd64:"CMOVQLT"
+       // arm64:"CSEL\tLT"
+       return x
+}
+
+func cmovchan(x, y chan int) chan int {
+       if x != y {
+               x = y
+       }
+       // amd64:"CMOVQNE"
+       // arm64:"CSEL\tNE"
+       return x
+}
+
+func cmovuintptr(x, y uintptr) uintptr {
+       if x < y {
+               x = -y
+       }
+       // amd64:"CMOVQCS"
+       // arm64:"CSEL\tLO"
+       return x
+}
+
+func cmov32bit(x, y uint32) uint32 {
+       if x < y {
+               x = -y
+       }
+       // amd64:"CMOVLCS"
+       // arm64:"CSEL\tLO"
+       return x
+}
+
+func cmov16bit(x, y uint16) uint16 {
+       if x < y {
+               x = -y
+       }
+       // amd64:"CMOVWCS"
+       // arm64:"CSEL\tLO"
+       return x
+}
+
+// Floating point comparison. For EQ/NE, we must
+// generate special code to handle NaNs.
+func cmovfloateq(x, y float64) int {
+       a := 128
+       if x == y {
+               a = 256
+       }
+       // amd64:"CMOVQNE","CMOVQPC"
+       // arm64:"CSEL\tEQ"
+       return a
+}
+
+func cmovfloatne(x, y float64) int {
+       a := 128
+       if x != y {
+               a = 256
+       }
+       // amd64:"CMOVQNE","CMOVQPS"
+       // arm64:"CSEL\tNE"
+       return a
+}
+
+//go:noinline
+func frexp(f float64) (frac float64, exp int) {
+       return 1.0, 4
+}
+
+//go:noinline
+func ldexp(frac float64, exp int) float64 {
+       return 1.0
+}
+
+// Generate a CMOV with a floating comparison and integer move.
+func cmovfloatint2(x, y float64) float64 {
+       yfr, yexp := 4.0, 5
+
+       r := x
+       for r >= y {
+               rfr, rexp := frexp(r)
+               if rfr < yfr {
+                       rexp = rexp - 1
+               }
+               // amd64:"CMOVQHI"
+               // arm64:"CSEL\tGT"
+               r = r - ldexp(y, (rexp-yexp))
+       }
+       return r
+}
+
+func cmovloaded(x [4]int, y int) int {
+       if x[2] != 0 {
+               y = x[2]
+       } else {
+               y = y >> 2
+       }
+       // amd64:"CMOVQNE"
+       // arm64:"CSEL\tNE"
+       return y
+}
+
+func cmovuintptr2(x, y uintptr) uintptr {
+       a := x * 2
+       if a == 0 {
+               a = 256
+       }
+       // amd64:"CMOVQEQ"
+       // arm64:"CSEL\tEQ"
+       return a
+}
+
+// Floating point CMOVs are not supported by amd64/arm64
+func cmovfloatmove(x, y int) float64 {
+       a := 1.0
+       if x <= y {
+               a = 2.0
+       }
+       // amd64:-"CMOV"
+       // arm64:-"CSEL"
+       return a
+}
+
+// On amd64, the following patterns trigger comparison inversion.
+// Test that we correctly invert the CMOV condition
+var gsink int64
+var gusink uint64
+
+func cmovinvert1(x, y int64) int64 {
+       if x < gsink {
+               y = -y
+       }
+       // amd64:"CMOVQGT"
+       return y
+}
+func cmovinvert2(x, y int64) int64 {
+       if x <= gsink {
+               y = -y
+       }
+       // amd64:"CMOVQGE"
+       return y
+}
+func cmovinvert3(x, y int64) int64 {
+       if x == gsink {
+               y = -y
+       }
+       // amd64:"CMOVQEQ"
+       return y
+}
+func cmovinvert4(x, y int64) int64 {
+       if x != gsink {
+               y = -y
+       }
+       // amd64:"CMOVQNE"
+       return y
+}
+func cmovinvert5(x, y uint64) uint64 {
+       if x > gusink {
+               y = -y
+       }
+       // amd64:"CMOVQCS"
+       return y
+}
+func cmovinvert6(x, y uint64) uint64 {
+       if x >= gusink {
+               y = -y
+       }
+       // amd64:"CMOVQLS"
+       return y
+}