]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: implement CMOV on amd64
authorGiovanni Bajo <rasky@develer.com>
Mon, 5 Mar 2018 19:59:40 +0000 (20:59 +0100)
committerGiovanni Bajo <rasky@develer.com>
Thu, 15 Mar 2018 16:41:59 +0000 (16:41 +0000)
This builds upon the branchelim pass, activating it for amd64 and
lowering CondSelect. Special care is made to FPU instructions for
NaN handling.

Benchmark results on Xeon E5630 (Westmere EP):

name                      old time/op    new time/op    delta
BinaryTree17-16              4.99s ± 9%     4.66s ± 2%     ~     (p=0.095 n=5+5)
Fannkuch11-16                4.93s ± 3%     5.04s ± 2%     ~     (p=0.548 n=5+5)
FmtFprintfEmpty-16          58.8ns ± 7%    61.4ns ±14%     ~     (p=0.579 n=5+5)
FmtFprintfString-16          114ns ± 2%     114ns ± 4%     ~     (p=0.603 n=5+5)
FmtFprintfInt-16             181ns ± 4%     125ns ± 3%  -30.90%  (p=0.008 n=5+5)
FmtFprintfIntInt-16          263ns ± 2%     217ns ± 2%  -17.34%  (p=0.008 n=5+5)
FmtFprintfPrefixedInt-16     230ns ± 1%     212ns ± 1%   -7.99%  (p=0.008 n=5+5)
FmtFprintfFloat-16           411ns ± 3%     344ns ± 5%  -16.43%  (p=0.008 n=5+5)
FmtManyArgs-16               828ns ± 4%     790ns ± 2%   -4.59%  (p=0.032 n=5+5)
GobDecode-16                10.9ms ± 4%    10.8ms ± 5%     ~     (p=0.548 n=5+5)
GobEncode-16                9.52ms ± 5%    9.46ms ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                      334ms ± 2%     337ms ± 2%     ~     (p=0.548 n=5+5)
Gunzip-16                   64.4ms ± 1%    65.0ms ± 1%   +1.00%  (p=0.008 n=5+5)
HTTPClientServer-16          156µs ± 3%     155µs ± 3%     ~     (p=0.690 n=5+5)
JSONEncode-16               21.0ms ± 1%    21.8ms ± 0%   +3.76%  (p=0.016 n=5+4)
JSONDecode-16               95.1ms ± 0%    95.7ms ± 1%     ~     (p=0.151 n=5+5)
Mandelbrot200-16            6.38ms ± 1%    6.42ms ± 1%     ~     (p=0.095 n=5+5)
GoParse-16                  5.47ms ± 2%    5.36ms ± 1%   -1.95%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16       111ns ± 1%     111ns ± 1%     ~     (p=0.635 n=5+4)
RegexpMatchEasy0_1K-16       408ns ± 1%     411ns ± 2%     ~     (p=0.087 n=5+5)
RegexpMatchEasy1_32-16       103ns ± 1%     104ns ± 1%     ~     (p=0.484 n=5+5)
RegexpMatchEasy1_1K-16       659ns ± 2%     652ns ± 1%     ~     (p=0.571 n=5+5)
RegexpMatchMedium_32-16      176ns ± 2%     174ns ± 1%     ~     (p=0.476 n=5+5)
RegexpMatchMedium_1K-16     58.6µs ± 4%    57.7µs ± 4%     ~     (p=0.548 n=5+5)
RegexpMatchHard_32-16       3.07µs ± 3%    3.04µs ± 4%     ~     (p=0.421 n=5+5)
RegexpMatchHard_1K-16       89.2µs ± 1%    87.9µs ± 2%   -1.52%  (p=0.032 n=5+5)
Revcomp-16                   575ms ± 0%     587ms ± 2%   +2.12%  (p=0.032 n=4+5)
Template-16                  110ms ± 1%     107ms ± 3%   -3.00%  (p=0.032 n=5+5)
TimeParse-16                 463ns ± 0%     462ns ± 0%     ~     (p=0.810 n=5+4)
TimeFormat-16                538ns ± 0%     535ns ± 0%   -0.63%  (p=0.024 n=5+5)

name                      old speed      new speed      delta
GobDecode-16              70.7MB/s ± 4%  71.4MB/s ± 5%     ~     (p=0.452 n=5+5)
GobEncode-16              80.7MB/s ± 5%  81.2MB/s ± 2%     ~     (p=1.000 n=5+5)
Gzip-16                   58.2MB/s ± 2%  57.7MB/s ± 2%     ~     (p=0.452 n=5+5)
Gunzip-16                  302MB/s ± 1%   299MB/s ± 1%   -0.99%  (p=0.008 n=5+5)
JSONEncode-16             92.4MB/s ± 1%  89.1MB/s ± 0%   -3.63%  (p=0.016 n=5+4)
JSONDecode-16             20.4MB/s ± 0%  20.3MB/s ± 1%     ~     (p=0.135 n=5+5)
GoParse-16                10.6MB/s ± 2%  10.8MB/s ± 1%   +2.00%  (p=0.016 n=5+5)
RegexpMatchEasy0_32-16     286MB/s ± 1%   285MB/s ± 3%     ~     (p=1.000 n=5+5)
RegexpMatchEasy0_1K-16    2.51GB/s ± 1%  2.49GB/s ± 2%     ~     (p=0.095 n=5+5)
RegexpMatchEasy1_32-16     309MB/s ± 1%   307MB/s ± 1%     ~     (p=0.548 n=5+5)
RegexpMatchEasy1_1K-16    1.55GB/s ± 2%  1.57GB/s ± 1%     ~     (p=0.690 n=5+5)
RegexpMatchMedium_32-16   5.68MB/s ± 2%  5.73MB/s ± 1%     ~     (p=0.579 n=5+5)
RegexpMatchMedium_1K-16   17.5MB/s ± 4%  17.8MB/s ± 4%     ~     (p=0.500 n=5+5)
RegexpMatchHard_32-16     10.4MB/s ± 3%  10.5MB/s ± 4%     ~     (p=0.460 n=5+5)
RegexpMatchHard_1K-16     11.5MB/s ± 1%  11.7MB/s ± 2%   +1.57%  (p=0.032 n=5+5)
Revcomp-16                 442MB/s ± 0%   433MB/s ± 2%   -2.05%  (p=0.032 n=4+5)
Template-16               17.7MB/s ± 1%  18.2MB/s ± 3%   +3.12%  (p=0.032 n=5+5)

Change-Id: I6972e8f35f2b31f9a42ac473a6bf419a18022558
Reviewed-on: https://go-review.googlesource.com/100935
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/branchelim.go
src/cmd/compile/internal/ssa/branchelim_test.go
src/cmd/compile/internal/ssa/export_test.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/condmove.go [new file with mode: 0644]

index 43797e2ff3bee4432113aa9b0043d762a03c200d..6b8fe875a4211ffae600723282162f3c7211f345 100644 (file)
@@ -398,7 +398,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
 
-       case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
+       case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
+               ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
+               ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
+               ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
+               ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
+               ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
+               ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
+               ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
+               ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
+               ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
+               ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
+               ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
                r := v.Reg()
                if r != v.Args[0].Reg() {
                        v.Fatalf("input[0] and output not in same register %s", v.LongString())
@@ -409,6 +420,71 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
 
+       case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
+               r := v.Reg()
+               if r != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
+               // Flag condition: ^ZERO || PARITY
+               // Generate:
+               //   CMOV*NE  SRC,DST
+               //   CMOV*PS  SRC,DST
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[1].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = r
+               var q *obj.Prog
+               if v.Op == ssa.OpAMD64CMOVQNEF {
+                       q = s.Prog(x86.ACMOVQPS)
+               } else if v.Op == ssa.OpAMD64CMOVLNEF {
+                       q = s.Prog(x86.ACMOVLPS)
+               } else {
+                       q = s.Prog(x86.ACMOVWPS)
+               }
+               q.From.Type = obj.TYPE_REG
+               q.From.Reg = v.Args[1].Reg()
+               q.To.Type = obj.TYPE_REG
+               q.To.Reg = r
+
+       case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
+               r := v.Reg()
+               if r != v.Args[0].Reg() {
+                       v.Fatalf("input[0] and output not in same register %s", v.LongString())
+               }
+
+               // Flag condition: ZERO && !PARITY
+               // Generate:
+               //   MOV      SRC,AX
+               //   CMOV*NE  DST,AX
+               //   CMOV*PC  AX,DST
+               //
+               // TODO(rasky): we could generate:
+               //   CMOV*NE  DST,SRC
+               //   CMOV*PC  SRC,DST
+               // But this requires a way for regalloc to know that SRC might be
+               // clobbered by this instruction.
+               if v.Args[1].Reg() != x86.REG_AX {
+                       opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
+               }
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = x86.REG_AX
+               var q *obj.Prog
+               if v.Op == ssa.OpAMD64CMOVQEQF {
+                       q = s.Prog(x86.ACMOVQPC)
+               } else if v.Op == ssa.OpAMD64CMOVLEQF {
+                       q = s.Prog(x86.ACMOVLPC)
+               } else {
+                       q = s.Prog(x86.ACMOVWPC)
+               }
+               q.From.Type = obj.TYPE_REG
+               q.From.Reg = x86.REG_AX
+               q.To.Type = obj.TYPE_REG
+               q.To.Reg = r
+
        case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
                r := v.Reg()
                p := s.Prog(v.Op.Asm())
index 54508985b369452155391e2684f4d9316f9dd4e6..75a6b8238c9444dd6333c7dbf5d5d379e8faff9e 100644 (file)
@@ -19,7 +19,10 @@ package ssa
 // rewrite Phis in the postdominator as CondSelects.
 func branchelim(f *Func) {
        // FIXME: add support for lowering CondSelects on more architectures
-       if f.Config.arch != "arm64" {
+       switch f.Config.arch {
+       case "arm64", "amd64":
+               // implemented
+       default:
                return
        }
 
@@ -32,10 +35,22 @@ func branchelim(f *Func) {
        }
 }
 
-func canCondSelect(v *Value) bool {
+func canCondSelect(v *Value, arch string) bool {
        // For now, stick to simple scalars that fit in registers
-       sz := v.Type.Size()
-       return sz <= v.Block.Func.Config.RegSize && (v.Type.IsInteger() || v.Type.IsPtrShaped())
+       switch {
+       case v.Type.Size() > v.Block.Func.Config.RegSize:
+               return false
+       case v.Type.IsPtrShaped():
+               return true
+       case v.Type.IsInteger():
+               if arch == "amd64" && v.Type.Size() < 2 {
+                       // amd64 doesn't support CMOV with byte registers
+                       return false
+               }
+               return true
+       default:
+               return false
+       }
 }
 
 func elimIf(f *Func, dom *Block) bool {
@@ -68,7 +83,7 @@ func elimIf(f *Func, dom *Block) bool {
        for _, v := range post.Values {
                if v.Op == OpPhi {
                        hasphis = true
-                       if !canCondSelect(v) {
+                       if !canCondSelect(v, f.Config.arch) {
                                return false
                        }
                }
@@ -169,7 +184,7 @@ func elimIfElse(f *Func, b *Block) bool {
        for _, v := range post.Values {
                if v.Op == OpPhi {
                        hasphis = true
-                       if !canCondSelect(v) {
+                       if !canCondSelect(v, f.Config.arch) {
                                return false
                        }
                }
index 979ba1d961270e9665bd996ae638bf00bb150435..30bb133f8ed7a5c27d1c1460048cd0361e6fe0a0 100644 (file)
@@ -11,128 +11,162 @@ import (
 
 // Test that a trivial 'if' is eliminated
 func TestBranchElimIf(t *testing.T) {
-       c := testConfig(t)
-       c.config.arch = "arm64" // FIXME
-       boolType := types.New(types.TBOOL)
-       intType := types.New(types.TINT32)
-       fun := c.Fun("entry",
-               Bloc("entry",
-                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
-                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
-                       Valu("const1", OpConst32, intType, 1, nil),
-                       Valu("const2", OpConst32, intType, 2, nil),
-                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
-                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
-                       If("cond", "b2", "b3")),
-               Bloc("b2",
-                       Goto("b3")),
-               Bloc("b3",
-                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
-                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
-                       Exit("retstore")))
+       var testData = []struct {
+               arch    string
+               intType string
+               ok      bool
+       }{
+               {"arm64", "int32", true},
+               {"amd64", "int32", true},
+               {"amd64", "int8", false},
+       }
 
-       CheckFunc(fun.f)
-       branchelim(fun.f)
-       CheckFunc(fun.f)
-       Deadcode(fun.f)
-       CheckFunc(fun.f)
+       for _, data := range testData {
+               t.Run(data.arch+"/"+data.intType, func(t *testing.T) {
+                       c := testConfigArch(t, data.arch)
+                       boolType := c.config.Types.Bool
+                       var intType *types.Type
+                       switch data.intType {
+                       case "int32":
+                               intType = c.config.Types.Int32
+                       case "int8":
+                               intType = c.config.Types.Int8
+                       default:
+                               t.Fatal("invalid integer type:", data.intType)
+                       }
+                       fun := c.Fun("entry",
+                               Bloc("entry",
+                                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
+                                       Valu("const1", OpConst32, intType, 1, nil),
+                                       Valu("const2", OpConst32, intType, 2, nil),
+                                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
+                                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
+                                       If("cond", "b2", "b3")),
+                               Bloc("b2",
+                                       Goto("b3")),
+                               Bloc("b3",
+                                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
+                                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
+                                       Exit("retstore")))
 
-       if len(fun.f.Blocks) != 1 {
-               t.Errorf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
-       }
-       if fun.values["phi"].Op != OpCondSelect {
-               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
-       }
-       if fun.values["phi"].Args[2] != fun.values["cond"] {
-               t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
-       }
-       if fun.blocks["entry"].Kind != BlockExit {
-               t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                       CheckFunc(fun.f)
+                       branchelim(fun.f)
+                       CheckFunc(fun.f)
+                       Deadcode(fun.f)
+                       CheckFunc(fun.f)
+
+                       if data.ok {
+
+                               if len(fun.f.Blocks) != 1 {
+                                       t.Fatalf("expected 1 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
+                               }
+                               if fun.values["phi"].Op != OpCondSelect {
+                                       t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                               }
+                               if fun.values["phi"].Args[2] != fun.values["cond"] {
+                                       t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
+                               }
+                               if fun.blocks["entry"].Kind != BlockExit {
+                                       t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                               }
+                       } else {
+                               if len(fun.f.Blocks) != 3 {
+                                       t.Fatalf("expected 3 block after branchelim and deadcode; found %d", len(fun.f.Blocks))
+                               }
+                       }
+               })
        }
 }
 
 // Test that a trivial if/else is eliminated
 func TestBranchElimIfElse(t *testing.T) {
-       c := testConfig(t)
-       c.config.arch = "arm64" // FIXME
-       boolType := types.New(types.TBOOL)
-       intType := types.New(types.TINT32)
-       fun := c.Fun("entry",
-               Bloc("entry",
-                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
-                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
-                       Valu("const1", OpConst32, intType, 1, nil),
-                       Valu("const2", OpConst32, intType, 2, nil),
-                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
-                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
-                       If("cond", "b2", "b3")),
-               Bloc("b2",
-                       Goto("b4")),
-               Bloc("b3",
-                       Goto("b4")),
-               Bloc("b4",
-                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
-                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
-                       Exit("retstore")))
+       for _, arch := range []string{"arm64", "amd64"} {
+               t.Run(arch, func(t *testing.T) {
+                       c := testConfigArch(t, arch)
+                       boolType := c.config.Types.Bool
+                       intType := c.config.Types.Int32
+                       fun := c.Fun("entry",
+                               Bloc("entry",
+                                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
+                                       Valu("const1", OpConst32, intType, 1, nil),
+                                       Valu("const2", OpConst32, intType, 2, nil),
+                                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
+                                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
+                                       If("cond", "b2", "b3")),
+                               Bloc("b2",
+                                       Goto("b4")),
+                               Bloc("b3",
+                                       Goto("b4")),
+                               Bloc("b4",
+                                       Valu("phi", OpPhi, intType, 0, nil, "const1", "const2"),
+                                       Valu("retstore", OpStore, types.TypeMem, 0, nil, "phi", "sb", "start"),
+                                       Exit("retstore")))
 
-       CheckFunc(fun.f)
-       branchelim(fun.f)
-       CheckFunc(fun.f)
-       Deadcode(fun.f)
-       CheckFunc(fun.f)
+                       CheckFunc(fun.f)
+                       branchelim(fun.f)
+                       CheckFunc(fun.f)
+                       Deadcode(fun.f)
+                       CheckFunc(fun.f)
 
-       if len(fun.f.Blocks) != 1 {
-               t.Errorf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
-       }
-       if fun.values["phi"].Op != OpCondSelect {
-               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
-       }
-       if fun.values["phi"].Args[2] != fun.values["cond"] {
-               t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
-       }
-       if fun.blocks["entry"].Kind != BlockExit {
-               t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                       if len(fun.f.Blocks) != 1 {
+                               t.Fatalf("expected 1 block after branchelim; found %d", len(fun.f.Blocks))
+                       }
+                       if fun.values["phi"].Op != OpCondSelect {
+                               t.Fatalf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                       }
+                       if fun.values["phi"].Args[2] != fun.values["cond"] {
+                               t.Errorf("expected CondSelect condition to be %s; found %s", fun.values["cond"], fun.values["phi"].Args[2])
+                       }
+                       if fun.blocks["entry"].Kind != BlockExit {
+                               t.Errorf("expected entry to be BlockExit; found kind %s", fun.blocks["entry"].Kind.String())
+                       }
+               })
        }
 }
 
 // Test that an if/else CFG that loops back
 // into itself does *not* get eliminated.
 func TestNoBranchElimLoop(t *testing.T) {
-       c := testConfig(t)
-       c.config.arch = "arm64" // FIXME
-       boolType := types.New(types.TBOOL)
-       intType := types.New(types.TINT32)
+       for _, arch := range []string{"arm64", "amd64"} {
+               t.Run(arch, func(t *testing.T) {
+                       c := testConfigArch(t, arch)
+                       boolType := c.config.Types.Bool
+                       intType := c.config.Types.Int32
 
-       // The control flow here is totally bogus,
-       // but a dead cycle seems like the only plausible
-       // way to arrive at a diamond CFG that is also a loop.
-       fun := c.Fun("entry",
-               Bloc("entry",
-                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
-                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
-                       Valu("const2", OpConst32, intType, 2, nil),
-                       Valu("const3", OpConst32, intType, 3, nil),
-                       Goto("b5")),
-               Bloc("b2",
-                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
-                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
-                       Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
-                       If("cond", "b3", "b4")),
-               Bloc("b3",
-                       Goto("b2")),
-               Bloc("b4",
-                       Goto("b2")),
-               Bloc("b5",
-                       Exit("start")))
+                       // The control flow here is totally bogus,
+                       // but a dead cycle seems like the only plausible
+                       // way to arrive at a diamond CFG that is also a loop.
+                       fun := c.Fun("entry",
+                               Bloc("entry",
+                                       Valu("start", OpInitMem, types.TypeMem, 0, nil),
+                                       Valu("sb", OpSB, types.TypeInvalid, 0, nil),
+                                       Valu("const2", OpConst32, intType, 2, nil),
+                                       Valu("const3", OpConst32, intType, 3, nil),
+                                       Goto("b5")),
+                               Bloc("b2",
+                                       Valu("addr", OpAddr, boolType.PtrTo(), 0, nil, "sb"),
+                                       Valu("cond", OpLoad, boolType, 0, nil, "addr", "start"),
+                                       Valu("phi", OpPhi, intType, 0, nil, "const2", "const3"),
+                                       If("cond", "b3", "b4")),
+                               Bloc("b3",
+                                       Goto("b2")),
+                               Bloc("b4",
+                                       Goto("b2")),
+                               Bloc("b5",
+                                       Exit("start")))
 
-       CheckFunc(fun.f)
-       branchelim(fun.f)
-       CheckFunc(fun.f)
+                       CheckFunc(fun.f)
+                       branchelim(fun.f)
+                       CheckFunc(fun.f)
 
-       if len(fun.f.Blocks) != 5 {
-               t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
-       }
-       if fun.values["phi"].Op != OpPhi {
-               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                       if len(fun.f.Blocks) != 5 {
+                               t.Errorf("expected 5 block after branchelim; found %d", len(fun.f.Blocks))
+                       }
+                       if fun.values["phi"].Op != OpPhi {
+                               t.Errorf("expected phi op to be CondSelect; found op %s", fun.values["phi"].Op)
+                       }
+               })
        }
 }
index 1fe0bbe6ae3976f313b9f611bb6d1defac70c3e8..8d3bd74fa515d858fd0d9540fb8123d0f852add5 100644 (file)
@@ -7,6 +7,7 @@ package ssa
 import (
        "cmd/compile/internal/types"
        "cmd/internal/obj"
+       "cmd/internal/obj/arm64"
        "cmd/internal/obj/s390x"
        "cmd/internal/obj/x86"
        "cmd/internal/src"
@@ -22,6 +23,7 @@ var Copyelim = copyelim
 var testCtxts = map[string]*obj.Link{
        "amd64": obj.Linknew(&x86.Linkamd64),
        "s390x": obj.Linknew(&s390x.Links390x),
+       "arm64": obj.Linknew(&arm64.Linkarm64),
 }
 
 func testConfig(tb testing.TB) *Conf      { return testConfigArch(tb, "amd64") }
index 6b750343725ed8d07c05f4e3a6157393ac20162a..9a6efb5a5d167e543d36a2911cd434ff0071f83f 100644 (file)
 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
 
+// Lowering conditional moves
+// If the condition is a SETxx, we can just run a CMOV from the comparison that was
+// setting the flags.
+// Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL
+(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t))
+    -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
+(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t)
+    -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
+(CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t)
+    -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond)
+
+// If the condition does not set the flags, we need to generate a comparison.
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1
+    -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2
+    -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4
+    -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
+
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
+    -> (CMOVQNE y x (CMPQconst [0] check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
+    -> (CMOVLNE y x (CMPQconst [0] check))
+(CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
+    -> (CMOVWNE y x (CMPQconst [0] check))
+
+// Absorb InvertFlags
+(CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
+    -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+(CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
+    -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+(CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
+    -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+
+// Absorb constants generated during lower
+(CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x
+(CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y
+(CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x
+(CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y
+(CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x
+(CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y
+(CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x
+(CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y
+(CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x
+(CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y
+
 // Miscellaneous
 (Convert <t> x mem) && config.PtrSize == 8 -> (MOVQconvert <t> x mem)
 (Convert <t> x mem) && config.PtrSize == 4 -> (MOVLconvert <t> x mem)
 (CMPLconst x [0]) -> (TESTL x x)
 (CMPWconst x [0]) -> (TESTW x x)
 (CMPBconst x [0]) -> (TESTB x x)
+(TESTQconst [-1] x) -> (TESTQ x x)
+(TESTLconst [-1] x) -> (TESTL x x)
+(TESTWconst [-1] x) -> (TESTW x x)
+(TESTBconst [-1] x) -> (TESTB x x)
 
 // Combining byte loads into larger (unaligned) loads.
 // There are many ways these combinations could occur.  This is
index 65feb328e09b6b0ea85401e1c706e2336eb69fea..ecc9027e51b2a56d09a0a899c4fead4786bfd817 100644 (file)
@@ -132,6 +132,7 @@ func init() {
                gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
                gp21load  = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
                gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
+               gp21pax   = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
 
                gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
                gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
@@ -340,10 +341,57 @@ func init() {
                {name: "BSRQ", argLength: 1, reg: gp11flags, asm: "BSRQ", typ: "(UInt64,Flags)"}, // # of high-order zeroes in 64-bit arg
                {name: "BSRL", argLength: 1, reg: gp11flags, asm: "BSRL", typ: "(UInt32,Flags)"}, // # of high-order zeroes in 32-bit arg
 
-               // Note ASM for ops moves whole register
-               //
-               {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
-               {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true}, // if arg2 encodes "equal" return arg1 else arg0
+               // CMOV instructions: 64, 32 and 16-bit sizes.
+               // if arg2 encodes a true result, return arg1, else arg0
+               {name: "CMOVQEQ", argLength: 3, reg: gp21, asm: "CMOVQEQ", resultInArg0: true},
+               {name: "CMOVQNE", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
+               {name: "CMOVQLT", argLength: 3, reg: gp21, asm: "CMOVQLT", resultInArg0: true},
+               {name: "CMOVQGT", argLength: 3, reg: gp21, asm: "CMOVQGT", resultInArg0: true},
+               {name: "CMOVQLE", argLength: 3, reg: gp21, asm: "CMOVQLE", resultInArg0: true},
+               {name: "CMOVQGE", argLength: 3, reg: gp21, asm: "CMOVQGE", resultInArg0: true},
+               {name: "CMOVQLS", argLength: 3, reg: gp21, asm: "CMOVQLS", resultInArg0: true},
+               {name: "CMOVQHI", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
+               {name: "CMOVQCC", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
+               {name: "CMOVQCS", argLength: 3, reg: gp21, asm: "CMOVQCS", resultInArg0: true},
+
+               {name: "CMOVLEQ", argLength: 3, reg: gp21, asm: "CMOVLEQ", resultInArg0: true},
+               {name: "CMOVLNE", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
+               {name: "CMOVLLT", argLength: 3, reg: gp21, asm: "CMOVLLT", resultInArg0: true},
+               {name: "CMOVLGT", argLength: 3, reg: gp21, asm: "CMOVLGT", resultInArg0: true},
+               {name: "CMOVLLE", argLength: 3, reg: gp21, asm: "CMOVLLE", resultInArg0: true},
+               {name: "CMOVLGE", argLength: 3, reg: gp21, asm: "CMOVLGE", resultInArg0: true},
+               {name: "CMOVLLS", argLength: 3, reg: gp21, asm: "CMOVLLS", resultInArg0: true},
+               {name: "CMOVLHI", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
+               {name: "CMOVLCC", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
+               {name: "CMOVLCS", argLength: 3, reg: gp21, asm: "CMOVLCS", resultInArg0: true},
+
+               {name: "CMOVWEQ", argLength: 3, reg: gp21, asm: "CMOVWEQ", resultInArg0: true},
+               {name: "CMOVWNE", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
+               {name: "CMOVWLT", argLength: 3, reg: gp21, asm: "CMOVWLT", resultInArg0: true},
+               {name: "CMOVWGT", argLength: 3, reg: gp21, asm: "CMOVWGT", resultInArg0: true},
+               {name: "CMOVWLE", argLength: 3, reg: gp21, asm: "CMOVWLE", resultInArg0: true},
+               {name: "CMOVWGE", argLength: 3, reg: gp21, asm: "CMOVWGE", resultInArg0: true},
+               {name: "CMOVWLS", argLength: 3, reg: gp21, asm: "CMOVWLS", resultInArg0: true},
+               {name: "CMOVWHI", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
+               {name: "CMOVWCC", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
+               {name: "CMOVWCS", argLength: 3, reg: gp21, asm: "CMOVWCS", resultInArg0: true},
+
+               // CMOV with floating point instructions. We need separate pseudo-op to handle
+               // InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
+               // NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
+               // code generation in amd64/ssa.go.
+               {name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
+               {name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
+               {name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
+               {name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
+               {name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
+               {name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
+               {name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
+               {name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
+               {name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
+               {name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
+               {name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
+               {name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
 
                {name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
                {name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
@@ -578,7 +626,6 @@ func init() {
                {name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
                //arg0=ptr,arg1=mem, returns void.  Faults if ptr is nil.
                {name: "LoweredNilCheck", argLength: 2, reg: regInfo{inputs: []regMask{gpsp}}, clobberFlags: true, nilCheck: true, faultOnNilArg0: true},
-
                // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
                // It saves all GP registers if necessary, but may clobber others.
                {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), ax}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
index 9445692c2f59d4537829e0843d2a55714b4cd027..1ab0e45b0a7ab29d2a72c94d2d9d20b8d5c1f745 100644 (file)
@@ -560,7 +560,47 @@ const (
        OpAMD64BSRQ
        OpAMD64BSRL
        OpAMD64CMOVQEQ
+       OpAMD64CMOVQNE
+       OpAMD64CMOVQLT
+       OpAMD64CMOVQGT
+       OpAMD64CMOVQLE
+       OpAMD64CMOVQGE
+       OpAMD64CMOVQLS
+       OpAMD64CMOVQHI
+       OpAMD64CMOVQCC
+       OpAMD64CMOVQCS
        OpAMD64CMOVLEQ
+       OpAMD64CMOVLNE
+       OpAMD64CMOVLLT
+       OpAMD64CMOVLGT
+       OpAMD64CMOVLLE
+       OpAMD64CMOVLGE
+       OpAMD64CMOVLLS
+       OpAMD64CMOVLHI
+       OpAMD64CMOVLCC
+       OpAMD64CMOVLCS
+       OpAMD64CMOVWEQ
+       OpAMD64CMOVWNE
+       OpAMD64CMOVWLT
+       OpAMD64CMOVWGT
+       OpAMD64CMOVWLE
+       OpAMD64CMOVWGE
+       OpAMD64CMOVWLS
+       OpAMD64CMOVWHI
+       OpAMD64CMOVWCC
+       OpAMD64CMOVWCS
+       OpAMD64CMOVQEQF
+       OpAMD64CMOVQNEF
+       OpAMD64CMOVQGTF
+       OpAMD64CMOVQGEF
+       OpAMD64CMOVLEQF
+       OpAMD64CMOVLNEF
+       OpAMD64CMOVLGTF
+       OpAMD64CMOVLGEF
+       OpAMD64CMOVWEQF
+       OpAMD64CMOVWNEF
+       OpAMD64CMOVWGTF
+       OpAMD64CMOVWGEF
        OpAMD64BSWAPQ
        OpAMD64BSWAPL
        OpAMD64POPCNTQ
@@ -6808,6 +6848,141 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "CMOVQNE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQLT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQLT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQGT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQLE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQLE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQGE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQLS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQLS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQHI",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQCC",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQCS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:         "CMOVLEQ",
                argLen:       3,
@@ -6823,6 +6998,472 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "CMOVLNE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLLT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLLT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLGT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLLE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLLE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLGE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLLS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLLS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLHI",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLCC",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLCS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWEQ",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWEQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWNE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWLT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWLT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGT",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWGT,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWLE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWLE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGE",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWGE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWLS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWLS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWHI",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWCC",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWCS",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQEQF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       clobbers: 1, // AX
+                       outputs: []outputInfo{
+                               {0, 65518}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQNEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGTF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVQGEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVQCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLEQF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLNEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGTF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVLGEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVLCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWEQF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWNEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWNE,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGTF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWHI,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
+       {
+               name:         "CMOVWGEF",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.ACMOVWCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                               {1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:         "BSWAPQ",
                argLen:       1,
index 23339ed8aa4652c711410536524344637463723e..cf9bec4e7c842bfbf5d445315728488049b75c57 100644 (file)
@@ -53,8 +53,66 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ANDQmem_0(v)
        case OpAMD64BSFQ:
                return rewriteValueAMD64_OpAMD64BSFQ_0(v)
+       case OpAMD64CMOVLCC:
+               return rewriteValueAMD64_OpAMD64CMOVLCC_0(v)
+       case OpAMD64CMOVLCS:
+               return rewriteValueAMD64_OpAMD64CMOVLCS_0(v)
+       case OpAMD64CMOVLEQ:
+               return rewriteValueAMD64_OpAMD64CMOVLEQ_0(v)
+       case OpAMD64CMOVLGE:
+               return rewriteValueAMD64_OpAMD64CMOVLGE_0(v)
+       case OpAMD64CMOVLGT:
+               return rewriteValueAMD64_OpAMD64CMOVLGT_0(v)
+       case OpAMD64CMOVLHI:
+               return rewriteValueAMD64_OpAMD64CMOVLHI_0(v)
+       case OpAMD64CMOVLLE:
+               return rewriteValueAMD64_OpAMD64CMOVLLE_0(v)
+       case OpAMD64CMOVLLS:
+               return rewriteValueAMD64_OpAMD64CMOVLLS_0(v)
+       case OpAMD64CMOVLLT:
+               return rewriteValueAMD64_OpAMD64CMOVLLT_0(v)
+       case OpAMD64CMOVLNE:
+               return rewriteValueAMD64_OpAMD64CMOVLNE_0(v)
+       case OpAMD64CMOVQCC:
+               return rewriteValueAMD64_OpAMD64CMOVQCC_0(v)
+       case OpAMD64CMOVQCS:
+               return rewriteValueAMD64_OpAMD64CMOVQCS_0(v)
        case OpAMD64CMOVQEQ:
                return rewriteValueAMD64_OpAMD64CMOVQEQ_0(v)
+       case OpAMD64CMOVQGE:
+               return rewriteValueAMD64_OpAMD64CMOVQGE_0(v)
+       case OpAMD64CMOVQGT:
+               return rewriteValueAMD64_OpAMD64CMOVQGT_0(v)
+       case OpAMD64CMOVQHI:
+               return rewriteValueAMD64_OpAMD64CMOVQHI_0(v)
+       case OpAMD64CMOVQLE:
+               return rewriteValueAMD64_OpAMD64CMOVQLE_0(v)
+       case OpAMD64CMOVQLS:
+               return rewriteValueAMD64_OpAMD64CMOVQLS_0(v)
+       case OpAMD64CMOVQLT:
+               return rewriteValueAMD64_OpAMD64CMOVQLT_0(v)
+       case OpAMD64CMOVQNE:
+               return rewriteValueAMD64_OpAMD64CMOVQNE_0(v)
+       case OpAMD64CMOVWCC:
+               return rewriteValueAMD64_OpAMD64CMOVWCC_0(v)
+       case OpAMD64CMOVWCS:
+               return rewriteValueAMD64_OpAMD64CMOVWCS_0(v)
+       case OpAMD64CMOVWEQ:
+               return rewriteValueAMD64_OpAMD64CMOVWEQ_0(v)
+       case OpAMD64CMOVWGE:
+               return rewriteValueAMD64_OpAMD64CMOVWGE_0(v)
+       case OpAMD64CMOVWGT:
+               return rewriteValueAMD64_OpAMD64CMOVWGT_0(v)
+       case OpAMD64CMOVWHI:
+               return rewriteValueAMD64_OpAMD64CMOVWHI_0(v)
+       case OpAMD64CMOVWLE:
+               return rewriteValueAMD64_OpAMD64CMOVWLE_0(v)
+       case OpAMD64CMOVWLS:
+               return rewriteValueAMD64_OpAMD64CMOVWLS_0(v)
+       case OpAMD64CMOVWLT:
+               return rewriteValueAMD64_OpAMD64CMOVWLT_0(v)
+       case OpAMD64CMOVWNE:
+               return rewriteValueAMD64_OpAMD64CMOVWNE_0(v)
        case OpAMD64CMPB:
                return rewriteValueAMD64_OpAMD64CMPB_0(v)
        case OpAMD64CMPBconst:
@@ -389,12 +447,20 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64SUBSSmem_0(v)
        case OpAMD64TESTB:
                return rewriteValueAMD64_OpAMD64TESTB_0(v)
+       case OpAMD64TESTBconst:
+               return rewriteValueAMD64_OpAMD64TESTBconst_0(v)
        case OpAMD64TESTL:
                return rewriteValueAMD64_OpAMD64TESTL_0(v)
+       case OpAMD64TESTLconst:
+               return rewriteValueAMD64_OpAMD64TESTLconst_0(v)
        case OpAMD64TESTQ:
                return rewriteValueAMD64_OpAMD64TESTQ_0(v)
+       case OpAMD64TESTQconst:
+               return rewriteValueAMD64_OpAMD64TESTQconst_0(v)
        case OpAMD64TESTW:
                return rewriteValueAMD64_OpAMD64TESTW_0(v)
+       case OpAMD64TESTWconst:
+               return rewriteValueAMD64_OpAMD64TESTWconst_0(v)
        case OpAMD64XADDLlock:
                return rewriteValueAMD64_OpAMD64XADDLlock_0(v)
        case OpAMD64XADDQlock:
@@ -491,6 +557,8 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpCom64_0(v)
        case OpCom8:
                return rewriteValueAMD64_OpCom8_0(v)
+       case OpCondSelect:
+               return rewriteValueAMD64_OpCondSelect_0(v) || rewriteValueAMD64_OpCondSelect_10(v) || rewriteValueAMD64_OpCondSelect_20(v) || rewriteValueAMD64_OpCondSelect_30(v) || rewriteValueAMD64_OpCondSelect_40(v)
        case OpConst16:
                return rewriteValueAMD64_OpConst16_0(v)
        case OpConst32:
@@ -3266,27 +3334,33 @@ func rewriteValueAMD64_OpAMD64BSFQ_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
-       // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
-       // cond: c != 0
-       // result: x
+func rewriteValueAMD64_OpAMD64CMOVLCC_0(v *Value) bool {
+       // match: (CMOVLCC x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLLS x y cond)
        for {
                _ = v.Args[2]
                x := v.Args[0]
+               y := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpSelect1 {
-                       break
-               }
-               v_2_0 := v_2.Args[0]
-               if v_2_0.Op != OpAMD64BSFQ {
-                       break
-               }
-               v_2_0_0 := v_2_0.Args[0]
-               if v_2_0_0.Op != OpAMD64ORQconst {
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_2_0_0.AuxInt
-               if !(c != 0) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLLS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVLCC _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpCopy)
@@ -3294,5042 +3368,6023 @@ func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPB x (MOVLconst [c]))
+       // match: (CMOVLCC _ x (FlagGT_UGT))
        // cond:
-       // result: (CMPBconst x [int64(int8(c))])
+       // result: x
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64CMPBconst)
-               v.AuxInt = int64(int8(c))
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPB (MOVLconst [c]) x)
+       // match: (CMOVLCC y _ (FlagGT_ULT))
        // cond:
-       // result: (InvertFlags (CMPBconst x [int64(int8(c))]))
+       // result: y
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v0.AuxInt = int64(int8(c))
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPBmem {sym} [off] ptr x mem)
+       // match: (CMOVLCC y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVLCC _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64CMPBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPBmem {sym} [off] ptr x mem))
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLCS_0(v *Value) bool {
+       // match: (CMOVLCS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLHI x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLHI)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPBconst_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)==int8(y)
-       // result: (FlagEQ)
+       // match: (CMOVLCS y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) == int8(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)<int8(y) && uint8(x)<uint8(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVLCS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int8(x) < int8(y) && uint8(x) < uint8(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)<int8(y) && uint8(x)>uint8(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVLCS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               x := v_0.AuxInt
-               if !(int8(x) < int8(y) && uint8(x) > uint8(y)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVLCS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)>int8(y) && uint8(x)<uint8(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVLCS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               x := v_0.AuxInt
-               if !(int8(x) > int8(y) && uint8(x) < uint8(y)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLEQ_0(v *Value) bool {
+       // match: (CMOVLEQ x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLEQ x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLEQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (MOVLconst [x]) [y])
-       // cond: int8(x)>int8(y) && uint8(x)>uint8(y)
-       // result: (FlagGT_UGT)
+       // match: (CMOVLEQ _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               x := v_0.AuxInt
-               if !(int8(x) > int8(y) && uint8(x) > uint8(y)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVLEQ y _ (FlagGT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int8(m) && int8(m) < int8(n)
-       // result: (FlagLT_ULT)
+       // match: (CMOVLEQ y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               m := v_0.AuxInt
-               if !(0 <= int8(m) && int8(m) < int8(n)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVLEQ y _ (FlagLT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPBconst (ANDL x y) [0])
+       // match: (CMOVLEQ y _ (FlagLT_UGT))
        // cond:
-       // result: (TESTB x y)
+       // result: y
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLGE_0(v *Value) bool {
+       // match: (CMOVLGE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLLE x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTB)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLLE)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPBconst (ANDLconst [c] x) [0])
+       // match: (CMOVLGE _ x (FlagEQ))
        // cond:
-       // result: (TESTBconst [int64(int8(c))] x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = int64(int8(c))
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPBconst x [0])
+       // match: (CMOVLGE _ x (FlagGT_UGT))
        // cond:
-       // result: (TESTB x x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTB)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVLGE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPBmem_0(v *Value) bool {
-       // match: (CMPBmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int8(c)),off)
-       // result: (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
+       // match: (CMOVLGE y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(int64(int8(c)), off)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVLGE y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64CMPBconstmem)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPL x (MOVLconst [c]))
+func rewriteValueAMD64_OpAMD64CMOVLGT_0(v *Value) bool {
+       // match: (CMOVLGT x y (InvertFlags cond))
        // cond:
-       // result: (CMPLconst x [c])
+       // result: (CMOVLLT x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64CMPLconst)
-               v.AuxInt = c
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLLT)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPL (MOVLconst [c]) x)
+       // match: (CMOVLGT y _ (FlagEQ))
        // cond:
-       // result: (InvertFlags (CMPLconst x [c]))
+       // result: y
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPLmem {sym} [off] ptr x mem)
+       // match: (CMOVLGT _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVLGT _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPLmem {sym} [off] ptr x mem))
+       // match: (CMOVLGT y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVLGT y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)==int32(y)
-       // result: (FlagEQ)
+func rewriteValueAMD64_OpAMD64CMOVLHI_0(v *Value) bool {
+       // match: (CMOVLHI x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLCS x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               x := v_0.AuxInt
-               if !(int32(x) == int32(y)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLCS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVLHI y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVLHI _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVLHI y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVLHI y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (MOVLconst [x]) [y])
-       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
-       // result: (FlagGT_UGT)
+       // match: (CMOVLHI _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPLconst (SHRLconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)
-       // result: (FlagLT_ULT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLLE_0(v *Value) bool {
+       // match: (CMOVLLE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLGE x y cond)
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPLconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int32(m) && int32(m) < int32(n)
-       // result: (FlagLT_ULT)
+       // match: (CMOVLLE _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= int32(m) && int32(m) < int32(n)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPLconst (ANDL x y) [0])
+       // match: (CMOVLLE y _ (FlagGT_UGT))
        // cond:
-       // result: (TESTL x y)
+       // result: y
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTL)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (CMPLconst (ANDLconst [c] x) [0])
+       // match: (CMOVLLE y _ (FlagGT_ULT))
        // cond:
-       // result: (TESTLconst [c] x)
+       // result: y
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPLconst x [0])
+       // match: (CMOVLLE _ x (FlagLT_ULT))
        // cond:
-       // result: (TESTL x x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTL)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVLLE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
-       // match: (CMPLmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+func rewriteValueAMD64_OpAMD64CMOVLLS_0(v *Value) bool {
+       // match: (CMOVLLS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLCC x y cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64CMPLconstmem)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLCC)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (CMPQconst x [c])
+       // match: (CMOVLLS _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64CMPQconst)
-               v.AuxInt = c
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (InvertFlags (CMPQconst x [c]))
+       // match: (CMOVLLS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_0.AuxInt
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVLLS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(is32Bit(c)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPQmem {sym} [off] ptr x mem)
+       // match: (CMOVLLS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPQmem {sym} [off] ptr x mem))
+       // match: (CMOVLLS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPQconst_0(v *Value) bool {
-       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
+func rewriteValueAMD64_OpAMD64CMOVLLT_0(v *Value) bool {
+       // match: (CMOVLLT x y (InvertFlags cond))
        // cond:
-       // result: (FlagLT_ULT)
+       // result: (CMOVLGT x y cond)
        for {
-               if v.AuxInt != 32 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0.AuxInt != -16 {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 15 {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLGT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
+       // match: (CMOVLLT y _ (FlagEQ))
        // cond:
-       // result: (FlagLT_ULT)
+       // result: y
        for {
-               if v.AuxInt != 32 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0.AuxInt != -8 {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 7 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x==y
-       // result: (FlagEQ)
+       // match: (CMOVLLT y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x == y) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x<y && uint64(x)<uint64(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVLLT y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x < y && uint64(x) < uint64(y)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x<y && uint64(x)>uint64(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVLLT _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x < y && uint64(x) > uint64(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x>y && uint64(x)<uint64(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVLLT _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x > y && uint64(x) < uint64(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (MOVQconst [x]) [y])
-       // cond: x>y && uint64(x)>uint64(y)
-       // result: (FlagGT_UGT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVLNE_0(v *Value) bool {
+       // match: (CMOVLNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVLNE x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(x > y && uint64(x) > uint64(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVLNE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPQconst (MOVBQZX _) [c])
-       // cond: 0xFF < c
-       // result: (FlagLT_ULT)
+       // match: (CMOVLNE y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
-                       break
-               }
-               if !(0xFF < c) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (MOVWQZX _) [c])
-       // cond: 0xFFFF < c
-       // result: (FlagLT_ULT)
+       // match: (CMOVLNE _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQZX {
-                       break
-               }
-               if !(0xFFFF < c) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (MOVLQZX _) [c])
-       // cond: 0xFFFFFFFF < c
-       // result: (FlagLT_ULT)
+       // match: (CMOVLNE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLQZX {
-                       break
-               }
-               if !(0xFFFFFFFF < c) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPQconst (SHRQconst _ [c]) [n])
-       // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)
-       // result: (FlagLT_ULT)
+       // match: (CMOVLNE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (ANDQconst _ [m]) [n])
-       // cond: 0 <= m && m < n
-       // result: (FlagLT_ULT)
+       // match: (CMOVLNE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPQconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= m && m < n
-       // result: (FlagLT_ULT)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQCC_0(v *Value) bool {
+       // match: (CMOVQCC x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQLS x y cond)
        for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               m := v_0.AuxInt
-               if !(0 <= m && m < n) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQLS)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPQconst (ANDQ x y) [0])
+       // match: (CMOVQCC _ x (FlagEQ))
        // cond:
-       // result: (TESTQ x y)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQ {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTQ)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst (ANDQconst [c] x) [0])
+       // match: (CMOVQCC _ x (FlagGT_UGT))
        // cond:
-       // result: (TESTQconst [c] x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDQconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTQconst)
-               v.AuxInt = c
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPQconst x [0])
+       // match: (CMOVQCC y _ (FlagGT_ULT))
        // cond:
-       // result: (TESTQ x x)
+       // result: y
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTQ)
-               v.AddArg(x)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQCC y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
-       // match: (CMPQmem {sym} [off] ptr (MOVQconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQCC _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64CMPQconstmem)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPW x (MOVLconst [c]))
+func rewriteValueAMD64_OpAMD64CMOVQCS_0(v *Value) bool {
+       // match: (CMOVQCS x y (InvertFlags cond))
        // cond:
-       // result: (CMPWconst x [int64(int16(c))])
+       // result: (CMOVQHI x y cond)
        for {
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64CMPWconst)
-               v.AuxInt = int64(int16(c))
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQHI)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPW (MOVLconst [c]) x)
+       // match: (CMOVQCS y _ (FlagEQ))
        // cond:
-       // result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+       // result: y
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v0.AuxInt = int64(int16(c))
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (CMPWmem {sym} [off] ptr x mem)
+       // match: (CMOVQCS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQCS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
                x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64CMPWmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (InvertFlags (CMPWmem {sym} [off] ptr x mem))
+       // match: (CMOVQCS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVWload {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQCS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64InvertFlags)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWmem, types.TypeFlags)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(x)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)==int16(y)
-       // result: (FlagEQ)
+func rewriteValueAMD64_OpAMD64CMOVQEQ_0(v *Value) bool {
+       // match: (CMOVQEQ x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQEQ x y cond)
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int16(x) == int16(y)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64FlagEQ)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQEQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)<int16(y) && uint16(x)<uint16(y)
-       // result: (FlagLT_ULT)
+       // match: (CMOVQEQ _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               x := v_0.AuxInt
-               if !(int16(x) < int16(y) && uint16(x) < uint16(y)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)<int16(y) && uint16(x)>uint16(y)
-       // result: (FlagLT_UGT)
+       // match: (CMOVQEQ y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               x := v_0.AuxInt
-               if !(int16(x) < int16(y) && uint16(x) > uint16(y)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQEQ y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64FlagLT_UGT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)>int16(y) && uint16(x)<uint16(y)
-       // result: (FlagGT_ULT)
+       // match: (CMOVQEQ y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               x := v_0.AuxInt
-               if !(int16(x) > int16(y) && uint16(x) < uint16(y)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQEQ y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64FlagGT_ULT)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (CMPWconst (MOVLconst [x]) [y])
-       // cond: int16(x)>int16(y) && uint16(x)>uint16(y)
-       // result: (FlagGT_UGT)
+       // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _))))
+       // cond: c != 0
+       // result: x
        for {
-               y := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v.Args[2]
+               x := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpSelect1 {
                        break
                }
-               x := v_0.AuxInt
-               if !(int16(x) > int16(y) && uint16(x) > uint16(y)) {
+               v_2_0 := v_2.Args[0]
+               if v_2_0.Op != OpAMD64BSFQ {
                        break
                }
-               v.reset(OpAMD64FlagGT_UGT)
-               return true
-       }
-       // match: (CMPWconst (ANDLconst _ [m]) [n])
-       // cond: 0 <= int16(m) && int16(m) < int16(n)
-       // result: (FlagLT_ULT)
-       for {
-               n := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               v_2_0_0 := v_2_0.Args[0]
+               if v_2_0_0.Op != OpAMD64ORQconst {
                        break
                }
-               m := v_0.AuxInt
-               if !(0 <= int16(m) && int16(m) < int16(n)) {
+               c := v_2_0_0.AuxInt
+               if !(c != 0) {
                        break
                }
-               v.reset(OpAMD64FlagLT_ULT)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (CMPWconst (ANDL x y) [0])
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQGE_0(v *Value) bool {
+       // match: (CMOVQGE x y (InvertFlags cond))
        // cond:
-       // result: (TESTW x y)
+       // result: (CMOVQLE x y cond)
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64TESTW)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQLE)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (CMPWconst (ANDLconst [c] x) [0])
+       // match: (CMOVQGE _ x (FlagEQ))
        // cond:
-       // result: (TESTWconst [int64(int16(c))] x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64TESTWconst)
-               v.AuxInt = int64(int16(c))
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPWconst x [0])
+       // match: (CMOVQGE _ x (FlagGT_UGT))
        // cond:
-       // result: (TESTW x x)
+       // result: x
        for {
-               if v.AuxInt != 0 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64TESTW)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
-       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // match: (CMOVQGE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(c, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
-       // match: (CMPWmem {sym} [off] ptr (MOVLconst [c]) mem)
-       // cond: validValAndOff(int64(int16(c)),off)
-       // result: (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
+       // match: (CMOVQGE y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validValAndOff(int64(int16(c)), off)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQGE y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64CMPWconstmem)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v *Value) bool {
-       // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
-       // cond: is32Bit(off1+off2)
-       // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
+func rewriteValueAMD64_OpAMD64CMOVQGT_0(v *Value) bool {
+       // match: (CMOVQGT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQLT x y cond)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1 + off2)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQLT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
-       // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
-       // cond: is32Bit(off1+off2)
-       // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
+       // match: (CMOVQGT y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(off1 + off2)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
-       // match: (LEAL [c] {s} (ADDLconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (LEAL [c+d] {s} x)
+       // match: (CMOVQGT _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAL)
-               v.AuxInt = c + d
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
-       // match: (LEAQ [c] {s} (ADDQconst [d] x))
-       // cond: is32Bit(c+d)
-       // result: (LEAQ [c+d] {s} x)
+       // match: (CMOVQGT _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c + d)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = c + d
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (LEAQ [c] {s} (ADDQ x y))
-       // cond: x.Op != OpSB && y.Op != OpSB
-       // result: (LEAQ1 [c] {s} x y)
+       // match: (CMOVQGT y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(x.Op != OpSB && y.Op != OpSB) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
+       // match: (CMOVQGT y _ (FlagLT_UGT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQHI_0(v *Value) bool {
+       // match: (CMOVQHI x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQCS x y cond)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQCS)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQHI y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQHI _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQHI y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQHI y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVQHI _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ1_0(v *Value) bool {
-       // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ1 [c+d] {s} x y)
+func rewriteValueAMD64_OpAMD64CMOVQLE_0(v *Value) bool {
+       // match: (CMOVQLE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQGE x y cond)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
+               _ = v.Args[2]
+               x := v.Args[0]
                y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQGE)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ1 [c+d] {s} x y)
+       // match: (CMOVQLE _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               x := v_1.Args[0]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = c + d
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
+       // match: (CMOVQLE y _ (FlagGT_UGT))
        // cond:
-       // result: (LEAQ2 [c] {s} x y)
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
+       // match: (CMOVQLE y _ (FlagGT_ULT))
        // cond:
-       // result: (LEAQ2 [c] {s} x y)
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_0.AuxInt != 1 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
+       // match: (CMOVQLE _ x (FlagLT_ULT))
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
+       // match: (CMOVQLE _ x (FlagLT_UGT))
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_0.AuxInt != 2 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVQLS_0(v *Value) bool {
+       // match: (CMOVQLS x y (InvertFlags cond))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: (CMOVQCC x y cond)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 3 {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQCC)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
+       // match: (CMOVQLS _ x (FlagEQ))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               if v_0.AuxInt != 3 {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQLS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               y := v_0.Args[0]
-               x := v.Args[1]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQLS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQLS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQLS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                y := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               x := v_1.Args[0]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
-       // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ2 [c+d] {s} x y)
+func rewriteValueAMD64_OpAMD64CMOVQLT_0(v *Value) bool {
+       // match: (CMOVQLT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQGT x y cond)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
+               _ = v.Args[2]
+               x := v.Args[0]
                y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c + d
-               v.Aux = s
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQGT)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+2*d) && y.Op != OpSB
-       // result: (LEAQ2 [c+2*d] {s} x y)
+       // match: (CMOVQLT y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+2*d) && y.Op != OpSB) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = c + 2*d
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+       // match: (CMOVQLT y _ (FlagGT_UGT))
        // cond:
-       // result: (LEAQ4 [c] {s} x y)
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 1 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
+       // match: (CMOVQLT y _ (FlagGT_ULT))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQLT _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQLT _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
-       // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ4 [c+d] {s} x y)
+func rewriteValueAMD64_OpAMD64CMOVQNE_0(v *Value) bool {
+       // match: (CMOVQNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVQNE x y cond)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
+               _ = v.Args[2]
+               x := v.Args[0]
                y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c + d
-               v.Aux = s
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVQNE)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+4*d) && y.Op != OpSB
-       // result: (LEAQ4 [c+4*d] {s} x y)
+       // match: (CMOVQNE y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+4*d) && y.Op != OpSB) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = c + 4*d
-               v.Aux = s
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
+       // match: (CMOVQNE _ x (FlagGT_UGT))
        // cond:
-       // result: (LEAQ8 [c] {s} x y)
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               if v_1.AuxInt != 1 {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQNE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               y := v_1.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVQNE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVQNE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
-       // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
-       // cond: is32Bit(c+d) && x.Op != OpSB
-       // result: (LEAQ8 [c+d] {s} x y)
+func rewriteValueAMD64_OpAMD64CMOVWCC_0(v *Value) bool {
+       // match: (CMOVWCC x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWLS x y cond)
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
+               _ = v.Args[2]
+               x := v.Args[0]
                y := v.Args[1]
-               if !(is32Bit(c+d) && x.Op != OpSB) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c + d
-               v.Aux = s
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWLS)
                v.AddArg(x)
                v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
-       // cond: is32Bit(c+8*d) && y.Op != OpSB
-       // result: (LEAQ8 [c+8*d] {s} x y)
+       // match: (CMOVWCC _ x (FlagEQ))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               d := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(is32Bit(c+8*d) && y.Op != OpSB) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWCC _ x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = c + 8*d
-               v.Aux = s
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWCC y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
                v.AddArg(y)
                return true
        }
-       // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
-       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       // match: (CMOVWCC y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               x := v_0.Args[0]
-               y := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWCC _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBQSX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+func rewriteValueAMD64_OpAMD64CMOVWCS_0(v *Value) bool {
+       // match: (CMOVWCS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWHI x y cond)
        for {
+               _ = v.Args[2]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVBload {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWHI)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVWCS y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMOVWCS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMOVWCS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (CMOVWCS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBQSX (ANDLconst [c] x))
-       // cond: c & 0x80 == 0
-       // result: (ANDLconst [c & 0x7f] x)
+       // match: (CMOVWCS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x80 == 0) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWEQ_0(v *Value) bool {
+       // match: (CMOVWEQ x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWEQ x y cond)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7f
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWEQ)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (MOVBQSX (MOVBQSX x))
+       // match: (CMOVWEQ _ x (FlagEQ))
        // cond:
-       // result: (MOVBQSX x)
+       // result: x
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQSX {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBQSXload_0(v *Value) bool {
-       // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVBQSX x)
+       // match: (CMOVWEQ y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBstore {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (CMOVWEQ y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWEQ y _ (FlagLT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVBQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWEQ y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBQZX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+func rewriteValueAMD64_OpAMD64CMOVWGE_0(v *Value) bool {
+       // match: (CMOVWGE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWLE x y cond)
        for {
+               _ = v.Args[2]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVBload {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWLE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVWGE _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (CMOVWGE _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWGE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       // match: (CMOVWGE y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWGE y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWGT_0(v *Value) bool {
+       // match: (CMOVWGT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWLT x y cond)
        for {
+               _ = v.Args[2]
                x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWLT)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVWGT y _ (FlagEQ))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       // match: (CMOVWGT _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVBloadidx1 {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWGT _ x (FlagGT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBQZX (ANDLconst [c] x))
+       // match: (CMOVWGT y _ (FlagLT_ULT))
        // cond:
-       // result: (ANDLconst [c & 0xff] x)
+       // result: y
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0xff
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBQZX (MOVBQZX x))
+       // match: (CMOVWGT y _ (FlagLT_UGT))
        // cond:
-       // result: (MOVBQZX x)
+       // result: y
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBload_0(v *Value) bool {
-       // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVBQZX x)
+func rewriteValueAMD64_OpAMD64CMOVWHI_0(v *Value) bool {
+       // match: (CMOVWHI x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWCS x y cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBstore {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64MOVBQZX)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWCS)
                v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       // match: (CMOVWHI y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (CMOVWHI _ x (FlagGT_UGT))
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWHI y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (CMOVWHI y _ (FlagLT_ULT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWHI _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWLE_0(v *Value) bool {
+       // match: (CMOVWLE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWGE x y cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWGE)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVWLE _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (CMOVWLE y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWLE y _ (FlagGT_ULT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       // match: (CMOVWLE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWLE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVBload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v *Value) bool {
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+func rewriteValueAMD64_OpAMD64CMOVWLS_0(v *Value) bool {
+       // match: (CMOVWLS x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWCC x y cond)
        for {
-               c := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CMOVWLS _ x (FlagEQ))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (CMOVWLS y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               c := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               idx := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               d := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (CMOVWLS _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       // match: (CMOVWLS _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               c := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               d := v_0.AuxInt
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMOVWLS y _ (FlagLT_UGT))
+       // cond:
+       // result: y
+       for {
+               _ = v.Args[2]
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVBloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
-       // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem)
-       // cond: y.Uses == 1
-       // result: (SETLmem [off] {sym} ptr x mem)
+func rewriteValueAMD64_OpAMD64CMOVWLT_0(v *Value) bool {
+       // match: (CMOVWLT x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWGT x y cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               x := v.Args[0]
                y := v.Args[1]
-               if y.Op != OpAMD64SETL {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64SETLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWGT)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETLEmem [off] {sym} ptr x mem)
+       // match: (CMOVWLT y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETLE {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem)
-       // cond: y.Uses == 1
-       // result: (SETGmem [off] {sym} ptr x mem)
+       // match: (CMOVWLT y _ (FlagGT_UGT))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETG {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64SETGmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETGEmem [off] {sym} ptr x mem)
+       // match: (CMOVWLT y _ (FlagGT_ULT))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETGE {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWLT _ x (FlagLT_ULT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem)
-       // cond: y.Uses == 1
-       // result: (SETEQmem [off] {sym} ptr x mem)
+       // match: (CMOVWLT _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETEQ {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETNEmem [off] {sym} ptr x mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMOVWNE_0(v *Value) bool {
+       // match: (CMOVWNE x y (InvertFlags cond))
+       // cond:
+       // result: (CMOVWNE x y cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
+               x := v.Args[0]
                y := v.Args[1]
-               if y.Op != OpAMD64SETNE {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               cond := v_2.Args[0]
+               v.reset(OpAMD64CMOVWNE)
                v.AddArg(x)
-               v.AddArg(mem)
+               v.AddArg(y)
+               v.AddArg(cond)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem)
-       // cond: y.Uses == 1
-       // result: (SETBmem [off] {sym} ptr x mem)
+       // match: (CMOVWNE y _ (FlagEQ))
+       // cond:
+       // result: y
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETB {
+               y := v.Args[0]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagEQ {
                        break
                }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               v.reset(OpCopy)
+               v.Type = y.Type
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMOVWNE _ x (FlagGT_UGT))
+       // cond:
+       // result: x
+       for {
+               _ = v.Args[2]
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETBEmem [off] {sym} ptr x mem)
+       // match: (CMOVWNE _ x (FlagGT_ULT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETBE {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem)
-       // cond: y.Uses == 1
-       // result: (SETAmem [off] {sym} ptr x mem)
+       // match: (CMOVWNE _ x (FlagLT_ULT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETA {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64SETAmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem)
-       // cond: y.Uses == 1
-       // result: (SETAEmem [off] {sym} ptr x mem)
+       // match: (CMOVWNE _ x (FlagLT_UGT))
+       // cond:
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               y := v.Args[1]
-               if y.Op != OpAMD64SETAE {
-                       break
-               }
-               x := y.Args[0]
-               mem := v.Args[2]
-               if !(y.Uses == 1) {
+               x := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
+func rewriteValueAMD64_OpAMD64CMPB_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
+       // match: (CMPB x (MOVLconst [c]))
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (CMPBconst x [int64(int8(c))])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBQSX {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               c := v_1.AuxInt
+               v.reset(OpAMD64CMPBconst)
+               v.AuxInt = int64(int8(c))
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
+       // match: (CMPB (MOVLconst [c]) x)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr x mem)
+       // result: (InvertFlags (CMPBconst x [int64(int8(c))]))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVBQZX {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v0.AuxInt = int64(int8(c))
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPBmem {sym} [off] ptr x mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64CMPBmem)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+       // match: (CMPB x l:(MOVBload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPBmem {sym} [off] ptr x mem))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = makeValAndOff(int64(int8(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPBconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)==int8(y)
+       // result: (FlagEQ)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int8(x) == int8(y)) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)<int8(y) && uint8(x)<uint8(y)
+       // result: (FlagLT_ULT)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.AuxInt
+               if !(int8(x) < int8(y) && uint8(x) < uint8(y)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)<int8(y) && uint8(x)>uint8(y)
+       // result: (FlagLT_UGT)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               y := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               x := v_0.AuxInt
+               if !(int8(x) < int8(y) && uint8(x) > uint8(y)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)>int8(y) && uint8(x)<uint8(y)
+       // result: (FlagGT_ULT)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x0 := v.Args[2]
-               if x0.Op != OpAMD64MOVBstore {
-                       break
-               }
-               if x0.AuxInt != i-1 {
-                       break
-               }
-               if x0.Aux != s {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               x := v_0.AuxInt
+               if !(int8(x) > int8(y) && uint8(x) < uint8(y)) {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRWconst {
+               v.reset(OpAMD64FlagGT_ULT)
+               return true
+       }
+       // match: (CMPBconst (MOVLconst [x]) [y])
+       // cond: int8(x)>int8(y) && uint8(x)>uint8(y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x0_1.AuxInt != 8 {
+               x := v_0.AuxInt
+               if !(int8(x) > int8(y) && uint8(x) > uint8(y)) {
                        break
                }
-               if w != x0_1.Args[0] {
+               v.reset(OpAMD64FlagGT_UGT)
+               return true
+       }
+       // match: (CMPBconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int8(m) && int8(m) < int8(n)
+       // result: (FlagLT_ULT)
+       for {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               mem := x0.Args[2]
-               if !(x0.Uses == 1 && clobber(x0)) {
+               m := v_0.AuxInt
+               if !(0 <= int8(m) && int8(m) < int8(n)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
-               v0.AuxInt = 8
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
+       // match: (CMPBconst (ANDL x y) [0])
+       // cond:
+       // result: (TESTB x y)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x2 := v.Args[2]
-               if x2.Op != OpAMD64MOVBstore {
-                       break
-               }
-               if x2.AuxInt != i-1 {
+               if v.AuxInt != 0 {
                        break
                }
-               if x2.Aux != s {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTB)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMPBconst (ANDLconst [c] x) [0])
+       // cond:
+       // result: (TESTBconst [int64(int8(c))] x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if x2_1.AuxInt != 8 {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = int64(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPBconst x [0])
+       // cond:
+       // result: (TESTB x x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               if w != x2_1.Args[0] {
+               x := v.Args[0]
+               v.reset(OpAMD64TESTB)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               if x1.AuxInt != i-2 {
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPBmem_0(v *Value) bool {
+       // match: (CMPBmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int8(c)),off)
+       // result: (CMPBconstmem {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if x1.Aux != s {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int8(c)), off)) {
                        break
                }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
+               v.reset(OpAMD64CMPBconstmem)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPL x (MOVLconst [c]))
+       // cond:
+       // result: (CMPLconst x [c])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRLconst {
+               c := v_1.AuxInt
+               v.reset(OpAMD64CMPLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPL (MOVLconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPLconst x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x1_1.AuxInt != 16 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPLmem {sym} [off] ptr x mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               if w != x1_1.Args[0] {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64CMPLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (CMPL x l:(MOVLload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPLmem {sym} [off] ptr x mem))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               if x0.AuxInt != i-3 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               if x0.Aux != s {
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPLconst_0(v *Value) bool {
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)==int32(y)
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               x := v_0.AuxInt
+               if !(int32(x) == int32(y)) {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRLconst {
+               v.reset(OpAMD64FlagEQ)
+               return true
+       }
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)<uint32(y)
+       // result: (FlagLT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x0_1.AuxInt != 24 {
+               x := v_0.AuxInt
+               if !(int32(x) < int32(y) && uint32(x) < uint32(y)) {
                        break
                }
-               if w != x0_1.Args[0] {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)<int32(y) && uint32(x)>uint32(y)
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               mem := x0.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               x := v_0.AuxInt
+               if !(int32(x) < int32(y) && uint32(x) > uint32(y)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 3
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_UGT)
                return true
        }
-       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
-       // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)<uint32(y)
+       // result: (FlagGT_ULT)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               w := v.Args[1]
-               x6 := v.Args[2]
-               if x6.Op != OpAMD64MOVBstore {
-                       break
-               }
-               if x6.AuxInt != i-1 {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x6.Aux != s {
+               x := v_0.AuxInt
+               if !(int32(x) > int32(y) && uint32(x) < uint32(y)) {
                        break
                }
-               _ = x6.Args[2]
-               if p != x6.Args[0] {
+               v.reset(OpAMD64FlagGT_ULT)
+               return true
+       }
+       // match: (CMPLconst (MOVLconst [x]) [y])
+       // cond: int32(x)>int32(y) && uint32(x)>uint32(y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               x6_1 := x6.Args[1]
-               if x6_1.Op != OpAMD64SHRQconst {
+               x := v_0.AuxInt
+               if !(int32(x) > int32(y) && uint32(x) > uint32(y)) {
                        break
                }
-               if x6_1.AuxInt != 8 {
+               v.reset(OpAMD64FlagGT_UGT)
+               return true
+       }
+       // match: (CMPLconst (SHRLconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)
+       // result: (FlagLT_ULT)
+       for {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               if w != x6_1.Args[0] {
+               c := v_0.AuxInt
+               if !(0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n)) {
                        break
                }
-               x5 := x6.Args[2]
-               if x5.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPLconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int32(m) && int32(m) < int32(n)
+       // result: (FlagLT_ULT)
+       for {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if x5.AuxInt != i-2 {
+               m := v_0.AuxInt
+               if !(0 <= int32(m) && int32(m) < int32(n)) {
                        break
                }
-               if x5.Aux != s {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPLconst (ANDL x y) [0])
+       // cond:
+       // result: (TESTL x y)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               _ = x5.Args[2]
-               if p != x5.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               x5_1 := x5.Args[1]
-               if x5_1.Op != OpAMD64SHRQconst {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMPLconst (ANDLconst [c] x) [0])
+       // cond:
+       // result: (TESTLconst [c] x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               if x5_1.AuxInt != 16 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if w != x5_1.Args[0] {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPLconst x [0])
+       // cond:
+       // result: (TESTL x x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               x4 := x5.Args[2]
-               if x4.Op != OpAMD64MOVBstore {
+               x := v.Args[0]
+               v.reset(OpAMD64TESTL)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPLconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               if x4.AuxInt != i-3 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               if x4.Aux != s {
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPLmem_0(v *Value) bool {
+       // match: (CMPLmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (CMPLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = x4.Args[2]
-               if p != x4.Args[0] {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off)) {
                        break
                }
-               x4_1 := x4.Args[1]
-               if x4_1.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64CMPLconstmem)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (CMPQconst x [c])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if x4_1.AuxInt != 24 {
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
                        break
                }
-               if w != x4_1.Args[0] {
+               v.reset(OpAMD64CMPQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (InvertFlags (CMPQconst x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x3 := x4.Args[2]
-               if x3.Op != OpAMD64MOVBstore {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               if x3.AuxInt != i-4 {
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPQmem {sym} [off] ptr x mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               if x3.Aux != s {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               _ = x3.Args[2]
-               if p != x3.Args[0] {
+               v.reset(OpAMD64CMPQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPQmem {sym} [off] ptr x mem))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               x3_1 := x3.Args[1]
-               if x3_1.Op != OpAMD64SHRQconst {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               if x3_1.AuxInt != 32 {
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQconst_0(v *Value) bool {
+       // match: (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
+       for {
+               if v.AuxInt != 32 {
                        break
                }
-               if w != x3_1.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
                        break
                }
-               x2 := x3.Args[2]
-               if x2.Op != OpAMD64MOVBstore {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if x2.AuxInt != i-5 {
+               if v_0_0.AuxInt != -16 {
                        break
                }
-               if x2.Aux != s {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               _ = x2.Args[2]
-               if p != x2.Args[0] {
+               if v_0_0_0.AuxInt != 15 {
                        break
                }
-               x2_1 := x2.Args[1]
-               if x2_1.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32])
+       // cond:
+       // result: (FlagLT_ULT)
+       for {
+               if v.AuxInt != 32 {
                        break
                }
-               if x2_1.AuxInt != 40 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64NEGQ {
                        break
                }
-               if w != x2_1.Args[0] {
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x1 := x2.Args[2]
-               if x1.Op != OpAMD64MOVBstore {
+               if v_0_0.AuxInt != -8 {
                        break
                }
-               if x1.AuxInt != i-6 {
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if x1.Aux != s {
+               if v_0_0_0.AuxInt != 7 {
                        break
                }
-               _ = x1.Args[2]
-               if p != x1.Args[0] {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x==y
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               x1_1 := x1.Args[1]
-               if x1_1.Op != OpAMD64SHRQconst {
+               x := v_0.AuxInt
+               if !(x == y) {
                        break
                }
-               if x1_1.AuxInt != 48 {
+               v.reset(OpAMD64FlagEQ)
+               return true
+       }
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x<y && uint64(x)<uint64(y)
+       // result: (FlagLT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if w != x1_1.Args[0] {
+               x := v_0.AuxInt
+               if !(x < y && uint64(x) < uint64(y)) {
                        break
                }
-               x0 := x1.Args[2]
-               if x0.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x<y && uint64(x)>uint64(y)
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if x0.AuxInt != i-7 {
+               x := v_0.AuxInt
+               if !(x < y && uint64(x) > uint64(y)) {
                        break
                }
-               if x0.Aux != s {
+               v.reset(OpAMD64FlagLT_UGT)
+               return true
+       }
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x>y && uint64(x)<uint64(y)
+       // result: (FlagGT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               _ = x0.Args[2]
-               if p != x0.Args[0] {
+               x := v_0.AuxInt
+               if !(x > y && uint64(x) < uint64(y)) {
                        break
                }
-               x0_1 := x0.Args[1]
-               if x0_1.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64FlagGT_ULT)
+               return true
+       }
+       // match: (CMPQconst (MOVQconst [x]) [y])
+       // cond: x>y && uint64(x)>uint64(y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if x0_1.AuxInt != 56 {
+               x := v_0.AuxInt
+               if !(x > y && uint64(x) > uint64(y)) {
                        break
                }
-               if w != x0_1.Args[0] {
+               v.reset(OpAMD64FlagGT_UGT)
+               return true
+       }
+       // match: (CMPQconst (MOVBQZX _) [c])
+       // cond: 0xFF < c
+       // result: (FlagLT_ULT)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               mem := x0.Args[2]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+               if !(0xFF < c) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i - 7
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (CMPQconst (MOVWQZX _) [c])
+       // cond: 0xFFFF < c
+       // result: (FlagLT_ULT)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRWconst {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               if v_1.AuxInt != 8 {
+               if !(0xFFFF < c) {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (MOVLQZX _) [c])
+       // cond: 0xFFFFFFFF < c
+       // result: (FlagLT_ULT)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLQZX {
                        break
                }
-               if x.AuxInt != i-1 {
+               if !(0xFFFFFFFF < c) {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPQconst (SHRQconst _ [c]) [n])
+       // cond: 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)
+       // result: (FlagLT_ULT)
+       for {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               c := v_0.AuxInt
+               if !(0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n)) {
                        break
                }
-               if w != x.Args[1] {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (ANDQconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
+       for {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagLT_ULT)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       // match: (CMPQconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= m && m < n
+       // result: (FlagLT_ULT)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if v_1.AuxInt != 8 {
+               m := v_0.AuxInt
+               if !(0 <= m && m < n) {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPQconst (ANDQ x y) [0])
+       // cond:
+       // result: (TESTQ x y)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               if x.AuxInt != i-1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQ {
                        break
                }
-               if x.Aux != s {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMPQconst (ANDQconst [c] x) [0])
+       // cond:
+       // result: (TESTQconst [c] x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDQconst {
                        break
                }
-               if w != x.Args[1] {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPQconst x [0])
+       // cond:
+       // result: (TESTQ x x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v.Args[0]
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
-               v.AddArg(mem)
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
+                       break
+               }
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPQmem_0(v *Value) bool {
+       // match: (CMPQmem {sym} [off] ptr (MOVQconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (CMPQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_1.AuxInt != 8 {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off)) {
                        break
                }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstore {
+               v.reset(OpAMD64CMPQconstmem)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPW_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPW x (MOVLconst [c]))
+       // cond:
+       // result: (CMPWconst x [int64(int16(c))])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               c := v_1.AuxInt
+               v.reset(OpAMD64CMPWconst)
+               v.AuxInt = int64(int16(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPW (MOVLconst [c]) x)
+       // cond:
+       // result: (InvertFlags (CMPWconst x [int64(int16(c))]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x.Aux != s {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v0.AuxInt = int64(int16(c))
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (CMPWmem {sym} [off] ptr x mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               if w != x.Args[1] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
+               v.reset(OpAMD64CMPWmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
+       // match: (CMPW x l:(MOVWload {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (InvertFlags (CMPWmem {sym} [off] ptr x mem))
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstore {
-                       break
-               }
-               if x.AuxInt != i-1 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               if w0.AuxInt != j-8 {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpAMD64InvertFlags)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWmem, types.TypeFlags)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(x)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPWconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)==int16(y)
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v_0.AuxInt
+               if !(int16(x) == int16(y)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagEQ)
                return true
        }
-       // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstore [i-1] {s} p w0 mem)
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)<int16(y) && uint16(x)<uint16(y)
+       // result: (FlagLT_ULT)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstore {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if x.AuxInt != i-1 {
+               x := v_0.AuxInt
+               if !(int16(x) < int16(y) && uint16(x) < uint16(y)) {
                        break
                }
-               if x.Aux != s {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)<int16(y) && uint16(x)>uint16(y)
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               x := v_0.AuxInt
+               if !(int16(x) < int16(y) && uint16(x) > uint16(y)) {
                        break
                }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64FlagLT_UGT)
+               return true
+       }
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)>int16(y) && uint16(x)<uint16(y)
+       // result: (FlagGT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               if w0.AuxInt != j-8 {
+               x := v_0.AuxInt
+               if !(int16(x) > int16(y) && uint16(x) < uint16(y)) {
                        break
                }
-               if w != w0.Args[0] {
+               v.reset(OpAMD64FlagGT_ULT)
+               return true
+       }
+       // match: (CMPWconst (MOVLconst [x]) [y])
+       // cond: int16(x)>int16(y) && uint16(x)>uint16(y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v_0.AuxInt
+               if !(int16(x) > int16(y) && uint16(x) > uint16(y)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
+               v.reset(OpAMD64FlagGT_UGT)
                return true
        }
-       // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
-       // result: (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
+       // match: (CMPWconst (ANDLconst _ [m]) [n])
+       // cond: 0 <= int16(m) && int16(m) < int16(n)
+       // result: (FlagLT_ULT)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVBload {
-                       break
-               }
-               j := x1.AuxInt
-               s2 := x1.Aux
-               _ = x1.Args[1]
-               p2 := x1.Args[0]
-               mem := x1.Args[1]
-               mem2 := v.Args[2]
-               if mem2.Op != OpAMD64MOVBstore {
+               n := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if mem2.AuxInt != i-1 {
+               m := v_0.AuxInt
+               if !(0 <= int16(m) && int16(m) < int16(n)) {
                        break
                }
-               if mem2.Aux != s {
+               v.reset(OpAMD64FlagLT_ULT)
+               return true
+       }
+       // match: (CMPWconst (ANDL x y) [0])
+       // cond:
+       // result: (TESTW x y)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               _ = mem2.Args[2]
-               if p != mem2.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
                        break
                }
-               x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVBload {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (CMPWconst (ANDLconst [c] x) [0])
+       // cond:
+       // result: (TESTWconst [int64(int16(c))] x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               if x2.AuxInt != j-1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               if x2.Aux != s2 {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64TESTWconst)
+               v.AuxInt = int64(int16(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPWconst x [0])
+       // cond:
+       // result: (TESTW x x)
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               _ = x2.Args[1]
-               if p2 != x2.Args[0] {
+               x := v.Args[0]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c])
+       // cond: l.Uses == 1 && validValAndOff(c, off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       for {
+               c := v.AuxInt
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               if mem != x2.Args[1] {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l.Uses == 1 && validValAndOff(c, off) && clobber(l)) {
                        break
                }
-               if mem != mem2.Args[2] {
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(c, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPWmem_0(v *Value) bool {
+       // match: (CMPWmem {sym} [off] ptr (MOVLconst [c]) mem)
+       // cond: validValAndOff(int64(int16(c)),off)
+       // result: (CMPWconstmem {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(int64(int16(c)), off)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AuxInt = j - 1
-               v0.Aux = s2
-               v0.AddArg(p2)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               v.reset(OpAMD64CMPWconstmem)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPXCHGLlock_0(v *Value) bool {
+       // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
+       // cond: is32Bit(off1+off2)
+       // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem)
        for {
                off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               sym := v.Aux
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               ptr := v_0.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
+               v.reset(OpAMD64CMPXCHGLlock)
                v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64CMPXCHGQlock_0(v *Value) bool {
+       // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
+       // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstore)
+               v.reset(OpAMD64CMPXCHGQlock)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
+               v.AddArg(old)
+               v.AddArg(new_)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstoreconst_0(v *Value) bool {
-       // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+func rewriteValueAMD64_OpAMD64LEAL_0(v *Value) bool {
+       // match: (LEAL [c] {s} (ADDLconst [d] x))
+       // cond: is32Bit(c+d)
+       // result: (LEAL [c+d] {s} x)
        for {
-               sc := v.AuxInt
+               c := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64LEAL)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ_0(v *Value) bool {
+       // match: (LEAQ [c] {s} (ADDQconst [d] x))
+       // cond: is32Bit(c+d)
+       // result: (LEAQ [c+d] {s} x)
+       for {
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(OpAMD64LEAQ)
+               v.AuxInt = c + d
                v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (LEAQ [c] {s} (ADDQ x y))
+       // cond: x.Op != OpSB && y.Op != OpSB
+       // result: (LEAQ1 [c] {s} x y)
        for {
-               sc := v.AuxInt
+               c := v.AuxInt
+               s := v.Aux
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(x.Op != OpSB && y.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
+       for {
+               off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               x := v_0.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(OpAMD64LEAQ)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVBstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // cond:
-       // result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
                _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = x
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64MOVBstoreconst {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[1]
-               if p != x.Args[0] {
-                       break
-               }
-               mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               sc := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
        for {
-               sc := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
+               off1 := v.AuxInt
+               sym1 := v.Aux
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1_0(v *Value) bool {
-       // match: (MOVBstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+func rewriteValueAMD64_OpAMD64LEAQ1_0(v *Value) bool {
+       // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(c)) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (LEAQ1 [c] {s} y (ADDQconst [d] x))
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ1 [c+d] {s} x y)
        for {
-               x := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               y := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(c)) {
+               d := v_1.AuxInt
+               x := v_1.Args[0]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVBstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ2 [c] {s} x y)
        for {
                c := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVBstoreconstidx1 {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if v_1.AuxInt != 1 {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ1 [c] {s} (SHLQconst [1] y) x)
+       // cond:
+       // result: (LEAQ2 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               if i != x.Args[1] {
+               if v_0.AuxInt != 1 {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ1 [c] {s} x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
                v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVBstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [2] y) x)
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               if v_0.AuxInt != 2 {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (LEAQ1 [c] {s} x (SHLQconst [3] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
                c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               v.reset(OpAMD64MOVBstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
-       // cond: x0.Uses == 1 && clobber(x0)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
+       // match: (LEAQ1 [c] {s} (SHLQconst [3] y) x)
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x0 := v.Args[3]
-               if x0.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x0.AuxInt != i-1 {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               if x0.Aux != s {
+               if v_0.AuxInt != 3 {
                        break
                }
-               _ = x0.Args[3]
-               if p != x0.Args[0] {
+               y := v_0.Args[0]
+               x := v.Args[1]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if idx != x0.Args[1] {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRWconst {
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ1 [off1] {sym1} y (LEAQ [off2] {sym2} x))
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               y := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               if x0_2.AuxInt != 8 {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               x := v_1.Args[0]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               if w != x0_2.Args[0] {
+               v.reset(OpAMD64LEAQ1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ2_0(v *Value) bool {
+       // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ2 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x0.Args[3]
-               if !(x0.Uses == 1 && clobber(x0)) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + d
                v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
-               v0.AuxInt = 8
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
-       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
+       // match: (LEAQ2 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+2*d) && y.Op != OpSB
+       // result: (LEAQ2 [c+2*d] {s} x y)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x2 := v.Args[3]
-               if x2.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x2.AuxInt != i-1 {
-                       break
-               }
-               if x2.Aux != s {
-                       break
-               }
-               _ = x2.Args[3]
-               if p != x2.Args[0] {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x2.Args[1] {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+2*d) && y.Op != OpSB) {
                        break
                }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpAMD64SHRLconst {
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = c + 2*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ2 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ4 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x2_2.AuxInt != 8 {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if w != x2_2.Args[0] {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ2 [c] {s} x (SHLQconst [2] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               x1 := x2.Args[3]
-               if x1.Op != OpAMD64MOVBstoreidx1 {
+               if v_1.AuxInt != 2 {
                        break
                }
-               if x1.AuxInt != i-2 {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if x1.Aux != s {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               _ = x1.Args[3]
-               if p != x1.Args[0] {
-                       break
-               }
-               if idx != x1.Args[1] {
-                       break
-               }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if x1_2.AuxInt != 16 {
-                       break
-               }
-               if w != x1_2.Args[0] {
+               v.reset(OpAMD64LEAQ2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ4_0(v *Value) bool {
+       // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ4 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x0 := x1.Args[3]
-               if x0.Op != OpAMD64MOVBstoreidx1 {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               if x0.AuxInt != i-3 {
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c + d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ4 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+4*d) && y.Op != OpSB
+       // result: (LEAQ4 [c+4*d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if x0.Aux != s {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+4*d) && y.Op != OpSB) {
                        break
                }
-               _ = x0.Args[3]
-               if p != x0.Args[0] {
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = c + 4*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ4 [c] {s} x (SHLQconst [1] y))
+       // cond:
+       // result: (LEAQ8 [c] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if idx != x0.Args[1] {
+               if v_1.AuxInt != 1 {
                        break
                }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRLconst {
+               y := v_1.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if x0_2.AuxInt != 24 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               if w != x0_2.Args[0] {
+               v.reset(OpAMD64LEAQ4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64LEAQ8_0(v *Value) bool {
+       // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y)
+       // cond: is32Bit(c+d) && x.Op != OpSB
+       // result: (LEAQ8 [c+d] {s} x y)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x0.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(c+d) && x.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 3
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c + d
                v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
-       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
-       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
+       // match: (LEAQ8 [c] {s} x (ADDQconst [d] y))
+       // cond: is32Bit(c+8*d) && y.Op != OpSB
+       // result: (LEAQ8 [c+8*d] {s} x y)
        for {
-               i := v.AuxInt
+               c := v.AuxInt
                s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               w := v.Args[2]
-               x6 := v.Args[3]
-               if x6.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x6.AuxInt != i-1 {
-                       break
-               }
-               if x6.Aux != s {
-                       break
-               }
-               _ = x6.Args[3]
-               if p != x6.Args[0] {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x6.Args[1] {
+               d := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(is32Bit(c+8*d) && y.Op != OpSB) {
                        break
                }
-               x6_2 := x6.Args[2]
-               if x6_2.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = c + 8*d
+               v.Aux = s
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB
+       // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if x6_2.AuxInt != 8 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               x := v_0.Args[0]
+               y := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB) {
                        break
                }
-               if w != x6_2.Args[0] {
+               v.reset(OpAMD64LEAQ8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQSX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBload {
                        break
                }
-               x5 := x6.Args[3]
-               if x5.Op != OpAMD64MOVBstoreidx1 {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if x5.AuxInt != i-2 {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               if x5.Aux != s {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               _ = x5.Args[3]
-               if p != x5.Args[0] {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               if idx != x5.Args[1] {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x5_2 := x5.Args[2]
-               if x5_2.Op != OpAMD64SHRQconst {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               if x5_2.AuxInt != 16 {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if w != x5_2.Args[0] {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQSX (ANDLconst [c] x))
+       // cond: c & 0x80 == 0
+       // result: (ANDLconst [c & 0x7f] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x4 := x5.Args[3]
-               if x4.Op != OpAMD64MOVBstoreidx1 {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x80 == 0) {
                        break
                }
-               if x4.AuxInt != i-3 {
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7f
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBQSX (MOVBQSX x))
+       // cond:
+       // result: (MOVBQSX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQSX {
                        break
                }
-               if x4.Aux != s {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQSXload_0(v *Value) bool {
+       // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVBQSX x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBstore {
                        break
                }
-               _ = x4.Args[3]
-               if p != x4.Args[0] {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               if idx != x4.Args[1] {
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x4_2 := x4.Args[2]
-               if x4_2.Op != OpAMD64SHRQconst {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               if x4_2.AuxInt != 24 {
+               v.reset(OpAMD64MOVBQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBQZX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBload {
                        break
                }
-               if w != x4_2.Args[0] {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x3 := x4.Args[3]
-               if x3.Op != OpAMD64MOVBstoreidx1 {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               if x3.AuxInt != i-4 {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if x3.Aux != s {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               _ = x3.Args[3]
-               if p != x3.Args[0] {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if idx != x3.Args[1] {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               x3_2 := x3.Args[2]
-               if x3_2.Op != OpAMD64SHRQconst {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if x3_2.AuxInt != 32 {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVBloadidx1 {
                        break
                }
-               if w != x3_2.Args[0] {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x2 := x3.Args[3]
-               if x2.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x2.AuxInt != i-5 {
-                       break
-               }
-               if x2.Aux != s {
-                       break
-               }
-               _ = x2.Args[3]
-               if p != x2.Args[0] {
-                       break
-               }
-               if idx != x2.Args[1] {
-                       break
-               }
-               x2_2 := x2.Args[2]
-               if x2_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if x2_2.AuxInt != 40 {
-                       break
-               }
-               if w != x2_2.Args[0] {
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVBQZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c & 0xff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x1 := x2.Args[3]
-               if x1.Op != OpAMD64MOVBstoreidx1 {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0xff
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBQZX (MOVBQZX x))
+       // cond:
+       // result: (MOVBQZX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               if x1.AuxInt != i-6 {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBload_0(v *Value) bool {
+       // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVBQZX x)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBstore {
                        break
                }
-               if x1.Aux != s {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               _ = x1.Args[3]
-               if p != x1.Args[0] {
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x1.Args[1] {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               x1_2 := x1.Args[2]
-               if x1_2.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if x1_2.AuxInt != 48 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               if w != x1_2.Args[0] {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               x0 := x1.Args[3]
-               if x0.Op != OpAMD64MOVBstoreidx1 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               if x0.AuxInt != i-7 {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVBloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               if x0.Aux != s {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
                        break
                }
-               _ = x0.Args[3]
-               if p != x0.Args[0] {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               if idx != x0.Args[1] {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               x0_2 := x0.Args[2]
-               if x0_2.Op != OpAMD64SHRQconst {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if x0_2.AuxInt != 56 {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               if w != x0_2.Args[0] {
+               v.reset(OpAMD64MOVBload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBloadidx1_0(v *Value) bool {
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x0.Args[3]
-               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 7
-               v.Aux = s
-               v.AddArg(p)
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(idx)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
-               v0.AddArg(w)
-               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       // match: (MOVBloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRWconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               if x.AuxInt != i-1 {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if x.Aux != s {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               _ = x.Args[3]
-               if p != x.Args[0] {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if idx != x.Args[1] {
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               if w != x.Args[2] {
+               v.reset(OpAMD64MOVBloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_0(v *Value) bool {
+       // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem)
+       // cond: y.Uses == 1
+       // result: (SETLmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETL {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
+               v.reset(OpAMD64SETLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETLEmem [off] {sym} ptr x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if v_2.AuxInt != 8 {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETLE {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               if x.AuxInt != i-1 {
+               v.reset(OpAMD64SETLEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem)
+       // cond: y.Uses == 1
+       // result: (SETGmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETG {
                        break
                }
-               if x.Aux != s {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               _ = x.Args[3]
-               if p != x.Args[0] {
+               v.reset(OpAMD64SETGmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETGEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETGE {
                        break
                }
-               if idx != x.Args[1] {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               if w != x.Args[2] {
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem)
+       // cond: y.Uses == 1
+       // result: (SETEQmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETEQ {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
+               v.reset(OpAMD64SETEQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
+       // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETNEmem [off] {sym} ptr x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETNE {
                        break
                }
-               if v_2.AuxInt != 8 {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-1 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
+               v.reset(OpAMD64SETNEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem)
+       // cond: y.Uses == 1
+       // result: (SETBmem [off] {sym} ptr x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-1 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if w0.AuxInt != j-8 {
-                       break
-               }
-               if w != w0.Args[0] {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETB {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
+       // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETBEmem [off] {sym} ptr x mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVBstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-1 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if w0.AuxInt != j-8 {
-                       break
-               }
-               if w != w0.Args[0] {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETBE {
                        break
                }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
-               v.AuxInt = i - 1
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
+               v.reset(OpAMD64SETBEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQSX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem)
+       // cond: y.Uses == 1
+       // result: (SETAmem [off] {sym} ptr x mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETA {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQSX (ANDLconst [c] x))
-       // cond: c & 0x80000000 == 0
-       // result: (ANDLconst [c & 0x7fffffff] x)
+       // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem)
+       // cond: y.Uses == 1
+       // result: (SETAEmem [off] {sym} ptr x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               y := v.Args[1]
+               if y.Op != OpAMD64SETAE {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x80000000 == 0) {
+               x := y.Args[0]
+               mem := v.Args[2]
+               if !(y.Uses == 1) {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7fffffff
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQSX (MOVLQSX x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem)
        // cond:
-       // result: (MOVLQSX x)
+       // result: (MOVBstore [off] {sym} ptr x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLQSX {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBQSX {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVLQSX)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQSX (MOVWQSX x))
+       // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem)
        // cond:
-       // result: (MOVWQSX x)
+       // result: (MOVBstore [off] {sym} ptr x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQSX {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVBQZX {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQSX)
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQSX (MOVBQSX x))
-       // cond:
-       // result: (MOVBQSX x)
+       // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQSX {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQSXload_0(v *Value) bool {
-       // match: (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVLQSX x)
+       // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLstore {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLQSX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVBstoreconst)
+               v.AuxInt = makeValAndOff(int64(int8(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
@@ -8337,1139 +9392,439 @@ func rewriteValueAMD64_OpAMD64MOVLQSXload_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                base := v_0.Args[0]
-               mem := v.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLQSXload)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLQZX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVLQZX x)
-       // cond: zeroUpper32Bits(x,3)
-       // result: x
+       // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1 && clobber(x0)
+       // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
        for {
-               x := v.Args[0]
-               if !(zeroUpper32Bits(x, 3)) {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x0 := v.Args[2]
+               if x0.Op != OpAMD64MOVBstore {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLloadidx1 {
+               if x0.AuxInt != i-1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if x0.Aux != s {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
-       for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLloadidx4 {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (MOVLQZX (ANDLconst [c] x))
-       // cond:
-       // result: (ANDLconst [c] x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRWconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX (MOVLQZX x))
-       // cond:
-       // result: (MOVLQZX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLQZX {
+               if x0_1.AuxInt != 8 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVLQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX (MOVWQZX x))
-       // cond:
-       // result: (MOVWQZX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQZX {
+               if w != x0_1.Args[0] {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLQZX (MOVBQZX x))
-       // cond:
-       // result: (MOVBQZX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
+               mem := x0.Args[2]
+               if !(x0.Uses == 1 && clobber(x0)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLatomicload_0(v *Value) bool {
-       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
+       // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x2 := v.Args[2]
+               if x2.Op != OpAMD64MOVBstore {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x2.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x2.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = x2.Args[2]
+               if p != x2.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLf2i_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLf2i <t> (Arg [off] {sym}))
-       // cond:
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpArg {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpAMD64SHRLconst {
                        break
                }
-               off := v_0.AuxInt
-               sym := v_0.Aux
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLi2f_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLi2f <t> (Arg [off] {sym}))
-       // cond:
-       // result: @b.Func.Entry (Arg <t> [off] {sym})
-       for {
-               t := v.Type
-               v_0 := v.Args[0]
-               if v_0.Op != OpArg {
+               if x2_1.AuxInt != 8 {
                        break
                }
-               off := v_0.AuxInt
-               sym := v_0.Aux
-               b = b.Func.Entry
-               v0 := b.NewValue0(v.Pos, OpArg, t)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLload_0(v *Value) bool {
-       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVLQZX x)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLstore {
+               if w != x2_1.Args[0] {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               x1 := x2.Args[2]
+               if x1.Op != OpAMD64MOVBstore {
                        break
                }
-               v.reset(OpAMD64MOVLQZX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x1.AuxInt != i-2 {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = x1.Args[2]
+               if p != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if x1_1.AuxInt != 16 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x1_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               x0 := x1.Args[2]
+               if x0.Op != OpAMD64MOVBstore {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x0.AuxInt != i-3 {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if x0.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRLconst {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
+               if x0_1.AuxInt != 24 {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if w != x0_1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               mem := x0.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLload [off1+off2] {sym} ptr mem)
+       // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               w := v.Args[1]
+               x6 := v.Args[2]
+               if x6.Op != OpAMD64MOVBstore {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if x6.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVLload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _))
-       // cond:
-       // result: (MOVLf2i val)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVSSstore {
+               if x6.Aux != s {
                        break
                }
-               if v_1.AuxInt != off {
+               _ = x6.Args[2]
+               if p != x6.Args[0] {
                        break
                }
-               if v_1.Aux != sym {
+               x6_1 := x6.Args[1]
+               if x6_1.Op != OpAMD64SHRQconst {
                        break
                }
-               _ = v_1.Args[2]
-               if ptr != v_1.Args[0] {
+               if x6_1.AuxInt != 8 {
                        break
                }
-               val := v_1.Args[1]
-               v.reset(OpAMD64MOVLf2i)
-               v.AddArg(val)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx1_0(v *Value) bool {
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // cond:
-       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if w != x6_1.Args[0] {
                        break
                }
-               if v_1.AuxInt != 2 {
+               x5 := x6.Args[2]
+               if x5.Op != OpAMD64MOVBstore {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (SHLQconst [2] idx) ptr mem)
-       // cond:
-       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if x5.AuxInt != i-2 {
                        break
                }
-               if v_0.AuxInt != 2 {
+               if x5.Aux != s {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // cond:
-       // result: (MOVLloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               _ = x5.Args[2]
+               if p != x5.Args[0] {
                        break
                }
-               if v_1.AuxInt != 3 {
+               x5_1 := x5.Args[1]
+               if x5_1.Op != OpAMD64SHRQconst {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (SHLQconst [3] idx) ptr mem)
-       // cond:
-       // result: (MOVLloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
+               if x5_1.AuxInt != 16 {
                        break
                }
-               if v_0.AuxInt != 3 {
+               if w != x5_1.Args[0] {
                        break
                }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x4 := x5.Args[2]
+               if x4.Op != OpAMD64MOVBstore {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               if x4.AuxInt != i-3 {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               idx := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if x4.Aux != s {
                        break
                }
-               d := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               _ = x4.Args[2]
+               if p != x4.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x4_1 := x4.Args[1]
+               if x4_1.Op != OpAMD64SHRQconst {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               if x4_1.AuxInt != 24 {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if w != x4_1.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               x3 := x4.Args[2]
+               if x3.Op != OpAMD64MOVBstore {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx4_0(v *Value) bool {
-       // match: (MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x3.AuxInt != i-4 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               if x3.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               _ = x3.Args[2]
+               if p != x3.Args[0] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + 4*d)) {
+               x3_1 := x3.Args[1]
+               if x3_1.Op != OpAMD64SHRQconst {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLloadidx8_0(v *Value) bool {
-       // match: (MOVLloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLloadidx8 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if x3_1.AuxInt != 32 {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               if w != x3_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVLloadidx8 [c+8*d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x2 := x3.Args[2]
+               if x2.Op != OpAMD64MOVBstore {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + 8*d)) {
+               if x2.AuxInt != i-5 {
                        break
                }
-               v.reset(OpAMD64MOVLloadidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstore_0(v *Value) bool {
-       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
-       // cond:
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLQSX {
+               if x2.Aux != s {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
-       // cond:
-       // result: (MOVLstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLQZX {
+               _ = x2.Args[2]
+               if p != x2.Args[0] {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               x2_1 := x2.Args[1]
+               if x2_1.Op != OpAMD64SHRQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if x2_1.AuxInt != 40 {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if w != x2_1.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
+               x1 := x2.Args[2]
+               if x1.Op != OpAMD64MOVBstore {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(int64(int32(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if x1.AuxInt != i-6 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x1.Aux != s {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               _ = x1.Args[2]
+               if p != x1.Args[0] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x1_1 := x1.Args[1]
+               if x1_1.Op != OpAMD64SHRQconst {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if x1_1.AuxInt != 48 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x1_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVLstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               x0 := x1.Args[2]
+               if x0.Op != OpAMD64MOVBstore {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x0.AuxInt != i-7 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if x0.Aux != s {
                        break
                }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
+               _ = x0.Args[2]
+               if p != x0.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
+               x0_1 := x0.Args[1]
+               if x0_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_1.AuxInt != 56 {
+                       break
+               }
+               if w != x0_1.Args[0] {
+                       break
+               }
+               mem := x0.Args[2]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVBstore_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w mem)
+       // result: (MOVWstore [i-1] {s} p w mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
                p := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
+               if v_1.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_1.AuxInt != 32 {
+               if v_1.AuxInt != 8 {
                        break
                }
                w := v_1.Args[0]
                x := v.Args[2]
-               if x.Op != OpAMD64MOVLstore {
+               if x.Op != OpAMD64MOVBstore {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-1 {
                        break
                }
                if x.Aux != s {
@@ -9486,24 +9841,62 @@ func rewriteValueAMD64_OpAMD64MOVLstore_0(v *Value) bool {
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
+       // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstore [i-4] {s} p w0 mem)
+       // result: (MOVWstore [i-1] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstore [i-1] {s} p w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -9513,13 +9906,108 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                if v_1.Op != OpAMD64SHRQconst {
                        break
                }
+               if v_1.AuxInt != 8 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstore [i-1] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
+                       break
+               }
                j := v_1.AuxInt
                w := v_1.Args[0]
                x := v.Args[2]
-               if x.Op != OpAMD64MOVLstore {
+               if x.Op != OpAMD64MOVBstore {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstore [i-1] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVBstore {
+                       break
+               }
+               if x.AuxInt != i-1 {
                        break
                }
                if x.Aux != s {
@@ -9533,7 +10021,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                if w0.Op != OpAMD64SHRQconst {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if w0.AuxInt != j-8 {
                        break
                }
                if w != w0.Args[0] {
@@ -9543,24 +10031,24 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem))
+       // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem))
        // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
-       // result: (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
+       // result: (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem)
        for {
                i := v.AuxInt
                s := v.Aux
                _ = v.Args[2]
                p := v.Args[0]
                x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVLload {
+               if x1.Op != OpAMD64MOVBload {
                        break
                }
                j := x1.AuxInt
@@ -9569,10 +10057,10 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                p2 := x1.Args[0]
                mem := x1.Args[1]
                mem2 := v.Args[2]
-               if mem2.Op != OpAMD64MOVLstore {
+               if mem2.Op != OpAMD64MOVBstore {
                        break
                }
-               if mem2.AuxInt != i-4 {
+               if mem2.AuxInt != i-1 {
                        break
                }
                if mem2.Aux != s {
@@ -9583,10 +10071,10 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                        break
                }
                x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVLload {
+               if x2.Op != OpAMD64MOVBload {
                        break
                }
-               if x2.AuxInt != j-4 {
+               if x2.AuxInt != j-1 {
                        break
                }
                if x2.Aux != s2 {
@@ -9605,12 +10093,12 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v0.AuxInt = j - 4
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AuxInt = j - 1
                v0.Aux = s2
                v0.AddArg(p2)
                v0.AddArg(mem)
@@ -9618,9 +10106,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -9637,7 +10125,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -9645,9 +10133,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVBstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -9663,7 +10151,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -9671,77 +10159,12 @@ func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
-       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
-       // result: (ADDLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               a := v.Args[1]
-               if a.Op != OpAMD64ADDLconst {
-                       break
-               }
-               c := a.AuxInt
-               l := a.Args[0]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               if l.AuxInt != off {
-                       break
-               }
-               if l.Aux != sym {
-                       break
-               }
-               _ = l.Args[1]
-               ptr2 := l.Args[0]
-               mem := l.Args[1]
-               if mem != v.Args[2] {
-                       break
-               }
-               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
-                       break
-               }
-               v.reset(OpAMD64ADDLconstmem)
-               v.AuxInt = makeValAndOff(c, off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
-       // cond:
-       // result: (MOVSSstore [off] {sym} ptr val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLf2i {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSSstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+func rewriteValueAMD64_OpAMD64MOVBstoreconst_0(v *Value) bool {
+       // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
                sc := v.AuxInt
                s := v.Aux
@@ -9756,16 +10179,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
+               v.reset(OpAMD64MOVBstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = s
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := v.AuxInt
                sym1 := v.Aux
@@ -9781,16 +10204,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
+               v.reset(OpAMD64MOVBstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // match: (MOVBstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
        // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                x := v.AuxInt
                sym1 := v.Aux
@@ -9808,35 +10231,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               x := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
-                       break
-               }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreconstidx4)
+               v.reset(OpAMD64MOVBstoreconstidx1)
                v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -9844,9 +10239,9 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // match: (MOVBstoreconst [x] {sym} (ADDQ ptr idx) mem)
        // cond:
-       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
+       // result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -9859,7 +10254,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.reset(OpAMD64MOVBstoreconstidx1)
                v.AuxInt = x
                v.Aux = sym
                v.AddArg(ptr)
@@ -9867,16 +10262,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                p := v.Args[0]
                x := v.Args[1]
-               if x.Op != OpAMD64MOVLstoreconst {
+               if x.Op != OpAMD64MOVBstoreconst {
                        break
                }
                a := x.AuxInt
@@ -9888,22 +10283,19 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                        break
                }
                mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = ValAndOff(a).Off()
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := v.AuxInt
                sym1 := v.Aux
@@ -9919,16 +10311,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
+               v.reset(OpAMD64MOVBstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // match: (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
                sc := v.AuxInt
                s := v.Aux
@@ -9943,7 +10335,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
                if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconst)
+               v.reset(OpAMD64MOVBstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = s
                v.AddArg(ptr)
@@ -9952,39 +10344,10 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
-       // cond:
-       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_1.AuxInt != 2 {
-                       break
-               }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1_0(v *Value) bool {
+       // match: (MOVBstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
        // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -10000,7 +10363,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.reset(OpAMD64MOVBstoreconstidx1)
                v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
@@ -10008,9 +10371,9 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
        // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -10026,7 +10389,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.reset(OpAMD64MOVBstoreconstidx1)
                v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
@@ -10034,9 +10397,9 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem)
        for {
                c := v.AuxInt
                s := v.Aux
@@ -10044,7 +10407,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                p := v.Args[0]
                i := v.Args[1]
                x := v.Args[2]
-               if x.Op != OpAMD64MOVLstoreconstidx1 {
+               if x.Op != OpAMD64MOVBstoreconstidx1 {
                        break
                }
                a := x.AuxInt
@@ -10059,237 +10422,454 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
                        break
                }
                mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = ValAndOff(a).Off()
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
                v.Aux = s
                v.AddArg(p)
                v.AddArg(i)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVBstoreidx1_0(v *Value) bool {
        b := v.Block
        _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVBstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(c)) {
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(4*c)
-       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
+       // match: (MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
+               d := v_1.AuxInt
                idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(4 * c)) {
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx4)
-               v.AuxInt = ValAndOff(x).add(4 * c)
+               v.reset(OpAMD64MOVBstoreidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem))
+       // cond: x0.Uses == 1 && clobber(x0)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem)
        for {
-               c := v.AuxInt
+               i := v.AuxInt
                s := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVLstoreconstidx4 {
+               idx := v.Args[1]
+               w := v.Args[2]
+               x0 := v.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               if x0.AuxInt != i-1 {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               if x0.Aux != s {
                        break
                }
-               if i != x.Args[1] {
+               _ = x0.Args[3]
+               if p != x0.Args[0] {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+               if idx != x0.Args[1] {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = ValAndOff(a).Off()
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRWconst {
+                       break
+               }
+               if x0_2.AuxInt != 8 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && clobber(x0)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
-               v0.AuxInt = 2
-               v0.AddArg(i)
+               v.AddArg(idx)
+               v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type)
+               v0.AuxInt = 8
+               v0.AddArg(w)
                v.AddArg(v0)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
-               v.AddArg(v1)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)
+       // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x2 := v.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if x2.AuxInt != i-1 {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // cond:
-       // result: (MOVLstoreidx8 [c] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               if x2.Aux != s {
                        break
                }
-               if v_1.AuxInt != 3 {
+               _ = x2.Args[3]
+               if p != x2.Args[0] {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if idx != x2.Args[1] {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x2_2.AuxInt != 8 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-2 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[3]
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x1_2.AuxInt != 16 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-3 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[3]
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if x0_2.AuxInt != 24 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2)) {
                        break
                }
                v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.AuxInt = i - 3
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem))))))))
+       // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)
+       // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               p := v.Args[0]
+               idx := v.Args[1]
+               w := v.Args[2]
+               x6 := v.Args[3]
+               if x6.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               if x6.AuxInt != i-1 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x6.Aux != s {
+                       break
+               }
+               _ = x6.Args[3]
+               if p != x6.Args[0] {
+                       break
+               }
+               if idx != x6.Args[1] {
+                       break
+               }
+               x6_2 := x6.Args[2]
+               if x6_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x6_2.AuxInt != 8 {
+                       break
+               }
+               if w != x6_2.Args[0] {
+                       break
+               }
+               x5 := x6.Args[3]
+               if x5.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x5.AuxInt != i-2 {
+                       break
+               }
+               if x5.Aux != s {
+                       break
+               }
+               _ = x5.Args[3]
+               if p != x5.Args[0] {
+                       break
+               }
+               if idx != x5.Args[1] {
+                       break
+               }
+               x5_2 := x5.Args[2]
+               if x5_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x5_2.AuxInt != 16 {
+                       break
+               }
+               if w != x5_2.Args[0] {
+                       break
+               }
+               x4 := x5.Args[3]
+               if x4.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x4.AuxInt != i-3 {
+                       break
+               }
+               if x4.Aux != s {
+                       break
+               }
+               _ = x4.Args[3]
+               if p != x4.Args[0] {
+                       break
+               }
+               if idx != x4.Args[1] {
+                       break
+               }
+               x4_2 := x4.Args[2]
+               if x4_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x4_2.AuxInt != 24 {
+                       break
+               }
+               if w != x4_2.Args[0] {
+                       break
+               }
+               x3 := x4.Args[3]
+               if x3.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x3.AuxInt != i-4 {
+                       break
+               }
+               if x3.Aux != s {
+                       break
+               }
+               _ = x3.Args[3]
+               if p != x3.Args[0] {
+                       break
+               }
+               if idx != x3.Args[1] {
+                       break
+               }
+               x3_2 := x3.Args[2]
+               if x3_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x3_2.AuxInt != 32 {
+                       break
+               }
+               if w != x3_2.Args[0] {
+                       break
+               }
+               x2 := x3.Args[3]
+               if x2.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x2.AuxInt != i-5 {
+                       break
+               }
+               if x2.Aux != s {
+                       break
+               }
+               _ = x2.Args[3]
+               if p != x2.Args[0] {
+                       break
+               }
+               if idx != x2.Args[1] {
+                       break
+               }
+               x2_2 := x2.Args[2]
+               if x2_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x2_2.AuxInt != 40 {
+                       break
+               }
+               if w != x2_2.Args[0] {
+                       break
+               }
+               x1 := x2.Args[3]
+               if x1.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x1.AuxInt != i-6 {
+                       break
+               }
+               if x1.Aux != s {
+                       break
+               }
+               _ = x1.Args[3]
+               if p != x1.Args[0] {
+                       break
+               }
+               if idx != x1.Args[1] {
+                       break
+               }
+               x1_2 := x1.Args[2]
+               if x1_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x1_2.AuxInt != 48 {
+                       break
+               }
+               if w != x1_2.Args[0] {
+                       break
+               }
+               x0 := x1.Args[3]
+               if x0.Op != OpAMD64MOVBstoreidx1 {
+                       break
+               }
+               if x0.AuxInt != i-7 {
+                       break
+               }
+               if x0.Aux != s {
+                       break
+               }
+               _ = x0.Args[3]
+               if p != x0.Args[0] {
+                       break
+               }
+               if idx != x0.Args[1] {
+                       break
+               }
+               x0_2 := x0.Args[2]
+               if x0_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if x0_2.AuxInt != 56 {
+                       break
+               }
+               if w != x0_2.Args[0] {
+                       break
+               }
+               mem := x0.Args[3]
+               if !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4) && clobber(x5) && clobber(x6)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 7
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type)
+               v0.AddArg(w)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -10297,18 +10877,18 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               if v_2.Op != OpAMD64SHRWconst {
                        break
                }
-               if v_2.AuxInt != 32 {
+               if v_2.AuxInt != 8 {
                        break
                }
                w := v_2.Args[0]
                x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx1 {
+               if x.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-1 {
                        break
                }
                if x.Aux != s {
@@ -10328,8 +10908,8 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
@@ -10337,9 +10917,9 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -10347,16 +10927,18 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               if v_2.Op != OpAMD64SHRLconst {
+                       break
+               }
+               if v_2.AuxInt != 8 {
                        break
                }
-               j := v_2.AuxInt
                w := v_2.Args[0]
                x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx1 {
+               if x.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-1 {
                        break
                }
                if x.Aux != s {
@@ -10369,93 +10951,25 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
                if idx != x.Args[1] {
                        break
                }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if w0.AuxInt != j-32 {
-                       break
-               }
-               if w != w0.Args[0] {
+               if w != x.Args[2] {
                        break
                }
                mem := x.Args[3]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + 4*d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -10466,15 +10980,15 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
                if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_2.AuxInt != 32 {
+               if v_2.AuxInt != 8 {
                        break
                }
                w := v_2.Args[0]
                x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx4 {
+               if x.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-1 {
                        break
                }
                if x.Aux != s {
@@ -10494,21 +11008,18 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx)
-               v.AddArg(v0)
+               v.AddArg(idx)
                v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
                i := v.AuxInt
                s := v.Aux
@@ -10516,16 +11027,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
                p := v.Args[0]
                idx := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
+               if v_2.Op != OpAMD64SHRLconst {
                        break
                }
                j := v_2.AuxInt
                w := v_2.Args[0]
                x := v.Args[3]
-               if x.Op != OpAMD64MOVLstoreidx4 {
+               if x.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               if x.AuxInt != i-4 {
+               if x.AuxInt != i-1 {
                        break
                }
                if x.Aux != s {
@@ -10539,10 +11050,10 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
                        break
                }
                w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
+               if w0.Op != OpAMD64SHRLconst {
                        break
                }
-               if w0.AuxInt != j-32 {
+               if w0.AuxInt != j-8 {
                        break
                }
                if w != w0.Args[0] {
@@ -10552,107 +11063,214 @@ func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
-               v.AuxInt = i - 4
-               v.Aux = s
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 2
-               v0.AddArg(idx)
-               v.AddArg(v0)
+               v.AddArg(idx)
                v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstoreidx8_0(v *Value) bool {
-       // match: (MOVLstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVLstoreidx8 [c+d] {sym} ptr idx val mem)
+       // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
+               i := v.AuxInt
+               s := v.Aux
                _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVBstoreidx1 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
+               if x.AuxInt != i-1 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-8 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = i - 1
+               v.Aux = s
+               v.AddArg(p)
                v.AddArg(idx)
-               v.AddArg(val)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVLstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVLstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLQSX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + 8*d)) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQSXload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQSX (ANDLconst [c] x))
+       // cond: c & 0x80000000 == 0
+       // result: (ANDLconst [c & 0x7fffffff] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(c&0x80000000 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7fffffff
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX (MOVLQSX x))
+       // cond:
+       // result: (MOVLQSX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLQSX {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVLQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX (MOVWQSX x))
+       // cond:
+       // result: (MOVWQSX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVWQSX {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQSX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQSX (MOVBQSX x))
+       // cond:
+       // result: (MOVBQSX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQSX {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVOload_0(v *Value) bool {
-       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVOload [off1+off2] {sym} ptr mem)
+func rewriteValueAMD64_OpAMD64MOVLQSXload_0(v *Value) bool {
+       // match: (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVLQSX x)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLstore {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVOload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.reset(OpAMD64MOVLQSX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -10668,7 +11286,7 @@ func rewriteValueAMD64_OpAMD64MOVOload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVOload)
+               v.reset(OpAMD64MOVLQSXload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -10677,66 +11295,190 @@ func rewriteValueAMD64_OpAMD64MOVOload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVOstore_0(v *Value) bool {
-       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVOstore [off1+off2] {sym} ptr val mem)
+func rewriteValueAMD64_OpAMD64MOVLQZX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX x)
+       // cond: zeroUpper32Bits(x,3)
+       // result: x
+       for {
+               x := v.Args[0]
+               if !(zeroUpper32Bits(x, 3)) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLloadidx1 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem)
+       for {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLloadidx4 {
+                       break
+               }
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
+               return true
+       }
+       // match: (MOVLQZX (ANDLconst [c] x))
+       // cond:
+       // result: (ANDLconst [c] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX (MOVLQZX x))
+       // cond:
+       // result: (MOVLQZX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLQZX {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVLQZX)
+               v.AddArg(x)
                return true
        }
-       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (MOVLQZX (MOVWQZX x))
+       // cond:
+       // result: (MOVWQZX x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQZX)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MOVLQZX (MOVBQZX x))
+       // cond:
+       // result: (MOVBQZX x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQatomicload_0(v *Value) bool {
-       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+func rewriteValueAMD64_OpAMD64MOVLatomicload_0(v *Value) bool {
+       // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
+       // result: (MOVLatomicload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -10751,16 +11493,16 @@ func rewriteValueAMD64_OpAMD64MOVQatomicload_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
+               v.reset(OpAMD64MOVLatomicload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+       // result: (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -10776,7 +11518,7 @@ func rewriteValueAMD64_OpAMD64MOVQatomicload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
+               v.reset(OpAMD64MOVLatomicload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -10785,10 +11527,10 @@ func rewriteValueAMD64_OpAMD64MOVQatomicload_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQf2i_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVLf2i_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVQf2i <t> (Arg [off] {sym}))
+       // match: (MOVLf2i <t> (Arg [off] {sym}))
        // cond:
        // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
@@ -10809,10 +11551,10 @@ func rewriteValueAMD64_OpAMD64MOVQf2i_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQi2f_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVLi2f_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MOVQi2f <t> (Arg [off] {sym}))
+       // match: (MOVLi2f <t> (Arg [off] {sym}))
        // cond:
        // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
@@ -10833,17 +11575,17 @@ func rewriteValueAMD64_OpAMD64MOVQi2f_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
-       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
+func rewriteValueAMD64_OpAMD64MOVLload_0(v *Value) bool {
+       // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _))
        // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: x
+       // result: (MOVLQZX x)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQstore {
+               if v_1.Op != OpAMD64MOVLstore {
                        break
                }
                off2 := v_1.AuxInt
@@ -10854,14 +11596,13 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64MOVLQZX)
                v.AddArg(x)
                return true
        }
-       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVQload [off1+off2] {sym} ptr mem)
+       // result: (MOVLload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -10876,16 +11617,16 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
+               v.reset(OpAMD64MOVLload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -10901,16 +11642,16 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
+               v.reset(OpAMD64MOVLload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVLload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -10928,7 +11669,7 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
+               v.reset(OpAMD64MOVLloadidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -10936,15 +11677,15 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
+       // match: (MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
                off2 := v_0.AuxInt
@@ -10956,7 +11697,7 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
+               v.reset(OpAMD64MOVLloadidx4)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -10964,25 +11705,53 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVQloadidx1 [off] {sym} ptr idx mem)
+       // match: (MOVLload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
-               _ = v_0.Args[1]
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLloadidx8)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVLloadidx1 [off] {sym} ptr idx mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
+               v.reset(OpAMD64MOVLloadidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -10990,9 +11759,9 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
        // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -11008,16 +11777,16 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
+               v.reset(OpAMD64MOVLload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVQload [off1+off2] {sym} ptr mem)
+       // result: (MOVLload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -11032,23 +11801,23 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQload)
+               v.reset(OpAMD64MOVLload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _))
+       // match: (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _))
        // cond:
-       // result: (MOVQf2i val)
+       // result: (MOVLf2i val)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVSDstore {
+               if v_1.Op != OpAMD64MOVSSstore {
                        break
                }
                if v_1.AuxInt != off {
@@ -11062,16 +11831,66 @@ func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
                        break
                }
                val := v_1.Args[1]
-               v.reset(OpAMD64MOVQf2i)
+               v.reset(OpAMD64MOVLf2i)
                v.AddArg(val)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
-       // match: (MOVQloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+func rewriteValueAMD64_OpAMD64MOVLloadidx1_0(v *Value) bool {
+       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
        // cond:
-       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} (SHLQconst [2] idx) ptr mem)
+       // cond:
+       // result: (MOVLloadidx4 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+       // cond:
+       // result: (MOVLloadidx8 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11086,7 +11905,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx8)
+               v.reset(OpAMD64MOVLloadidx8)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -11094,9 +11913,9 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQloadidx1 [c] {sym} (SHLQconst [3] idx) ptr mem)
+       // match: (MOVLloadidx1 [c] {sym} (SHLQconst [3] idx) ptr mem)
        // cond:
-       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
+       // result: (MOVLloadidx8 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11111,7 +11930,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                idx := v_0.Args[0]
                ptr := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVQloadidx8)
+               v.reset(OpAMD64MOVLloadidx8)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -11119,9 +11938,9 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11137,7 +11956,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
+               v.reset(OpAMD64MOVLloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11145,9 +11964,9 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // match: (MOVLloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11163,7 +11982,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
+               v.reset(OpAMD64MOVLloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11171,9 +11990,9 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11189,7 +12008,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
+               v.reset(OpAMD64MOVLloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11197,9 +12016,9 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11215,7 +12034,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx1)
+               v.reset(OpAMD64MOVLloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11225,10 +12044,10 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
-       // match: (MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
+func rewriteValueAMD64_OpAMD64MOVLloadidx4_0(v *Value) bool {
+       // match: (MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQloadidx8 [c+d] {sym} ptr idx mem)
+       // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11244,7 +12063,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
+               v.reset(OpAMD64MOVLloadidx4)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11252,9 +12071,64 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // match: (MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+4*d)
+       // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + 4*d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLloadidx4)
+               v.AuxInt = c + 4*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLloadidx8_0(v *Value) bool {
+       // match: (MOVLloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVLloadidx8 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLloadidx8)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond: is32Bit(c+8*d)
-       // result: (MOVQloadidx8 [c+8*d] {sym} ptr idx mem)
+       // result: (MOVLloadidx8 [c+8*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11270,7 +12144,7 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
                if !(is32Bit(c + 8*d)) {
                        break
                }
-               v.reset(OpAMD64MOVQloadidx8)
+               v.reset(OpAMD64MOVLloadidx8)
                v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11280,10 +12154,54 @@ func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
-       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+func rewriteValueAMD64_OpAMD64MOVLstore_0(v *Value) bool {
+       // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem)
+       // cond:
+       // result: (MOVLstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLQSX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem)
+       // cond:
+       // result: (MOVLstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLQZX {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -11299,7 +12217,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
+               v.reset(OpAMD64MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -11307,33 +12225,33 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
-       // cond: validValAndOff(c,off)
-       // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
                mem := v.Args[2]
-               if !(validValAndOff(c, off)) {
+               if !(validOff(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
-               v.AuxInt = makeValAndOff(c, off)
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(int64(int32(c)), off)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -11350,7 +12268,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
+               v.reset(OpAMD64MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -11358,9 +12276,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVLstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -11379,7 +12297,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
+               v.reset(OpAMD64MOVLstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -11388,9 +12306,39 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVQstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVLstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -11409,7 +12357,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx8)
+               v.reset(OpAMD64MOVLstoreidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -11418,9 +12366,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} (ADDQ ptr idx) val mem)
+       // match: (MOVLstore [off] {sym} (ADDQ ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVQstoreidx1 [off] {sym} ptr idx val mem)
+       // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -11437,7 +12385,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
+               v.reset(OpAMD64MOVLstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -11446,9 +12394,178 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstore [i-4] {s} p w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_1.AuxInt != 32 {
+                       break
+               }
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               if w != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstore [i-4] {s} p w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstore {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem))
+       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
+       // result: (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVLload {
+                       break
+               }
+               j := x1.AuxInt
+               s2 := x1.Aux
+               _ = x1.Args[1]
+               p2 := x1.Args[0]
+               mem := x1.Args[1]
+               mem2 := v.Args[2]
+               if mem2.Op != OpAMD64MOVLstore {
+                       break
+               }
+               if mem2.AuxInt != i-4 {
+                       break
+               }
+               if mem2.Aux != s {
+                       break
+               }
+               _ = mem2.Args[2]
+               if p != mem2.Args[0] {
+                       break
+               }
+               x2 := mem2.Args[1]
+               if x2.Op != OpAMD64MOVLload {
+                       break
+               }
+               if x2.AuxInt != j-4 {
+                       break
+               }
+               if x2.Aux != s2 {
+                       break
+               }
+               _ = x2.Args[1]
+               if p2 != x2.Args[0] {
+                       break
+               }
+               if mem != x2.Args[1] {
+                       break
+               }
+               if mem != mem2.Args[2] {
+                       break
+               }
+               if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v0.AuxInt = j - 4
+               v0.Aux = s2
+               v0.AddArg(p2)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
        // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -11465,7 +12582,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
+               v.reset(OpAMD64MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -11473,9 +12590,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVLstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -11491,7 +12608,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstore)
+               v.reset(OpAMD64MOVLstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -11499,21 +12616,21 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
        // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
-       // result: (ADDQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
+       // result: (ADDLconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                a := v.Args[1]
-               if a.Op != OpAMD64ADDQconst {
+               if a.Op != OpAMD64ADDLconst {
                        break
                }
                c := a.AuxInt
                l := a.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                if l.AuxInt != off {
@@ -11531,28 +12648,28 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
                if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
                        break
                }
-               v.reset(OpAMD64ADDQconstmem)
+               v.reset(OpAMD64ADDLconstmem)
                v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
+       // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
        // cond:
-       // result: (MOVSDstore [off] {sym} ptr val mem)
+       // result: (MOVSSstore [off] {sym} ptr val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQf2i {
+               if v_1.Op != OpAMD64MOVLf2i {
                        break
                }
                val := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVSDstore)
+               v.reset(OpAMD64MOVSSstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -11562,14 +12679,14 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVLstoreconst_0(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
                sc := v.AuxInt
                s := v.Aux
@@ -11584,16 +12701,16 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               v.reset(OpAMD64MOVLstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = s
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := v.AuxInt
                sym1 := v.Aux
@@ -11609,16 +12726,16 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               v.reset(OpAMD64MOVLstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // match: (MOVLstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
        // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                x := v.AuxInt
                sym1 := v.Aux
@@ -11636,7 +12753,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.reset(OpAMD64MOVLstoreconstidx1)
                v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -11644,15 +12761,15 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem)
+       // match: (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem)
        // cond: canMergeSym(sym1, sym2)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                x := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
                off := v_0.AuxInt
@@ -11664,7 +12781,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.reset(OpAMD64MOVLstoreconstidx4)
                v.AuxInt = ValAndOff(x).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -11672,9 +12789,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // match: (MOVLstoreconst [x] {sym} (ADDQ ptr idx) mem)
        // cond:
-       // result: (MOVQstoreconstidx1 [x] {sym} ptr idx mem)
+       // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -11687,7 +12804,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.reset(OpAMD64MOVLstoreconstidx1)
                v.AuxInt = x
                v.Aux = sym
                v.AddArg(ptr)
@@ -11695,19 +12812,19 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
-       // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
-       // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
+       // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
        for {
                c := v.AuxInt
                s := v.Aux
                _ = v.Args[1]
                p := v.Args[0]
                x := v.Args[1]
-               if x.Op != OpAMD64MOVQstoreconst {
+               if x.Op != OpAMD64MOVLstoreconst {
                        break
                }
-               c2 := x.AuxInt
+               a := x.AuxInt
                if x.Aux != s {
                        break
                }
@@ -11716,22 +12833,22 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                        break
                }
                mem := x.Args[1]
-               if !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
+               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVOstore)
-               v.AuxInt = ValAndOff(c2).Off()
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = ValAndOff(a).Off()
                v.Aux = s
                v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVOconst, types.TypeInt128)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
        // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
                sc := v.AuxInt
                sym1 := v.Aux
@@ -11747,16 +12864,16 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               v.reset(OpAMD64MOVLstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
        // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
                sc := v.AuxInt
                s := v.Aux
@@ -11771,7 +12888,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
                if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconst)
+               v.reset(OpAMD64MOVLstoreconst)
                v.AuxInt = ValAndOff(sc).add(off)
                v.Aux = s
                v.AddArg(ptr)
@@ -11780,10 +12897,14 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
-       // match: (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
        // cond:
-       // result: (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
+       // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11793,12 +12914,12 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if v_1.AuxInt != 2 {
                        break
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.reset(OpAMD64MOVLstoreconstidx4)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -11806,9 +12927,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
        // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -11824,7 +12945,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.reset(OpAMD64MOVLstoreconstidx1)
                v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
@@ -11832,9 +12953,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
        // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -11850,7 +12971,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.reset(OpAMD64MOVLstoreconstidx1)
                v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
@@ -11858,12 +12979,55 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstoreconstidx1 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(i)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8_0(v *Value) bool {
-       // match: (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem)
+func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem)
        // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -11879,7 +13043,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8_0(v *Value) bool {
                if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.reset(OpAMD64MOVLstoreconstidx4)
                v.AuxInt = ValAndOff(x).add(c)
                v.Aux = sym
                v.AddArg(ptr)
@@ -11887,9 +13051,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(8*c)
-       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
+       // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond: ValAndOff(x).canAdd(4*c)
+       // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem)
        for {
                x := v.AuxInt
                sym := v.Aux
@@ -11902,23 +13066,92 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8_0(v *Value) bool {
                c := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(8 * c)) {
+               if !(ValAndOff(x).canAdd(4 * c)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreconstidx8)
-               v.AuxInt = ValAndOff(x).add(8 * c)
+               v.reset(OpAMD64MOVLstoreconstidx4)
+               v.AuxInt = ValAndOff(x).add(4 * c)
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
+       // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVLstoreconstidx4 {
+                       break
+               }
+               a := x.AuxInt
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[2]
+               if p != x.Args[0] {
+                       break
+               }
+               if i != x.Args[1] {
+                       break
+               }
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = ValAndOff(a).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
+               v0.AuxInt = 2
+               v0.AddArg(i)
+               v.AddArg(v0)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
+               v.AddArg(v1)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
-       // match: (MOVQstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
+func rewriteValueAMD64_OpAMD64MOVLstoreidx1_0(v *Value) bool {
+       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
        // cond:
-       // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem)
+       // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_1.AuxInt != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
+       // cond:
+       // result: (MOVLstoreidx8 [c] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11934,7 +13167,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(OpAMD64MOVQstoreidx8)
+               v.reset(OpAMD64MOVLstoreidx8)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -11943,9 +13176,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11962,7 +13195,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
+               v.reset(OpAMD64MOVLstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11971,9 +13204,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -11990,7 +13223,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx1)
+               v.reset(OpAMD64MOVLstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -11999,12 +13232,119 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
+       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if v_2.AuxInt != 32 {
+                       break
+               }
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               if w != x.Args[2] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
+                       break
+               }
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx1 {
+                       break
+               }
+               if x.AuxInt != i-4 {
+                       break
+               }
+               if x.Aux != s {
+                       break
+               }
+               _ = x.Args[3]
+               if p != x.Args[0] {
+                       break
+               }
+               if idx != x.Args[1] {
+                       break
+               }
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
+                       break
+               }
+               if w0.AuxInt != j-32 {
+                       break
+               }
+               if w != w0.Args[0] {
+                       break
+               }
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVQstoreidx8_0(v *Value) bool {
-       // match: (MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+func rewriteValueAMD64_OpAMD64MOVLstoreidx4_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVQstoreidx8 [c+d] {sym} ptr idx val mem)
+       // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12021,7 +13361,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx8_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx8)
+               v.reset(OpAMD64MOVLstoreidx4)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -12030,9 +13370,9 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx8_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       // match: (MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+4*d)
+       // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12046,11 +13386,11 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx8_0(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               if !(is32Bit(c + 8*d)) {
+               if !(is32Bit(c + 4*d)) {
                        break
                }
-               v.reset(OpAMD64MOVQstoreidx8)
-               v.AuxInt = c + 8*d
+               v.reset(OpAMD64MOVLstoreidx4)
+               v.AuxInt = c + 4*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -12058,257 +13398,127 @@ func rewriteValueAMD64_OpAMD64MOVQstoreidx8_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDload_0(v *Value) bool {
-       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVSDload [off1+off2] {sym} ptr mem)
+       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               if v_2.AuxInt != 32 {
                        break
                }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx4 {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if x.AuxInt != i-4 {
                        break
                }
-               v.reset(OpAMD64MOVSDload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if x.Aux != s {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-       for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if idx != x.Args[1] {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if w != x.Args[2] {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDload [off] {sym} (ADDQ ptr idx) mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               mem := v.Args[1]
-               if !(ptr.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _))
-       // cond:
-       // result: (MOVQi2f val)
+       // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQstore {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_1.AuxInt != off {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVLstoreidx4 {
                        break
                }
-               if v_1.Aux != sym {
+               if x.AuxInt != i-4 {
                        break
                }
-               _ = v_1.Args[2]
-               if ptr != v_1.Args[0] {
+               if x.Aux != s {
                        break
                }
-               val := v_1.Args[1]
-               v.reset(OpAMD64MOVQi2f)
-               v.AddArg(val)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSDloadidx1_0(v *Value) bool {
-       // match: (MOVSDloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
-       // cond:
-       // result: (MOVSDloadidx8 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               if v_1.AuxInt != 3 {
+               if idx != x.Args[1] {
                        break
                }
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVSDloadidx8)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               if w0.AuxInt != j-32 {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if w != w0.Args[0] {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = i - 4
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 2
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSDloadidx8_0(v *Value) bool {
-       // match: (MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
+func rewriteValueAMD64_OpAMD64MOVLstoreidx8_0(v *Value) bool {
+       // match: (MOVLstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
+       // result: (MOVLstoreidx8 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
@@ -12316,25 +13526,27 @@ func rewriteValueAMD64_OpAMD64MOVSDloadidx8_0(v *Value) bool {
                d := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
-               mem := v.Args[2]
+               val := v.Args[2]
+               mem := v.Args[3]
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx8)
+               v.reset(OpAMD64MOVLstoreidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // match: (MOVLstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+8*d)
-       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
+       // result: (MOVLstoreidx8 [c+8*d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
@@ -12342,54 +13554,54 @@ func rewriteValueAMD64_OpAMD64MOVSDloadidx8_0(v *Value) bool {
                }
                d := v_1.AuxInt
                idx := v_1.Args[0]
-               mem := v.Args[2]
+               val := v.Args[2]
+               mem := v.Args[3]
                if !(is32Bit(c + 8*d)) {
                        break
                }
-               v.reset(OpAMD64MOVSDloadidx8)
+               v.reset(OpAMD64MOVLstoreidx8)
                v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSDstore_0(v *Value) bool {
-       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+func rewriteValueAMD64_OpAMD64MOVOload_0(v *Value) bool {
+       // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVOload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
                ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
+               v.reset(OpAMD64MOVOload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
@@ -12397,280 +13609,204 @@ func rewriteValueAMD64_OpAMD64MOVSDstore_0(v *Value) bool {
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
                base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
+               mem := v.Args[1]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstore)
+               v.reset(OpAMD64MOVOload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVOstore_0(v *Value) bool {
+       // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVOstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
-               sym1 := v.Aux
+               sym := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ1 {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
                off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               _ = v_0.Args[1]
                ptr := v_0.Args[0]
-               idx := v_0.Args[1]
                val := v.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx1)
+               v.reset(OpAMD64MOVOstore)
                v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ8 {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
                off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
+               base := v_0.Args[0]
                val := v.Args[1]
                mem := v.Args[2]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx8)
+               v.reset(OpAMD64MOVOstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off] {sym} (ADDQ ptr idx) val mem)
-       // cond: ptr.Op != OpSB
-       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               ptr := v_0.Args[0]
-               idx := v_0.Args[1]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(ptr.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem)
-       // cond:
-       // result: (MOVQstore [off] {sym} ptr val mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQi2f {
-                       break
-               }
-               val := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSDstoreidx1_0(v *Value) bool {
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
-       // cond:
-       // result: (MOVSDstoreidx8 [c] {sym} ptr idx val mem)
+func rewriteValueAMD64_OpAMD64MOVQatomicload_0(v *Value) bool {
+       // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQatomicload [off1+off2] {sym} ptr mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if v_1.AuxInt != 3 {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               d := v_0.AuxInt
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
                ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
+               v.reset(OpAMD64MOVQatomicload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQf2i_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVQf2i <t> (Arg [off] {sym}))
+       // cond:
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               t := v.Type
+               v_0 := v.Args[0]
+               if v_0.Op != OpArg {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               off := v_0.AuxInt
+               sym := v_0.Aux
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSDstoreidx8_0(v *Value) bool {
-       // match: (MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
+func rewriteValueAMD64_OpAMD64MOVQi2f_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVQi2f <t> (Arg [off] {sym}))
+       // cond:
+       // result: @b.Func.Entry (Arg <t> [off] {sym})
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
+               t := v.Type
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               if v_0.Op != OpArg {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               off := v_0.AuxInt
+               sym := v_0.Aux
+               b = b.Func.Entry
+               v0 := b.NewValue0(v.Pos, OpArg, t)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
                return true
        }
-       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+8*d)
-       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQload_0(v *Value) bool {
+       // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: x
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[3]
+               _ = v.Args[1]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64MOVQstore {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + 8*d)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MOVSDstoreidx8)
-               v.AuxInt = c + 8*d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
-       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSSload [off1+off2] {sym} ptr mem)
+       // result: (MOVQload [off1+off2] {sym} ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -12685,16 +13821,16 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSload)
+               v.reset(OpAMD64MOVQload)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -12710,16 +13846,16 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSload)
+               v.reset(OpAMD64MOVQload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVQload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVQloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -12737,7 +13873,7 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx1)
+               v.reset(OpAMD64MOVQloadidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -12745,15 +13881,15 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
+       // match: (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVQloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -12765,7 +13901,7 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx4)
+               v.reset(OpAMD64MOVQloadidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -12773,9 +13909,9 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off] {sym} (ADDQ ptr idx) mem)
+       // match: (MOVQload [off] {sym} (ADDQ ptr idx) mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
+       // result: (MOVQloadidx1 [off] {sym} ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -12791,7 +13927,7 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx1)
+               v.reset(OpAMD64MOVQloadidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -12799,19 +13935,68 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _))
-       // cond:
-       // result: (MOVLi2f val)
+       // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLstore {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               if v_1.AuxInt != off {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _))
+       // cond:
+       // result: (MOVQf2i val)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVSDstore {
+                       break
+               }
+               if v_1.AuxInt != off {
                        break
                }
                if v_1.Aux != sym {
@@ -12822,16 +14007,16 @@ func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
                        break
                }
                val := v_1.Args[1]
-               v.reset(OpAMD64MOVLi2f)
+               v.reset(OpAMD64MOVQf2i)
                v.AddArg(val)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
-       // match: (MOVSSloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
+func rewriteValueAMD64_OpAMD64MOVQloadidx1_0(v *Value) bool {
+       // match: (MOVQloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
        // cond:
-       // result: (MOVSSloadidx4 [c] {sym} ptr idx mem)
+       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12841,12 +14026,12 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               if v_1.AuxInt != 3 {
                        break
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVSSloadidx4)
+               v.reset(OpAMD64MOVQloadidx8)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -12854,9 +14039,34 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // match: (MOVQloadidx1 [c] {sym} (SHLQconst [3] idx) ptr mem)
+       // cond:
+       // result: (MOVQloadidx8 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
+                       break
+               }
+               if v_0.AuxInt != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond: is32Bit(c+d)
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12872,7 +14082,7 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx1)
+               v.reset(OpAMD64MOVQloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -12880,9 +14090,35 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // match: (MOVQloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
        // cond: is32Bit(c+d)
-       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               idx := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12898,7 +14134,33 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx1)
+               v.reset(OpAMD64MOVQloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -12908,10 +14170,10 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSSloadidx4_0(v *Value) bool {
-       // match: (MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
+func rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v *Value) bool {
+       // match: (MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond: is32Bit(c+d)
-       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
+       // result: (MOVQloadidx8 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12927,7 +14189,7 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx4_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx4)
+               v.reset(OpAMD64MOVQloadidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -12935,9 +14197,9 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx4_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
+       // match: (MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+8*d)
+       // result: (MOVQloadidx8 [c+8*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -12950,11 +14212,11 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx4_0(v *Value) bool {
                d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(c + 4*d)) {
+               if !(is32Bit(c + 8*d)) {
                        break
                }
-               v.reset(OpAMD64MOVSSloadidx4)
-               v.AuxInt = c + 4*d
+               v.reset(OpAMD64MOVQloadidx8)
+               v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -12963,10 +14225,10 @@ func rewriteValueAMD64_OpAMD64MOVSSloadidx4_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
-       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool {
+       // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -12982,7 +14244,7 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -12990,9 +14252,33 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem)
+       // cond: validValAndOff(c,off)
+       // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = makeValAndOff(c, off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -13009,7 +14295,7 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstore)
+               v.reset(OpAMD64MOVQstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -13017,9 +14303,9 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVQstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVQstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -13038,7 +14324,7 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx1)
+               v.reset(OpAMD64MOVQstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -13047,15 +14333,15 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVQstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ4 {
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -13068,7 +14354,7 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx4)
+               v.reset(OpAMD64MOVQstoreidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -13077,9 +14363,9 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off] {sym} (ADDQ ptr idx) val mem)
+       // match: (MOVQstore [off] {sym} (ADDQ ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       // result: (MOVQstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -13096,7 +14382,7 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx1)
+               v.reset(OpAMD64MOVQstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -13105,611 +14391,673 @@ func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem)
-       // cond:
-       // result: (MOVLstore [off] {sym} ptr val mem)
+       // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLi2f {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               val := v_1.Args[0]
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVSSstoreidx1_0(v *Value) bool {
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
-       // cond:
-       // result: (MOVSSstoreidx4 [c] {sym} ptr idx val mem)
+       // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVQstore [off1+off2] {sym} ptr val mem)
        for {
-               c := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if v_1.AuxInt != 2 {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+       // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+       // result: (ADDQconstmem {sym} [makeValAndOff(c,off)] ptr mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[3]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               a := v.Args[1]
+               if a.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_0.AuxInt
-               ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               c := a.AuxInt
+               l := a.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = c + d
+               if l.AuxInt != off {
+                       break
+               }
+               if l.Aux != sym {
+                       break
+               }
+               _ = l.Args[1]
+               ptr2 := l.Args[0]
+               mem := l.Args[1]
+               if mem != v.Args[2] {
+                       break
+               }
+               if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconstmem)
+               v.AuxInt = makeValAndOff(c, off)
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
+       // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
+       // cond:
+       // result: (MOVSDstore [off] {sym} ptr val mem)
        for {
-               c := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
-               _ = v.Args[3]
+               _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               if v_1.Op != OpAMD64MOVQf2i {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx1)
-               v.AuxInt = c + d
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVSDstore)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(idx)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVSSstoreidx4_0(v *Value) bool {
-       // match: (MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
+func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
+               sc := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               d := v_0.AuxInt
+               off := v_0.AuxInt
                ptr := v_0.Args[0]
-               idx := v.Args[1]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + d)) {
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c + d
-               v.Aux = sym
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+4*d)
-       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
+       // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               d := v_1.AuxInt
-               idx := v_1.Args[0]
-               val := v.Args[2]
-               mem := v.Args[3]
-               if !(is32Bit(c + 4*d)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVSSstoreidx4)
-               v.AuxInt = c + 4*d
-               v.Aux = sym
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVQstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               x := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               x := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVQstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // cond:
+       // result: (MOVQstoreconstidx1 [x] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
-                       break
-               }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQSX (ANDLconst [c] x))
-       // cond: c & 0x8000 == 0
-       // result: (ANDLconst [c & 0x7fff] x)
+       // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
+       // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
+       // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64MOVQstoreconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(c&0x8000 == 0) {
+               c2 := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0x7fff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSX (MOVWQSX x))
-       // cond:
-       // result: (MOVWQSX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQSX {
+               _ = x.Args[1]
+               if p != x.Args[0] {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQSX (MOVBQSX x))
-       // cond:
-       // result: (MOVBQSX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQSX {
+               mem := x.Args[1]
+               if !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQSX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVOstore)
+               v.AuxInt = ValAndOff(c2).Off()
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVOconst, types.TypeInt128)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWQSXload_0(v *Value) bool {
-       // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWQSX x)
+       // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               sc := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWstore {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWQSX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               sc := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
                mem := v.Args[1]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v.reset(OpAMD64MOVWQSXload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpAMD64MOVQstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWQZX_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1_0(v *Value) bool {
+       // match: (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
+       // cond:
+       // result: (MOVQstoreconstidx8 [c] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWload {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_1.AuxInt != 3 {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond: ValAndOff(x).canAdd(c)
+       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVLload {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+       // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond: ValAndOff(x).canAdd(c)
+       // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVQload {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[1]
-               ptr := x.Args[0]
-               mem := x.Args[1]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVQstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8_0(v *Value) bool {
+       // match: (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond: ValAndOff(x).canAdd(c)
+       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWloadidx1 {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
+       // match: (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond: ValAndOff(x).canAdd(8*c)
+       // result: (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem)
        for {
-               x := v.Args[0]
-               if x.Op != OpAMD64MOVWloadidx2 {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off := x.AuxInt
-               sym := x.Aux
-               _ = x.Args[2]
-               ptr := x.Args[0]
-               idx := x.Args[1]
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(8 * c)) {
                        break
                }
-               b = x.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, v.Type)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = off
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(idx)
-               v0.AddArg(mem)
+               v.reset(OpAMD64MOVQstoreconstidx8)
+               v.AuxInt = ValAndOff(x).add(8 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQZX (ANDLconst [c] x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreidx1_0(v *Value) bool {
+       // match: (MOVQstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
        // cond:
-       // result: (ANDLconst [c & 0xffff] x)
+       // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDLconst {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64ANDLconst)
-               v.AuxInt = c & 0xffff
-               v.AddArg(x)
-               return true
-       }
-       // match: (MOVWQZX (MOVWQZX x))
-       // cond:
-       // result: (MOVWQZX x)
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVWQZX {
+               if v_1.AuxInt != 3 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWQZX (MOVBQZX x))
-       // cond:
-       // result: (MOVBQZX x)
+       // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVBQZX {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64MOVBQZX)
-               v.AddArg(x)
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
-       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
-       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-       // result: (MOVWQZX x)
+       // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
-               off := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[3]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWstore {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               _ = v_1.Args[2]
-               ptr2 := v_1.Args[0]
-               x := v_1.Args[1]
-               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWQZX)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstoreidx8_0(v *Value) bool {
+       // match: (MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVQstoreidx8 [c+d] {sym} ptr idx val mem)
        for {
-               off1 := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[1]
+               _ = v.Args[3]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_0.AuxInt
+               d := v_0.AuxInt
                ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+8*d)
+       // result: (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + 8*d)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstoreidx8)
+               v.AuxInt = c + 8*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDload_0(v *Value) bool {
+       // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSDload [off1+off2] {sym} ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
                base := v_0.Args[0]
                mem := v.Args[1]
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWload)
+               v.reset(OpAMD64MOVSDload)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -13727,7 +15075,7 @@ func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx1)
+               v.reset(OpAMD64MOVSDloadidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -13735,15 +15083,15 @@ func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem)
+       // match: (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
+       // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -13755,7 +15103,7 @@ func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx2)
+               v.reset(OpAMD64MOVSDloadidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -13763,9 +15111,9 @@ func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off] {sym} (ADDQ ptr idx) mem)
+       // match: (MOVSDload [off] {sym} (ADDQ ptr idx) mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [off] {sym} ptr idx mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -13781,7 +15129,7 @@ func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx1)
+               v.reset(OpAMD64MOVSDloadidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -13789,61 +15137,39 @@ func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+       // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _))
+       // cond:
+       // result: (MOVQi2f val)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQstore {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               if v_1.AuxInt != off {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWload [off1+off2] {sym} ptr mem)
-       for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if v_1.Aux != sym {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(is32Bit(off1 + off2)) {
+               _ = v_1.Args[2]
+               if ptr != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVWload)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               val := v_1.Args[1]
+               v.reset(OpAMD64MOVQi2f)
+               v.AddArg(val)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
-       // match: (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
+func rewriteValueAMD64_OpAMD64MOVSDloadidx1_0(v *Value) bool {
+       // match: (MOVSDloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem)
        // cond:
-       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
+       // result: (MOVSDloadidx8 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -13853,37 +15179,12 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if v_1.AuxInt != 3 {
                        break
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} (SHLQconst [1] idx) ptr mem)
-       // cond:
-       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               if v_0.AuxInt != 1 {
-                       break
-               }
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWloadidx2)
+               v.reset(OpAMD64MOVSDloadidx8)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -13891,9 +15192,9 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // match: (MOVSDloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond: is32Bit(c+d)
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -13909,33 +15210,7 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               idx := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWloadidx1)
+               v.reset(OpAMD64MOVSDloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -13943,9 +15218,9 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // match: (MOVSDloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
        // cond: is32Bit(c+d)
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -13961,33 +15236,7 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx1)
-               v.AuxInt = c + d
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(idx)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
-       // cond: is32Bit(c+d)
-       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
-       for {
-               c := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               idx := v_0.Args[0]
-               ptr := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(c + d)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWloadidx1)
+               v.reset(OpAMD64MOVSDloadidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -13997,10 +15246,10 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWloadidx2_0(v *Value) bool {
-       // match: (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem)
+func rewriteValueAMD64_OpAMD64MOVSDloadidx8_0(v *Value) bool {
+       // match: (MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem)
        // cond: is32Bit(c+d)
-       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
+       // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -14016,7 +15265,7 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx2_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx2)
+               v.reset(OpAMD64MOVSDloadidx8)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -14024,9 +15273,9 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx2_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem)
-       // cond: is32Bit(c+2*d)
-       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
+       // match: (MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+8*d)
+       // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -14039,11 +15288,11 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx2_0(v *Value) bool {
                d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(c + 2*d)) {
+               if !(is32Bit(c + 8*d)) {
                        break
                }
-               v.reset(OpAMD64MOVWloadidx2)
-               v.AuxInt = c + 2*d
+               v.reset(OpAMD64MOVSDloadidx8)
+               v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -14052,54 +15301,10 @@ func rewriteValueAMD64_OpAMD64MOVWloadidx2_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
-       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWQSX {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
-       // cond:
-       // result: (MOVWstore [off] {sym} ptr x mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVWQZX {
-                       break
-               }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(x)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+func rewriteValueAMD64_OpAMD64MOVSDstore_0(v *Value) bool {
+       // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // result: (MOVSDstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -14115,7 +15320,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(ptr)
@@ -14123,33 +15328,9 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
-       // cond: validOff(off)
-       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               mem := v.Args[2]
-               if !(validOff(off)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = makeValAndOff(int64(int16(c)), off)
-               v.Aux = sym
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -14166,7 +15347,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
+               v.reset(OpAMD64MOVSDstore)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -14174,9 +15355,9 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -14195,7 +15376,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
+               v.reset(OpAMD64MOVSDstoreidx1)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -14204,15 +15385,15 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem)
+       // match: (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               if v_0.Op != OpAMD64LEAQ8 {
                        break
                }
                off2 := v_0.AuxInt
@@ -14225,7 +15406,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
+               v.reset(OpAMD64MOVSDstoreidx8)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
@@ -14234,9 +15415,9 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off] {sym} (ADDQ ptr idx) val mem)
+       // match: (MOVSDstore [off] {sym} (ADDQ ptr idx) val mem)
        // cond: ptr.Op != OpSB
-       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
+       // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -14253,7 +15434,7 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
+               v.reset(OpAMD64MOVSDstoreidx1)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -14262,436 +15443,286 @@ func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem)
+       // cond:
+       // result: (MOVQstore [off] {sym} ptr val mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               if w != x.Args[1] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               if v_1.Op != OpAMD64MOVQi2f {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSDstoreidx1_0(v *Value) bool {
+       // match: (MOVSDstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem)
+       // cond:
+       // result: (MOVSDstoreidx8 [c] {sym} ptr idx val mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if v_1.AuxInt != 16 {
-                       break
-               }
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstore {
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if x.AuxInt != i-2 {
+               if v_1.AuxInt != 3 {
                        break
                }
-               if x.Aux != s {
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               if w != x.Args[1] {
+               v.reset(OpAMD64MOVSDstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w)
+               v.reset(OpAMD64MOVSDstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstore_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
-                       break
-               }
-               if w != w0.Args[0] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstore [i-2] {s} p w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_1.AuxInt
-               w := v_1.Args[0]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstore {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               w0 := x.Args[1]
-               if w0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
-                       break
-               }
-               if w != w0.Args[0] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
-       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
-       // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               x1 := v.Args[1]
-               if x1.Op != OpAMD64MOVWload {
-                       break
-               }
-               j := x1.AuxInt
-               s2 := x1.Aux
-               _ = x1.Args[1]
-               p2 := x1.Args[0]
-               mem := x1.Args[1]
-               mem2 := v.Args[2]
-               if mem2.Op != OpAMD64MOVWstore {
-                       break
-               }
-               if mem2.AuxInt != i-2 {
-                       break
-               }
-               if mem2.Aux != s {
-                       break
-               }
-               _ = mem2.Args[2]
-               if p != mem2.Args[0] {
-                       break
-               }
-               x2 := mem2.Args[1]
-               if x2.Op != OpAMD64MOVWload {
-                       break
-               }
-               if x2.AuxInt != j-2 {
-                       break
-               }
-               if x2.Aux != s2 {
-                       break
-               }
-               _ = x2.Args[1]
-               if p2 != x2.Args[0] {
-                       break
-               }
-               if mem != x2.Args[1] {
-                       break
-               }
-               if mem != mem2.Args[2] {
-                       break
-               }
-               if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = j - 2
-               v0.Aux = s2
-               v0.AddArg(p2)
-               v0.AddArg(mem)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
-       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
-       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+func rewriteValueAMD64_OpAMD64MOVSDstoreidx8_0(v *Value) bool {
+       // match: (MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+8*d)
+       // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem)
        for {
-               off1 := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               ptr := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + 8*d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64MOVSDstoreidx8)
+               v.AuxInt = c + 8*d
                v.Aux = sym
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreconst_0(v *Value) bool {
-       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+func rewriteValueAMD64_OpAMD64MOVSSload_0(v *Value) bool {
+       // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSload [off1+off2] {sym} ptr mem)
        for {
-               sc := v.AuxInt
-               s := v.Aux
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                ptr := v_0.Args[0]
                mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               sc := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
-               ptr := v_0.Args[0]
+               base := v_0.Args[0]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
+               v.reset(OpAMD64MOVSSload)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVSSload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem)
-       // cond: canMergeSym(sym1, sym2)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       // match: (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               x := v.AuxInt
+               off1 := v.AuxInt
                sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ2 {
+               if v_0.Op != OpAMD64LEAQ4 {
                        break
                }
-               off := v_0.AuxInt
+               off2 := v_0.AuxInt
                sym2 := v_0.Aux
                _ = v_0.Args[1]
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(off)
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [x] {sym} (ADDQ ptr idx) mem)
-       // cond:
-       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
+       // match: (MOVSSload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSloadidx1 [off] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
@@ -14702,100 +15733,50 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst_0(v *Value) bool {
                ptr := v_0.Args[0]
                idx := v_0.Args[1]
                mem := v.Args[1]
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = x
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               p := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64MOVWstoreconst {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[1]
-               if p != x.Args[0] {
-                       break
-               }
-               mem := x.Args[1]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreconst)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
-       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _))
+       // cond:
+       // result: (MOVLi2f val)
        for {
-               sc := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAL {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLstore {
                        break
                }
-               off := v_0.AuxInt
-               sym2 := v_0.Aux
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
+               if v_1.AuxInt != off {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
-       // cond: ValAndOff(sc).canAdd(off)
-       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-       for {
-               sc := v.AuxInt
-               s := v.Aux
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDLconst {
+               if v_1.Aux != sym {
                        break
                }
-               off := v_0.AuxInt
-               ptr := v_0.Args[0]
-               mem := v.Args[1]
-               if !(ValAndOff(sc).canAdd(off)) {
+               _ = v_1.Args[2]
+               if ptr != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconst)
-               v.AuxInt = ValAndOff(sc).add(off)
-               v.Aux = s
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               val := v_1.Args[1]
+               v.reset(OpAMD64MOVLi2f)
+               v.AddArg(val)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
-       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
+func rewriteValueAMD64_OpAMD64MOVSSloadidx1_0(v *Value) bool {
+       // match: (MOVSSloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem)
        // cond:
-       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       // result: (MOVSSloadidx4 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -14805,12 +15786,12 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if v_1.AuxInt != 2 {
                        break
                }
                idx := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.reset(OpAMD64MOVSSloadidx4)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -14818,37 +15799,37 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVSSloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
                mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(c)) {
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       // match: (MOVSSloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14856,92 +15837,54 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
+               d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(c)) {
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx1)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(OpAMD64MOVSSloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
-       for {
-               c := v.AuxInt
-               s := v.Aux
-               _ = v.Args[2]
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstoreconstidx1 {
-                       break
-               }
-               a := x.AuxInt
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[2]
-               if p != x.Args[0] {
-                       break
-               }
-               if i != x.Args[1] {
-                       break
-               }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(i)
-               v.AddArg(mem)
-               return true
-       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem)
-       // cond: ValAndOff(x).canAdd(c)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+func rewriteValueAMD64_OpAMD64MOVSSloadidx4_0(v *Value) bool {
+       // match: (MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
+               d := v_0.AuxInt
                ptr := v_0.Args[0]
                idx := v.Args[1]
                mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(c)) {
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(c)
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem)
-       // cond: ValAndOff(x).canAdd(2*c)
-       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
+       // match: (MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+4*d)
+       // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem)
        for {
-               x := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
@@ -14949,65 +15892,192 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2_0(v *Value) bool {
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
+               d := v_1.AuxInt
                idx := v_1.Args[0]
                mem := v.Args[2]
-               if !(ValAndOff(x).canAdd(2 * c)) {
+               if !(is32Bit(c + 4*d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreconstidx2)
-               v.AuxInt = ValAndOff(x).add(2 * c)
+               v.reset(OpAMD64MOVSSloadidx4)
+               v.AuxInt = c + 4*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
-       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
-       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVSSstore_0(v *Value) bool {
+       // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVSSstore [off1+off2] {sym} ptr val mem)
        for {
-               c := v.AuxInt
-               s := v.Aux
+               off1 := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               p := v.Args[0]
-               i := v.Args[1]
-               x := v.Args[2]
-               if x.Op != OpAMD64MOVWstoreconstidx2 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               a := x.AuxInt
-               if x.Aux != s {
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               _ = x.Args[2]
-               if p != x.Args[0] {
+               v.reset(OpAMD64MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if i != x.Args[1] {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               mem := x.Args[2]
-               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+               v.reset(OpAMD64MOVSSstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               v.reset(OpAMD64MOVLstoreconstidx1)
-               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
-               v0.AuxInt = 1
-               v0.AddArg(i)
-               v.AddArg(v0)
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ4 {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstoreidx4)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem)
+       // cond:
+       // result: (MOVLstore [off] {sym} ptr val mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLi2f {
+                       break
+               }
+               val := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
-       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem)
+func rewriteValueAMD64_OpAMD64MOVSSstoreidx1_0(v *Value) bool {
+       // match: (MOVSSstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem)
        // cond:
-       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       // result: (MOVSSstoreidx4 [c] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -15017,13 +16087,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               if v_1.AuxInt != 1 {
+               if v_1.AuxInt != 2 {
                        break
                }
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               v.reset(OpAMD64MOVWstoreidx2)
+               v.reset(OpAMD64MOVSSstoreidx4)
                v.AuxInt = c
                v.Aux = sym
                v.AddArg(ptr)
@@ -15032,9 +16102,9 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // match: (MOVSSstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -15051,7 +16121,7 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
+               v.reset(OpAMD64MOVSSstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -15060,9 +16130,9 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -15079,7 +16149,7 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx1)
+               v.reset(OpAMD64MOVSSstoreidx1)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -15088,224 +16158,12 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
-                       break
-               }
-               if w != w0.Args[0] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
-       // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
-       for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx1 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
-                       break
-               }
-               if w != w0.Args[0] {
-                       break
-               }
-               mem := x.Args[3]
-               if !(x.Uses == 1 && clobber(x)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v.AddArg(idx)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
        return false
 }
-func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+func rewriteValueAMD64_OpAMD64MOVSSstoreidx4_0(v *Value) bool {
+       // match: (MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem)
        // cond: is32Bit(c+d)
-       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
+       // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -15322,7 +16180,7 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
+               v.reset(OpAMD64MOVSSstoreidx4)
                v.AuxInt = c + d
                v.Aux = sym
                v.AddArg(ptr)
@@ -15331,9 +16189,9 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem)
-       // cond: is32Bit(c+2*d)
-       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+4*d)
+       // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem)
        for {
                c := v.AuxInt
                sym := v.Aux
@@ -15347,11 +16205,11 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                idx := v_1.Args[0]
                val := v.Args[2]
                mem := v.Args[3]
-               if !(is32Bit(c + 2*d)) {
+               if !(is32Bit(c + 4*d)) {
                        break
                }
-               v.reset(OpAMD64MOVWstoreidx2)
-               v.AuxInt = c + 2*d
+               v.reset(OpAMD64MOVSSstoreidx4)
+               v.AuxInt = c + 4*d
                v.Aux = sym
                v.AddArg(ptr)
                v.AddArg(idx)
@@ -15359,2229 +16217,4316 @@ func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWQSX_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRLconst {
-                       break
-               }
-               if v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               mem := x.Args[3]
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if v_2.AuxInt != 16 {
-                       break
-               }
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               if w != x.Args[2] {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               mem := x.Args[3]
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w)
-               v.AddArg(mem)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem))
        // cond: x.Uses == 1 && clobber(x)
-       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
+       // result: @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
        for {
-               i := v.AuxInt
-               s := v.Aux
-               _ = v.Args[3]
-               p := v.Args[0]
-               idx := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64SHRQconst {
-                       break
-               }
-               j := v_2.AuxInt
-               w := v_2.Args[0]
-               x := v.Args[3]
-               if x.Op != OpAMD64MOVWstoreidx2 {
-                       break
-               }
-               if x.AuxInt != i-2 {
-                       break
-               }
-               if x.Aux != s {
-                       break
-               }
-               _ = x.Args[3]
-               if p != x.Args[0] {
-                       break
-               }
-               if idx != x.Args[1] {
-                       break
-               }
-               w0 := x.Args[2]
-               if w0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               if w0.AuxInt != j-16 {
-                       break
-               }
-               if w != w0.Args[0] {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               mem := x.Args[3]
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
                if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVLstoreidx1)
-               v.AuxInt = i - 2
-               v.Aux = s
-               v.AddArg(p)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
-               v0.AuxInt = 1
-               v0.AddArg(idx)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQSXload, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(w0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULL_0(v *Value) bool {
-       // match: (MULL x (MOVLconst [c]))
-       // cond:
-       // result: (MULLconst [c] x)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULL (MOVLconst [c]) x)
-       // cond:
-       // result: (MULLconst [c] x)
+       // match: (MOVWQSX (ANDLconst [c] x))
+       // cond: c & 0x8000 == 0
+       // result: (ANDLconst [c & 0x7fff] x)
        for {
-               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
                c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = c
+               x := v_0.Args[0]
+               if !(c&0x8000 == 0) {
+                       break
+               }
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0x7fff
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool {
-       // match: (MULLconst [c] (MULLconst [d] x))
+       // match: (MOVWQSX (MOVWQSX x))
        // cond:
-       // result: (MULLconst [int64(int32(c * d))] x)
+       // result: (MOVWQSX x)
        for {
-               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MULLconst {
+               if v_0.Op != OpAMD64MOVWQSX {
                        break
                }
-               d := v_0.AuxInt
                x := v_0.Args[0]
-               v.reset(OpAMD64MULLconst)
-               v.AuxInt = int64(int32(c * d))
+               v.reset(OpAMD64MOVWQSX)
                v.AddArg(x)
                return true
        }
-       // match: (MULLconst [c] (MOVLconst [d]))
+       // match: (MOVWQSX (MOVBQSX x))
        // cond:
-       // result: (MOVLconst [int64(int32(c*d))])
+       // result: (MOVBQSX x)
        for {
-               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64MOVBQSX {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int64(int32(c * d))
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQSX)
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULQ_0(v *Value) bool {
-       // match: (MULQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (MULQconst [c] x)
+func rewriteValueAMD64_OpAMD64MOVWQSXload_0(v *Value) bool {
+       // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWQSX x)
        for {
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[1]
-               x := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVWstore {
                        break
                }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = c
+               v.reset(OpAMD64MOVWQSX)
                v.AddArg(x)
                return true
        }
-       // match: (MULQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (MULQconst [c] x)
+       // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWQSXload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64MOVWQZX_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (MULQconst [c] (MULQconst [d] x))
-       // cond: is32Bit(c*d)
-       // result: (MULQconst [c * d] x)
+       // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MULQconst {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWload {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(is32Bit(c * d)) {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MULQconst)
-               v.AuxInt = c * d
-               v.AddArg(x)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULQconst [-1] x)
-       // cond:
-       // result: (NEGQ x)
+       // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
        for {
-               if v.AuxInt != -1 {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVLload {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64NEGQ)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [0] _)
-       // cond:
-       // result: (MOVQconst [0])
-       for {
-               if v.AuxInt != 0 {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULQconst [1] x)
-       // cond:
-       // result: x
+       // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
        for {
-               if v.AuxInt != 1 {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVQload {
                        break
                }
-               x := v.Args[0]
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[1]
+               ptr := x.Args[0]
+               mem := x.Args[1]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, v.Type)
                v.reset(OpCopy)
-               v.Type = x.Type
-               v.AddArg(x)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULQconst [3] x)
-       // cond:
-       // result: (LEAQ2 x x)
+       // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem)
        for {
-               if v.AuxInt != 3 {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWloadidx1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ2)
-               v.AddArg(x)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [5] x)
-       // cond:
-       // result: (LEAQ4 x x)
-       for {
-               if v.AuxInt != 5 {
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg(x)
-               v.AddArg(x)
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULQconst [7] x)
-       // cond:
-       // result: (LEAQ8 (NEGQ <v.Type> x) x)
+       // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem)
        for {
-               if v.AuxInt != 7 {
+               x := v.Args[0]
+               if x.Op != OpAMD64MOVWloadidx2 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, v.Type)
-               v0.AddArg(x)
+               off := x.AuxInt
+               sym := x.Aux
+               _ = x.Args[2]
+               ptr := x.Args[0]
+               idx := x.Args[1]
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               b = x.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, v.Type)
+               v.reset(OpCopy)
                v.AddArg(v0)
-               v.AddArg(x)
+               v0.AuxInt = off
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(idx)
+               v0.AddArg(mem)
                return true
        }
-       // match: (MULQconst [9] x)
+       // match: (MOVWQZX (ANDLconst [c] x))
        // cond:
-       // result: (LEAQ8 x x)
+       // result: (ANDLconst [c & 0xffff] x)
        for {
-               if v.AuxInt != 9 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDLconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64ANDLconst)
+               v.AuxInt = c & 0xffff
                v.AddArg(x)
                return true
        }
-       // match: (MULQconst [11] x)
+       // match: (MOVWQZX (MOVWQZX x))
        // cond:
-       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
+       // result: (MOVWQZX x)
        for {
-               if v.AuxInt != 11 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVWQZX {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ2)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVWQZX)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
                return true
        }
-       // match: (MULQconst [13] x)
+       // match: (MOVWQZX (MOVBQZX x))
        // cond:
-       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
+       // result: (MOVBQZX x)
        for {
-               if v.AuxInt != 13 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVBQZX {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
+               x := v_0.Args[0]
+               v.reset(OpAMD64MOVBQZX)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULQconst_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULQconst [21] x)
-       // cond:
-       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
+func rewriteValueAMD64_OpAMD64MOVWload_0(v *Value) bool {
+       // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
+       // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+       // result: (MOVWQZX x)
        for {
-               if v.AuxInt != 21 {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVWstore {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               _ = v_1.Args[2]
+               ptr2 := v_1.Args[0]
+               x := v_1.Args[1]
+               if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWQZX)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
                return true
        }
-       // match: (MULQconst [25] x)
-       // cond:
-       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
+       // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
        for {
-               if v.AuxInt != 25 {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULQconst [37] x)
-       // cond:
-       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
+       // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               if v.AuxInt != 37 {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ4)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [41] x)
-       // cond:
-       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
-       for {
-               if v.AuxInt != 41 {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULQconst [73] x)
-       // cond:
-       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
+       // match: (MOVWload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               if v.AuxInt != 73 {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpAMD64LEAQ8)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SUBQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c + 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+       // match: (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
-               v.reset(OpAMD64LEAQ1)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 1)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-2) && c >= 34
-       // result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-2) && c >= 34) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64LEAQ2)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 2)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-4) && c >= 68
-       // result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+       // match: (MOVWload [off] {sym} (ADDQ ptr idx) mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWloadidx1 [off] {sym} ptr idx mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-4) && c >= 68) {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               v.reset(OpAMD64LEAQ4)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 4)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: isPowerOfTwo(c-8) && c >= 136
-       // result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(isPowerOfTwo(c-8) && c >= 136) {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64LEAQ8)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
-               v0.AuxInt = log2(c - 8)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULQconst_20(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (MULQconst [c] x)
-       // cond: c%3 == 0 && isPowerOfTwo(c/3)
-       // result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+       // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c / 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (MULQconst [c] x)
-       // cond: c%5 == 0 && isPowerOfTwo(c/5)
-       // result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
-       for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c / 5)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULQconst [c] x)
-       // cond: c%9 == 0 && isPowerOfTwo(c/9)
-       // result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+       // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWload [off1+off2] {sym} ptr mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = log2(c / 9)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
-               v0.AddArg(x)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (MULQconst [c] (MOVQconst [d]))
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWloadidx1_0(v *Value) bool {
+       // match: (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
        // cond:
-       // result: (MOVQconst [c*d])
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
        for {
                c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = c * d
+               if v_1.AuxInt != 1 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSD_0(v *Value) bool {
-       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSDmem x [off] {sym} ptr mem)
+       // match: (MOVWloadidx1 [c] {sym} (SHLQconst [1] idx) ptr mem)
+       // cond:
+       // result: (MOVWloadidx2 [c] {sym} ptr idx mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSDload {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if v_0.AuxInt != 1 {
                        break
                }
-               v.reset(OpAMD64MULSDmem)
-               v.AuxInt = off
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c
                v.Aux = sym
-               v.AddArg(x)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MULSD l:(MOVSDload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSDmem x [off] {sym} ptr mem)
+       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVSDload {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MULSDmem)
-               v.AuxInt = off
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
-               v.AddArg(x)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64MULSDmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MULSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MULSDmem [off1+off2] {sym} val base mem)
+       // match: (MOVWloadidx1 [c] {sym} idx (ADDQconst [d] ptr) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               off1 := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               val := v.Args[0]
+               idx := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
+               d := v_1.AuxInt
+               ptr := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MULSDmem)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
                v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MULSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MULSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               c := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               val := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MULSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MULSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
-       // cond:
-       // result: (MULSD x (MOVQi2f y))
+       // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] idx) ptr mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem)
        for {
-               off := v.AuxInt
+               c := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVQstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               d := v_0.AuxInt
+               idx := v_0.Args[0]
+               ptr := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64MULSD)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVWloadidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULSS_0(v *Value) bool {
-       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSSmem x [off] {sym} ptr mem)
+func rewriteValueAMD64_OpAMD64MOVWloadidx2_0(v *Value) bool {
+       // match: (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSSload {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64MULSSmem)
-               v.AuxInt = off
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c + d
                v.Aux = sym
-               v.AddArg(x)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
-       // match: (MULSS l:(MOVSSload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (MULSSmem x [off] {sym} ptr mem)
+       // match: (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem)
+       // cond: is32Bit(c+2*d)
+       // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVSSload {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(c + 2*d)) {
                        break
                }
-               v.reset(OpAMD64MULSSmem)
-               v.AuxInt = off
+               v.reset(OpAMD64MOVWloadidx2)
+               v.AuxInt = c + 2*d
                v.Aux = sym
-               v.AddArg(x)
                v.AddArg(ptr)
+               v.AddArg(idx)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64MULSSmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (MULSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (MULSSmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpAMD64MOVWstore_0(v *Value) bool {
+       // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               val := v.Args[0]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64MOVWQSX {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
+               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem)
+       // cond:
+       // result: (MOVWstore [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVWQZX {
                        break
                }
-               v.reset(OpAMD64MULSSmem)
-               v.AuxInt = off1 + off2
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
+               v.AddArg(ptr)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (MULSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (MULSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
        for {
                off1 := v.AuxInt
-               sym1 := v.Aux
+               sym := v.Aux
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64MULSSmem)
+               v.reset(OpAMD64MOVWstore)
                v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
+               v.Aux = sym
+               v.AddArg(ptr)
                v.AddArg(val)
-               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (MULSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
-       // cond:
-       // result: (MULSS x (MOVLi2f y))
+       // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem)
+       // cond: validOff(off)
+       // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVLstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               c := v_1.AuxInt
+               mem := v.Args[2]
+               if !(validOff(off)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64MULSS)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = makeValAndOff(int64(int16(c)), off)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NEGL_0(v *Value) bool {
-       // match: (NEGL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [int64(int32(-c))])
+       // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int64(int32(-c))
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NEGQ_0(v *Value) bool {
-       // match: (NEGQ (MOVQconst [c]))
-       // cond:
-       // result: (MOVQconst [-c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -c
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (NEGQ (ADDQconst [c] (NEGQ x)))
-       // cond: c != -(1<<31)
-       // result: (ADDQconst [-c] x)
+       // match: (MOVWstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64NEGQ {
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               x := v_0_0.Args[0]
-               if !(c != -(1 << 31)) {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = -c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NOTL_0(v *Value) bool {
-       // match: (NOTL (MOVLconst [c]))
-       // cond:
-       // result: (MOVLconst [^c])
+       // match: (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = ^c
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64NOTQ_0(v *Value) bool {
-       // match: (NOTQ (MOVQconst [c]))
-       // cond:
-       // result: (MOVQconst [^c])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               c := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = ^c
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64ORL_0(v *Value) bool {
-       // match: (ORL x (MOVLconst [c]))
-       // cond:
-       // result: (ORLconst [c] x)
+       // match: (MOVWstore [off] {sym} (ADDQ ptr idx) val mem)
+       // cond: ptr.Op != OpSB
+       // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL (MOVLconst [c]) x)
-       // cond:
-       // result: (ORLconst [c] x)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(ptr.Op != OpSB) {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64ORLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHLLconst x [c]) (SHRLconst x [d]))
-       // cond: d==32-c
-       // result: (ROLLconst x [c])
+       // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if v_1.AuxInt != 16 {
                        break
                }
-               if !(d == 32-c) {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstore {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL (SHRLconst x [d]) (SHLLconst x [c]))
-       // cond: d==32-c
-       // result: (ROLLconst x [c])
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
+               if x.AuxInt != i-2 {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
+               if x.Aux != s {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               _ = x.Args[2]
+               if p != x.Args[0] {
                        break
                }
-               if !(d == 32-c) {
+               if w != x.Args[1] {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
-       // cond: d==16-c && c < 16 && t.Size() == 2
-       // result: (ROLWconst x [c])
+       // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w mem)
        for {
-               t := v.Type
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRWconst {
+               if v_1.AuxInt != 16 {
                        break
                }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstore {
                        break
                }
-               if !(d == 16-c && c < 16 && t.Size() == 2) {
+               if x.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
-       // cond: d==16-c && c < 16 && t.Size() == 2
-       // result: (ROLWconst x [c])
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRWconst {
+               if x.Aux != s {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
+               _ = x.Args[2]
+               if p != x.Args[0] {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if w != x.Args[1] {
                        break
                }
-               if !(d == 16-c && c < 16 && t.Size() == 2) {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
-       // cond: d==8-c && c < 8 && t.Size() == 1
-       // result: (ROLBconst x [c])
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstore_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w0 mem)
        for {
-               t := v.Type
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRBconst {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstore {
                        break
                }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if x.AuxInt != i-2 {
                        break
                }
-               if !(d == 8-c && c < 8 && t.Size() == 1) {
+               if x.Aux != s {
                        break
                }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
-               v.AddArg(x)
-               return true
-       }
-       // match: (ORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
-       // cond: d==8-c && c < 8 && t.Size() == 1
-       // result: (ROLBconst x [c])
-       for {
-               t := v.Type
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRBconst {
+               _ = x.Args[2]
+               if p != x.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRLconst {
                        break
                }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               if w0.AuxInt != j-16 {
                        break
                }
-               if !(d == 8-c && c < 8 && t.Size() == 1) {
+               if w != w0.Args[0] {
                        break
                }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHLL x y) (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
-       // cond:
-       // result: (ROLL x y)
+       // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstore [i-2] {s} p w0 mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLL {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
+               j := v_1.AuxInt
+               w := v_1.Args[0]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstore {
                        break
                }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHRL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               if x != v_1_0.Args[0] {
-                       break
-               }
-               v_1_0_1 := v_1_0.Args[1]
-               if v_1_0_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               if y != v_1_0_1.Args[0] {
-                       break
-               }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SBBLcarrymask {
-                       break
-               }
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64CMPQconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 32 {
+               if x.AuxInt != i-2 {
                        break
                }
-               v_1_1_0_0 := v_1_1_0.Args[0]
-               if v_1_1_0_0.Op != OpAMD64NEGQ {
+               if x.Aux != s {
                        break
                }
-               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
+               _ = x.Args[2]
+               if p != x.Args[0] {
                        break
                }
-               if v_1_1_0_0_0.AuxInt != -32 {
+               w0 := x.Args[1]
+               if w0.Op != OpAMD64SHRQconst {
                        break
                }
-               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
+               if w0.AuxInt != j-16 {
                        break
                }
-               if v_1_1_0_0_0_0.AuxInt != 31 {
+               if w != w0.Args[0] {
                        break
                }
-               if y != v_1_1_0_0_0_0.Args[0] {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHLL x y) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHRL x (NEGQ y))))
-       // cond:
-       // result: (ROLL x y)
+       // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem))
+       // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)
+       // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLL {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               x1 := v.Args[1]
+               if x1.Op != OpAMD64MOVWload {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
+               j := x1.AuxInt
+               s2 := x1.Aux
+               _ = x1.Args[1]
+               p2 := x1.Args[0]
+               mem := x1.Args[1]
+               mem2 := v.Args[2]
+               if mem2.Op != OpAMD64MOVWstore {
                        break
                }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SBBLcarrymask {
+               if mem2.AuxInt != i-2 {
                        break
                }
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64CMPQconst {
+               if mem2.Aux != s {
                        break
                }
-               if v_1_0_0.AuxInt != 32 {
+               _ = mem2.Args[2]
+               if p != mem2.Args[0] {
                        break
                }
-               v_1_0_0_0 := v_1_0_0.Args[0]
-               if v_1_0_0_0.Op != OpAMD64NEGQ {
+               x2 := mem2.Args[1]
+               if x2.Op != OpAMD64MOVWload {
                        break
                }
-               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
-               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
+               if x2.AuxInt != j-2 {
                        break
                }
-               if v_1_0_0_0_0.AuxInt != -32 {
+               if x2.Aux != s2 {
                        break
                }
-               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
-               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
+               _ = x2.Args[1]
+               if p2 != x2.Args[0] {
                        break
                }
-               if v_1_0_0_0_0_0.AuxInt != 31 {
+               if mem != x2.Args[1] {
                        break
                }
-               if y != v_1_0_0_0_0_0.Args[0] {
+               if mem != mem2.Args[2] {
                        break
                }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHRL {
+               if !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1) && clobber(x2) && clobber(mem2)) {
                        break
                }
-               _ = v_1_1.Args[1]
-               if x != v_1_1.Args[0] {
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = j - 2
+               v0.Aux = s2
+               v0.AddArg(p2)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
+       // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
+       // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               v_1_1_1 := v_1_1.Args[1]
-               if v_1_1_1.Op != OpAMD64NEGQ {
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
                        break
                }
-               if y != v_1_1_1.Args[0] {
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MOVWstore [off1+off2] {sym} ptr val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               off2 := v_0.AuxInt
+               ptr := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ORL_10(v *Value) bool {
-       // match: (ORL (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))) (SHLL x y))
-       // cond:
-       // result: (ROLL x y)
+func rewriteValueAMD64_OpAMD64MOVWstoreconst_0(v *Value) bool {
+       // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
        for {
+               sc := v.AuxInt
+               s := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHRL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               x := v_0_0.Args[0]
-               v_0_0_1 := v_0_0.Args[1]
-               if v_0_0_1.Op != OpAMD64NEGQ {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               y := v_0_0_1.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SBBLcarrymask {
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64CMPQconst {
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
+       for {
+               sc := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               if v_0_1_0.AuxInt != 32 {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v_0_1_0_0 := v_0_1_0.Args[0]
-               if v_0_1_0_0.Op != OpAMD64NEGQ {
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ1 {
                        break
                }
-               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
-               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               if v_0_1_0_0_0.AuxInt != -32 {
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem)
+       // cond: canMergeSym(sym1, sym2)
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ2 {
                        break
                }
-               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
-               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2)) {
                        break
                }
-               if v_0_1_0_0_0_0.AuxInt != 31 {
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [x] {sym} (ADDQ ptr idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQ {
                        break
                }
-               if y != v_0_1_0_0_0_0.Args[0] {
+               _ = v_0.Args[1]
+               ptr := v_0.Args[0]
+               idx := v_0.Args[1]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = x
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               p := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64MOVWstoreconst {
                        break
                }
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLL {
+               a := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
+               _ = x.Args[1]
+               if p != x.Args[0] {
                        break
                }
-               if y != v_1.Args[1] {
+               mem := x.Args[1]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVLstoreconst)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHRL x (NEGQ y))) (SHLL x y))
-       // cond:
-       // result: (ROLL x y)
+       // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
+       // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
        for {
+               sc := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               if v_0.Op != OpAMD64LEAL {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SBBLcarrymask {
+               off := v_0.AuxInt
+               sym2 := v_0.Aux
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64CMPQconst {
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem)
+       // cond: ValAndOff(sc).canAdd(off)
+       // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+       for {
+               sc := v.AuxInt
+               s := v.Aux
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDLconst {
                        break
                }
-               if v_0_0_0.AuxInt != 32 {
+               off := v_0.AuxInt
+               ptr := v_0.Args[0]
+               mem := v.Args[1]
+               if !(ValAndOff(sc).canAdd(off)) {
                        break
                }
-               v_0_0_0_0 := v_0_0_0.Args[0]
-               if v_0_0_0_0.Op != OpAMD64NEGQ {
+               v.reset(OpAMD64MOVWstoreconst)
+               v.AuxInt = ValAndOff(sc).add(off)
+               v.Aux = s
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1_0(v *Value) bool {
+       // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem)
+       // cond:
+       // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
-               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if v_0_0_0_0_0.AuxInt != -32 {
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond: ValAndOff(x).canAdd(c)
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
-               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               if v_0_0_0_0_0_0.AuxInt != 31 {
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond: ValAndOff(x).canAdd(c)
+       // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               y := v_0_0_0_0_0_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHRL {
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               _ = v_0_1.Args[1]
-               x := v_0_1.Args[0]
-               v_0_1_1 := v_0_1.Args[1]
-               if v_0_1_1.Op != OpAMD64NEGQ {
+               v.reset(OpAMD64MOVWstoreconstidx1)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstoreconstidx1 {
                        break
                }
-               if y != v_0_1_1.Args[0] {
+               a := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLL {
+               _ = x.Args[2]
+               if p != x.Args[0] {
                        break
                }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
+               if i != x.Args[1] {
                        break
                }
-               if y != v_1.Args[1] {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(i)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHLL x y) (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
-       // cond:
-       // result: (ROLL x y)
-       for {
-               _ = v.Args[1]
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem)
+       // cond: ValAndOff(x).canAdd(c)
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHRL {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = v_1_0.Args[1]
-               if x != v_1_0.Args[0] {
+               c := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(c)) {
                        break
                }
-               v_1_0_1 := v_1_0.Args[1]
-               if v_1_0_1.Op != OpAMD64NEGL {
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem)
+       // cond: ValAndOff(x).canAdd(2*c)
+       // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem)
+       for {
+               x := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if y != v_1_0_1.Args[0] {
+               c := v_1.AuxInt
+               idx := v_1.Args[0]
+               mem := v.Args[2]
+               if !(ValAndOff(x).canAdd(2 * c)) {
                        break
                }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SBBLcarrymask {
+               v.reset(OpAMD64MOVWstoreconstidx2)
+               v.AuxInt = ValAndOff(x).add(2 * c)
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem))
+       // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+       // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem)
+       for {
+               c := v.AuxInt
+               s := v.Aux
+               _ = v.Args[2]
+               p := v.Args[0]
+               i := v.Args[1]
+               x := v.Args[2]
+               if x.Op != OpAMD64MOVWstoreconstidx2 {
                        break
                }
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64CMPLconst {
+               a := x.AuxInt
+               if x.Aux != s {
                        break
                }
-               if v_1_1_0.AuxInt != 32 {
+               _ = x.Args[2]
+               if p != x.Args[0] {
                        break
                }
-               v_1_1_0_0 := v_1_1_0.Args[0]
-               if v_1_1_0_0.Op != OpAMD64NEGL {
+               if i != x.Args[1] {
                        break
                }
-               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
+               mem := x.Args[2]
+               if !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
                        break
                }
-               if v_1_1_0_0_0.AuxInt != -32 {
+               v.reset(OpAMD64MOVLstoreconstidx1)
+               v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type)
+               v0.AuxInt = 1
+               v0.AddArg(i)
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreidx1_0(v *Value) bool {
+       // match: (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem)
+       // cond:
+       // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
                        break
                }
-               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
+               if v_1.AuxInt != 1 {
                        break
                }
-               if v_1_1_0_0_0_0.AuxInt != 31 {
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if y != v_1_1_0_0_0_0.Args[0] {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHLL x y) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHRL x (NEGL y))))
-       // cond:
-       // result: (ROLL x y)
+       // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLL {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SBBLcarrymask {
+               v.reset(OpAMD64MOVWstoreidx1)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64CMPLconst {
+               if v_2.AuxInt != 16 {
                        break
                }
-               if v_1_0_0.AuxInt != 32 {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
                        break
                }
-               v_1_0_0_0 := v_1_0_0.Args[0]
-               if v_1_0_0_0.Op != OpAMD64NEGL {
+               if x.AuxInt != i-2 {
                        break
                }
-               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
-               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
+               if x.Aux != s {
                        break
                }
-               if v_1_0_0_0_0.AuxInt != -32 {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
-               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
+               if idx != x.Args[1] {
                        break
                }
-               if v_1_0_0_0_0_0.AuxInt != 31 {
+               if w != x.Args[2] {
                        break
                }
-               if y != v_1_0_0_0_0_0.Args[0] {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHRL {
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               _ = v_1_1.Args[1]
-               if x != v_1_1.Args[0] {
+               if v_2.AuxInt != 16 {
                        break
                }
-               v_1_1_1 := v_1_1.Args[1]
-               if v_1_1_1.Op != OpAMD64NEGL {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
                        break
                }
-               if y != v_1_1_1.Args[0] {
+               if x.AuxInt != i-2 {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (ORL (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))) (SHLL x y))
-       // cond:
-       // result: (ROLL x y)
-       for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               if x.Aux != s {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHRL {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               _ = v_0_0.Args[1]
-               x := v_0_0.Args[0]
-               v_0_0_1 := v_0_0.Args[1]
-               if v_0_0_1.Op != OpAMD64NEGL {
+               if idx != x.Args[1] {
                        break
                }
-               y := v_0_0_1.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SBBLcarrymask {
+               if w != x.Args[2] {
                        break
                }
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64CMPLconst {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               if v_0_1_0.AuxInt != 32 {
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
+       for {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRLconst {
                        break
                }
-               v_0_1_0_0 := v_0_1_0.Args[0]
-               if v_0_1_0_0.Op != OpAMD64NEGL {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
                        break
                }
-               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
-               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
+               if x.AuxInt != i-2 {
                        break
                }
-               if v_0_1_0_0_0.AuxInt != -32 {
+               if x.Aux != s {
                        break
                }
-               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
-               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               if v_0_1_0_0_0_0.AuxInt != 31 {
+               if idx != x.Args[1] {
                        break
                }
-               if y != v_0_1_0_0_0_0.Args[0] {
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRLconst {
                        break
                }
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLL {
+               if w0.AuxInt != j-16 {
                        break
                }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
+               if w != w0.Args[0] {
                        break
                }
-               if y != v_1.Args[1] {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHRL x (NEGL y))) (SHLL x y))
-       // cond:
-       // result: (ROLL x y)
+       // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SBBLcarrymask {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx1 {
                        break
                }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64CMPLconst {
+               if x.AuxInt != i-2 {
                        break
                }
-               if v_0_0_0.AuxInt != 32 {
+               if x.Aux != s {
                        break
                }
-               v_0_0_0_0 := v_0_0_0.Args[0]
-               if v_0_0_0_0.Op != OpAMD64NEGL {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
-               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
+               if idx != x.Args[1] {
                        break
                }
-               if v_0_0_0_0_0.AuxInt != -32 {
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
                        break
                }
-               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
-               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
+               if w0.AuxInt != j-16 {
                        break
                }
-               if v_0_0_0_0_0_0.AuxInt != 31 {
+               if w != w0.Args[0] {
                        break
                }
-               y := v_0_0_0_0_0_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHRL {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               _ = v_0_1.Args[1]
-               x := v_0_1.Args[0]
-               v_0_1_1 := v_0_1.Args[1]
-               if v_0_1_1.Op != OpAMD64NEGL {
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v.AddArg(idx)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MOVWstoreidx2_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem)
+       // cond: is32Bit(c+d)
+       // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               if y != v_0_1_1.Args[0] {
+               d := v_0.AuxInt
+               ptr := v_0.Args[0]
+               idx := v.Args[1]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + d)) {
                        break
                }
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c + d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem)
+       // cond: is32Bit(c+2*d)
+       // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem)
+       for {
+               c := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[3]
+               ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if y != v_1.Args[1] {
+               d := v_1.AuxInt
+               idx := v_1.Args[0]
+               val := v.Args[2]
+               mem := v.Args[3]
+               if !(is32Bit(c + 2*d)) {
                        break
                }
-               v.reset(OpAMD64ROLL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVWstoreidx2)
+               v.AuxInt = c + 2*d
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(idx)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHRL x y) (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
-       // cond:
-       // result: (RORL x y)
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRL {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRLconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
+               if v_2.AuxInt != 16 {
                        break
                }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
                        break
                }
-               _ = v_1_0.Args[1]
-               if x != v_1_0.Args[0] {
+               if x.AuxInt != i-2 {
                        break
                }
-               v_1_0_1 := v_1_0.Args[1]
-               if v_1_0_1.Op != OpAMD64NEGQ {
+               if x.Aux != s {
                        break
                }
-               if y != v_1_0_1.Args[0] {
-                       break
-               }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SBBLcarrymask {
-                       break
-               }
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64CMPQconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 32 {
-                       break
-               }
-               v_1_1_0_0 := v_1_1_0.Args[0]
-               if v_1_1_0_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_1_1_0_0_0.AuxInt != -32 {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
+               if idx != x.Args[1] {
                        break
                }
-               if v_1_1_0_0_0_0.AuxInt != 31 {
+               if w != x.Args[2] {
                        break
                }
-               if y != v_1_1_0_0_0_0.Args[0] {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64RORL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (SHRL x y) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHLL x (NEGQ y))))
-       // cond:
-       // result: (RORL x y)
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRL {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SBBLcarrymask {
-                       break
-               }
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64CMPQconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 32 {
-                       break
-               }
-               v_1_0_0_0 := v_1_0_0.Args[0]
-               if v_1_0_0_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
-               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               if v_1_0_0_0_0.AuxInt != -32 {
+               if v_2.AuxInt != 16 {
                        break
                }
-               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
-               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
                        break
                }
-               if v_1_0_0_0_0_0.AuxInt != 31 {
+               if x.AuxInt != i-2 {
                        break
                }
-               if y != v_1_0_0_0_0_0.Args[0] {
+               if x.Aux != s {
                        break
                }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLL {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               _ = v_1_1.Args[1]
-               if x != v_1_1.Args[0] {
+               if idx != x.Args[1] {
                        break
                }
-               v_1_1_1 := v_1_1.Args[1]
-               if v_1_1_1.Op != OpAMD64NEGQ {
+               if w != x.Args[2] {
                        break
                }
-               if y != v_1_1_1.Args[0] {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
-               v.reset(OpAMD64RORL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w)
+               v.AddArg(mem)
                return true
        }
-       // match: (ORL (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))) (SHRL x y))
-       // cond:
-       // result: (RORL x y)
+       // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem))
+       // cond: x.Uses == 1 && clobber(x)
+       // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               x := v_0_0.Args[0]
-               v_0_0_1 := v_0_0.Args[1]
-               if v_0_0_1.Op != OpAMD64NEGQ {
+               i := v.AuxInt
+               s := v.Aux
+               _ = v.Args[3]
+               p := v.Args[0]
+               idx := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SHRQconst {
                        break
                }
-               y := v_0_0_1.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SBBLcarrymask {
+               j := v_2.AuxInt
+               w := v_2.Args[0]
+               x := v.Args[3]
+               if x.Op != OpAMD64MOVWstoreidx2 {
                        break
                }
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64CMPQconst {
+               if x.AuxInt != i-2 {
                        break
                }
-               if v_0_1_0.AuxInt != 32 {
+               if x.Aux != s {
                        break
                }
-               v_0_1_0_0 := v_0_1_0.Args[0]
-               if v_0_1_0_0.Op != OpAMD64NEGQ {
+               _ = x.Args[3]
+               if p != x.Args[0] {
                        break
                }
-               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
-               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
+               if idx != x.Args[1] {
                        break
                }
-               if v_0_1_0_0_0.AuxInt != -32 {
+               w0 := x.Args[2]
+               if w0.Op != OpAMD64SHRQconst {
                        break
                }
-               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
-               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
+               if w0.AuxInt != j-16 {
                        break
                }
-               if v_0_1_0_0_0_0.AuxInt != 31 {
+               if w != w0.Args[0] {
                        break
                }
-               if y != v_0_1_0_0_0_0.Args[0] {
+               mem := x.Args[3]
+               if !(x.Uses == 1 && clobber(x)) {
                        break
                }
+               v.reset(OpAMD64MOVLstoreidx1)
+               v.AuxInt = i - 2
+               v.Aux = s
+               v.AddArg(p)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type)
+               v0.AuxInt = 1
+               v0.AddArg(idx)
+               v.AddArg(v0)
+               v.AddArg(w0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULL_0(v *Value) bool {
+       // match: (MULL x (MOVLconst [c]))
+       // cond:
+       // result: (MULLconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRL {
-                       break
-               }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
-                       break
-               }
-               if y != v_1.Args[1] {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64RORL)
+               c := v_1.AuxInt
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHLL x (NEGQ y))) (SHRL x y))
+       // match: (MULL (MOVLconst [c]) x)
        // cond:
-       // result: (RORL x y)
+       // result: (MULLconst [c] x)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SBBLcarrymask {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64CMPQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 32 {
-                       break
-               }
-               v_0_0_0_0 := v_0_0_0.Args[0]
-               if v_0_0_0_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
-               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
-                       break
-               }
-               if v_0_0_0_0_0.AuxInt != -32 {
-                       break
-               }
-               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
-               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               if v_0_0_0_0_0_0.AuxInt != 31 {
-                       break
-               }
-               y := v_0_0_0_0_0_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               x := v_0_1.Args[0]
-               v_0_1_1 := v_0_1.Args[1]
-               if v_0_1_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               if y != v_0_1_1.Args[0] {
-                       break
-               }
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRL {
-                       break
-               }
-               _ = v_1.Args[1]
-               if x != v_1.Args[0] {
-                       break
-               }
-               if y != v_1.Args[1] {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64RORL)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64ORL_20(v *Value) bool {
-       // match: (ORL (SHRL x y) (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+func rewriteValueAMD64_OpAMD64MULLconst_0(v *Value) bool {
+       // match: (MULLconst [c] (MULLconst [d] x))
        // cond:
-       // result: (RORL x y)
+       // result: (MULLconst [int64(int32(c * d))] x)
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRL {
+               if v_0.Op != OpAMD64MULLconst {
                        break
                }
-               _ = v_0.Args[1]
+               d := v_0.AuxInt
                x := v_0.Args[0]
-               y := v_0.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               if x != v_1_0.Args[0] {
-                       break
-               }
-               v_1_0_1 := v_1_0.Args[1]
-               if v_1_0_1.Op != OpAMD64NEGL {
-                       break
-               }
-               if y != v_1_0_1.Args[0] {
-                       break
-               }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SBBLcarrymask {
-                       break
-               }
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64CMPLconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 32 {
-                       break
-               }
-               v_1_1_0_0 := v_1_1_0.Args[0]
-               if v_1_1_0_0.Op != OpAMD64NEGL {
-                       break
-               }
-               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
-               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
-                       break
-               }
-               if v_1_1_0_0_0.AuxInt != -32 {
-                       break
-               }
-               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
-               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
-                       break
-               }
-               if v_1_1_0_0_0_0.AuxInt != 31 {
-                       break
-               }
-               if y != v_1_1_0_0_0_0.Args[0] {
-                       break
-               }
-               v.reset(OpAMD64RORL)
+               v.reset(OpAMD64MULLconst)
+               v.AuxInt = int64(int32(c * d))
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ORL (SHRL x y) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHLL x (NEGL y))))
+       // match: (MULLconst [c] (MOVLconst [d]))
        // cond:
-       // result: (RORL x y)
+       // result: (MOVLconst [int64(int32(c*d))])
        for {
-               _ = v.Args[1]
+               c := v.AuxInt
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRL {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int64(int32(c * d))
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQ_0(v *Value) bool {
+       // match: (MULQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (MULQconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDL {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SBBLcarrymask {
-                       break
-               }
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64CMPLconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 32 {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v_1_0_0_0 := v_1_0_0.Args[0]
-               if v_1_0_0_0.Op != OpAMD64NEGL {
+               c := v_1.AuxInt
+               if !(is32Bit(c)) {
                        break
                }
-               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
-               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (MULQconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               if v_1_0_0_0_0.AuxInt != -32 {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
-               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQconst_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULQconst [c] (MULQconst [d] x))
+       // cond: is32Bit(c*d)
+       // result: (MULQconst [c * d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MULQconst {
                        break
                }
-               if v_1_0_0_0_0_0.AuxInt != 31 {
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(c * d)) {
                        break
                }
-               if y != v_1_0_0_0_0_0.Args[0] {
+               v.reset(OpAMD64MULQconst)
+               v.AuxInt = c * d
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [-1] x)
+       // cond:
+       // result: (NEGQ x)
+       for {
+               if v.AuxInt != -1 {
                        break
                }
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLL {
+               x := v.Args[0]
+               v.reset(OpAMD64NEGQ)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [0] _)
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               if v.AuxInt != 0 {
                        break
                }
-               _ = v_1_1.Args[1]
-               if x != v_1_1.Args[0] {
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (MULQconst [1] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               v_1_1_1 := v_1_1.Args[1]
-               if v_1_1_1.Op != OpAMD64NEGL {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [3] x)
+       // cond:
+       // result: (LEAQ2 x x)
+       for {
+               if v.AuxInt != 3 {
                        break
                }
-               if y != v_1_1_1.Args[0] {
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [5] x)
+       // cond:
+       // result: (LEAQ4 x x)
+       for {
+               if v.AuxInt != 5 {
                        break
                }
-               v.reset(OpAMD64RORL)
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (ORL (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))) (SHRL x y))
+       // match: (MULQconst [7] x)
        // cond:
-       // result: (RORL x y)
+       // result: (LEAQ8 (NEGQ <v.Type> x) x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ANDL {
+               if v.AuxInt != 7 {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, v.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [9] x)
+       // cond:
+       // result: (LEAQ8 x x)
+       for {
+               if v.AuxInt != 9 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [11] x)
+       // cond:
+       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 11 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [13] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
+       for {
+               if v.AuxInt != 13 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQconst_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULQconst [21] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 21 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [25] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
+       for {
+               if v.AuxInt != 25 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [37] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
+       for {
+               if v.AuxInt != 37 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [41] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 41 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [73] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
+       for {
+               if v.AuxInt != 73 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpAMD64SUBQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ1)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-2) && c >= 34
+       // result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-2) && c >= 34) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 2)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-4) && c >= 68
+       // result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-4) && c >= 68) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 4)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-8) && c >= 136
+       // result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-8) && c >= 136) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULQconst_20(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (MULQconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] (MOVQconst [d]))
+       // cond:
+       // result: (MOVQconst [c*d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c * d
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSD_0(v *Value) bool {
+       // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSDmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSD l:(MOVSDload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSDmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSDmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MULSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MULSDmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MULSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MULSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MULSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
+       // cond:
+       // result: (MULSD x (MOVQi2f y))
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVQstore {
+                       break
+               }
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64MULSD)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSS_0(v *Value) bool {
+       // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSSmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSS l:(MOVSSload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (MULSSmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVSSload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64MULSSmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (MULSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (MULSSmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (MULSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64MULSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (MULSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // cond:
+       // result: (MULSS x (MOVLi2f y))
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVLstore {
+                       break
+               }
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64MULSS)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NEGL_0(v *Value) bool {
+       // match: (NEGL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [int64(int32(-c))])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int64(int32(-c))
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NEGQ_0(v *Value) bool {
+       // match: (NEGQ (MOVQconst [c]))
+       // cond:
+       // result: (MOVQconst [-c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -c
+               return true
+       }
+       // match: (NEGQ (ADDQconst [c] (NEGQ x)))
+       // cond: c != -(1<<31)
+       // result: (ADDQconst [-c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               x := v_0_0.Args[0]
+               if !(c != -(1 << 31)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = -c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NOTL_0(v *Value) bool {
+       // match: (NOTL (MOVLconst [c]))
+       // cond:
+       // result: (MOVLconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64NOTQ_0(v *Value) bool {
+       // match: (NOTQ (MOVQconst [c]))
+       // cond:
+       // result: (MOVQconst [^c])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = ^c
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORL_0(v *Value) bool {
+       // match: (ORL x (MOVLconst [c]))
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpAMD64ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (MOVLconst [c]) x)
+       // cond:
+       // result: (ORLconst [c] x)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64ORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRLconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRBconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORL (SHLL x y) (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               if y != v_1_0_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 32 {
+                       break
+               }
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_1_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHLL x y) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHRL x (NEGQ y))))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_1_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_0_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORL_10(v *Value) bool {
+       // match: (ORL (ANDL (SHRL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_0_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHRL x (NEGQ y))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 32 {
+                       break
+               }
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               if y != v_0_1_1.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHLL x y) (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_0_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 32 {
+                       break
+               }
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
+                       break
+               }
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               if v_1_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_1_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHLL x y) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHRL x (NEGL y))))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
+                       break
+               }
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               if v_1_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_1_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_0_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SHRL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGL {
+                       break
+               }
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGL {
+                       break
+               }
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               if v_0_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_0_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_0_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHRL x (NEGL y))) (SHLL x y))
+       // cond:
+       // result: (ROLL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPLconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 32 {
+                       break
+               }
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGL {
+                       break
+               }
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               if v_0_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_0_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_0_1_1.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64ROLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHRL x y) (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               if y != v_1_0_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 32 {
+                       break
+               }
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_1_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHRL x y) (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHLL x (NEGQ y))))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_1_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_1_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_0_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SHLL x (NEGQ y)) (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32]))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               x := v_0_0.Args[0]
+               v_0_0_1 := v_0_0.Args[1]
+               if v_0_0_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               y := v_0_0_1.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 32 {
+                       break
+               }
+               v_0_1_0_0 := v_0_1_0.Args[0]
+               if v_0_1_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_1_0_0_0 := v_0_1_0_0.Args[0]
+               if v_0_1_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_0_1_0_0_0_0 := v_0_1_0_0_0.Args[0]
+               if v_0_1_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_0_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SBBLcarrymask (CMPQconst (NEGQ (ADDQconst (ANDQconst y [31]) [-32])) [32])) (SHLL x (NEGQ y))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64CMPQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 32 {
+                       break
+               }
+               v_0_0_0_0 := v_0_0_0.Args[0]
+               if v_0_0_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0_0_0_0 := v_0_0_0_0.Args[0]
+               if v_0_0_0_0_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               if v_0_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
+               if v_0_0_0_0_0_0.Op != OpAMD64ANDQconst {
+                       break
+               }
+               if v_0_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               y := v_0_0_0_0_0_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               x := v_0_1.Args[0]
+               v_0_1_1 := v_0_1.Args[1]
+               if v_0_1_1.Op != OpAMD64NEGQ {
+                       break
+               }
+               if y != v_0_1_1.Args[0] {
+                       break
+               }
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               if y != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ORL_20(v *Value) bool {
+       // match: (ORL (SHRL x y) (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               if x != v_1_0.Args[0] {
+                       break
+               }
+               v_1_0_1 := v_1_0.Args[1]
+               if v_1_0_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_0_1.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64CMPLconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 32 {
+                       break
+               }
+               v_1_1_0_0 := v_1_1_0.Args[0]
+               if v_1_1_0_0.Op != OpAMD64NEGL {
+                       break
+               }
+               v_1_1_0_0_0 := v_1_1_0_0.Args[0]
+               if v_1_1_0_0_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               if v_1_1_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_1_0_0_0_0 := v_1_1_0_0_0.Args[0]
+               if v_1_1_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_1_1_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_1_0_0_0_0.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (SHRL x y) (ANDL (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32])) (SHLL x (NEGL y))))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRL {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SBBLcarrymask {
+                       break
+               }
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64CMPLconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 32 {
+                       break
+               }
+               v_1_0_0_0 := v_1_0_0.Args[0]
+               if v_1_0_0_0.Op != OpAMD64NEGL {
+                       break
+               }
+               v_1_0_0_0_0 := v_1_0_0_0.Args[0]
+               if v_1_0_0_0_0.Op != OpAMD64ADDLconst {
+                       break
+               }
+               if v_1_0_0_0_0.AuxInt != -32 {
+                       break
+               }
+               v_1_0_0_0_0_0 := v_1_0_0_0_0.Args[0]
+               if v_1_0_0_0_0_0.Op != OpAMD64ANDLconst {
+                       break
+               }
+               if v_1_0_0_0_0_0.AuxInt != 31 {
+                       break
+               }
+               if y != v_1_0_0_0_0_0.Args[0] {
+                       break
+               }
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               if x != v_1_1.Args[0] {
+                       break
+               }
+               v_1_1_1 := v_1_1.Args[1]
+               if v_1_1_1.Op != OpAMD64NEGL {
+                       break
+               }
+               if y != v_1_1_1.Args[0] {
+                       break
+               }
+               v.reset(OpAMD64RORL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (ORL (ANDL (SHLL x (NEGL y)) (SBBLcarrymask (CMPLconst (NEGL (ADDLconst (ANDLconst y [31]) [-32])) [32]))) (SHRL x y))
+       // cond:
+       // result: (RORL x y)
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ANDL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
+                       break
                }
                _ = v_0_0.Args[1]
                x := v_0_0.Args[0]
@@ -37128,122 +40073,664 @@ func rewriteValueAMD64_OpAMD64SBBLcarrymask_0(v *Value) bool {
                v.AuxInt = -1
                return true
        }
-       // match: (SBBLcarrymask (FlagLT_UGT))
+       // match: (SBBLcarrymask (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SBBLcarrymask (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [-1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (SBBLcarrymask (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SBBQcarrymask_0(v *Value) bool {
+       // match: (SBBQcarrymask (FlagEQ))
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SBBQcarrymask (FlagLT_ULT))
+       // cond:
+       // result: (MOVQconst [-1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (SBBQcarrymask (FlagLT_UGT))
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SBBQcarrymask (FlagGT_ULT))
+       // cond:
+       // result: (MOVQconst [-1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = -1
+               return true
+       }
+       // match: (SBBQcarrymask (FlagGT_UGT))
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETA_0(v *Value) bool {
+       // match: (SETA (InvertFlags x))
+       // cond:
+       // result: (SETB x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETB)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SETA (FlagEQ))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETA (FlagLT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETA (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETA (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETA (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETAE_0(v *Value) bool {
+       // match: (SETAE (InvertFlags x))
+       // cond:
+       // result: (SETBE x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64InvertFlags {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETBE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (SETAE (FlagEQ))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagEQ {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETAE (FlagLT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETAE (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETAE (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SETAE (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETAEmem [off] {sym} ptr (InvertFlags x) mem)
+       // cond:
+       // result: (SETBEmem [off] {sym} ptr x mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
+                       break
+               }
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64SETBEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SETAEmem [off1+off2] {sym} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SETAEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagEQ {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_ULT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_UGT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_ULT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
+                       break
+               }
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETAmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (MOVLconst [0])
+       // result: (SETBmem [off] {sym} ptr x mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               x := v_1.Args[0]
+               mem := v.Args[2]
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v.AddArg(x)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBLcarrymask (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [-1])
+       // match: (SETAmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SETAmem [off1+off2] {sym} base val mem)
        for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = -1
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBLcarrymask (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETAmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SETAmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SETAmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SBBQcarrymask_0(v *Value) bool {
-       // match: (SBBQcarrymask (FlagEQ))
+       // match: (SETAmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
-       // result: (MOVQconst [0])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagLT_ULT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVQconst [-1])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -1
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagLT_UGT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVQconst [0])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagGT_ULT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVQconst [-1])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = -1
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SBBQcarrymask (FlagGT_UGT))
+       // match: (SETAmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVQconst [0])
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
+               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETA_0(v *Value) bool {
-       // match: (SETA (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETB_0(v *Value) bool {
+       // match: (SETB (InvertFlags x))
        // cond:
-       // result: (SETB x)
+       // result: (SETA x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETB)
+               v.reset(OpAMD64SETA)
                v.AddArg(x)
                return true
        }
-       // match: (SETA (FlagEQ))
+       // match: (SETB (FlagEQ))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -37255,71 +40742,71 @@ func rewriteValueAMD64_OpAMD64SETA_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETA (FlagLT_ULT))
+       // match: (SETB (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETA (FlagLT_UGT))
+       // match: (SETB (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETA (FlagGT_ULT))
+       // match: (SETB (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETA (FlagGT_UGT))
+       // match: (SETB (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETAE_0(v *Value) bool {
-       // match: (SETAE (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
+       // match: (SETBE (InvertFlags x))
        // cond:
-       // result: (SETBE x)
+       // result: (SETAE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETBE)
+               v.reset(OpAMD64SETAE)
                v.AddArg(x)
                return true
        }
-       // match: (SETAE (FlagEQ))
+       // match: (SETBE (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -37331,62 +40818,62 @@ func rewriteValueAMD64_OpAMD64SETAE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETAE (FlagLT_ULT))
+       // match: (SETBE (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETAE (FlagLT_UGT))
+       // match: (SETBE (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETAE (FlagGT_ULT))
+       // match: (SETBE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETAE (FlagGT_UGT))
+       // match: (SETBE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETBEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETAEmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETBEmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETBEmem [off] {sym} ptr x mem)
+       // result: (SETAEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37398,7 +40885,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETBEmem)
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -37406,9 +40893,9 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETBEmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETAEmem [off1+off2] {sym} base val mem)
+       // result: (SETBEmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -37424,7 +40911,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64SETBEmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -37432,9 +40919,9 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETBEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETAEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETBEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -37451,7 +40938,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64SETBEmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -37459,7 +40946,7 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -37482,9 +40969,9 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37500,14 +40987,14 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37523,14 +41010,14 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37546,14 +41033,14 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37569,19 +41056,19 @@ func rewriteValueAMD64_OpAMD64SETAEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETAmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETBmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETBmem [off] {sym} ptr x mem)
+       // result: (SETAmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37593,7 +41080,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETBmem)
+               v.reset(OpAMD64SETAmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -37601,9 +41088,9 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETBmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETAmem [off1+off2] {sym} base val mem)
+       // result: (SETBmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -37619,7 +41106,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETAmem)
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -37627,9 +41114,9 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETBmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETAmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETBmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -37646,7 +41133,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETAmem)
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -37654,7 +41141,7 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -37677,9 +41164,9 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37695,14 +41182,14 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37718,14 +41205,14 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37741,14 +41228,14 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETAmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETBmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -37764,104 +41251,254 @@ func rewriteValueAMD64_OpAMD64SETAmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETB_0(v *Value) bool {
-       // match: (SETB (InvertFlags x))
-       // cond:
-       // result: (SETA x)
+func rewriteValueAMD64_OpAMD64SETEQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (SETEQ (TESTL (SHLL (MOVLconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETAE (BTL x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64InvertFlags {
+               if v_0.Op != OpAMD64TESTL {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETA)
-               v.AddArg(x)
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagEQ))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETEQ (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTL x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
+               if v_0.Op != OpAMD64TESTL {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagLT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETAE (BTQ x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETAE (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETEQ (TESTLconst [c] x))
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTLconst [log2uint32(c)] x))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagLT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETEQ (TESTQconst [c] x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               if v_0.Op != OpAMD64TESTQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETEQ (TESTQ (MOVQconst [c]) x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETB (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETEQ (TESTQ x (MOVQconst [c])))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAE (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAE)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
-       // match: (SETBE (InvertFlags x))
+       // match: (SETEQ (InvertFlags x))
        // cond:
-       // result: (SETAE x)
+       // result: (SETEQ x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETAE)
+               v.reset(OpAMD64SETEQ)
                v.AddArg(x)
                return true
        }
-       // match: (SETBE (FlagEQ))
+       // match: (SETEQ (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -37873,19 +41510,22 @@ func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETBE (FlagLT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
+       // match: (SETEQ (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETBE (FlagLT_UGT))
+       // match: (SETEQ (FlagLT_UGT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -37897,19 +41537,19 @@ func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETBE (FlagGT_ULT))
+       // match: (SETEQ (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETBE (FlagGT_UGT))
+       // match: (SETEQ (FlagGT_UGT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -37923,207 +41563,308 @@ func rewriteValueAMD64_OpAMD64SETBE_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETBEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETBEmem [off] {sym} ptr (InvertFlags x) mem)
-       // cond:
-       // result: (SETAEmem [off] {sym} ptr x mem)
+       config := b.Func.Config
+       _ = config
+       // match: (SETEQmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64InvertFlags {
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               x := v_1.Args[0]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
+               if !(!config.nacl) {
+                       break
+               }
                v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SETBEmem [off1+off2] {sym} base val mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(val)
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETBEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETBEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.reset(OpAMD64SETAEmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagEQ) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagEQ {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 1 {
                        break
                }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTLconst [c] x) mem)
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTLconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQconst [c] x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVQconst {
                        break
                }
+               c := v_1_0.AuxInt
+               x := v_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETEQmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETAEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETBmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETEQmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETAmem [off] {sym} ptr x mem)
+       // result: (SETEQmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38135,7 +41876,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETAmem)
+               v.reset(OpAMD64SETEQmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -38143,9 +41884,9 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETEQmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETBmem [off1+off2] {sym} base val mem)
+       // result: (SETEQmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -38161,7 +41902,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
+               v.reset(OpAMD64SETEQmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -38169,9 +41910,14 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETEQmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETBmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETEQmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -38188,7 +41934,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
+               v.reset(OpAMD64SETEQmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -38196,9 +41942,9 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38214,14 +41960,14 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38237,12 +41983,12 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -38265,30 +42011,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETBmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
-                       break
-               }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETBmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -38297,7 +42020,7 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                _ = v.Args[2]
                ptr := v.Args[0]
                x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               if x.Op != OpAMD64FlagGT_ULT {
                        break
                }
                mem := v.Args[2]
@@ -38308,252 +42031,125 @@ func rewriteValueAMD64_OpAMD64SETBmem_0(v *Value) bool {
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
                v0.AuxInt = 0
                v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETEQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETEQ (TESTL (SHLL (MOVLconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETAE (BTL x y))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (SETEQ (TESTL y (SHLL (MOVLconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETAE (BTL x y))
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETAE (BTQ x y))
+       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
+               mem := v.Args[2]
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
                v.AddArg(v0)
+               v.AddArg(mem)
                return true
        }
-       // match: (SETEQ (TESTQ y (SHLQ (MOVQconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETAE (BTQ x y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETG_0(v *Value) bool {
+       // match: (SETG (InvertFlags x))
+       // cond:
+       // result: (SETL x)
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_0_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
+               if v_0.Op != OpAMD64InvertFlags {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETL)
+               v.AddArg(x)
                return true
        }
-       // match: (SETEQ (TESTLconst [c] x))
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTLconst [log2uint32(c)] x))
+       // match: (SETG (FlagEQ))
+       // cond:
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+               if v_0.Op != OpAMD64FlagEQ {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (TESTQconst [c] x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTQconst [log2(c)] x))
+       // match: (SETG (FlagLT_ULT))
+       // cond:
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (TESTQ (MOVQconst [c]) x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTQconst [log2(c)] x))
+       // match: (SETG (FlagLT_UGT))
+       // cond:
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_0_0.AuxInt
-               x := v_0.Args[1]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (TESTQ x (MOVQconst [c])))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAE (BTQconst [log2(c)] x))
+       // match: (SETG (FlagGT_ULT))
+       // cond:
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_0_1.AuxInt
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
+               return true
+       }
+       // match: (SETG (FlagGT_UGT))
+       // cond:
+       // result: (MOVLconst [1])
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               v.reset(OpAMD64SETAE)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 1
                return true
        }
-       // match: (SETEQ (InvertFlags x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETGE_0(v *Value) bool {
+       // match: (SETGE (InvertFlags x))
        // cond:
-       // result: (SETEQ x)
+       // result: (SETLE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETEQ)
+               v.reset(OpAMD64SETLE)
                v.AddArg(x)
                return true
        }
-       // match: (SETEQ (FlagEQ))
+       // match: (SETGE (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -38565,10 +42161,7 @@ func rewriteValueAMD64_OpAMD64SETEQ_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
-       // match: (SETEQ (FlagLT_ULT))
+       // match: (SETGE (FlagLT_ULT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -38580,7 +42173,7 @@ func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (FlagLT_UGT))
+       // match: (SETGE (FlagLT_UGT))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -38592,334 +42185,233 @@ func rewriteValueAMD64_OpAMD64SETEQ_10(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETEQ (FlagGT_ULT))
+       // match: (SETGE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETEQ (FlagGT_UGT))
+       // match: (SETGE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETEQmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SETGEmem [off] {sym} ptr (InvertFlags x) mem)
+       // cond:
+       // result: (SETLEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
+               if v_1.Op != OpAMD64InvertFlags {
                        break
                }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
+               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64SETLEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(x)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SETGEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SETGEmem [off1+off2] {sym} base val mem)
        for {
-               off := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_1_1.Args[1]
+               off2 := v_0.AuxInt
+               base := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               if !(!config.nacl) {
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off1 + off2
                v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
+       // match: (SETGEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SETGEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               off1 := v.AuxInt
+               sym1 := v.Aux
                _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64LEAQ {
                        break
                }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
+               off2 := v_0.AuxInt
+               sym2 := v_0.Aux
+               base := v_0.Args[0]
+               val := v.Args[1]
                mem := v.Args[2]
-               if !(!config.nacl) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETAEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64SETGEmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(base)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQ x y) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 1 {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagEQ {
                        break
                }
-               x := v_1_1.Args[1]
                mem := v.Args[2]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTLconst [c] x) mem)
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTLconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_ULT {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQconst [c] x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagLT_UGT {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
                mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64MOVQconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_ULT {
                        break
                }
-               c := v_1_0.AuxInt
-               x := v_1.Args[1]
                mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETAEmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // cond:
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64MOVQconst {
+               x := v.Args[1]
+               if x.Op != OpAMD64FlagGT_UGT {
                        break
                }
-               c := v_1_1.AuxInt
                mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETAEmem)
+               v.reset(OpAMD64MOVBstore)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr (InvertFlags x) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETGmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETEQmem [off] {sym} ptr x mem)
+       // result: (SETLmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -38931,7 +42423,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETEQmem)
+               v.reset(OpAMD64SETLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -38939,9 +42431,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETGmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETEQmem [off1+off2] {sym} base val mem)
+       // result: (SETGmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -38957,7 +42449,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
+               v.reset(OpAMD64SETGmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -38965,14 +42457,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETEQmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETGmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETEQmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETGmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -38989,7 +42476,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETEQmem)
+               v.reset(OpAMD64SETGmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -38997,9 +42484,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39015,12 +42502,12 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -39043,7 +42530,7 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -39066,9 +42553,9 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39084,14 +42571,14 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETEQmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETGmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39107,28 +42594,28 @@ func rewriteValueAMD64_OpAMD64SETEQmem_10(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETG_0(v *Value) bool {
-       // match: (SETG (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETL_0(v *Value) bool {
+       // match: (SETL (InvertFlags x))
        // cond:
-       // result: (SETL x)
+       // result: (SETG x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETL)
+               v.reset(OpAMD64SETG)
                v.AddArg(x)
                return true
        }
-       // match: (SETG (FlagEQ))
+       // match: (SETL (FlagEQ))
        // cond:
        // result: (MOVLconst [0])
        for {
@@ -39140,71 +42627,71 @@ func rewriteValueAMD64_OpAMD64SETG_0(v *Value) bool {
                v.AuxInt = 0
                return true
        }
-       // match: (SETG (FlagLT_ULT))
+       // match: (SETL (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETG (FlagLT_UGT))
+       // match: (SETL (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETG (FlagGT_ULT))
+       // match: (SETL (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETG (FlagGT_UGT))
+       // match: (SETL (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETGE_0(v *Value) bool {
-       // match: (SETGE (InvertFlags x))
+func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
+       // match: (SETLE (InvertFlags x))
        // cond:
-       // result: (SETLE x)
+       // result: (SETGE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETLE)
+               v.reset(OpAMD64SETGE)
                v.AddArg(x)
                return true
        }
-       // match: (SETGE (FlagEQ))
+       // match: (SETLE (FlagEQ))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -39216,62 +42703,62 @@ func rewriteValueAMD64_OpAMD64SETGE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETGE (FlagLT_ULT))
+       // match: (SETLE (FlagLT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETGE (FlagLT_UGT))
+       // match: (SETLE (FlagLT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagLT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETGE (FlagGT_ULT))
+       // match: (SETLE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETGE (FlagGT_UGT))
+       // match: (SETLE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETLEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETGEmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETLEmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETLEmem [off] {sym} ptr x mem)
+       // result: (SETGEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39283,7 +42770,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETLEmem)
+               v.reset(OpAMD64SETGEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -39291,9 +42778,9 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETLEmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETGEmem [off1+off2] {sym} base val mem)
+       // result: (SETLEmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -39309,7 +42796,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
+               v.reset(OpAMD64SETLEmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -39317,9 +42804,9 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETLEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETGEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETLEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -39336,7 +42823,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETGEmem)
+               v.reset(OpAMD64SETLEmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -39344,7 +42831,7 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -39367,9 +42854,9 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39385,14 +42872,14 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39408,14 +42895,14 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39431,14 +42918,14 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39454,19 +42941,19 @@ func rewriteValueAMD64_OpAMD64SETGEmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETGmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETLmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETLmem [off] {sym} ptr x mem)
+       // result: (SETGmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39478,7 +42965,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETLmem)
+               v.reset(OpAMD64SETGmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -39486,9 +42973,9 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETLmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETGmem [off1+off2] {sym} base val mem)
+       // result: (SETLmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -39504,7 +42991,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETGmem)
+               v.reset(OpAMD64SETLmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -39512,9 +42999,9 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       // match: (SETLmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETGmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETLmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -39531,7 +43018,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETGmem)
+               v.reset(OpAMD64SETLmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -39539,7 +43026,7 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -39562,9 +43049,9 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39580,14 +43067,14 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39603,14 +43090,14 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39626,14 +43113,14 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETGmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETLmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -39649,116 +43136,269 @@ func rewriteValueAMD64_OpAMD64SETGmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0.AuxInt = 0
+               v.AddArg(v0)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETNE_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (SETNE (TESTL (SHLL (MOVLconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETB (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETNE (TESTL y (SHLL (MOVLconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB (BTL x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTL {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y))
+       // cond: !config.nacl
+       // result: (SETB (BTQ x y))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_0_0.Args[1]
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_0.Args[1]
+               y := v_0.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETL_0(v *Value) bool {
-       // match: (SETL (InvertFlags x))
-       // cond:
-       // result: (SETG x)
+       // match: (SETNE (TESTQ y (SHLQ (MOVQconst [1]) x)))
+       // cond: !config.nacl
+       // result: (SETB (BTQ x y))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64InvertFlags {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (SETL (FlagEQ))
-       // cond:
-       // result: (MOVLconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64SHLQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0_1.Args[1]
+               v_0_1_0 := v_0_1.Args[0]
+               if v_0_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_0_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_0_1.Args[1]
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagLT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETNE (TESTLconst [c] x))
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTLconst [log2uint32(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               if v_0.Op != OpAMD64TESTLconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagLT_UGT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SETNE (TESTQconst [c] x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               if v_0.Op != OpAMD64TESTQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETNE (TESTQ (MOVQconst [c]) x))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_0.AuxInt
+               x := v_0.Args[1]
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       // match: (SETL (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (SETNE (TESTQ x (MOVQconst [c])))
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETB (BTQconst [log2(c)] x))
        for {
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               if v_0.Op != OpAMD64TESTQ {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               v_0_1 := v_0.Args[1]
+               if v_0_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0_1.AuxInt
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
-       // match: (SETLE (InvertFlags x))
+       // match: (SETNE (InvertFlags x))
        // cond:
-       // result: (SETGE x)
+       // result: (SETNE x)
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64InvertFlags {
                        break
                }
                x := v_0.Args[0]
-               v.reset(OpAMD64SETGE)
+               v.reset(OpAMD64SETNE)
                v.AddArg(x)
                return true
        }
-       // match: (SETLE (FlagEQ))
+       // match: (SETNE (FlagEQ))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (MOVLconst [0])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagEQ {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.AuxInt = 0
                return true
        }
-       // match: (SETLE (FlagLT_ULT))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETNE_10(v *Value) bool {
+       // match: (SETNE (FlagLT_ULT))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -39770,7 +43410,7 @@ func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETLE (FlagLT_UGT))
+       // match: (SETNE (FlagLT_UGT))
        // cond:
        // result: (MOVLconst [1])
        for {
@@ -39782,233 +43422,334 @@ func rewriteValueAMD64_OpAMD64SETLE_0(v *Value) bool {
                v.AuxInt = 1
                return true
        }
-       // match: (SETLE (FlagGT_ULT))
+       // match: (SETNE (FlagGT_ULT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_ULT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
-       // match: (SETLE (FlagGT_UGT))
+       // match: (SETNE (FlagGT_UGT))
        // cond:
-       // result: (MOVLconst [0])
+       // result: (MOVLconst [1])
        for {
                v_0 := v.Args[0]
                if v_0.Op != OpAMD64FlagGT_UGT {
                        break
                }
                v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               v.AuxInt = 1
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETLEmem_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SETNEmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SETLEmem [off] {sym} ptr (InvertFlags x) mem)
-       // cond:
-       // result: (SETGEmem [off] {sym} ptr x mem)
+       config := b.Func.Config
+       _ = config
+       // match: (SETNEmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64InvertFlags {
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               x := v_1.Args[0]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64SETGEmem)
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SETLEmem [off1+off2] {sym} base val mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTL {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLL {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               if v_1_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off1 + off2
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
                v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(val)
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETLEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               off := v.AuxInt
+               sym := v.Aux
                _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               ptr := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_0.Args[1]
+               v_1_0_0 := v_1_0.Args[0]
+               if v_1_0_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               if v_1_0_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_0.Args[1]
+               y := v_1.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(!config.nacl) {
                        break
                }
-               v.reset(OpAMD64SETLEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
+               v.reset(OpAMD64SETBmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(ptr)
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagEQ) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
+       // cond: !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagEQ {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               y := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               _ = v_1_1.Args[1]
+               v_1_1_0 := v_1_1.Args[0]
+               if v_1_1_0.Op != OpAMD64MOVQconst {
                        break
                }
+               if v_1_1_0.AuxInt != 1 {
+                       break
+               }
+               x := v_1_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(!config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
+               v0.AddArg(x)
+               v0.AddArg(y)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTLconst [c] x) mem)
+       // cond: isUint32PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTLconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
+               v0.AuxInt = log2uint32(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQconst [c] x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQconst {
                        break
                }
+               c := v_1.AuxInt
+               x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
                        break
                }
+               _ = v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_1_0.AuxInt
+               x := v_1.Args[1]
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SETNEmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
+       // cond: isUint64PowerOfTwo(c) && !config.nacl
+       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
                ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64TESTQ {
+                       break
+               }
+               _ = v_1.Args[1]
+               x := v_1.Args[0]
+               v_1_1 := v_1.Args[1]
+               if v_1_1.Op != OpAMD64MOVQconst {
                        break
                }
+               c := v_1_1.AuxInt
                mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
+               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+                       break
+               }
+               v.reset(OpAMD64SETBmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
+               v0.AuxInt = log2(c)
+               v0.AddArg(x)
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETLmem [off] {sym} ptr (InvertFlags x) mem)
+       // match: (SETNEmem [off] {sym} ptr (InvertFlags x) mem)
        // cond:
-       // result: (SETGmem [off] {sym} ptr x mem)
+       // result: (SETNEmem [off] {sym} ptr x mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -40020,7 +43761,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                }
                x := v_1.Args[0]
                mem := v.Args[2]
-               v.reset(OpAMD64SETGmem)
+               v.reset(OpAMD64SETNEmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(ptr)
@@ -40028,9 +43769,9 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off1] {sym} (ADDQconst [off2] base) val mem)
+       // match: (SETNEmem [off1] {sym} (ADDQconst [off2] base) val mem)
        // cond: is32Bit(off1+off2)
-       // result: (SETLmem [off1+off2] {sym} base val mem)
+       // result: (SETNEmem [off1+off2] {sym} base val mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -40046,7 +43787,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SETLmem)
+               v.reset(OpAMD64SETNEmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(base)
@@ -40054,9 +43795,14 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SETNEmem_10(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SETNEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
        // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETLmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // result: (SETNEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -40073,7 +43819,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SETLmem)
+               v.reset(OpAMD64SETNEmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(base)
@@ -40081,7 +43827,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagEQ) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagEQ) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
        for {
@@ -40104,7 +43850,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagLT_ULT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -40127,7 +43873,7 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagLT_UGT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
        // cond:
        // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
@@ -40150,9 +43896,9 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagGT_ULT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -40168,14 +43914,14 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
-       // match: (SETLmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
        for {
                off := v.AuxInt
                sym := v.Aux
@@ -40191,820 +43937,592 @@ func rewriteValueAMD64_OpAMD64SETLmem_0(v *Value) bool {
                v.Aux = sym
                v.AddArg(ptr)
                v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
+               v0.AuxInt = 1
                v.AddArg(v0)
                v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETNE_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
        b := v.Block
        _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETNE (TESTL (SHLL (MOVLconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETB (BTL x y))
+       // match: (SHLL x (MOVQconst [c]))
+       // cond:
+       // result: (SHLLconst [c&31] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (TESTL y (SHLL (MOVLconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETB (BTL x y))
+       // match: (SHLL x (MOVLconst [c]))
+       // cond:
+       // result: (SHLLconst [c&31] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTL {
-                       break
-               }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_0_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLLconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y))
-       // cond: !config.nacl
-       // result: (SETB (BTQ x y))
+       // match: (SHLL x (ADDQconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64SHLQ {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               _ = v_0_0.Args[1]
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64MOVQconst {
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLL x (NEGQ <t> y))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               if v_0_0_0.AuxInt != 1 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_0_0.Args[1]
-               y := v_0.Args[1]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (TESTQ y (SHLQ (MOVQconst [1]) x)))
-       // cond: !config.nacl
-       // result: (SETB (BTQ x y))
+       // match: (SHLL x (ANDQconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDQconst {
                        break
                }
-               _ = v_0.Args[1]
-               y := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64SHLQ {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               _ = v_0_1.Args[1]
-               v_0_1_0 := v_0_1.Args[0]
-               if v_0_1_0.Op != OpAMD64MOVQconst {
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       // match: (SHLL x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLL x (NEGQ <t> y))
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               if v_0_1_0.AuxInt != 1 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
                        break
                }
-               x := v_0_1.Args[1]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (TESTLconst [c] x))
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTLconst [log2uint32(c)] x))
+       // match: (SHLL x (ADDLconst [c] y))
+       // cond: c & 31 == 0
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTLconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNE (TESTQconst [c] x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTQconst [log2(c)] x))
+       // match: (SHLL x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 31 == 0
+       // result: (SHLL x (NEGL <t> y))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (TESTQ (MOVQconst [c]) x))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTQconst [log2(c)] x))
+       // match: (SHLL x (ANDLconst [c] y))
+       // cond: c & 31 == 31
+       // result: (SHLL x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_0.Args[1]
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_0_0.AuxInt
-               x := v_0.Args[1]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNE (TESTQ x (MOVQconst [c])))
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETB (BTQconst [log2(c)] x))
+       // match: (SHLL x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 31 == 31
+       // result: (SHLL x (NEGL <t> y))
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64TESTQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               _ = v_0.Args[1]
-               x := v_0.Args[0]
-               v_0_1 := v_0.Args[1]
-               if v_0_1.Op != OpAMD64MOVQconst {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_0_1.AuxInt
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SETNE (InvertFlags x))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHLLconst_0(v *Value) bool {
+       // match: (SHLLconst x [0])
        // cond:
-       // result: (SETNE x)
+       // result: x
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64InvertFlags {
+               if v.AuxInt != 0 {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETNE)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
                return true
        }
-       // match: (SETNE (FlagEQ))
-       // cond:
-       // result: (MOVLconst [0])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagEQ {
-                       break
-               }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
-               return true
-       }
        return false
 }
-func rewriteValueAMD64_OpAMD64SETNE_10(v *Value) bool {
-       // match: (SETNE (FlagLT_ULT))
+func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SHLQ x (MOVQconst [c]))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (SHLQconst [c&63] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_ULT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (FlagLT_UGT))
+       // match: (SHLQ x (MOVLconst [c]))
        // cond:
-       // result: (MOVLconst [1])
+       // result: (SHLQconst [c&63] x)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagLT_UGT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               c := v_1.AuxInt
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = c & 63
+               v.AddArg(x)
                return true
        }
-       // match: (SETNE (FlagGT_ULT))
-       // cond:
-       // result: (MOVLconst [1])
+       // match: (SHLQ x (ADDQconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLQ x y)
        for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_ULT {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
-               return true
-       }
-       // match: (SETNE (FlagGT_UGT))
-       // cond:
-       // result: (MOVLconst [1])
-       for {
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64FlagGT_UGT {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 1
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETNEmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (SETNEmem [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SHLQ x (NEGQ <t> (ADDQconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLQ x (NEGQ <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               _ = v_1.Args[1]
+               t := v_1.Type
                v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
+               if v_1_0.Op != OpAMD64ADDQconst {
                        break
                }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTL y (SHLL (MOVLconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTL x y) mem)
+       // match: (SHLQ x (ANDQconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLQ x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTL {
+               if v_1.Op != OpAMD64ANDQconst {
                        break
                }
-               _ = v_1.Args[1]
+               c := v_1.AuxInt
                y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLL {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               if v_1_1_0.AuxInt != 1 {
-                       break
-               }
-               x := v_1_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
-                       break
-               }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_0.Args[1]
-               v_1_0_0 := v_1_0.Args[0]
-               if v_1_0_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               if v_1_0_0.AuxInt != 1 {
-                       break
-               }
-               x := v_1_0.Args[1]
-               y := v_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ y (SHLQ (MOVQconst [1]) x)) mem)
-       // cond: !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQ x y) mem)
+       // match: (SHLQ x (NEGQ <t> (ANDQconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLQ x (NEGQ <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               y := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64SHLQ {
-                       break
-               }
-               _ = v_1_1.Args[1]
-               v_1_1_0 := v_1_1.Args[0]
-               if v_1_1_0.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64NEGQ {
                        break
                }
-               if v_1_1_0.AuxInt != 1 {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDQconst {
                        break
                }
-               x := v_1_1.Args[1]
-               mem := v.Args[2]
-               if !(!config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTLconst [c] x) mem)
-       // cond: isUint32PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
+       // match: (SHLQ x (ADDLconst [c] y))
+       // cond: c & 63 == 0
+       // result: (SHLQ x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTLconst {
+               if v_1.Op != OpAMD64ADDLconst {
                        break
                }
                c := v_1.AuxInt
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               if !(isUint32PowerOfTwo(c) && !config.nacl) {
+               y := v_1.Args[0]
+               if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-               v0.AuxInt = log2uint32(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQconst [c] x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SHLQ x (NEGL <t> (ADDLconst [c] y)))
+       // cond: c & 63 == 0
+       // result: (SHLQ x (NEGL <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQconst {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               c := v_1.AuxInt
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ADDLconst {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 0) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SHLQ x (ANDLconst [c] y))
+       // cond: c & 63 == 63
+       // result: (SHLQ x y)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
-                       break
-               }
-               _ = v_1.Args[1]
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_1_0.AuxInt
-               x := v_1.Args[1]
-               mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1.AuxInt
+               y := v_1.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (TESTQ x (MOVQconst [c])) mem)
-       // cond: isUint64PowerOfTwo(c) && !config.nacl
-       // result: (SETBmem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+       // match: (SHLQ x (NEGL <t> (ANDLconst [c] y)))
+       // cond: c & 63 == 63
+       // result: (SHLQ x (NEGL <t> y))
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
+               _ = v.Args[1]
+               x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64TESTQ {
+               if v_1.Op != OpAMD64NEGL {
                        break
                }
-               _ = v_1.Args[1]
-               x := v_1.Args[0]
-               v_1_1 := v_1.Args[1]
-               if v_1_1.Op != OpAMD64MOVQconst {
+               t := v_1.Type
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64ANDLconst {
                        break
                }
-               c := v_1_1.AuxInt
-               mem := v.Args[2]
-               if !(isUint64PowerOfTwo(c) && !config.nacl) {
+               c := v_1_0.AuxInt
+               y := v_1_0.Args[0]
+               if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SETBmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-               v0.AuxInt = log2(c)
-               v0.AddArg(x)
+               v.reset(OpAMD64SHLQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr (InvertFlags x) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHLQconst_0(v *Value) bool {
+       // match: (SHLQconst x [0])
        // cond:
-       // result: (SETNEmem [off] {sym} ptr x mem)
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64InvertFlags {
+               if v.AuxInt != 0 {
                        break
                }
-               x := v_1.Args[0]
-               mem := v.Args[2]
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
                v.AddArg(x)
-               v.AddArg(mem)
                return true
        }
-       // match: (SETNEmem [off1] {sym} (ADDQconst [off2] base) val mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SETNEmem [off1+off2] {sym} base val mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
+       // match: (SHRB x (MOVQconst [c]))
+       // cond: c&31 < 8
+       // result: (SHRBconst [c&31] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64ADDQconst {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               off2 := v_0.AuxInt
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               c := v_1.AuxInt
+               if !(c&31 < 8) {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHRBconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SETNEmem_10(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SETNEmem [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SETNEmem [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+       // match: (SHRB x (MOVLconst [c]))
+       // cond: c&31 < 8
+       // result: (SHRBconst [c&31] x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64LEAQ {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               off2 := v_0.AuxInt
-               sym2 := v_0.Aux
-               base := v_0.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_1.AuxInt
+               if !(c&31 < 8) {
                        break
                }
-               v.reset(OpAMD64SETNEmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(base)
-               v.AddArg(val)
-               v.AddArg(mem)
+               v.reset(OpAMD64SHRBconst)
+               v.AuxInt = c & 31
+               v.AddArg(x)
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagEQ) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem)
+       // match: (SHRB _ (MOVQconst [c]))
+       // cond: c&31 >= 8
+       // result: (MOVLconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagEQ {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 0
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_ULT {
+               c := v_1.AuxInt
+               if !(c&31 >= 8) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagLT_UGT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // match: (SHRB _ (MOVLconst [c]))
+       // cond: c&31 >= 8
+       // result: (MOVLconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagLT_UGT {
+               _ = v.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
-               return true
-       }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_ULT) mem)
-       // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
-       for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_ULT {
+               c := v_1.AuxInt
+               if !(c&31 >= 8) {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (SETNEmem [off] {sym} ptr x:(FlagGT_UGT) mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SHRBconst_0(v *Value) bool {
+       // match: (SHRBconst x [0])
        // cond:
-       // result: (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem)
+       // result: x
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               x := v.Args[1]
-               if x.Op != OpAMD64FlagGT_UGT {
+               if v.AuxInt != 0 {
                        break
                }
-               mem := v.Args[2]
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(ptr)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, x.Type)
-               v0.AuxInt = 1
-               v.AddArg(v0)
-               v.AddArg(mem)
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SHRL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHLL x (MOVQconst [c]))
+       // match: (SHRL x (MOVQconst [c]))
        // cond:
-       // result: (SHLLconst [c&31] x)
+       // result: (SHRLconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41013,14 +44531,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLLconst)
+               v.reset(OpAMD64SHRLconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHLL x (MOVLconst [c]))
+       // match: (SHRL x (MOVLconst [c]))
        // cond:
-       // result: (SHLLconst [c&31] x)
+       // result: (SHRLconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41029,14 +44547,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLLconst)
+               v.reset(OpAMD64SHRLconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHLL x (ADDQconst [c] y))
+       // match: (SHRL x (ADDQconst [c] y))
        // cond: c & 31 == 0
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41049,14 +44567,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGQ <t> (ADDQconst [c] y)))
+       // match: (SHRL x (NEGQ <t> (ADDQconst [c] y)))
        // cond: c & 31 == 0
-       // result: (SHLL x (NEGQ <t> y))
+       // result: (SHRL x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41074,16 +44592,16 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLL x (ANDQconst [c] y))
+       // match: (SHRL x (ANDQconst [c] y))
        // cond: c & 31 == 31
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41096,14 +44614,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGQ <t> (ANDQconst [c] y)))
+       // match: (SHRL x (NEGQ <t> (ANDQconst [c] y)))
        // cond: c & 31 == 31
-       // result: (SHLL x (NEGQ <t> y))
+       // result: (SHRL x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41121,16 +44639,16 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLL x (ADDLconst [c] y))
+       // match: (SHRL x (ADDLconst [c] y))
        // cond: c & 31 == 0
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41143,14 +44661,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGL <t> (ADDLconst [c] y)))
+       // match: (SHRL x (NEGL <t> (ADDLconst [c] y)))
        // cond: c & 31 == 0
-       // result: (SHLL x (NEGL <t> y))
+       // result: (SHRL x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41168,16 +44686,16 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLL x (ANDLconst [c] y))
+       // match: (SHRL x (ANDLconst [c] y))
        // cond: c & 31 == 31
-       // result: (SHLL x y)
+       // result: (SHRL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41190,14 +44708,14 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLL x (NEGL <t> (ANDLconst [c] y)))
+       // match: (SHRL x (NEGL <t> (ANDLconst [c] y)))
        // cond: c & 31 == 31
-       // result: (SHLL x (NEGL <t> y))
+       // result: (SHRL x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41215,7 +44733,7 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
                if !(c&31 == 31) {
                        break
                }
-               v.reset(OpAMD64SHLL)
+               v.reset(OpAMD64SHRL)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
@@ -41224,8 +44742,8 @@ func rewriteValueAMD64_OpAMD64SHLL_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLLconst_0(v *Value) bool {
-       // match: (SHLLconst x [0])
+func rewriteValueAMD64_OpAMD64SHRLconst_0(v *Value) bool {
+       // match: (SHRLconst x [0])
        // cond:
        // result: x
        for {
@@ -41240,12 +44758,12 @@ func rewriteValueAMD64_OpAMD64SHLLconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SHRQ_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHLQ x (MOVQconst [c]))
+       // match: (SHRQ x (MOVQconst [c]))
        // cond:
-       // result: (SHLQconst [c&63] x)
+       // result: (SHRQconst [c&63] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41254,14 +44772,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLQconst)
+               v.reset(OpAMD64SHRQconst)
                v.AuxInt = c & 63
                v.AddArg(x)
                return true
        }
-       // match: (SHLQ x (MOVLconst [c]))
+       // match: (SHRQ x (MOVLconst [c]))
        // cond:
-       // result: (SHLQconst [c&63] x)
+       // result: (SHRQconst [c&63] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41270,14 +44788,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHLQconst)
+               v.reset(OpAMD64SHRQconst)
                v.AuxInt = c & 63
                v.AddArg(x)
                return true
        }
-       // match: (SHLQ x (ADDQconst [c] y))
+       // match: (SHRQ x (ADDQconst [c] y))
        // cond: c & 63 == 0
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41290,14 +44808,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGQ <t> (ADDQconst [c] y)))
+       // match: (SHRQ x (NEGQ <t> (ADDQconst [c] y)))
        // cond: c & 63 == 0
-       // result: (SHLQ x (NEGQ <t> y))
+       // result: (SHRQ x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41315,16 +44833,16 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ANDQconst [c] y))
+       // match: (SHRQ x (ANDQconst [c] y))
        // cond: c & 63 == 63
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41337,14 +44855,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGQ <t> (ANDQconst [c] y)))
+       // match: (SHRQ x (NEGQ <t> (ANDQconst [c] y)))
        // cond: c & 63 == 63
-       // result: (SHLQ x (NEGQ <t> y))
+       // result: (SHRQ x (NEGQ <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41362,16 +44880,16 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ADDLconst [c] y))
+       // match: (SHRQ x (ADDLconst [c] y))
        // cond: c & 63 == 0
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41384,14 +44902,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGL <t> (ADDLconst [c] y)))
+       // match: (SHRQ x (NEGL <t> (ADDLconst [c] y)))
        // cond: c & 63 == 0
-       // result: (SHLQ x (NEGL <t> y))
+       // result: (SHRQ x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41409,16 +44927,16 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 0) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHLQ x (ANDLconst [c] y))
+       // match: (SHRQ x (ANDLconst [c] y))
        // cond: c & 63 == 63
-       // result: (SHLQ x y)
+       // result: (SHRQ x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41431,14 +44949,14 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v.AddArg(y)
                return true
        }
-       // match: (SHLQ x (NEGL <t> (ANDLconst [c] y)))
+       // match: (SHRQ x (NEGL <t> (ANDLconst [c] y)))
        // cond: c & 63 == 63
-       // result: (SHLQ x (NEGL <t> y))
+       // result: (SHRQ x (NEGL <t> y))
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41456,7 +44974,7 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
                if !(c&63 == 63) {
                        break
                }
-               v.reset(OpAMD64SHLQ)
+               v.reset(OpAMD64SHRQ)
                v.AddArg(x)
                v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
                v0.AddArg(y)
@@ -41465,8 +44983,8 @@ func rewriteValueAMD64_OpAMD64SHLQ_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHLQconst_0(v *Value) bool {
-       // match: (SHLQconst x [0])
+func rewriteValueAMD64_OpAMD64SHRQconst_0(v *Value) bool {
+       // match: (SHRQconst x [0])
        // cond:
        // result: x
        for {
@@ -41481,10 +44999,10 @@ func rewriteValueAMD64_OpAMD64SHLQconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
-       // match: (SHRB x (MOVQconst [c]))
-       // cond: c&31 < 8
-       // result: (SHRBconst [c&31] x)
+func rewriteValueAMD64_OpAMD64SHRW_0(v *Value) bool {
+       // match: (SHRW x (MOVQconst [c]))
+       // cond: c&31 < 16
+       // result: (SHRWconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41493,17 +45011,17 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 < 8) {
+               if !(c&31 < 16) {
                        break
                }
-               v.reset(OpAMD64SHRBconst)
+               v.reset(OpAMD64SHRWconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHRB x (MOVLconst [c]))
-       // cond: c&31 < 8
-       // result: (SHRBconst [c&31] x)
+       // match: (SHRW x (MOVLconst [c]))
+       // cond: c&31 < 16
+       // result: (SHRWconst [c&31] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41512,16 +45030,16 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 < 8) {
+               if !(c&31 < 16) {
                        break
                }
-               v.reset(OpAMD64SHRBconst)
+               v.reset(OpAMD64SHRWconst)
                v.AuxInt = c & 31
                v.AddArg(x)
                return true
        }
-       // match: (SHRB _ (MOVQconst [c]))
-       // cond: c&31 >= 8
+       // match: (SHRW _ (MOVQconst [c]))
+       // cond: c&31 >= 16
        // result: (MOVLconst [0])
        for {
                _ = v.Args[1]
@@ -41530,15 +45048,15 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 >= 8) {
+               if !(c&31 >= 16) {
                        break
                }
                v.reset(OpAMD64MOVLconst)
                v.AuxInt = 0
                return true
        }
-       // match: (SHRB _ (MOVLconst [c]))
-       // cond: c&31 >= 8
+       // match: (SHRW _ (MOVLconst [c]))
+       // cond: c&31 >= 16
        // result: (MOVLconst [0])
        for {
                _ = v.Args[1]
@@ -41547,7 +45065,7 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 >= 8) {
+               if !(c&31 >= 16) {
                        break
                }
                v.reset(OpAMD64MOVLconst)
@@ -41556,8 +45074,8 @@ func rewriteValueAMD64_OpAMD64SHRB_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRBconst_0(v *Value) bool {
-       // match: (SHRBconst x [0])
+func rewriteValueAMD64_OpAMD64SHRWconst_0(v *Value) bool {
+       // match: (SHRWconst x [0])
        // cond:
        // result: x
        for {
@@ -41572,269 +45090,702 @@ func rewriteValueAMD64_OpAMD64SHRBconst_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64SUBL_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHRL x (MOVQconst [c]))
+       // match: (SUBL x (MOVLconst [c]))
        // cond:
-       // result: (SHRLconst [c&31] x)
+       // result: (SUBLconst x [c])
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHRLconst)
-               v.AuxInt = c & 31
+               v.reset(OpAMD64SUBLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRL x (MOVLconst [c]))
+       // match: (SUBL (MOVLconst [c]) x)
        // cond:
-       // result: (SHRLconst [c&31] x)
+       // result: (NEGL (SUBLconst <v.Type> x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64NEGL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SUBL x x)
+       // cond:
+       // result: (MOVLconst [0])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SUBL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBLconst_0(v *Value) bool {
+       // match: (SUBLconst [c] x)
+       // cond: int32(c) == 0
+       // result: x
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
+                       break
+               }
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBLconst [c] x)
+       // cond:
+       // result: (ADDLconst [int64(int32(-c))] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               v.reset(OpAMD64ADDLconst)
+               v.AuxInt = int64(int32(-c))
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBLmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBL x (MOVLf2i y))
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSSstore {
+                       break
+               }
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (SUBQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (SUBQconst x [c])
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHRLconst)
-               v.AuxInt = c & 31
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRL x (ADDQconst [c] y))
-       // cond: c & 31 == 0
-       // result: (SHRL x y)
+       // match: (SUBQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (NEGQ (SUBQconst <v.Type> x [c]))
+       for {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type)
+               v0.AuxInt = c
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SUBQ x x)
+       // cond:
+       // result: (MOVQconst [0])
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               if x != v.Args[1] {
+                       break
+               }
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (SUBQ x l:(MOVQload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBQconst_0(v *Value) bool {
+       // match: (SUBQconst [0] x)
+       // cond:
+       // result: x
+       for {
+               if v.AuxInt != 0 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBQconst [c] x)
+       // cond: c != -(1<<31)
+       // result: (ADDQconst [-c] x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c != -(1 << 31)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = -c
+               v.AddArg(x)
+               return true
+       }
+       // match: (SUBQconst (MOVQconst [d]) [c])
+       // cond:
+       // result: (MOVQconst [d-c])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = d - c
+               return true
+       }
+       // match: (SUBQconst (SUBQconst x [d]) [c])
+       // cond: is32Bit(-c-d)
+       // result: (ADDQconst [-c-d] x)
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SUBQconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               if !(is32Bit(-c - d)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = -c - d
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBQmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBQmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBQmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
                if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 0) {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
+               v.reset(OpAMD64SUBQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64SUBQmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (SUBQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBQ x (MOVQf2i y))
+       for {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSDstore {
+                       break
+               }
+               if v_2.AuxInt != off {
+                       break
+               }
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBQ)
                v.AddArg(x)
-               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
-       // match: (SHRL x (NEGQ <t> (ADDQconst [c] y)))
-       // cond: c & 31 == 0
-       // result: (SHRL x (NEGQ <t> y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
+       // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBSDmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSDload {
+                       break
+               }
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64SUBSDmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSDmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBSDmem [off1+off2] {sym} val base mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 0) {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64SUBSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (ANDQconst [c] y))
-       // cond: c & 31 == 31
-       // result: (SHRL x y)
+       // match: (SUBSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDQconst {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 31) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64SUBSDmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (NEGQ <t> (ANDQconst [c] y)))
-       // cond: c & 31 == 31
-       // result: (SHRL x (NEGQ <t> y))
+       // match: (SUBSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBSD x (MOVQi2f y))
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVQstore {
                        break
                }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDQconst {
+               if v_2.AuxInt != off {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 31) {
+               if v_2.Aux != sym {
                        break
                }
-               v.reset(OpAMD64SHRL)
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBSD)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
                v0.AddArg(y)
                v.AddArg(v0)
                return true
        }
-       // match: (SHRL x (ADDLconst [c] y))
-       // cond: c & 31 == 0
-       // result: (SHRL x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSS_0(v *Value) bool {
+       // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (SUBSSmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDLconst {
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVSSload {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
+               v.reset(OpAMD64SUBSSmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
-               v.AddArg(y)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (NEGL <t> (ADDLconst [c] y)))
-       // cond: c & 31 == 0
-       // result: (SHRL x (NEGL <t> y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64SUBSSmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (SUBSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (SUBSSmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDLconst {
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 0) {
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64SUBSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (ANDLconst [c] y))
-       // cond: c & 31 == 31
-       // result: (SHRL x y)
+       // match: (SUBSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (SUBSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&31 == 31) {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v.AddArg(y)
+               v.reset(OpAMD64SUBSSmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (SHRL x (NEGL <t> (ANDLconst [c] y)))
-       // cond: c & 31 == 31
-       // result: (SHRL x (NEGL <t> y))
+       // match: (SUBSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // cond:
+       // result: (SUBSS x (MOVLi2f y))
        for {
-               _ = v.Args[1]
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVLstore {
                        break
                }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDLconst {
+               if v_2.AuxInt != off {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&31 == 31) {
+               if v_2.Aux != sym {
                        break
                }
-               v.reset(OpAMD64SHRL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SHRLconst_0(v *Value) bool {
-       // match: (SHRLconst x [0])
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               y := v_2.Args[1]
+               v.reset(OpAMD64SUBSS)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
+               v0.AddArg(y)
+               v.AddArg(v0)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRQ_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64TESTB_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SHRQ x (MOVQconst [c]))
+       // match: (TESTB (MOVLconst [c]) x)
        // cond:
-       // result: (SHRQconst [c&63] x)
+       // result: (TESTBconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64SHRQconst)
-               v.AuxInt = c & 63
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRQ x (MOVLconst [c]))
+       // match: (TESTB x (MOVLconst [c]))
        // cond:
-       // result: (SHRQconst [c&63] x)
+       // result: (TESTBconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
@@ -41843,366 +45794,380 @@ func rewriteValueAMD64_OpAMD64SHRQ_0(v *Value) bool {
                        break
                }
                c := v_1.AuxInt
-               v.reset(OpAMD64SHRQconst)
-               v.AuxInt = c & 63
+               v.reset(OpAMD64TESTBconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRQ x (ADDQconst [c] y))
-       // cond: c & 63 == 0
-       // result: (SHRQ x y)
+       // match: (TESTB l:(MOVBload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRQ x (NEGQ <t> (ADDQconst [c] y)))
-       // cond: c & 63 == 0
-       // result: (SHRQ x (NEGQ <t> y))
+       // match: (TESTB l2 l:(MOVBload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDQconst {
+               l2 := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVBload {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRQ x (ANDQconst [c] y))
-       // cond: c & 63 == 63
-       // result: (SHRQ x y)
+       return false
+}
+func rewriteValueAMD64_OpAMD64TESTBconst_0(v *Value) bool {
+       // match: (TESTBconst [-1] x)
+       // cond:
+       // result: (TESTB x x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 63) {
+               if v.AuxInt != -1 {
                        break
                }
-               v.reset(OpAMD64SHRQ)
+               x := v.Args[0]
+               v.reset(OpAMD64TESTB)
+               v.AddArg(x)
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (SHRQ x (NEGQ <t> (ANDQconst [c] y)))
-       // cond: c & 63 == 63
-       // result: (SHRQ x (NEGQ <t> y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64TESTL_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (TESTL (MOVLconst [c]) x)
+       // cond:
+       // result: (TESTLconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGQ {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDQconst {
-                       break
-               }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 63) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               v.reset(OpAMD64SHRQ)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64TESTLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       // match: (SHRQ x (ADDLconst [c] y))
-       // cond: c & 63 == 0
-       // result: (SHRQ x y)
+       // match: (TESTL x (MOVLconst [c]))
+       // cond:
+       // result: (TESTLconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDLconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 0) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
+               v.reset(OpAMD64TESTLconst)
+               v.AuxInt = c
                v.AddArg(x)
-               v.AddArg(y)
                return true
        }
-       // match: (SHRQ x (NEGL <t> (ADDLconst [c] y)))
-       // cond: c & 63 == 0
-       // result: (SHRQ x (NEGL <t> y))
+       // match: (TESTL l:(MOVLload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ADDLconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 0) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRQ x (ANDLconst [c] y))
-       // cond: c & 63 == 63
-       // result: (SHRQ x y)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ANDLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               y := v_1.Args[0]
-               if !(c&63 == 63) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v.AddArg(y)
-               return true
-       }
-       // match: (SHRQ x (NEGL <t> (ANDLconst [c] y)))
-       // cond: c & 63 == 63
-       // result: (SHRQ x (NEGL <t> y))
+       // match: (TESTL l2 l:(MOVLload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64NEGL {
-                       break
-               }
-               t := v_1.Type
-               v_1_0 := v_1.Args[0]
-               if v_1_0.Op != OpAMD64ANDLconst {
+               l2 := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               c := v_1_0.AuxInt
-               y := v_1_0.Args[0]
-               if !(c&63 == 63) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t)
-               v0.AddArg(y)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
+               v.reset(OpCopy)
                v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRQconst_0(v *Value) bool {
-       // match: (SHRQconst x [0])
+func rewriteValueAMD64_OpAMD64TESTLconst_0(v *Value) bool {
+       // match: (TESTLconst [-1] x)
        // cond:
-       // result: x
+       // result: (TESTL x x)
        for {
-               if v.AuxInt != 0 {
+               if v.AuxInt != -1 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64TESTL)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRW_0(v *Value) bool {
-       // match: (SHRW x (MOVQconst [c]))
-       // cond: c&31 < 16
-       // result: (SHRWconst [c&31] x)
+func rewriteValueAMD64_OpAMD64TESTQ_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       // match: (TESTQ (MOVQconst [c]) x)
+       // cond: is32Bit(c)
+       // result: (TESTQconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 < 16) {
+               c := v_0.AuxInt
+               x := v.Args[1]
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64SHRWconst)
-               v.AuxInt = c & 31
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRW x (MOVLconst [c]))
-       // cond: c&31 < 16
-       // result: (SHRWconst [c&31] x)
+       // match: (TESTQ x (MOVQconst [c]))
+       // cond: is32Bit(c)
+       // result: (TESTQconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
                c := v_1.AuxInt
-               if !(c&31 < 16) {
+               if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64SHRWconst)
-               v.AuxInt = c & 31
+               v.reset(OpAMD64TESTQconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SHRW _ (MOVQconst [c]))
-       // cond: c&31 >= 16
-       // result: (MOVLconst [0])
+       // match: (TESTQ l:(MOVQload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 >= 16) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SHRW _ (MOVLconst [c]))
-       // cond: c&31 >= 16
-       // result: (MOVLconst [0])
+       // match: (TESTQ l2 l:(MOVQload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               l2 := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
-               c := v_1.AuxInt
-               if !(c&31 >= 16) {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SHRWconst_0(v *Value) bool {
-       // match: (SHRWconst x [0])
+func rewriteValueAMD64_OpAMD64TESTQconst_0(v *Value) bool {
+       // match: (TESTQconst [-1] x)
        // cond:
-       // result: x
+       // result: (TESTQ x x)
        for {
-               if v.AuxInt != 0 {
+               if v.AuxInt != -1 {
                        break
                }
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64TESTQ)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (SUBL x (MOVLconst [c]))
+       // match: (TESTW (MOVLconst [c]) x)
        // cond:
-       // result: (SUBLconst x [c])
+       // result: (TESTWconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64SUBLconst)
+               c := v_0.AuxInt
+               x := v.Args[1]
+               v.reset(OpAMD64TESTWconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBL (MOVLconst [c]) x)
+       // match: (TESTW x (MOVLconst [c]))
        // cond:
-       // result: (NEGL (SUBLconst <v.Type> x [c]))
+       // result: (TESTWconst [c] x)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64NEGL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               c := v_1.AuxInt
+               v.reset(OpAMD64TESTWconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBL x x)
-       // cond:
-       // result: (MOVLconst [0])
+       // match: (TESTW l:(MOVWload {sym} [off] ptr mem) l2)
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVWload {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               l2 := v.Args[1]
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+                       break
+               }
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
-       // match: (SUBL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBLmem x [off] {sym} ptr mem)
+       // match: (TESTW l2 l:(MOVWload {sym} [off] ptr mem))
+       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
+       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
+               l2 := v.Args[0]
                l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               if l.Op != OpAMD64MOVWload {
                        break
                }
                off := l.AuxInt
@@ -42210,54 +46175,70 @@ func rewriteValueAMD64_OpAMD64SUBL_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SUBLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               b = l.Block
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
+               v.reset(OpCopy)
+               v.AddArg(v0)
+               v0.AuxInt = makeValAndOff(0, off)
+               v0.Aux = sym
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBLconst_0(v *Value) bool {
-       // match: (SUBLconst [c] x)
-       // cond: int32(c) == 0
-       // result: x
+func rewriteValueAMD64_OpAMD64TESTWconst_0(v *Value) bool {
+       // match: (TESTWconst [-1] x)
+       // cond:
+       // result: (TESTW x x)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(int32(c) == 0) {
+               if v.AuxInt != -1 {
                        break
                }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               x := v.Args[0]
+               v.reset(OpAMD64TESTW)
+               v.AddArg(x)
                v.AddArg(x)
                return true
        }
-       // match: (SUBLconst [c] x)
-       // cond:
-       // result: (ADDLconst [int64(int32(-c))] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64XADDLlock_0(v *Value) bool {
+       // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XADDLlock [off1+off2] {sym} val ptr mem)
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               v.reset(OpAMD64ADDLconst)
-               v.AuxInt = int64(int32(-c))
-               v.AddArg(x)
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XADDLlock)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+func rewriteValueAMD64_OpAMD64XADDQlock_0(v *Value) bool {
+       // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
        // cond: is32Bit(off1+off2)
-       // result: (SUBLmem [off1+off2] {sym} val base mem)
+       // result: (XADDQlock [off1+off2] {sym} val ptr mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -42268,22 +46249,51 @@ func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
                        break
                }
                off2 := v_1.AuxInt
-               base := v_1.Args[0]
+               ptr := v_1.Args[0]
                mem := v.Args[2]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64SUBLmem)
+               v.reset(OpAMD64XADDQlock)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(val)
-               v.AddArg(base)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (SUBLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
+       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -42295,299 +46305,325 @@ func rewriteValueAMD64_OpAMD64SUBLmem_0(v *Value) bool {
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               base := v_1.Args[0]
+               ptr := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
                        break
                }
-               v.reset(OpAMD64SUBLmem)
+               v.reset(OpAMD64XCHGL)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(val)
-               v.AddArg(base)
+               v.AddArg(ptr)
                v.AddArg(mem)
                return true
        }
-       // match: (SUBLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
-       // cond:
-       // result: (SUBL x (MOVLf2i y))
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGQ_0(v *Value) bool {
+       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
        for {
-               off := v.AuxInt
+               off1 := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSSstore {
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               if v_2.AuxInt != off {
+               off2 := v_1.AuxInt
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               ptr := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBL)
-               v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (SUBQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (SUBQconst x [c])
+func rewriteValueAMD64_OpAMD64XORL_0(v *Value) bool {
+       // match: (XORL x (MOVLconst [c]))
+       // cond:
+       // result: (XORLconst [c] x)
        for {
                _ = v.Args[1]
                x := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               if v_1.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_1.AuxInt
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64SUBQconst)
+               v.reset(OpAMD64XORLconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (NEGQ (SUBQconst <v.Type> x [c]))
+       // match: (XORL (MOVLconst [c]) x)
+       // cond:
+       // result: (XORLconst [c] x)
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
                c := v_0.AuxInt
                x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type)
-               v0.AuxInt = c
-               v0.AddArg(x)
-               v.AddArg(v0)
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQ x x)
-       // cond:
-       // result: (MOVQconst [0])
+       // match: (XORL (SHLLconst x [c]) (SHRLconst x [d]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (SUBQ x l:(MOVQload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBQmem x [off] {sym} ptr mem)
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRLconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64SUBQmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBQconst_0(v *Value) bool {
-       // match: (SUBQconst [0] x)
-       // cond:
-       // result: x
-       for {
-               if v.AuxInt != 0 {
+               if !(d == 32-c) {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBQconst [c] x)
-       // cond: c != -(1<<31)
-       // result: (ADDQconst [-c] x)
+       // match: (XORL (SHRLconst x [d]) (SHLLconst x [c]))
+       // cond: d==32-c
+       // result: (ROLLconst x [c])
        for {
-               c := v.AuxInt
-               x := v.Args[0]
-               if !(c != -(1 << 31)) {
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRLconst {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = -c
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLLconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (SUBQconst (MOVQconst [d]) [c])
-       // cond:
-       // result: (MOVQconst [d-c])
+       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
-               c := v.AuxInt
+               t := v.Type
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = d - c
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQconst (SUBQconst x [d]) [c])
-       // cond: is32Bit(-c-d)
-       // result: (ADDQconst [-c-d] x)
+       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
+       // cond: d==16-c && c < 16 && t.Size() == 2
+       // result: (ROLWconst x [c])
        for {
-               c := v.AuxInt
+               t := v.Type
+               _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SUBQconst {
+               if v_0.Op != OpAMD64SHRWconst {
                        break
                }
                d := v_0.AuxInt
                x := v_0.Args[0]
-               if !(is32Bit(-c - d)) {
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = -c - d
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 16-c && c < 16 && t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBQmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBQmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SUBQmem [off1+off2] {sym} val base mem)
+       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
+       // cond: d==8-c && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLLconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v_1.Op != OpAMD64SHRBconst {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64SUBQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
+       // cond: d==8-c && c < 8 && t.Size() == 1
+       // result: (ROLBconst x [c])
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHRBconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v_1.Op != OpAMD64SHLLconst {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               v.reset(OpAMD64SUBQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               if !(d == 8-c && c < 8 && t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64ROLBconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       // match: (SUBQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // match: (XORL x x)
        // cond:
-       // result: (SUBQ x (MOVQf2i y))
+       // result: (MOVLconst [0])
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
+               _ = v.Args[1]
                x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSDstore {
-                       break
-               }
-               if v_2.AuxInt != off {
+               if x != v.Args[1] {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = 0
+               return true
+       }
+       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORLmem x [off] {sym} ptr mem)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               off := l.AuxInt
+               sym := l.Aux
+               _ = l.Args[1]
+               ptr := l.Args[0]
+               mem := l.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBQ)
+               v.reset(OpAMD64XORLmem)
+               v.AuxInt = off
+               v.Aux = sym
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
-       // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem))
+func rewriteValueAMD64_OpAMD64XORL_10(v *Value) bool {
+       // match: (XORL l:(MOVLload [off] {sym} ptr mem) x)
        // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBSDmem x [off] {sym} ptr mem)
+       // result: (XORLmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSDload {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVLload {
                        break
                }
                off := l.AuxInt
@@ -42595,10 +46631,11 @@ func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
+               x := v.Args[1]
                if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               v.reset(OpAMD64SUBSDmem)
+               v.reset(OpAMD64XORLmem)
                v.AuxInt = off
                v.Aux = sym
                v.AddArg(x)
@@ -42608,544 +46645,421 @@ func rewriteValueAMD64_OpAMD64SUBSD_0(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64SUBSDmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBSDmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SUBSDmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpAMD64XORLconst_0(v *Value) bool {
+       // match: (XORLconst [1] (SETNE x))
+       // cond:
+       // result: (SETEQ x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETNE {
                        break
                }
-               v.reset(OpAMD64SUBSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETEQ)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSDmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (XORLconst [1] (SETEQ x))
+       // cond:
+       // result: (SETNE x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETEQ {
                        break
                }
-               v.reset(OpAMD64SUBSDmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETNE)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSDmem x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _))
+       // match: (XORLconst [1] (SETL x))
        // cond:
-       // result: (SUBSD x (MOVQi2f y))
+       // result: (SETGE x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVQstore {
+               if v.AuxInt != 1 {
                        break
                }
-               if v_2.AuxInt != off {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETL {
                        break
                }
-               if v_2.Aux != sym {
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETGE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETGE x))
+       // cond:
+       // result: (SETL x)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETGE {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBSD)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETL)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQi2f, typ.Float64)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBSS_0(v *Value) bool {
-       // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (SUBSSmem x [off] {sym} ptr mem)
+       // match: (XORLconst [1] (SETLE x))
+       // cond:
+       // result: (SETG x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVSSload {
+               if v.AuxInt != 1 {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETLE {
                        break
                }
-               v.reset(OpAMD64SUBSSmem)
-               v.AuxInt = off
-               v.Aux = sym
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETG)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64SUBSSmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (SUBSSmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (SUBSSmem [off1+off2] {sym} val base mem)
+       // match: (XORLconst [1] (SETG x))
+       // cond:
+       // result: (SETLE x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETG {
                        break
                }
-               v.reset(OpAMD64SUBSSmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETLE)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSSmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (SUBSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (XORLconst [1] (SETB x))
+       // cond:
+       // result: (SETAE x)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               if v.AuxInt != 1 {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETB {
                        break
                }
-               v.reset(OpAMD64SUBSSmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETAE)
+               v.AddArg(x)
                return true
        }
-       // match: (SUBSSmem x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _))
+       // match: (XORLconst [1] (SETAE x))
        // cond:
-       // result: (SUBSS x (MOVLi2f y))
+       // result: (SETB x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVLstore {
+               if v.AuxInt != 1 {
                        break
                }
-               if v_2.AuxInt != off {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETAE {
                        break
                }
-               if v_2.Aux != sym {
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETB)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETBE x))
+       // cond:
+       // result: (SETA x)
+       for {
+               if v.AuxInt != 1 {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SETBE {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64SUBSS)
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETA)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLi2f, typ.Float32)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64TESTB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (TESTB (MOVLconst [c]) x)
+       // match: (XORLconst [1] (SETA x))
        // cond:
-       // result: (TESTBconst [c] x)
+       // result: (SETBE x)
        for {
-               _ = v.Args[1]
+               if v.AuxInt != 1 {
+                       break
+               }
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64SETA {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = c
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETBE)
                v.AddArg(x)
                return true
        }
-       // match: (TESTB x (MOVLconst [c]))
+       return false
+}
+func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
+       // match: (XORLconst [c] (XORLconst [d] x))
        // cond:
-       // result: (TESTBconst [c] x)
+       // result: (XORLconst [c ^ d] x)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64XORLconst {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64TESTBconst)
-               v.AuxInt = c
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = c ^ d
                v.AddArg(x)
                return true
        }
-       // match: (TESTB l:(MOVBload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORLconst [c] x)
+       // cond: int32(c)==0
+       // result: x
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(int32(c) == 0) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
                v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.Type = x.Type
+               v.AddArg(x)
                return true
        }
-       // match: (TESTB l2 l:(MOVBload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPBconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORLconst [c] (MOVLconst [d]))
+       // cond:
+       // result: (MOVLconst [c^d])
        for {
-               _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVBload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVLconst {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPBconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = c ^ d
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64TESTL_0(v *Value) bool {
+func rewriteValueAMD64_OpAMD64XORLmem_0(v *Value) bool {
        b := v.Block
        _ = b
-       // match: (TESTL (MOVLconst [c]) x)
-       // cond:
-       // result: (TESTLconst [c] x)
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (XORLmem [off1] {sym} val (ADDQconst [off2] base) mem)
+       // cond: is32Bit(off1+off2)
+       // result: (XORLmem [off1+off2] {sym} val base mem)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               off1 := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64ADDQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1 + off2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = sym
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (TESTL x (MOVLconst [c]))
-       // cond:
-       // result: (TESTLconst [c] x)
+       // match: (XORLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (XORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
-               _ = v.Args[1]
-               x := v.Args[0]
+               off1 := v.AuxInt
+               sym1 := v.Aux
+               _ = v.Args[2]
+               val := v.Args[0]
                v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if v_1.Op != OpAMD64LEAQ {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64TESTLconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               off2 := v_1.AuxInt
+               sym2 := v_1.Aux
+               base := v_1.Args[0]
+               mem := v.Args[2]
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+                       break
+               }
+               v.reset(OpAMD64XORLmem)
+               v.AuxInt = off1 + off2
+               v.Aux = mergeSym(sym1, sym2)
+               v.AddArg(val)
+               v.AddArg(base)
+               v.AddArg(mem)
                return true
        }
-       // match: (TESTL l:(MOVLload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+       // cond:
+       // result: (XORL x (MOVLf2i y))
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
+               off := v.AuxInt
+               sym := v.Aux
+               _ = v.Args[2]
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSSstore {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               if v_2.AuxInt != off {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (TESTL l2 l:(MOVLload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPLconstmem {sym} [makeValAndOff(0,off)] ptr mem)
-       for {
-               _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
+               if v_2.Aux != sym {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconstmem, types.TypeFlags)
-               v.reset(OpCopy)
+               y := v_2.Args[1]
+               v.reset(OpAMD64XORL)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v0.AddArg(y)
                v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64TESTQ_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (TESTQ (MOVQconst [c]) x)
+func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool {
+       // match: (XORQ x (MOVQconst [c]))
        // cond: is32Bit(c)
-       // result: (TESTQconst [c] x)
+       // result: (XORQconst [c] x)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
+               c := v_1.AuxInt
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64TESTQconst)
+               v.reset(OpAMD64XORQconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (TESTQ x (MOVQconst [c]))
+       // match: (XORQ (MOVQconst [c]) x)
        // cond: is32Bit(c)
-       // result: (TESTQconst [c] x)
+       // result: (XORQconst [c] x)
        for {
                _ = v.Args[1]
-               x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               c := v_1.AuxInt
+               c := v_0.AuxInt
+               x := v.Args[1]
                if !(is32Bit(c)) {
                        break
                }
-               v.reset(OpAMD64TESTQconst)
+               v.reset(OpAMD64XORQconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (TESTQ l:(MOVQload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [d]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64SHLQconst {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHRQconst {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               return true
-       }
-       // match: (TESTQ l2 l:(MOVQload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPQconstmem {sym} [makeValAndOff(0,off)] ptr mem)
-       for {
-               _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               if !(d == 64-c) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64ROLQconst)
+               v.AuxInt = c
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       // match: (TESTW (MOVLconst [c]) x)
-       // cond:
-       // result: (TESTWconst [c] x)
+       // match: (XORQ (SHRQconst x [d]) (SHLQconst x [c]))
+       // cond: d==64-c
+       // result: (ROLQconst x [c])
        for {
                _ = v.Args[1]
                v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
+               if v_0.Op != OpAMD64SHRQconst {
                        break
                }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64TESTWconst)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpAMD64SHLQconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpAMD64ROLQconst)
                v.AuxInt = c
                v.AddArg(x)
                return true
        }
-       // match: (TESTW x (MOVLconst [c]))
+       // match: (XORQ x x)
        // cond:
-       // result: (TESTWconst [c] x)
+       // result: (MOVQconst [0])
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
+               if x != v.Args[1] {
                        break
                }
-               c := v_1.AuxInt
-               v.reset(OpAMD64TESTWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = 0
                return true
        }
-       // match: (TESTW l:(MOVWload {sym} [off] ptr mem) l2)
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORQ x l:(MOVQload [off] {sym} ptr mem))
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVWload {
+               x := v.Args[0]
+               l := v.Args[1]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -43153,28 +47067,24 @@ func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
-               l2 := v.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64XORQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (TESTW l2 l:(MOVWload {sym} [off] ptr mem))
-       // cond: l == l2 && l.Uses == 2 && validValAndOff(0,off) && clobber(l)
-       // result: @l.Block (CMPWconstmem {sym} [makeValAndOff(0,off)] ptr mem)
+       // match: (XORQ l:(MOVQload [off] {sym} ptr mem) x)
+       // cond: canMergeLoad(v, l, x) && clobber(l)
+       // result: (XORQmem x [off] {sym} ptr mem)
        for {
                _ = v.Args[1]
-               l2 := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVWload {
+               l := v.Args[0]
+               if l.Op != OpAMD64MOVQload {
                        break
                }
                off := l.AuxInt
@@ -43182,83 +47092,74 @@ func rewriteValueAMD64_OpAMD64TESTW_0(v *Value) bool {
                _ = l.Args[1]
                ptr := l.Args[0]
                mem := l.Args[1]
-               if !(l == l2 && l.Uses == 2 && validValAndOff(0, off) && clobber(l)) {
+               x := v.Args[1]
+               if !(canMergeLoad(v, l, x) && clobber(l)) {
                        break
                }
-               b = l.Block
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPWconstmem, types.TypeFlags)
-               v.reset(OpCopy)
-               v.AddArg(v0)
-               v0.AuxInt = makeValAndOff(0, off)
-               v0.Aux = sym
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
+               v.reset(OpAMD64XORQmem)
+               v.AuxInt = off
+               v.Aux = sym
+               v.AddArg(x)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XADDLlock_0(v *Value) bool {
-       // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XADDLlock [off1+off2] {sym} val ptr mem)
+func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
+       // match: (XORQconst [c] (XORQconst [d] x))
+       // cond:
+       // result: (XORQconst [c ^ d] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64XORQconst {
                        break
                }
-               v.reset(OpAMD64XADDLlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORQconst)
+               v.AuxInt = c ^ d
+               v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XADDQlock_0(v *Value) bool {
-       // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XADDQlock [off1+off2] {sym} val ptr mem)
+       // match: (XORQconst [0] x)
+       // cond:
+       // result: x
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               if v.AuxInt != 0 {
                        break
                }
-               off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               x := v.Args[0]
+               v.reset(OpCopy)
+               v.Type = x.Type
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORQconst [c] (MOVQconst [d]))
+       // cond:
+       // result: (MOVQconst [c^d])
+       for {
+               c := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XADDQlock)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               d := v_0.AuxInt
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = c ^ d
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
-       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+func rewriteValueAMD64_OpAMD64XORQmem_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (XORQmem [off1] {sym} val (ADDQconst [off2] base) mem)
        // cond: is32Bit(off1+off2)
-       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       // result: (XORQmem [off1+off2] {sym} val base mem)
        for {
                off1 := v.AuxInt
                sym := v.Aux
@@ -43269,22 +47170,22 @@ func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
                        break
                }
                off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
+               base := v_1.Args[0]
                mem := v.Args[2]
                if !(is32Bit(off1 + off2)) {
                        break
                }
-               v.reset(OpAMD64XCHGL)
+               v.reset(OpAMD64XORQmem)
                v.AuxInt = off1 + off2
                v.Aux = sym
                v.AddArg(val)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
-       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       // match: (XORQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
+       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+       // result: (XORQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
        for {
                off1 := v.AuxInt
                sym1 := v.Aux
@@ -43296,1629 +47197,1806 @@ func rewriteValueAMD64_OpAMD64XCHGL_0(v *Value) bool {
                }
                off2 := v_1.AuxInt
                sym2 := v_1.Aux
-               ptr := v_1.Args[0]
+               base := v_1.Args[0]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
                        break
                }
-               v.reset(OpAMD64XCHGL)
+               v.reset(OpAMD64XORQmem)
                v.AuxInt = off1 + off2
                v.Aux = mergeSym(sym1, sym2)
                v.AddArg(val)
-               v.AddArg(ptr)
+               v.AddArg(base)
                v.AddArg(mem)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XCHGQ_0(v *Value) bool {
-       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
+       // match: (XORQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
+       // cond:
+       // result: (XORQ x (MOVQf2i y))
        for {
-               off1 := v.AuxInt
+               off := v.AuxInt
                sym := v.Aux
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               ptr := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64MOVSDstore {
                        break
                }
-               off2 := v_1.AuxInt
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               if v_2.AuxInt != off {
                        break
                }
-               v.reset(OpAMD64XCHGQ)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               if v_2.Aux != sym {
+                       break
+               }
+               _ = v_2.Args[2]
+               if ptr != v_2.Args[0] {
+                       break
+               }
+               y := v_2.Args[1]
+               v.reset(OpAMD64XORQ)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v0.AddArg(y)
+               v.AddArg(v0)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAdd16_0(v *Value) bool {
+       // match: (Add16 x y)
+       // cond:
+       // result: (ADDL x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAdd32_0(v *Value) bool {
+       // match: (Add32 x y)
+       // cond:
+       // result: (ADDL x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDL)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAdd32F_0(v *Value) bool {
+       // match: (Add32F x y)
+       // cond:
+       // result: (ADDSS x y)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDSS)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
-       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+}
+func rewriteValueAMD64_OpAdd64_0(v *Value) bool {
+       // match: (Add64 x y)
+       // cond:
+       // result: (ADDQ x y)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               ptr := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
-                       break
-               }
-               v.reset(OpAMD64XCHGQ)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDQ)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORL_0(v *Value) bool {
-       // match: (XORL x (MOVLconst [c]))
+func rewriteValueAMD64_OpAdd64F_0(v *Value) bool {
+       // match: (Add64F x y)
        // cond:
-       // result: (XORLconst [c] x)
+       // result: (ADDSD x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = c
+               y := v.Args[1]
+               v.reset(OpAMD64ADDSD)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL (MOVLconst [c]) x)
+}
+func rewriteValueAMD64_OpAdd8_0(v *Value) bool {
+       // match: (Add8 x y)
        // cond:
-       // result: (XORLconst [c] x)
+       // result: (ADDL x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ADDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL (SHLLconst x [c]) (SHRLconst x [d]))
-       // cond: d==32-c
-       // result: (ROLLconst x [c])
+}
+func rewriteValueAMD64_OpAddPtr_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (AddPtr x y)
+       // cond: config.PtrSize == 8
+       // result: (ADDQ x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRLconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 32-c) {
+               x := v.Args[0]
+               y := v.Args[1]
+               if !(config.PtrSize == 8) {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64ADDQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL (SHRLconst x [d]) (SHLLconst x [c]))
-       // cond: d==32-c
-       // result: (ROLLconst x [c])
+       // match: (AddPtr x y)
+       // cond: config.PtrSize == 4
+       // result: (ADDL x y)
        for {
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 32-c) {
+               x := v.Args[0]
+               y := v.Args[1]
+               if !(config.PtrSize == 4) {
                        break
                }
-               v.reset(OpAMD64ROLLconst)
-               v.AuxInt = c
+               v.reset(OpAMD64ADDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRWconst x [d]))
-       // cond: d==16-c && c < 16 && t.Size() == 2
-       // result: (ROLWconst x [c])
+       return false
+}
+func rewriteValueAMD64_OpAddr_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (Addr {sym} base)
+       // cond: config.PtrSize == 8
+       // result: (LEAQ {sym} base)
        for {
-               t := v.Type
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRWconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
+               sym := v.Aux
+               base := v.Args[0]
+               if !(config.PtrSize == 8) {
                        break
                }
-               if !(d == 16-c && c < 16 && t.Size() == 2) {
+               v.reset(OpAMD64LEAQ)
+               v.Aux = sym
+               v.AddArg(base)
+               return true
+       }
+       // match: (Addr {sym} base)
+       // cond: config.PtrSize == 4
+       // result: (LEAL {sym} base)
+       for {
+               sym := v.Aux
+               base := v.Args[0]
+               if !(config.PtrSize == 4) {
                        break
                }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
-               v.AddArg(x)
+               v.reset(OpAMD64LEAL)
+               v.Aux = sym
+               v.AddArg(base)
                return true
        }
-       // match: (XORL <t> (SHRWconst x [d]) (SHLLconst x [c]))
-       // cond: d==16-c && c < 16 && t.Size() == 2
-       // result: (ROLWconst x [c])
+       return false
+}
+func rewriteValueAMD64_OpAnd16_0(v *Value) bool {
+       // match: (And16 x y)
+       // cond:
+       // result: (ANDL x y)
        for {
-               t := v.Type
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRWconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 16-c && c < 16 && t.Size() == 2) {
-                       break
-               }
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL <t> (SHLLconst x [c]) (SHRBconst x [d]))
-       // cond: d==8-c && c < 8 && t.Size() == 1
-       // result: (ROLBconst x [c])
+}
+func rewriteValueAMD64_OpAnd32_0(v *Value) bool {
+       // match: (And32 x y)
+       // cond:
+       // result: (ANDL x y)
        for {
-               t := v.Type
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRBconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 8-c && c < 8 && t.Size() == 1) {
-                       break
-               }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL <t> (SHRBconst x [d]) (SHLLconst x [c]))
-       // cond: d==8-c && c < 8 && t.Size() == 1
-       // result: (ROLBconst x [c])
+}
+func rewriteValueAMD64_OpAnd64_0(v *Value) bool {
+       // match: (And64 x y)
+       // cond:
+       // result: (ANDQ x y)
        for {
-               t := v.Type
                _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRBconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLLconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 8-c && c < 8 && t.Size() == 1) {
-                       break
-               }
-               v.reset(OpAMD64ROLBconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64ANDQ)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL x x)
+}
+func rewriteValueAMD64_OpAnd8_0(v *Value) bool {
+       // match: (And8 x y)
        // cond:
-       // result: (MOVLconst [0])
+       // result: (ANDL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               if x != v.Args[1] {
-                       break
-               }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = 0
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
+               v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORLmem x [off] {sym} ptr mem)
+}
+func rewriteValueAMD64_OpAndB_0(v *Value) bool {
+       // match: (AndB x y)
+       // cond:
+       // result: (ANDL x y)
        for {
                _ = v.Args[1]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off
-               v.Aux = sym
+               y := v.Args[1]
+               v.reset(OpAMD64ANDL)
                v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicAdd32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicAdd32 ptr val mem)
+       // cond:
+       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64AddTupleFirst32)
+               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicAdd64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicAdd64 ptr val mem)
+       // cond:
+       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64AddTupleFirst64)
+               v.AddArg(val)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicAnd8_0(v *Value) bool {
+       // match: (AtomicAnd8 ptr val mem)
+       // cond:
+       // result: (ANDBlock ptr val mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64ANDBlock)
                v.AddArg(ptr)
+               v.AddArg(val)
                v.AddArg(mem)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORL_10(v *Value) bool {
-       // match: (XORL l:(MOVLload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORLmem x [off] {sym} ptr mem)
+func rewriteValueAMD64_OpAtomicCompareAndSwap32_0(v *Value) bool {
+       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
+       // cond:
+       // result: (CMPXCHGLlock ptr old new_ mem)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVLload {
-                       break
-               }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off
-               v.Aux = sym
-               v.AddArg(x)
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64CMPXCHGLlock)
                v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
                v.AddArg(mem)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORLconst_0(v *Value) bool {
-       // match: (XORLconst [1] (SETNE x))
+func rewriteValueAMD64_OpAtomicCompareAndSwap64_0(v *Value) bool {
+       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
        // cond:
-       // result: (SETEQ x)
+       // result: (CMPXCHGQlock ptr old new_ mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETNE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETEQ)
-               v.AddArg(x)
+               _ = v.Args[3]
+               ptr := v.Args[0]
+               old := v.Args[1]
+               new_ := v.Args[2]
+               mem := v.Args[3]
+               v.reset(OpAMD64CMPXCHGQlock)
+               v.AddArg(ptr)
+               v.AddArg(old)
+               v.AddArg(new_)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETEQ x))
+}
+func rewriteValueAMD64_OpAtomicExchange32_0(v *Value) bool {
+       // match: (AtomicExchange32 ptr val mem)
        // cond:
-       // result: (SETNE x)
+       // result: (XCHGL val ptr mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETEQ {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64XCHGL)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicExchange64_0(v *Value) bool {
+       // match: (AtomicExchange64 ptr val mem)
+       // cond:
+       // result: (XCHGQ val ptr mem)
+       for {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64XCHGQ)
+               v.AddArg(val)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoad32_0(v *Value) bool {
+       // match: (AtomicLoad32 ptr mem)
+       // cond:
+       // result: (MOVLatomicload ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoad64_0(v *Value) bool {
+       // match: (AtomicLoad64 ptr mem)
+       // cond:
+       // result: (MOVQatomicload ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
+               return true
+       }
+}
+func rewriteValueAMD64_OpAtomicLoadPtr_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 8
+       // result: (MOVQatomicload ptr mem)
+       for {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 8) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETNE)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETL x))
-       // cond:
-       // result: (SETGE x)
+       // match: (AtomicLoadPtr ptr mem)
+       // cond: config.PtrSize == 4
+       // result: (MOVLatomicload ptr mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETL {
+               _ = v.Args[1]
+               ptr := v.Args[0]
+               mem := v.Args[1]
+               if !(config.PtrSize == 4) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETGE)
-               v.AddArg(x)
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg(ptr)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETGE x))
+       return false
+}
+func rewriteValueAMD64_OpAtomicOr8_0(v *Value) bool {
+       // match: (AtomicOr8 ptr val mem)
        // cond:
-       // result: (SETL x)
+       // result: (ORBlock ptr val mem)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETGE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETL)
-               v.AddArg(x)
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpAMD64ORBlock)
+               v.AddArg(ptr)
+               v.AddArg(val)
+               v.AddArg(mem)
                return true
        }
-       // match: (XORLconst [1] (SETLE x))
+}
+func rewriteValueAMD64_OpAtomicStore32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicStore32 ptr val mem)
        // cond:
-       // result: (SETG x)
+       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETLE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETG)
-               v.AddArg(x)
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETG x))
+}
+func rewriteValueAMD64_OpAtomicStore64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicStore64 ptr val mem)
        // cond:
-       // result: (SETLE x)
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETG {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETLE)
-               v.AddArg(x)
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETB x))
-       // cond:
-       // result: (SETAE x)
+}
+func rewriteValueAMD64_OpAtomicStorePtrNoWB_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       config := b.Func.Config
+       _ = config
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 8
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETB {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 8) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETAE)
-               v.AddArg(x)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETAE x))
-       // cond:
-       // result: (SETB x)
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // cond: config.PtrSize == 4
+       // result: (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETAE {
+               _ = v.Args[2]
+               ptr := v.Args[0]
+               val := v.Args[1]
+               mem := v.Args[2]
+               if !(config.PtrSize == 4) {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETB)
-               v.AddArg(x)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.BytePtr, types.TypeMem))
+               v0.AddArg(val)
+               v0.AddArg(ptr)
+               v0.AddArg(mem)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [1] (SETBE x))
+       return false
+}
+func rewriteValueAMD64_OpAvg64u_0(v *Value) bool {
+       // match: (Avg64u x y)
        // cond:
-       // result: (SETA x)
+       // result: (AVGQU x y)
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETBE {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETA)
+               _ = v.Args[1]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpAMD64AVGQU)
                v.AddArg(x)
+               v.AddArg(y)
                return true
        }
-       // match: (XORLconst [1] (SETA x))
+}
+func rewriteValueAMD64_OpBitLen32_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (BitLen32 x)
        // cond:
-       // result: (SETBE x)
+       // result: (BitLen64 (MOVLQZX <typ.UInt64> x))
        for {
-               if v.AuxInt != 1 {
-                       break
-               }
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SETA {
-                       break
-               }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETBE)
-               v.AddArg(x)
+               x := v.Args[0]
+               v.reset(OpBitLen64)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORLconst_10(v *Value) bool {
-       // match: (XORLconst [c] (XORLconst [d] x))
+func rewriteValueAMD64_OpBitLen64_0(v *Value) bool {
+       b := v.Block
+       _ = b
+       typ := &b.Func.Config.Types
+       _ = typ
+       // match: (BitLen64 <t> x)
        // cond:
-       // result: (XORLconst [c ^ d] x)
+       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64XORLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = c ^ d
-               v.AddArg(x)
+               t := v.Type
+               x := v.Args[0]
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = 1
+               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
+               v1 := b.NewValue0(v.Pos, OpSelect0, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
+               v3.AuxInt = -1
+               v0.AddArg(v3)
+               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v5 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v5.AddArg(x)
+               v4.AddArg(v5)
+               v0.AddArg(v4)
+               v.AddArg(v0)
                return true
        }
-       // match: (XORLconst [c] x)
-       // cond: int32(c)==0
-       // result: x
+}
+func rewriteValueAMD64_OpBswap32_0(v *Value) bool {
+       // match: (Bswap32 x)
+       // cond:
+       // result: (BSWAPL x)
        for {
-               c := v.AuxInt
                x := v.Args[0]
-               if !(int32(c) == 0) {
-                       break
-               }
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpAMD64BSWAPL)
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [c] (MOVLconst [d]))
+}
+func rewriteValueAMD64_OpBswap64_0(v *Value) bool {
+       // match: (Bswap64 x)
        // cond:
-       // result: (MOVLconst [c^d])
+       // result: (BSWAPQ x)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVLconst {
-                       break
-               }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = c ^ d
+               x := v.Args[0]
+               v.reset(OpAMD64BSWAPQ)
+               v.AddArg(x)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORLmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (XORLmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XORLmem [off1+off2] {sym} val base mem)
+func rewriteValueAMD64_OpCeil_0(v *Value) bool {
+       // match: (Ceil x)
+       // cond:
+       // result: (ROUNDSD [2] x)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               x := v.Args[0]
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = 2
+               v.AddArg(x)
                return true
        }
-       // match: (XORLmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (XORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+}
+func rewriteValueAMD64_OpClosureCall_0(v *Value) bool {
+       // match: (ClosureCall [argwid] entry closure mem)
+       // cond:
+       // result: (CALLclosure [argwid] entry closure mem)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               argwid := v.AuxInt
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
+               entry := v.Args[0]
+               closure := v.Args[1]
                mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
-                       break
-               }
-               v.reset(OpAMD64XORLmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
+               v.reset(OpAMD64CALLclosure)
+               v.AuxInt = argwid
+               v.AddArg(entry)
+               v.AddArg(closure)
                v.AddArg(mem)
                return true
        }
-       // match: (XORLmem x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _))
+}
+func rewriteValueAMD64_OpCom16_0(v *Value) bool {
+       // match: (Com16 x)
        // cond:
-       // result: (XORL x (MOVLf2i y))
+       // result: (NOTL x)
        for {
-               off := v.AuxInt
-               sym := v.Aux
-               _ = v.Args[2]
                x := v.Args[0]
-               ptr := v.Args[1]
-               v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSSstore {
-                       break
-               }
-               if v_2.AuxInt != off {
-                       break
-               }
-               if v_2.Aux != sym {
-                       break
-               }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
-                       break
-               }
-               y := v_2.Args[1]
-               v.reset(OpAMD64XORL)
+               v.reset(OpAMD64NOTL)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
-               v0.AddArg(y)
-               v.AddArg(v0)
                return true
        }
-       return false
 }
-func rewriteValueAMD64_OpAMD64XORQ_0(v *Value) bool {
-       // match: (XORQ x (MOVQconst [c]))
-       // cond: is32Bit(c)
-       // result: (XORQconst [c] x)
+func rewriteValueAMD64_OpCom32_0(v *Value) bool {
+       // match: (Com32 x)
+       // cond:
+       // result: (NOTL x)
        for {
-               _ = v.Args[1]
                x := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = c
+               v.reset(OpAMD64NOTL)
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (MOVQconst [c]) x)
-       // cond: is32Bit(c)
-       // result: (XORQconst [c] x)
+}
+func rewriteValueAMD64_OpCom64_0(v *Value) bool {
+       // match: (Com64 x)
+       // cond:
+       // result: (NOTQ x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v.Args[1]
-               if !(is32Bit(c)) {
-                       break
-               }
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64NOTQ)
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHLQconst x [c]) (SHRQconst x [d]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+}
+func rewriteValueAMD64_OpCom8_0(v *Value) bool {
+       // match: (Com8 x)
+       // cond:
+       // result: (NOTL x)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHLQconst {
-                       break
-               }
-               c := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHRQconst {
-                       break
-               }
-               d := v_1.AuxInt
-               if x != v_1.Args[0] {
-                       break
-               }
-               if !(d == 64-c) {
-                       break
-               }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
+               x := v.Args[0]
+               v.reset(OpAMD64NOTL)
                v.AddArg(x)
                return true
        }
-       // match: (XORQ (SHRQconst x [d]) (SHLQconst x [c]))
-       // cond: d==64-c
-       // result: (ROLQconst x [c])
+}
+func rewriteValueAMD64_OpCondSelect_0(v *Value) bool {
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQ y x cond)
        for {
-               _ = v.Args[1]
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64SHRQconst {
-                       break
-               }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64SHLQconst {
-                       break
-               }
-               c := v_1.AuxInt
-               if x != v_1.Args[0] {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQ {
                        break
                }
-               if !(d == 64-c) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64ROLQconst)
-               v.AuxInt = c
+               v.reset(OpAMD64CMOVQEQ)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQ x x)
-       // cond:
-       // result: (MOVQconst [0])
-       for {
-               _ = v.Args[1]
-               x := v.Args[0]
-               if x != v.Args[1] {
-                       break
-               }
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = 0
-               return true
-       }
-       // match: (XORQ x l:(MOVQload [off] {sym} ptr mem))
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORQmem x [off] {sym} ptr mem)
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               l := v.Args[1]
-               if l.Op != OpAMD64MOVQload {
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNE {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpAMD64CMOVQNE)
+               v.AddArg(y)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQ l:(MOVQload [off] {sym} ptr mem) x)
-       // cond: canMergeLoad(v, l, x) && clobber(l)
-       // result: (XORQmem x [off] {sym} ptr mem)
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLT y x cond)
        for {
-               _ = v.Args[1]
-               l := v.Args[0]
-               if l.Op != OpAMD64MOVQload {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETL {
                        break
                }
-               off := l.AuxInt
-               sym := l.Aux
-               _ = l.Args[1]
-               ptr := l.Args[0]
-               mem := l.Args[1]
-               x := v.Args[1]
-               if !(canMergeLoad(v, l, x) && clobber(l)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off
-               v.Aux = sym
+               v.reset(OpAMD64CMOVQLT)
+               v.AddArg(y)
                v.AddArg(x)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XORQconst_0(v *Value) bool {
-       // match: (XORQconst [c] (XORQconst [d] x))
-       // cond:
-       // result: (XORQconst [c ^ d] x)
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGT y x cond)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64XORQconst {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETG {
                        break
                }
-               d := v_0.AuxInt
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORQconst)
-               v.AuxInt = c ^ d
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGT)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQconst [0] x)
-       // cond:
-       // result: x
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLE y x cond)
        for {
-               if v.AuxInt != 0 {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETLE {
                        break
                }
-               x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLE)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQconst [c] (MOVQconst [d]))
-       // cond:
-       // result: (MOVQconst [c^d])
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGE y x cond)
        for {
-               c := v.AuxInt
-               v_0 := v.Args[0]
-               if v_0.Op != OpAMD64MOVQconst {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGE {
                        break
                }
-               d := v_0.AuxInt
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = c ^ d
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64XORQmem_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (XORQmem [off1] {sym} val (ADDQconst [off2] base) mem)
-       // cond: is32Bit(off1+off2)
-       // result: (XORQmem [off1+off2] {sym} val base mem)
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQHI y x cond)
        for {
-               off1 := v.AuxInt
-               sym := v.Aux
+               t := v.Type
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64ADDQconst {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETA {
                        break
                }
-               off2 := v_1.AuxInt
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1 + off2)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = sym
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64CMOVQHI)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQmem [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
-       // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-       // result: (XORQmem [off1+off2] {mergeSym(sym1,sym2)} val base mem)
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCS y x cond)
        for {
-               off1 := v.AuxInt
-               sym1 := v.Aux
+               t := v.Type
                _ = v.Args[2]
-               val := v.Args[0]
-               v_1 := v.Args[1]
-               if v_1.Op != OpAMD64LEAQ {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETB {
                        break
                }
-               off2 := v_1.AuxInt
-               sym2 := v_1.Aux
-               base := v_1.Args[0]
-               mem := v.Args[2]
-               if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               v.reset(OpAMD64XORQmem)
-               v.AuxInt = off1 + off2
-               v.Aux = mergeSym(sym1, sym2)
-               v.AddArg(val)
-               v.AddArg(base)
-               v.AddArg(mem)
+               v.reset(OpAMD64CMOVQCS)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (XORQmem x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _))
-       // cond:
-       // result: (XORQ x (MOVQf2i y))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCC y x cond)
        for {
-               off := v.AuxInt
-               sym := v.Aux
+               t := v.Type
                _ = v.Args[2]
                x := v.Args[0]
-               ptr := v.Args[1]
+               y := v.Args[1]
                v_2 := v.Args[2]
-               if v_2.Op != OpAMD64MOVSDstore {
+               if v_2.Op != OpAMD64SETAE {
                        break
                }
-               if v_2.AuxInt != off {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               if v_2.Aux != sym {
+               v.reset(OpAMD64CMOVQCC)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
+               return true
+       }
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLS y x cond)
+       for {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETBE {
                        break
                }
-               _ = v_2.Args[2]
-               if ptr != v_2.Args[0] {
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
                        break
                }
-               y := v_2.Args[1]
-               v.reset(OpAMD64XORQ)
+               v.reset(OpAMD64CMOVQLS)
+               v.AddArg(y)
                v.AddArg(x)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
-               v0.AddArg(y)
-               v.AddArg(v0)
+               v.AddArg(cond)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAdd16_0(v *Value) bool {
-       // match: (Add16 x y)
-       // cond:
-       // result: (ADDL x y)
+func rewriteValueAMD64_OpCondSelect_10(v *Value) bool {
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd32_0(v *Value) bool {
-       // match: (Add32 x y)
-       // cond:
-       // result: (ADDL x y)
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNEF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNEF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd32F_0(v *Value) bool {
-       // match: (Add32F x y)
-       // cond:
-       // result: (ADDSS x y)
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGTF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDSS)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGTF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd64_0(v *Value) bool {
-       // match: (Add64 x y)
-       // cond:
-       // result: (ADDQ x y)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGEF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDQ)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGEF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd64F_0(v *Value) bool {
-       // match: (Add64F x y)
-       // cond:
-       // result: (ADDSD x y)
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQ y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDSD)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQ)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAdd8_0(v *Value) bool {
-       // match: (Add8 x y)
-       // cond:
-       // result: (ADDL x y)
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNE y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAddPtr_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (AddPtr x y)
-       // cond: config.PtrSize == 8
-       // result: (ADDQ x y)
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLT y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               if !(config.PtrSize == 8) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETL {
                        break
                }
-               v.reset(OpAMD64ADDQ)
-               v.AddArg(x)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLT)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (AddPtr x y)
-       // cond: config.PtrSize == 4
-       // result: (ADDL x y)
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGT y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               if !(config.PtrSize == 4) {
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETG {
                        break
                }
-               v.reset(OpAMD64ADDL)
-               v.AddArg(x)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGT)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAddr_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (Addr {sym} base)
-       // cond: config.PtrSize == 8
-       // result: (LEAQ {sym} base)
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLE y x cond)
        for {
-               sym := v.Aux
-               base := v.Args[0]
-               if !(config.PtrSize == 8) {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETLE {
                        break
                }
-               v.reset(OpAMD64LEAQ)
-               v.Aux = sym
-               v.AddArg(base)
-               return true
-       }
-       // match: (Addr {sym} base)
-       // cond: config.PtrSize == 4
-       // result: (LEAL {sym} base)
-       for {
-               sym := v.Aux
-               base := v.Args[0]
-               if !(config.PtrSize == 4) {
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
                        break
                }
-               v.reset(OpAMD64LEAL)
-               v.Aux = sym
-               v.AddArg(base)
+               v.reset(OpAMD64CMOVLLE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAnd16_0(v *Value) bool {
-       // match: (And16 x y)
-       // cond:
-       // result: (ANDL x y)
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGE y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGE)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAnd32_0(v *Value) bool {
-       // match: (And32 x y)
-       // cond:
-       // result: (ANDL x y)
+func rewriteValueAMD64_OpCondSelect_20(v *Value) bool {
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLHI y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLHI)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAnd64_0(v *Value) bool {
-       // match: (And64 x y)
-       // cond:
-       // result: (ANDQ x y)
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCS y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDQ)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLCS)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAnd8_0(v *Value) bool {
-       // match: (And8 x y)
-       // cond:
-       // result: (ANDL x y)
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCC y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLCC)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAndB_0(v *Value) bool {
-       // match: (AndB x y)
-       // cond:
-       // result: (ANDL x y)
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLS y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64ANDL)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLS)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAdd32_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicAdd32 ptr val mem)
-       // cond:
-       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
-       for {
-               _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64AddTupleFirst32)
-               v.AddArg(val)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAdd64_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicAdd64 ptr val mem)
-       // cond:
-       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQF y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64AddTupleFirst64)
-               v.AddArg(val)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicAnd8_0(v *Value) bool {
-       // match: (AtomicAnd8 ptr val mem)
-       // cond:
-       // result: (ANDBlock ptr val mem)
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNEF y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64ANDBlock)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap32_0(v *Value) bool {
-       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
-       // cond:
-       // result: (CMPXCHGLlock ptr old new_ mem)
-       for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNEF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap64_0(v *Value) bool {
-       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
-       // cond:
-       // result: (CMPXCHGQlock ptr old new_ mem)
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGTF y x cond)
        for {
-               _ = v.Args[3]
-               ptr := v.Args[0]
-               old := v.Args[1]
-               new_ := v.Args[2]
-               mem := v.Args[3]
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AddArg(ptr)
-               v.AddArg(old)
-               v.AddArg(new_)
-               v.AddArg(mem)
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGTF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicExchange32_0(v *Value) bool {
-       // match: (AtomicExchange32 ptr val mem)
-       // cond:
-       // result: (XCHGL val ptr mem)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGEF y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64XCHGL)
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGEF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicExchange64_0(v *Value) bool {
-       // match: (AtomicExchange64 ptr val mem)
-       // cond:
-       // result: (XCHGQ val ptr mem)
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQ y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64XCHGQ)
-               v.AddArg(val)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQ)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicLoad32_0(v *Value) bool {
-       // match: (AtomicLoad32 ptr mem)
-       // cond:
-       // result: (MOVLatomicload ptr mem)
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNE y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVLatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpAtomicLoad64_0(v *Value) bool {
-       // match: (AtomicLoad64 ptr mem)
-       // cond:
-       // result: (MOVQatomicload ptr mem)
+func rewriteValueAMD64_OpCondSelect_30(v *Value) bool {
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLT y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLT)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicLoadPtr_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       // match: (AtomicLoadPtr ptr mem)
-       // cond: config.PtrSize == 8
-       // result: (MOVQatomicload ptr mem)
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGT y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               if !(config.PtrSize == 8) {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETG {
                        break
                }
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGT)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (AtomicLoadPtr ptr mem)
-       // cond: config.PtrSize == 4
-       // result: (MOVLatomicload ptr mem)
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLE y x cond)
        for {
-               _ = v.Args[1]
-               ptr := v.Args[0]
-               mem := v.Args[1]
-               if !(config.PtrSize == 4) {
+               t := v.Type
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETLE {
                        break
                }
-               v.reset(OpAMD64MOVLatomicload)
-               v.AddArg(ptr)
-               v.AddArg(mem)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAtomicOr8_0(v *Value) bool {
-       // match: (AtomicOr8 ptr val mem)
-       // cond:
-       // result: (ORBlock ptr val mem)
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGE y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64ORBlock)
-               v.AddArg(ptr)
-               v.AddArg(val)
-               v.AddArg(mem)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGE)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicStore32_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicStore32 ptr val mem)
-       // cond:
-       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWHI y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWHI)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicStore64_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicStore64 ptr val mem)
-       // cond:
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCS y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCS)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpAtomicStorePtrNoWB_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       config := b.Func.Config
-       _ = config
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (AtomicStorePtrNoWB ptr val mem)
-       // cond: config.PtrSize == 8
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCC y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(config.PtrSize == 8) {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETAE {
                        break
                }
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       // match: (AtomicStorePtrNoWB ptr val mem)
-       // cond: config.PtrSize == 4
-       // result: (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLS y x cond)
        for {
+               t := v.Type
                _ = v.Args[2]
-               ptr := v.Args[0]
-               val := v.Args[1]
-               mem := v.Args[2]
-               if !(config.PtrSize == 4) {
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETBE {
                        break
                }
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.BytePtr, types.TypeMem))
-               v0.AddArg(val)
-               v0.AddArg(ptr)
-               v0.AddArg(mem)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLS)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAvg64u_0(v *Value) bool {
-       // match: (Avg64u x y)
-       // cond:
-       // result: (AVGQU x y)
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQF y x cond)
        for {
-               _ = v.Args[1]
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
                y := v.Args[1]
-               v.reset(OpAMD64AVGQU)
-               v.AddArg(x)
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQF)
                v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpBitLen32_0(v *Value) bool {
-       b := v.Block
-       _ = b
-       typ := &b.Func.Config.Types
-       _ = typ
-       // match: (BitLen32 x)
-       // cond:
-       // result: (BitLen64 (MOVLQZX <typ.UInt64> x))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNEF y x cond)
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpBitLen64)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
-               v0.AddArg(x)
-               v.AddArg(v0)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNEF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
+       return false
 }
-func rewriteValueAMD64_OpBitLen64_0(v *Value) bool {
+func rewriteValueAMD64_OpCondSelect_40(v *Value) bool {
        b := v.Block
        _ = b
        typ := &b.Func.Config.Types
        _ = typ
-       // match: (BitLen64 <t> x)
-       // cond:
-       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGTF y x cond)
        for {
                t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = 1
-               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
-               v1 := b.NewValue0(v.Pos, OpSelect0, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
-               v3.AuxInt = -1
-               v0.AddArg(v3)
-               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v5 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v5.AddArg(x)
-               v4.AddArg(v5)
-               v0.AddArg(v4)
-               v.AddArg(v0)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGTF)
+               v.AddArg(y)
+               v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpBswap32_0(v *Value) bool {
-       // match: (Bswap32 x)
-       // cond:
-       // result: (BSWAPL x)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGEF y x cond)
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64BSWAPL)
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGEF)
+               v.AddArg(y)
                v.AddArg(x)
+               v.AddArg(cond)
                return true
        }
-}
-func rewriteValueAMD64_OpBswap64_0(v *Value) bool {
-       // match: (Bswap64 x)
-       // cond:
-       // result: (BSWAPQ x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 1
+       // result: (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64BSWAPQ)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
                v.AddArg(x)
+               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCeil_0(v *Value) bool {
-       // match: (Ceil x)
-       // cond:
-       // result: (ROUNDSD [2] x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 2
+       // result: (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = 2
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
                v.AddArg(x)
+               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpClosureCall_0(v *Value) bool {
-       // match: (ClosureCall [argwid] entry closure mem)
-       // cond:
-       // result: (CALLclosure [argwid] entry closure mem)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 4
+       // result: (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
        for {
-               argwid := v.AuxInt
+               t := v.Type
                _ = v.Args[2]
-               entry := v.Args[0]
-               closure := v.Args[1]
-               mem := v.Args[2]
-               v.reset(OpAMD64CALLclosure)
-               v.AuxInt = argwid
-               v.AddArg(entry)
-               v.AddArg(closure)
-               v.AddArg(mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCom16_0(v *Value) bool {
-       // match: (Com16 x)
-       // cond:
-       // result: (NOTL x)
-       for {
                x := v.Args[0]
-               v.reset(OpAMD64NOTL)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 4) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
                v.AddArg(x)
+               v.AddArg(y)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCom32_0(v *Value) bool {
-       // match: (Com32 x)
-       // cond:
-       // result: (NOTL x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64NOTL)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNE)
+               v.AddArg(y)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = 0
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCom64_0(v *Value) bool {
-       // match: (Com64 x)
-       // cond:
-       // result: (NOTQ x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
+       // result: (CMOVLNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64NOTQ)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
+               v.AddArg(y)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = 0
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
-}
-func rewriteValueAMD64_OpCom8_0(v *Value) bool {
-       // match: (Com8 x)
-       // cond:
-       // result: (NOTL x)
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
+       // result: (CMOVWNE y x (CMPQconst [0] check))
        for {
+               t := v.Type
+               _ = v.Args[2]
                x := v.Args[0]
-               v.reset(OpAMD64NOTL)
+               y := v.Args[1]
+               check := v.Args[2]
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg(y)
                v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = 0
+               v0.AddArg(check)
+               v.AddArg(v0)
                return true
        }
+       return false
 }
 func rewriteValueAMD64_OpConst16_0(v *Value) bool {
        // match: (Const16 [val])
diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go
new file mode 100644 (file)
index 0000000..1f51505
--- /dev/null
@@ -0,0 +1,178 @@
+// asmcheck
+
+package codegen
+
+func cmovint(c int) int {
+       x := c + 4
+       if x < 0 {
+               x = 182
+       }
+       // amd64:"CMOVQLT"
+       // arm64:"CSEL\tLT"
+       return x
+}
+
+func cmovchan(x, y chan int) chan int {
+       if x != y {
+               x = y
+       }
+       // amd64:"CMOVQNE"
+       // arm64:"CSEL\tNE"
+       return x
+}
+
+func cmovuintptr(x, y uintptr) uintptr {
+       if x < y {
+               x = -y
+       }
+       // amd64:"CMOVQCS"
+       // arm64:"CSEL\tLO"
+       return x
+}
+
+func cmov32bit(x, y uint32) uint32 {
+       if x < y {
+               x = -y
+       }
+       // amd64:"CMOVLCS"
+       // arm64:"CSEL\tLO"
+       return x
+}
+
+func cmov16bit(x, y uint16) uint16 {
+       if x < y {
+               x = -y
+       }
+       // amd64:"CMOVWCS"
+       // arm64:"CSEL\tLO"
+       return x
+}
+
+// Floating point comparison. For EQ/NE, we must
+// generate special code to handle NaNs.
+func cmovfloateq(x, y float64) int {
+       a := 128
+       if x == y {
+               a = 256
+       }
+       // amd64:"CMOVQNE","CMOVQPC"
+       // arm64:"CSEL\tEQ"
+       return a
+}
+
+func cmovfloatne(x, y float64) int {
+       a := 128
+       if x != y {
+               a = 256
+       }
+       // amd64:"CMOVQNE","CMOVQPS"
+       // arm64:"CSEL\tNE"
+       return a
+}
+
+//go:noinline
+func frexp(f float64) (frac float64, exp int) {
+       return 1.0, 4
+}
+
+//go:noinline
+func ldexp(frac float64, exp int) float64 {
+       return 1.0
+}
+
+// Generate a CMOV with a floating comparison and integer move.
+func cmovfloatint2(x, y float64) float64 {
+       yfr, yexp := 4.0, 5
+
+       r := x
+       for r >= y {
+               rfr, rexp := frexp(r)
+               if rfr < yfr {
+                       rexp = rexp - 1
+               }
+               // amd64:"CMOVQHI"
+               // arm64:"CSEL\tGT"
+               r = r - ldexp(y, (rexp-yexp))
+       }
+       return r
+}
+
+func cmovloaded(x [4]int, y int) int {
+       if x[2] != 0 {
+               y = x[2]
+       } else {
+               y = y >> 2
+       }
+       // amd64:"CMOVQNE"
+       // arm64:"CSEL\tNE"
+       return y
+}
+
+func cmovuintptr2(x, y uintptr) uintptr {
+       a := x * 2
+       if a == 0 {
+               a = 256
+       }
+       // amd64:"CMOVQEQ"
+       // arm64:"CSEL\tEQ"
+       return a
+}
+
+// Floating point CMOVs are not supported by amd64/arm64
+func cmovfloatmove(x, y int) float64 {
+       a := 1.0
+       if x <= y {
+               a = 2.0
+       }
+       // amd64:-"CMOV"
+       // arm64:-"CSEL"
+       return a
+}
+
+// On amd64, the following patterns trigger comparison inversion.
+// Test that we correctly invert the CMOV condition
+var gsink int64
+var gusink uint64
+
+func cmovinvert1(x, y int64) int64 {
+       if x < gsink {
+               y = -y
+       }
+       // amd64:"CMOVQGT"
+       return y
+}
+func cmovinvert2(x, y int64) int64 {
+       if x <= gsink {
+               y = -y
+       }
+       // amd64:"CMOVQGE"
+       return y
+}
+func cmovinvert3(x, y int64) int64 {
+       if x == gsink {
+               y = -y
+       }
+       // amd64:"CMOVQEQ"
+       return y
+}
+func cmovinvert4(x, y int64) int64 {
+       if x != gsink {
+               y = -y
+       }
+       // amd64:"CMOVQNE"
+       return y
+}
+func cmovinvert5(x, y uint64) uint64 {
+       if x > gusink {
+               y = -y
+       }
+       // amd64:"CMOVQCS"
+       return y
+}
+func cmovinvert6(x, y uint64) uint64 {
+       if x >= gusink {
+               y = -y
+       }
+       // amd64:"CMOVQLS"
+       return y
+}